aboutsummaryrefslogtreecommitdiffstats
path: root/ipc
diff options
context:
space:
mode:
Diffstat (limited to 'ipc')
-rw-r--r--ipc/Makefile1
-rw-r--r--ipc/ipc_sysctl.c2
-rw-r--r--ipc/mq_sysctl.c116
-rw-r--r--ipc/mqueue.c262
-rw-r--r--ipc/msgutil.c19
-rw-r--r--ipc/namespace.c56
-rw-r--r--ipc/sem.c4
-rw-r--r--ipc/shm.c23
-rw-r--r--ipc/util.c9
-rw-r--r--ipc/util.h24
10 files changed, 350 insertions, 166 deletions
diff --git a/ipc/Makefile b/ipc/Makefile
index 65c384395801..4e1955ea815d 100644
--- a/ipc/Makefile
+++ b/ipc/Makefile
@@ -8,4 +8,5 @@ obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o
obj_mq-$(CONFIG_COMPAT) += compat_mq.o
obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
obj-$(CONFIG_IPC_NS) += namespace.o
+obj-$(CONFIG_POSIX_MQUEUE_SYSCTL) += mq_sysctl.o
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 4a7a12c95abe..40eab7314aeb 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -26,7 +26,7 @@ static void *get_ipc(ctl_table *table)
return which;
}
-#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_PROC_SYSCTL
static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
new file mode 100644
index 000000000000..24ae46dfe45d
--- /dev/null
+++ b/ipc/mq_sysctl.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2007 IBM Corporation
+ *
+ * Author: Cedric Le Goater <clg@fr.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <linux/nsproxy.h>
+#include <linux/ipc_namespace.h>
+#include <linux/sysctl.h>
+
+/*
+ * Define the ranges various user-specified maximum values can
+ * be set to.
+ */
+#define MIN_MSGMAX 1 /* min value for msg_max */
+#define MAX_MSGMAX HARD_MSGMAX /* max value for msg_max */
+#define MIN_MSGSIZEMAX 128 /* min value for msgsize_max */
+#define MAX_MSGSIZEMAX (8192*128) /* max value for msgsize_max */
+
+#ifdef CONFIG_PROC_SYSCTL
+static void *get_mq(ctl_table *table)
+{
+ char *which = table->data;
+ struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
+ which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns;
+ return which;
+}
+
+static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table mq_table;
+ memcpy(&mq_table, table, sizeof(mq_table));
+ mq_table.data = get_mq(table);
+
+ return proc_dointvec(&mq_table, write, filp, buffer, lenp, ppos);
+}
+
+static int proc_mq_dointvec_minmax(ctl_table *table, int write,
+ struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table mq_table;
+ memcpy(&mq_table, table, sizeof(mq_table));
+ mq_table.data = get_mq(table);
+
+ return proc_dointvec_minmax(&mq_table, write, filp, buffer,
+ lenp, ppos);
+}
+#else
+#define proc_mq_dointvec NULL
+#define proc_mq_dointvec_minmax NULL
+#endif
+
+static int msg_max_limit_min = MIN_MSGMAX;
+static int msg_max_limit_max = MAX_MSGMAX;
+
+static int msg_maxsize_limit_min = MIN_MSGSIZEMAX;
+static int msg_maxsize_limit_max = MAX_MSGSIZEMAX;
+
+static ctl_table mq_sysctls[] = {
+ {
+ .procname = "queues_max",
+ .data = &init_ipc_ns.mq_queues_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_mq_dointvec,
+ },
+ {
+ .procname = "msg_max",
+ .data = &init_ipc_ns.mq_msg_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_mq_dointvec_minmax,
+ .extra1 = &msg_max_limit_min,
+ .extra2 = &msg_max_limit_max,
+ },
+ {
+ .procname = "msgsize_max",
+ .data = &init_ipc_ns.mq_msgsize_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_mq_dointvec_minmax,
+ .extra1 = &msg_maxsize_limit_min,
+ .extra2 = &msg_maxsize_limit_max,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table mq_sysctl_dir[] = {
+ {
+ .procname = "mqueue",
+ .mode = 0555,
+ .child = mq_sysctls,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table mq_sysctl_root[] = {
+ {
+ .ctl_name = CTL_FS,
+ .procname = "fs",
+ .mode = 0555,
+ .child = mq_sysctl_dir,
+ },
+ { .ctl_name = 0 }
+};
+
+struct ctl_table_header *mq_register_sysctl_table(void)
+{
+ return register_sysctl_table(mq_sysctl_root);
+}
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 54b4077fed79..c5e68adc6732 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -31,6 +31,8 @@
#include <linux/mutex.h>
#include <linux/nsproxy.h>
#include <linux/pid.h>
+#include <linux/ipc_namespace.h>
+#include <linux/ima.h>
#include <net/sock.h>
#include "util.h"
@@ -46,21 +48,6 @@
#define STATE_PENDING 1
#define STATE_READY 2
-/* default values */
-#define DFLT_QUEUESMAX 256 /* max number of message queues */
-#define DFLT_MSGMAX 10 /* max number of messages in each queue */
-#define HARD_MSGMAX (131072/sizeof(void*))
-#define DFLT_MSGSIZEMAX 8192 /* max message size */
-
-/*
- * Define the ranges various user-specified maximum values can
- * be set to.
- */
-#define MIN_MSGMAX 1 /* min value for msg_max */
-#define MAX_MSGMAX HARD_MSGMAX /* max value for msg_max */
-#define MIN_MSGSIZEMAX 128 /* min value for msgsize_max */
-#define MAX_MSGSIZEMAX (8192*128) /* max value for msgsize_max */
-
struct ext_wait_queue { /* queue of sleeping tasks */
struct task_struct *task;
struct list_head list;
@@ -93,14 +80,7 @@ static const struct file_operations mqueue_file_operations;
static struct super_operations mqueue_super_ops;
static void remove_notification(struct mqueue_inode_info *info);
-static spinlock_t mq_lock;
static struct kmem_cache *mqueue_inode_cachep;
-static struct vfsmount *mqueue_mnt;
-
-static unsigned int queues_count;
-static unsigned int queues_max = DFLT_QUEUESMAX;
-static unsigned int msg_max = DFLT_MSGMAX;
-static unsigned int msgsize_max = DFLT_MSGSIZEMAX;
static struct ctl_table_header * mq_sysctl_table;
@@ -109,8 +89,27 @@ static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
return container_of(inode, struct mqueue_inode_info, vfs_inode);
}
-static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
- struct mq_attr *attr)
+/*
+ * This routine should be called with the mq_lock held.
+ */
+static inline struct ipc_namespace *__get_ns_from_inode(struct inode *inode)
+{
+ return get_ipc_ns(inode->i_sb->s_fs_info);
+}
+
+static struct ipc_namespace *get_ns_from_inode(struct inode *inode)
+{
+ struct ipc_namespace *ns;
+
+ spin_lock(&mq_lock);
+ ns = __get_ns_from_inode(inode);
+ spin_unlock(&mq_lock);
+ return ns;
+}
+
+static struct inode *mqueue_get_inode(struct super_block *sb,
+ struct ipc_namespace *ipc_ns, int mode,
+ struct mq_attr *attr)
{
struct user_struct *u = current_user();
struct inode *inode;
@@ -141,8 +140,8 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
info->qsize = 0;
info->user = NULL; /* set when all is ok */
memset(&info->attr, 0, sizeof(info->attr));
- info->attr.mq_maxmsg = msg_max;
- info->attr.mq_msgsize = msgsize_max;
+ info->attr.mq_maxmsg = ipc_ns->mq_msg_max;
+ info->attr.mq_msgsize = ipc_ns->mq_msgsize_max;
if (attr) {
info->attr.mq_maxmsg = attr->mq_maxmsg;
info->attr.mq_msgsize = attr->mq_msgsize;
@@ -188,30 +187,38 @@ out_inode:
static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;
+ struct ipc_namespace *ns = data;
+ int error = 0;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = MQUEUE_MAGIC;
sb->s_op = &mqueue_super_ops;
- inode = mqueue_get_inode(sb, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL);
- if (!inode)
- return -ENOMEM;
+ inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO,
+ NULL);
+ if (!inode) {
+ error = -ENOMEM;
+ goto out;
+ }
sb->s_root = d_alloc_root(inode);
if (!sb->s_root) {
iput(inode);
- return -ENOMEM;
+ error = -ENOMEM;
}
- return 0;
+out:
+ return error;
}
static int mqueue_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *data, struct vfsmount *mnt)
{
- return get_sb_single(fs_type, flags, data, mqueue_fill_super, mnt);
+ if (!(flags & MS_KERNMOUNT))
+ data = current->nsproxy->ipc_ns;
+ return get_sb_ns(fs_type, flags, data, mqueue_fill_super, mnt);
}
static void init_once(void *foo)
@@ -242,11 +249,13 @@ static void mqueue_delete_inode(struct inode *inode)
struct user_struct *user;
unsigned long mq_bytes;
int i;
+ struct ipc_namespace *ipc_ns;
if (S_ISDIR(inode->i_mode)) {
clear_inode(inode);
return;
}
+ ipc_ns = get_ns_from_inode(inode);
info = MQUEUE_I(inode);
spin_lock(&info->lock);
for (i = 0; i < info->attr.mq_curmsgs; i++)
@@ -262,10 +271,19 @@ static void mqueue_delete_inode(struct inode *inode)
if (user) {
spin_lock(&mq_lock);
user->mq_bytes -= mq_bytes;
- queues_count--;
+ /*
+ * get_ns_from_inode() ensures that the
+ * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
+ * to which we now hold a reference, or it is NULL.
+ * We can't put it here under mq_lock, though.
+ */
+ if (ipc_ns)
+ ipc_ns->mq_queues_count--;
spin_unlock(&mq_lock);
free_uid(user);
}
+ if (ipc_ns)
+ put_ipc_ns(ipc_ns);
}
static int mqueue_create(struct inode *dir, struct dentry *dentry,
@@ -274,31 +292,41 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry,
struct inode *inode;
struct mq_attr *attr = dentry->d_fsdata;
int error;
+ struct ipc_namespace *ipc_ns;
spin_lock(&mq_lock);
- if (queues_count >= queues_max && !capable(CAP_SYS_RESOURCE)) {
+ ipc_ns = __get_ns_from_inode(dir);
+ if (!ipc_ns) {
+ error = -EACCES;
+ goto out_unlock;
+ }
+ if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max &&
+ !capable(CAP_SYS_RESOURCE)) {
error = -ENOSPC;
- goto out_lock;
+ goto out_unlock;
}
- queues_count++;
+ ipc_ns->mq_queues_count++;
spin_unlock(&mq_lock);
- inode = mqueue_get_inode(dir->i_sb, mode, attr);
+ inode = mqueue_get_inode(dir->i_sb, ipc_ns, mode, attr);
if (!inode) {
error = -ENOMEM;
spin_lock(&mq_lock);
- queues_count--;
- goto out_lock;
+ ipc_ns->mq_queues_count--;
+ goto out_unlock;
}
+ put_ipc_ns(ipc_ns);
dir->i_size += DIRENT_SIZE;
dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME;
d_instantiate(dentry, inode);
dget(dentry);
return 0;
-out_lock:
+out_unlock:
spin_unlock(&mq_lock);
+ if (ipc_ns)
+ put_ipc_ns(ipc_ns);
return error;
}
@@ -562,7 +590,7 @@ static void remove_notification(struct mqueue_inode_info *info)
info->notify_owner = NULL;
}
-static int mq_attr_ok(struct mq_attr *attr)
+static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr)
{
if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0)
return 0;
@@ -570,8 +598,8 @@ static int mq_attr_ok(struct mq_attr *attr)
if (attr->mq_maxmsg > HARD_MSGMAX)
return 0;
} else {
- if (attr->mq_maxmsg > msg_max ||
- attr->mq_msgsize > msgsize_max)
+ if (attr->mq_maxmsg > ipc_ns->mq_msg_max ||
+ attr->mq_msgsize > ipc_ns->mq_msgsize_max)
return 0;
}
/* check for overflow */
@@ -587,8 +615,9 @@ static int mq_attr_ok(struct mq_attr *attr)
/*
* Invoked when creating a new queue via sys_mq_open
*/
-static struct file *do_create(struct dentry *dir, struct dentry *dentry,
- int oflag, mode_t mode, struct mq_attr *attr)
+static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
+ struct dentry *dentry, int oflag, mode_t mode,
+ struct mq_attr *attr)
{
const struct cred *cred = current_cred();
struct file *result;
@@ -596,14 +625,14 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
if (attr) {
ret = -EINVAL;
- if (!mq_attr_ok(attr))
+ if (!mq_attr_ok(ipc_ns, attr))
goto out;
/* store for use during create */
dentry->d_fsdata = attr;
}
- mode &= ~current->fs->umask;
- ret = mnt_want_write(mqueue_mnt);
+ mode &= ~current_umask();
+ ret = mnt_want_write(ipc_ns->mq_mnt);
if (ret)
goto out;
ret = vfs_create(dir->d_inode, dentry, mode, NULL);
@@ -611,24 +640,25 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
if (ret)
goto out_drop_write;
- result = dentry_open(dentry, mqueue_mnt, oflag, cred);
+ result = dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
/*
* dentry_open() took a persistent mnt_want_write(),
* so we can now drop this one.
*/
- mnt_drop_write(mqueue_mnt);
+ mnt_drop_write(ipc_ns->mq_mnt);
return result;
out_drop_write:
- mnt_drop_write(mqueue_mnt);
+ mnt_drop_write(ipc_ns->mq_mnt);
out:
dput(dentry);
- mntput(mqueue_mnt);
+ mntput(ipc_ns->mq_mnt);
return ERR_PTR(ret);
}
/* Opens existing queue */
-static struct file *do_open(struct dentry *dentry, int oflag)
+static struct file *do_open(struct ipc_namespace *ipc_ns,
+ struct dentry *dentry, int oflag)
{
const struct cred *cred = current_cred();
@@ -637,17 +667,17 @@ static struct file *do_open(struct dentry *dentry, int oflag)
if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
dput(dentry);
- mntput(mqueue_mnt);
+ mntput(ipc_ns->mq_mnt);
return ERR_PTR(-EINVAL);
}
if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) {
dput(dentry);
- mntput(mqueue_mnt);
+ mntput(ipc_ns->mq_mnt);
return ERR_PTR(-EACCES);
}
- return dentry_open(dentry, mqueue_mnt, oflag, cred);
+ return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
}
SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
@@ -658,6 +688,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
char *name;
struct mq_attr attr;
int fd, error;
+ struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
return -EFAULT;
@@ -671,13 +702,13 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
if (fd < 0)
goto out_putname;
- mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
- dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
+ mutex_lock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
+ dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
if (IS_ERR(dentry)) {
error = PTR_ERR(dentry);
goto out_err;
}
- mntget(mqueue_mnt);
+ mntget(ipc_ns->mq_mnt);
if (oflag & O_CREAT) {
if (dentry->d_inode) { /* entry already exists */
@@ -685,10 +716,10 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
error = -EEXIST;
if (oflag & O_EXCL)
goto out;
- filp = do_open(dentry, oflag);
+ filp = do_open(ipc_ns, dentry, oflag);
} else {
- filp = do_create(mqueue_mnt->mnt_root, dentry,
- oflag, mode,
+ filp = do_create(ipc_ns, ipc_ns->mq_mnt->mnt_root,
+ dentry, oflag, mode,
u_attr ? &attr : NULL);
}
} else {
@@ -696,26 +727,27 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
if (!dentry->d_inode)
goto out;
audit_inode(name, dentry);
- filp = do_open(dentry, oflag);
+ filp = do_open(ipc_ns, dentry, oflag);
}
if (IS_ERR(filp)) {
error = PTR_ERR(filp);
goto out_putfd;
}
+ ima_counts_get(filp);
fd_install(fd, filp);
goto out_upsem;
out:
dput(dentry);
- mntput(mqueue_mnt);
+ mntput(ipc_ns->mq_mnt);
out_putfd:
put_unused_fd(fd);
out_err:
fd = error;
out_upsem:
- mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
+ mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
out_putname:
putname(name);
return fd;
@@ -727,14 +759,15 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
char *name;
struct dentry *dentry;
struct inode *inode = NULL;
+ struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
name = getname(u_name);
if (IS_ERR(name))
return PTR_ERR(name);
- mutex_lock_nested(&mqueue_mnt->mnt_root->d_inode->i_mutex,
+ mutex_lock_nested(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex,
I_MUTEX_PARENT);
- dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
+ dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
goto out_unlock;
@@ -748,16 +781,16 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
inode = dentry->d_inode;
if (inode)
atomic_inc(&inode->i_count);
- err = mnt_want_write(mqueue_mnt);
+ err = mnt_want_write(ipc_ns->mq_mnt);
if (err)
goto out_err;
err = vfs_unlink(dentry->d_parent->d_inode, dentry);
- mnt_drop_write(mqueue_mnt);
+ mnt_drop_write(ipc_ns->mq_mnt);
out_err:
dput(dentry);
out_unlock:
- mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
+ mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
putname(name);
if (inode)
iput(inode);
@@ -1156,10 +1189,12 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
omqstat.mq_flags = filp->f_flags & O_NONBLOCK;
if (u_mqstat) {
audit_mq_getsetattr(mqdes, &mqstat);
+ spin_lock(&filp->f_lock);
if (mqstat.mq_flags & O_NONBLOCK)
filp->f_flags |= O_NONBLOCK;
else
filp->f_flags &= ~O_NONBLOCK;
+ spin_unlock(&filp->f_lock);
inode->i_atime = inode->i_ctime = CURRENT_TIME;
}
@@ -1203,59 +1238,31 @@ static struct file_system_type mqueue_fs_type = {
.kill_sb = kill_litter_super,
};
-static int msg_max_limit_min = MIN_MSGMAX;
-static int msg_max_limit_max = MAX_MSGMAX;
-
-static int msg_maxsize_limit_min = MIN_MSGSIZEMAX;
-static int msg_maxsize_limit_max = MAX_MSGSIZEMAX;
-
-static ctl_table mq_sysctls[] = {
- {
- .procname = "queues_max",
- .data = &queues_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .procname = "msg_max",
- .data = &msg_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .extra1 = &msg_max_limit_min,
- .extra2 = &msg_max_limit_max,
- },
- {
- .procname = "msgsize_max",
- .data = &msgsize_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .extra1 = &msg_maxsize_limit_min,
- .extra2 = &msg_maxsize_limit_max,
- },
- { .ctl_name = 0 }
-};
+int mq_init_ns(struct ipc_namespace *ns)
+{
+ ns->mq_queues_count = 0;
+ ns->mq_queues_max = DFLT_QUEUESMAX;
+ ns->mq_msg_max = DFLT_MSGMAX;
+ ns->mq_msgsize_max = DFLT_MSGSIZEMAX;
+
+ ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns);
+ if (IS_ERR(ns->mq_mnt)) {
+ int err = PTR_ERR(ns->mq_mnt);
+ ns->mq_mnt = NULL;
+ return err;
+ }
+ return 0;
+}
-static ctl_table mq_sysctl_dir[] = {
- {
- .procname = "mqueue",
- .mode = 0555,
- .child = mq_sysctls,
- },
- { .ctl_name = 0 }
-};
+void mq_clear_sbinfo(struct ipc_namespace *ns)
+{
+ ns->mq_mnt->mnt_sb->s_fs_info = NULL;
+}
-static ctl_table mq_sysctl_root[] = {
- {
- .ctl_name = CTL_FS,
- .procname = "fs",
- .mode = 0555,
- .child = mq_sysctl_dir,
- },
- { .ctl_name = 0 }
-};
+void mq_put_mnt(struct ipc_namespace *ns)
+{
+ mntput(ns->mq_mnt);
+}
static int __init init_mqueue_fs(void)
{
@@ -1268,21 +1275,20 @@ static int __init init_mqueue_fs(void)
return -ENOMEM;
/* ignore failues - they are not fatal */
- mq_sysctl_table = register_sysctl_table(mq_sysctl_root);
+ mq_sysctl_table = mq_register_sysctl_table();
error = register_filesystem(&mqueue_fs_type);
if (error)
goto out_sysctl;
- if (IS_ERR(mqueue_mnt = kern_mount(&mqueue_fs_type))) {
- error = PTR_ERR(mqueue_mnt);
+ spin_lock_init(&mq_lock);
+
+ init_ipc_ns.mq_mnt = kern_mount_data(&mqueue_fs_type, &init_ipc_ns);
+ if (IS_ERR(init_ipc_ns.mq_mnt)) {
+ error = PTR_ERR(init_ipc_ns.mq_mnt);
goto out_filesystem;
}
- /* internal initialization - not common for vfs */
- queues_count = 0;
- spin_lock_init(&mq_lock);
-
return 0;
out_filesystem:
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index c82c215693d7..f095ee268833 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -13,10 +13,29 @@
#include <linux/security.h>
#include <linux/slab.h>
#include <linux/ipc.h>
+#include <linux/ipc_namespace.h>
#include <asm/uaccess.h>
#include "util.h"
+DEFINE_SPINLOCK(mq_lock);
+
+/*
+ * The next 2 defines are here bc this is the only file
+ * compiled when either CONFIG_SYSVIPC and CONFIG_POSIX_MQUEUE
+ * and not CONFIG_IPC_NS.
+ */
+struct ipc_namespace init_ipc_ns = {
+ .count = ATOMIC_INIT(1),
+#ifdef CONFIG_POSIX_MQUEUE
+ .mq_queues_max = DFLT_QUEUESMAX,
+ .mq_msg_max = DFLT_MSGMAX,
+ .mq_msgsize_max = DFLT_MSGSIZEMAX,
+#endif
+};
+
+atomic_t nr_ipc_ns = ATOMIC_INIT(1);
+
struct msg_msgseg {
struct msg_msgseg* next;
/* the next part of the message follows immediately */
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 9171d948751e..a1094ff0befa 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -9,17 +9,26 @@
#include <linux/rcupdate.h>
#include <linux/nsproxy.h>
#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
#include "util.h"
-static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
+static struct ipc_namespace *create_ipc_ns(void)
{
struct ipc_namespace *ns;
+ int err;
ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
if (ns == NULL)
return ERR_PTR(-ENOMEM);
+ atomic_set(&ns->count, 1);
+ err = mq_init_ns(ns);
+ if (err) {
+ kfree(ns);
+ return ERR_PTR(err);
+ }
atomic_inc(&nr_ipc_ns);
sem_init_ns(ns);
@@ -34,24 +43,14 @@ static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
ipcns_notify(IPCNS_CREATED);
register_ipcns_notifier(ns);
- kref_init(&ns->kref);
return ns;
}
struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns)
{
- struct ipc_namespace *new_ns;
-
- BUG_ON(!ns);
- get_ipc_ns(ns);
-
if (!(flags & CLONE_NEWIPC))
- return ns;
-
- new_ns = clone_ipc_ns(ns);
-
- put_ipc_ns(ns);
- return new_ns;
+ return get_ipc_ns(ns);
+ return create_ipc_ns();
}
/*
@@ -84,11 +83,8 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
up_write(&ids->rw_mutex);
}
-void free_ipc_ns(struct kref *kref)
+static void free_ipc_ns(struct ipc_namespace *ns)
{
- struct ipc_namespace *ns;
-
- ns = container_of(kref, struct ipc_namespace, kref);
/*
* Unregistering the hotplug notifier at the beginning guarantees
* that the ipc namespace won't be freed while we are inside the
@@ -110,3 +106,29 @@ void free_ipc_ns(struct kref *kref)
*/
ipcns_notify(IPCNS_REMOVED);
}
+
+/*
+ * put_ipc_ns - drop a reference to an ipc namespace.
+ * @ns: the namespace to put
+ *
+ * If this is the last task in the namespace exiting, and
+ * it is dropping the refcount to 0, then it can race with
+ * a task in another ipc namespace but in a mounts namespace
+ * which has this ipcns's mqueuefs mounted, doing some action
+ * with one of the mqueuefs files. That can raise the refcount.
+ * So dropping the refcount, and raising the refcount when
+ * accessing it through the VFS, are protected with mq_lock.
+ *
+ * (Clearly, a task raising the refcount on its own ipc_ns
+ * needn't take mq_lock since it can't race with the last task
+ * in the ipcns exiting).
+ */
+void put_ipc_ns(struct ipc_namespace *ns)
+{
+ if (atomic_dec_and_lock(&ns->count, &mq_lock)) {
+ mq_clear_sbinfo(ns);
+ spin_unlock(&mq_lock);
+ mq_put_mnt(ns);
+ free_ipc_ns(ns);
+ }
+}
diff --git a/ipc/sem.c b/ipc/sem.c
index 16a2189e96f9..87c2b641fd7b 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1290,8 +1290,8 @@ void exit_sem(struct task_struct *tsk)
int i;
rcu_read_lock();
- un = list_entry(rcu_dereference(ulp->list_proc.next),
- struct sem_undo, list_proc);
+ un = list_entry_rcu(ulp->list_proc.next,
+ struct sem_undo, list_proc);
if (&un->list_proc == &ulp->list_proc)
semid = -1;
else
diff --git a/ipc/shm.c b/ipc/shm.c
index 05d51d2a792c..1bc4701ef4f0 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -39,6 +39,7 @@
#include <linux/nsproxy.h>
#include <linux/mount.h>
#include <linux/ipc_namespace.h>
+#include <linux/ima.h>
#include <asm/uaccess.h>
@@ -173,7 +174,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
shm_unlock(shp);
if (!is_file_hugepages(shp->shm_file))
shmem_lock(shp->shm_file, 0, shp->mlock_user);
- else
+ else if (shp->mlock_user)
user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size,
shp->mlock_user);
fput (shp->shm_file);
@@ -368,8 +369,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
/* hugetlb_file_setup applies strict accounting */
if (shmflg & SHM_NORESERVE)
acctflag = VM_NORESERVE;
- file = hugetlb_file_setup(name, size, acctflag);
- shp->mlock_user = current_user();
+ file = hugetlb_file_setup(name, size, acctflag,
+ &shp->mlock_user);
} else {
/*
* Do not allow no accounting for OVERCOMMIT_NEVER, even
@@ -409,6 +410,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
return error;
no_id:
+ if (shp->mlock_user) /* shmflg & SHM_HUGETLB case */
+ user_shm_unlock(size, shp->mlock_user);
fput(file);
no_file:
security_shm_free(shp);
@@ -553,12 +556,14 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
in_use = shm_ids(ns).in_use;
for (total = 0, next_id = 0; total < in_use; next_id++) {
+ struct kern_ipc_perm *ipc;
struct shmid_kernel *shp;
struct inode *inode;
- shp = idr_find(&shm_ids(ns).ipcs_idr, next_id);
- if (shp == NULL)
+ ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
+ if (ipc == NULL)
continue;
+ shp = container_of(ipc, struct shmid_kernel, shm_perm);
inode = shp->shm_file->f_path.dentry->d_inode;
@@ -887,6 +892,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations);
if (!file)
goto out_free;
+ ima_counts_get(file);
file->private_data = sfd;
file->f_mapping = shp->shm_file->f_mapping;
@@ -964,10 +970,13 @@ SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma, *next;
+ struct vm_area_struct *vma;
unsigned long addr = (unsigned long)shmaddr;
- loff_t size = 0;
int retval = -EINVAL;
+#ifdef CONFIG_MMU
+ loff_t size = 0;
+ struct vm_area_struct *next;
+#endif
if (addr & ~PAGE_MASK)
return retval;
diff --git a/ipc/util.c b/ipc/util.c
index 7585a72e259b..b8e4ba92f6d1 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -47,15 +47,6 @@ struct ipc_proc_iface {
int (*show)(struct seq_file *, void *);
};
-struct ipc_namespace init_ipc_ns = {
- .kref = {
- .refcount = ATOMIC_INIT(2),
- },
-};
-
-atomic_t nr_ipc_ns = ATOMIC_INIT(1);
-
-
#ifdef CONFIG_MEMORY_HOTPLUG
static void ipc_memory_notifier(struct work_struct *work)
diff --git a/ipc/util.h b/ipc/util.h
index 3646b45a03c9..764b51a37a6a 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -10,6 +10,7 @@
#ifndef _IPC_UTIL_H
#define _IPC_UTIL_H
+#include <linux/unistd.h>
#include <linux/err.h>
#define SEQ_MULTIPLIER (IPCMNI)
@@ -20,6 +21,15 @@ void shm_init (void);
struct ipc_namespace;
+#ifdef CONFIG_POSIX_MQUEUE
+extern void mq_clear_sbinfo(struct ipc_namespace *ns);
+extern void mq_put_mnt(struct ipc_namespace *ns);
+#else
+static inline void mq_clear_sbinfo(struct ipc_namespace *ns) { }
+static inline void mq_put_mnt(struct ipc_namespace *ns) { }
+#endif
+
+#ifdef CONFIG_SYSVIPC
void sem_init_ns(struct ipc_namespace *ns);
void msg_init_ns(struct ipc_namespace *ns);
void shm_init_ns(struct ipc_namespace *ns);
@@ -27,6 +37,15 @@ void shm_init_ns(struct ipc_namespace *ns);
void sem_exit_ns(struct ipc_namespace *ns);
void msg_exit_ns(struct ipc_namespace *ns);
void shm_exit_ns(struct ipc_namespace *ns);
+#else
+static inline void sem_init_ns(struct ipc_namespace *ns) { }
+static inline void msg_init_ns(struct ipc_namespace *ns) { }
+static inline void shm_init_ns(struct ipc_namespace *ns) { }
+
+static inline void sem_exit_ns(struct ipc_namespace *ns) { }
+static inline void msg_exit_ns(struct ipc_namespace *ns) { }
+static inline void shm_exit_ns(struct ipc_namespace *ns) { }
+#endif
/*
* Structure that holds the parameters needed by the ipc operations
@@ -110,7 +129,7 @@ void ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out);
struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd,
struct ipc64_perm *perm, int extra_perm);
-#if defined(__ia64__) || defined(__x86_64__) || defined(__hppa__) || defined(__XTENSA__)
+#ifndef __ARCH_WANT_IPC_PARSE_VERSION
/* On IA-64, we always use the "64-bit version" of the IPC structures. */
# define ipc_parse_version(cmd) IPC_64
#else
@@ -153,5 +172,6 @@ static inline void ipc_unlock(struct kern_ipc_perm *perm)
struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id);
int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
struct ipc_ops *ops, struct ipc_params *params);
-
+void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
+ void (*free)(struct ipc_namespace *, struct kern_ipc_perm *));
#endif