aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 15:44:47 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 15:44:47 -0800
commit6a2b60b17b3e48a418695a94bd2420f6ab32e519 (patch)
tree54b7792fa68b8890f710fa6398b6ba8626a039a8 /fs/proc
parentMerge branch 'for-3.8/drivers' of git://git.kernel.dk/linux-block (diff)
parentproc: Usable inode numbers for the namespace file descriptors. (diff)
downloadlinux-dev-6a2b60b17b3e48a418695a94bd2420f6ab32e519.tar.xz
linux-dev-6a2b60b17b3e48a418695a94bd2420f6ab32e519.zip
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull user namespace changes from Eric Biederman: "While small this set of changes is very significant with respect to containers in general and user namespaces in particular. The user space interface is now complete. This set of changes adds support for unprivileged users to create user namespaces and as a user namespace root to create other namespaces. The tyranny of supporting suid root preventing unprivileged users from using cool new kernel features is broken. This set of changes completes the work on setns, adding support for the pid, user, mount namespaces. This set of changes includes a bunch of basic pid namespace cleanups/simplifications. Of particular significance is the rework of the pid namespace cleanup so it no longer requires sending out tendrils into all kinds of unexpected cleanup paths for operation. At least one case of broken error handling is fixed by this cleanup. The files under /proc/<pid>/ns/ have been converted from regular files to magic symlinks which prevents incorrect caching by the VFS, ensuring the files always refer to the namespace the process is currently using and ensuring that the ptrace_mayaccess permission checks are always applied. The files under /proc/<pid>/ns/ have been given stable inode numbers so it is now possible to see if different processes share the same namespaces. Through the David Miller's net tree are changes to relax many of the permission checks in the networking stack to allowing the user namespace root to usefully use the networking stack. Similar changes for the mount namespace and the pid namespace are coming through my tree. Two small changes to add user namespace support were commited here adn in David Miller's -net tree so that I could complete the work on the /proc/<pid>/ns/ files in this tree. Work remains to make it safe to build user namespaces and 9p, afs, ceph, cifs, coda, gfs2, ncpfs, nfs, nfsd, ocfs2, and xfs so the Kconfig guard remains in place preventing that user namespaces from being built when any of those filesystems are enabled. Future design work remains to allow root users outside of the initial user namespace to mount more than just /proc and /sys." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (38 commits) proc: Usable inode numbers for the namespace file descriptors. proc: Fix the namespace inode permission checks. proc: Generalize proc inode allocation userns: Allow unprivilged mounts of proc and sysfs userns: For /proc/self/{uid,gid}_map derive the lower userns from the struct file procfs: Print task uids and gids in the userns that opened the proc file userns: Implement unshare of the user namespace userns: Implent proc namespace operations userns: Kill task_user_ns userns: Make create_new_namespaces take a user_ns parameter userns: Allow unprivileged use of setns. userns: Allow unprivileged users to create new namespaces userns: Allow setting a userns mapping to your current uid. userns: Allow chown and setgid preservation userns: Allow unprivileged users to create user namespaces. userns: Ignore suid and sgid on binaries if the uid or gid can not be mapped userns: fix return value on mntns_install() failure vfs: Allow unprivileged manipulation of the mount namespace. vfs: Only support slave subtrees across different user namespaces vfs: Add a user namespace reference from struct mnt_namespace ...
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c169
-rw-r--r--fs/proc/generic.c26
-rw-r--r--fs/proc/inode.c6
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/namespaces.c185
-rw-r--r--fs/proc/root.c17
-rw-r--r--fs/proc/self.c59
9 files changed, 249 insertions, 217 deletions
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 99349efbbc2b..981b05601931 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -21,6 +21,7 @@ proc-y += uptime.o
proc-y += version.o
proc-y += softirqs.o
proc-y += namespaces.o
+proc-y += self.o
proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
proc-$(CONFIG_NET) += proc_net.o
proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index d3696708fc1a..d66248a1919b 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -162,7 +162,7 @@ static inline const char *get_task_state(struct task_struct *tsk)
static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *p)
{
- struct user_namespace *user_ns = current_user_ns();
+ struct user_namespace *user_ns = seq_user_ns(m);
struct group_info *group_info;
int g;
struct fdtable *fdt = NULL;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index aa63d25157b8..5a5a0be40e40 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2345,146 +2345,6 @@ static const struct file_operations proc_coredump_filter_operations = {
};
#endif
-/*
- * /proc/self:
- */
-static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
- int buflen)
-{
- struct pid_namespace *ns = dentry->d_sb->s_fs_info;
- pid_t tgid = task_tgid_nr_ns(current, ns);
- char tmp[PROC_NUMBUF];
- if (!tgid)
- return -ENOENT;
- sprintf(tmp, "%d", tgid);
- return vfs_readlink(dentry,buffer,buflen,tmp);
-}
-
-static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- struct pid_namespace *ns = dentry->d_sb->s_fs_info;
- pid_t tgid = task_tgid_nr_ns(current, ns);
- char *name = ERR_PTR(-ENOENT);
- if (tgid) {
- /* 11 for max length of signed int in decimal + NULL term */
- name = kmalloc(12, GFP_KERNEL);
- if (!name)
- name = ERR_PTR(-ENOMEM);
- else
- sprintf(name, "%d", tgid);
- }
- nd_set_link(nd, name);
- return NULL;
-}
-
-static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
- void *cookie)
-{
- char *s = nd_get_link(nd);
- if (!IS_ERR(s))
- kfree(s);
-}
-
-static const struct inode_operations proc_self_inode_operations = {
- .readlink = proc_self_readlink,
- .follow_link = proc_self_follow_link,
- .put_link = proc_self_put_link,
-};
-
-/*
- * proc base
- *
- * These are the directory entries in the root directory of /proc
- * that properly belong to the /proc filesystem, as they describe
- * describe something that is process related.
- */
-static const struct pid_entry proc_base_stuff[] = {
- NOD("self", S_IFLNK|S_IRWXUGO,
- &proc_self_inode_operations, NULL, {}),
-};
-
-static struct dentry *proc_base_instantiate(struct inode *dir,
- struct dentry *dentry, struct task_struct *task, const void *ptr)
-{
- const struct pid_entry *p = ptr;
- struct inode *inode;
- struct proc_inode *ei;
- struct dentry *error;
-
- /* Allocate the inode */
- error = ERR_PTR(-ENOMEM);
- inode = new_inode(dir->i_sb);
- if (!inode)
- goto out;
-
- /* Initialize the inode */
- ei = PROC_I(inode);
- inode->i_ino = get_next_ino();
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-
- /*
- * grab the reference to the task.
- */
- ei->pid = get_task_pid(task, PIDTYPE_PID);
- if (!ei->pid)
- goto out_iput;
-
- inode->i_mode = p->mode;
- if (S_ISDIR(inode->i_mode))
- set_nlink(inode, 2);
- if (S_ISLNK(inode->i_mode))
- inode->i_size = 64;
- if (p->iop)
- inode->i_op = p->iop;
- if (p->fop)
- inode->i_fop = p->fop;
- ei->op = p->op;
- d_add(dentry, inode);
- error = NULL;
-out:
- return error;
-out_iput:
- iput(inode);
- goto out;
-}
-
-static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
-{
- struct dentry *error;
- struct task_struct *task = get_proc_task(dir);
- const struct pid_entry *p, *last;
-
- error = ERR_PTR(-ENOENT);
-
- if (!task)
- goto out_no_task;
-
- /* Lookup the directory entry */
- last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
- for (p = proc_base_stuff; p <= last; p++) {
- if (p->len != dentry->d_name.len)
- continue;
- if (!memcmp(dentry->d_name.name, p->name, p->len))
- break;
- }
- if (p > last)
- goto out;
-
- error = proc_base_instantiate(dir, dentry, task, p);
-
-out:
- put_task_struct(task);
-out_no_task:
- return error;
-}
-
-static int proc_base_fill_cache(struct file *filp, void *dirent,
- filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
-{
- return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
- proc_base_instantiate, task, p);
-}
-
#ifdef CONFIG_TASK_IO_ACCOUNTING
static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
{
@@ -2839,10 +2699,6 @@ void proc_flush_task(struct task_struct *task)
proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
tgid->numbers[i].nr);
}
-
- upid = &pid->numbers[pid->level];
- if (upid->nr == 1)
- pid_ns_release_proc(upid->ns);
}
static struct dentry *proc_pid_instantiate(struct inode *dir,
@@ -2876,15 +2732,11 @@ out:
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
{
- struct dentry *result;
+ struct dentry *result = NULL;
struct task_struct *task;
unsigned tgid;
struct pid_namespace *ns;
- result = proc_base_lookup(dir, dentry);
- if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
- goto out;
-
tgid = name_to_int(dentry);
if (tgid == ~0U)
goto out;
@@ -2947,7 +2799,7 @@ retry:
return iter;
}
-#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff))
+#define TGID_OFFSET (FIRST_PROCESS_ENTRY)
static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
struct tgid_iter iter)
@@ -2967,25 +2819,12 @@ static int fake_filldir(void *buf, const char *name, int namelen,
/* for the /proc/ directory itself, after non-process stuff has been done */
int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
- unsigned int nr;
- struct task_struct *reaper;
struct tgid_iter iter;
struct pid_namespace *ns;
filldir_t __filldir;
if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
- goto out_no_task;
- nr = filp->f_pos - FIRST_PROCESS_ENTRY;
-
- reaper = get_proc_task(filp->f_path.dentry->d_inode);
- if (!reaper)
- goto out_no_task;
-
- for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
- const struct pid_entry *p = &proc_base_stuff[nr];
- if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
- goto out;
- }
+ goto out;
ns = filp->f_dentry->d_sb->s_fs_info;
iter.task = NULL;
@@ -3006,8 +2845,6 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
}
filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
out:
- put_task_struct(reaper);
-out_no_task:
return 0;
}
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 0d80cef4cfb9..7b3ae3cc0ef9 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -350,14 +350,14 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
* Return an inode number between PROC_DYNAMIC_FIRST and
* 0xffffffff, or zero on failure.
*/
-static unsigned int get_inode_number(void)
+int proc_alloc_inum(unsigned int *inum)
{
unsigned int i;
int error;
retry:
- if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
- return 0;
+ if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL))
+ return -ENOMEM;
spin_lock(&proc_inum_lock);
error = ida_get_new(&proc_inum_ida, &i);
@@ -365,18 +365,19 @@ retry:
if (error == -EAGAIN)
goto retry;
else if (error)
- return 0;
+ return error;
if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
spin_lock(&proc_inum_lock);
ida_remove(&proc_inum_ida, i);
spin_unlock(&proc_inum_lock);
- return 0;
+ return -ENOSPC;
}
- return PROC_DYNAMIC_FIRST + i;
+ *inum = PROC_DYNAMIC_FIRST + i;
+ return 0;
}
-static void release_inode_number(unsigned int inum)
+void proc_free_inum(unsigned int inum)
{
spin_lock(&proc_inum_lock);
ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
@@ -554,13 +555,12 @@ static const struct inode_operations proc_dir_inode_operations = {
static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
{
- unsigned int i;
struct proc_dir_entry *tmp;
+ int ret;
- i = get_inode_number();
- if (i == 0)
- return -EAGAIN;
- dp->low_ino = i;
+ ret = proc_alloc_inum(&dp->low_ino);
+ if (ret)
+ return ret;
if (S_ISDIR(dp->mode)) {
if (dp->proc_iops == NULL) {
@@ -764,7 +764,7 @@ EXPORT_SYMBOL(proc_create_data);
static void free_proc_entry(struct proc_dir_entry *de)
{
- release_inode_number(de->low_ino);
+ proc_free_inum(de->low_ino);
if (S_ISLNK(de->mode))
kfree(de->data);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 3b22bbdee9ec..439ae6886507 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -31,6 +31,7 @@ static void proc_evict_inode(struct inode *inode)
struct proc_dir_entry *de;
struct ctl_table_header *head;
const struct proc_ns_operations *ns_ops;
+ void *ns;
truncate_inode_pages(&inode->i_data, 0);
clear_inode(inode);
@@ -49,8 +50,9 @@ static void proc_evict_inode(struct inode *inode)
}
/* Release any associated namespace */
ns_ops = PROC_I(inode)->ns_ops;
- if (ns_ops && ns_ops->put)
- ns_ops->put(PROC_I(inode)->ns);
+ ns = PROC_I(inode)->ns;
+ if (ns_ops && ns)
+ ns_ops->put(ns);
}
static struct kmem_cache * proc_inode_cachep;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 43973b084abf..252544c05207 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -15,6 +15,7 @@ struct ctl_table_header;
struct mempolicy;
extern struct proc_dir_entry proc_root;
+extern void proc_self_init(void);
#ifdef CONFIG_PROC_SYSCTL
extern int proc_sys_init(void);
extern void sysctl_head_put(struct ctl_table_header *head);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b178ed733c36..b7a47196c8c3 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -11,6 +11,7 @@
#include <net/net_namespace.h>
#include <linux/ipc_namespace.h>
#include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
#include "internal.h"
@@ -24,12 +25,168 @@ static const struct proc_ns_operations *ns_entries[] = {
#ifdef CONFIG_IPC_NS
&ipcns_operations,
#endif
+#ifdef CONFIG_PID_NS
+ &pidns_operations,
+#endif
+#ifdef CONFIG_USER_NS
+ &userns_operations,
+#endif
+ &mntns_operations,
};
static const struct file_operations ns_file_operations = {
.llseek = no_llseek,
};
+static const struct inode_operations ns_inode_operations = {
+ .setattr = proc_setattr,
+};
+
+static int ns_delete_dentry(const struct dentry *dentry)
+{
+ /* Don't cache namespace inodes when not in use */
+ return 1;
+}
+
+static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+ struct inode *inode = dentry->d_inode;
+ const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
+
+ return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
+ ns_ops->name, inode->i_ino);
+}
+
+const struct dentry_operations ns_dentry_operations =
+{
+ .d_delete = ns_delete_dentry,
+ .d_dname = ns_dname,
+};
+
+static struct dentry *proc_ns_get_dentry(struct super_block *sb,
+ struct task_struct *task, const struct proc_ns_operations *ns_ops)
+{
+ struct dentry *dentry, *result;
+ struct inode *inode;
+ struct proc_inode *ei;
+ struct qstr qname = { .name = "", };
+ void *ns;
+
+ ns = ns_ops->get(task);
+ if (!ns)
+ return ERR_PTR(-ENOENT);
+
+ dentry = d_alloc_pseudo(sb, &qname);
+ if (!dentry) {
+ ns_ops->put(ns);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ inode = iget_locked(sb, ns_ops->inum(ns));
+ if (!inode) {
+ dput(dentry);
+ ns_ops->put(ns);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ei = PROC_I(inode);
+ if (inode->i_state & I_NEW) {
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_op = &ns_inode_operations;
+ inode->i_mode = S_IFREG | S_IRUGO;
+ inode->i_fop = &ns_file_operations;
+ ei->ns_ops = ns_ops;
+ ei->ns = ns;
+ unlock_new_inode(inode);
+ } else {
+ ns_ops->put(ns);
+ }
+
+ d_set_d_op(dentry, &ns_dentry_operations);
+ result = d_instantiate_unique(dentry, inode);
+ if (result) {
+ dput(dentry);
+ dentry = result;
+ }
+
+ return dentry;
+}
+
+static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ struct inode *inode = dentry->d_inode;
+ struct super_block *sb = inode->i_sb;
+ struct proc_inode *ei = PROC_I(inode);
+ struct task_struct *task;
+ struct dentry *ns_dentry;
+ void *error = ERR_PTR(-EACCES);
+
+ task = get_proc_task(inode);
+ if (!task)
+ goto out;
+
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ goto out_put_task;
+
+ ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops);
+ if (IS_ERR(ns_dentry)) {
+ error = ERR_CAST(ns_dentry);
+ goto out_put_task;
+ }
+
+ dput(nd->path.dentry);
+ nd->path.dentry = ns_dentry;
+ error = NULL;
+
+out_put_task:
+ put_task_struct(task);
+out:
+ return error;
+}
+
+static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
+{
+ struct inode *inode = dentry->d_inode;
+ struct proc_inode *ei = PROC_I(inode);
+ const struct proc_ns_operations *ns_ops = ei->ns_ops;
+ struct task_struct *task;
+ void *ns;
+ char name[50];
+ int len = -EACCES;
+
+ task = get_proc_task(inode);
+ if (!task)
+ goto out;
+
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ goto out_put_task;
+
+ len = -ENOENT;
+ ns = ns_ops->get(task);
+ if (!ns)
+ goto out_put_task;
+
+ snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns));
+ len = strlen(name);
+
+ if (len > buflen)
+ len = buflen;
+ if (copy_to_user(buffer, name, len))
+ len = -EFAULT;
+
+ ns_ops->put(ns);
+out_put_task:
+ put_task_struct(task);
+out:
+ return len;
+}
+
+static const struct inode_operations proc_ns_link_inode_operations = {
+ .readlink = proc_ns_readlink,
+ .follow_link = proc_ns_follow_link,
+ .setattr = proc_setattr,
+};
+
static struct dentry *proc_ns_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
@@ -37,21 +194,15 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
struct inode *inode;
struct proc_inode *ei;
struct dentry *error = ERR_PTR(-ENOENT);
- void *ns;
inode = proc_pid_make_inode(dir->i_sb, task);
if (!inode)
goto out;
- ns = ns_ops->get(task);
- if (!ns)
- goto out_iput;
-
ei = PROC_I(inode);
- inode->i_mode = S_IFREG|S_IRUSR;
- inode->i_fop = &ns_file_operations;
- ei->ns_ops = ns_ops;
- ei->ns = ns;
+ inode->i_mode = S_IFLNK|S_IRWXUGO;
+ inode->i_op = &proc_ns_link_inode_operations;
+ ei->ns_ops = ns_ops;
d_set_d_op(dentry, &pid_dentry_operations);
d_add(dentry, inode);
@@ -60,9 +211,6 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
error = NULL;
out:
return error;
-out_iput:
- iput(inode);
- goto out;
}
static int proc_ns_fill_cache(struct file *filp, void *dirent,
@@ -89,10 +237,6 @@ static int proc_ns_dir_readdir(struct file *filp, void *dirent,
if (!task)
goto out_no_task;
- ret = -EPERM;
- if (!ptrace_may_access(task, PTRACE_MODE_READ))
- goto out;
-
ret = 0;
i = filp->f_pos;
switch (i) {
@@ -152,10 +296,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
if (!task)
goto out_no_task;
- error = ERR_PTR(-EPERM);
- if (!ptrace_may_access(task, PTRACE_MODE_READ))
- goto out;
-
last = &ns_entries[ARRAY_SIZE(ns_entries)];
for (entry = ns_entries; entry < last; entry++) {
if (strlen((*entry)->name) != len)
@@ -163,7 +303,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
if (!memcmp(dentry->d_name.name, (*entry)->name, len))
break;
}
- error = ERR_PTR(-ENOENT);
if (entry == last)
goto out;
@@ -198,3 +337,7 @@ out_invalid:
return ERR_PTR(-EINVAL);
}
+bool proc_ns_inode(struct inode *inode)
+{
+ return inode->i_fop == &ns_file_operations;
+}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9889a92d2e01..c6e9fac26bac 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -100,14 +100,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
int err;
struct super_block *sb;
struct pid_namespace *ns;
- struct proc_inode *ei;
char *options;
if (flags & MS_KERNMOUNT) {
ns = (struct pid_namespace *)data;
options = NULL;
} else {
- ns = current->nsproxy->pid_ns;
+ ns = task_active_pid_ns(current);
options = data;
}
@@ -130,13 +129,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
sb->s_flags |= MS_ACTIVE;
}
- ei = PROC_I(sb->s_root->d_inode);
- if (!ei->pid) {
- rcu_read_lock();
- ei->pid = get_pid(find_pid_ns(1, ns));
- rcu_read_unlock();
- }
-
return dget(sb->s_root);
}
@@ -153,6 +145,7 @@ static struct file_system_type proc_fs_type = {
.name = "proc",
.mount = proc_mount,
.kill_sb = proc_kill_sb,
+ .fs_flags = FS_USERNS_MOUNT,
};
void __init proc_root_init(void)
@@ -163,12 +156,8 @@ void __init proc_root_init(void)
err = register_filesystem(&proc_fs_type);
if (err)
return;
- err = pid_ns_prepare_proc(&init_pid_ns);
- if (err) {
- unregister_filesystem(&proc_fs_type);
- return;
- }
+ proc_self_init();
proc_symlink("mounts", NULL, "self/mounts");
proc_net_init();
diff --git a/fs/proc/self.c b/fs/proc/self.c
new file mode 100644
index 000000000000..aa5cc3bff140
--- /dev/null
+++ b/fs/proc/self.c
@@ -0,0 +1,59 @@
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <linux/namei.h>
+
+/*
+ * /proc/self:
+ */
+static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
+ int buflen)
+{
+ struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+ pid_t tgid = task_tgid_nr_ns(current, ns);
+ char tmp[PROC_NUMBUF];
+ if (!tgid)
+ return -ENOENT;
+ sprintf(tmp, "%d", tgid);
+ return vfs_readlink(dentry,buffer,buflen,tmp);
+}
+
+static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+ pid_t tgid = task_tgid_nr_ns(current, ns);
+ char *name = ERR_PTR(-ENOENT);
+ if (tgid) {
+ /* 11 for max length of signed int in decimal + NULL term */
+ name = kmalloc(12, GFP_KERNEL);
+ if (!name)
+ name = ERR_PTR(-ENOMEM);
+ else
+ sprintf(name, "%d", tgid);
+ }
+ nd_set_link(nd, name);
+ return NULL;
+}
+
+static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
+ void *cookie)
+{
+ char *s = nd_get_link(nd);
+ if (!IS_ERR(s))
+ kfree(s);
+}
+
+static const struct inode_operations proc_self_inode_operations = {
+ .readlink = proc_self_readlink,
+ .follow_link = proc_self_follow_link,
+ .put_link = proc_self_put_link,
+};
+
+void __init proc_self_init(void)
+{
+ struct proc_dir_entry *proc_self_symlink;
+ mode_t mode;
+
+ mode = S_IFLNK | S_IRWXUGO;
+ proc_self_symlink = proc_create("self", mode, NULL, NULL );
+ proc_self_symlink->proc_iops = &proc_self_inode_operations;
+}