aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/array.c162
-rw-r--r--fs/proc/base.c245
-rw-r--r--fs/proc/inode.c6
-rw-r--r--fs/proc/internal.h3
-rw-r--r--fs/proc/proc_sysctl.c4
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/proc/task_mmu.c95
-rw-r--r--fs/proc/task_nommu.c2
8 files changed, 378 insertions, 141 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c
index f9bd395b3473..c1c207c36cae 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -81,6 +81,7 @@
#include <linux/pid_namespace.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
+#include <linux/user_namespace.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -161,6 +162,7 @@ static inline const char *get_task_state(struct task_struct *tsk)
static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *p)
{
+ struct user_namespace *user_ns = current_user_ns();
struct group_info *group_info;
int g;
struct fdtable *fdt = NULL;
@@ -189,8 +191,14 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
task_tgid_nr_ns(p, ns),
pid_nr_ns(pid, ns),
ppid, tpid,
- cred->uid, cred->euid, cred->suid, cred->fsuid,
- cred->gid, cred->egid, cred->sgid, cred->fsgid);
+ from_kuid_munged(user_ns, cred->uid),
+ from_kuid_munged(user_ns, cred->euid),
+ from_kuid_munged(user_ns, cred->suid),
+ from_kuid_munged(user_ns, cred->fsuid),
+ from_kgid_munged(user_ns, cred->gid),
+ from_kgid_munged(user_ns, cred->egid),
+ from_kgid_munged(user_ns, cred->sgid),
+ from_kgid_munged(user_ns, cred->fsgid));
task_lock(p);
if (p->files)
@@ -205,7 +213,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
task_unlock(p);
for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++)
- seq_printf(m, "%d ", GROUP_AT(group_info, g));
+ seq_printf(m, "%d ",
+ from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
put_cred(cred);
seq_putc(m, '\n');
@@ -361,7 +370,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task, int whole)
{
unsigned long vsize, eip, esp, wchan = ~0UL;
- long priority, nice;
+ int priority, nice;
int tty_pgrp = -1, tty_nr = 0;
sigset_t sigign, sigcatch;
char state;
@@ -483,7 +492,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_put_decimal_ull(m, ' ', 0);
seq_put_decimal_ull(m, ' ', start_time);
seq_put_decimal_ull(m, ' ', vsize);
- seq_put_decimal_ll(m, ' ', mm ? get_mm_rss(mm) : 0);
+ seq_put_decimal_ull(m, ' ', mm ? get_mm_rss(mm) : 0);
seq_put_decimal_ull(m, ' ', rsslim);
seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0);
seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0);
@@ -508,9 +517,23 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task));
seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime));
seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime));
- seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_data : 0);
- seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->end_data : 0);
- seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_brk : 0);
+
+ if (mm && permitted) {
+ seq_put_decimal_ull(m, ' ', mm->start_data);
+ seq_put_decimal_ull(m, ' ', mm->end_data);
+ seq_put_decimal_ull(m, ' ', mm->start_brk);
+ seq_put_decimal_ull(m, ' ', mm->arg_start);
+ seq_put_decimal_ull(m, ' ', mm->arg_end);
+ seq_put_decimal_ull(m, ' ', mm->env_start);
+ seq_put_decimal_ull(m, ' ', mm->env_end);
+ } else
+ seq_printf(m, " 0 0 0 0 0 0 0");
+
+ if (permitted)
+ seq_put_decimal_ll(m, ' ', task->exit_code);
+ else
+ seq_put_decimal_ll(m, ' ', 0);
+
seq_putc(m, '\n');
if (mm)
mmput(mm);
@@ -556,3 +579,126 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
return 0;
}
+
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static struct pid *
+get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos)
+{
+ struct task_struct *start, *task;
+ struct pid *pid = NULL;
+
+ read_lock(&tasklist_lock);
+
+ start = pid_task(proc_pid(inode), PIDTYPE_PID);
+ if (!start)
+ goto out;
+
+ /*
+ * Lets try to continue searching first, this gives
+ * us significant speedup on children-rich processes.
+ */
+ if (pid_prev) {
+ task = pid_task(pid_prev, PIDTYPE_PID);
+ if (task && task->real_parent == start &&
+ !(list_empty(&task->sibling))) {
+ if (list_is_last(&task->sibling, &start->children))
+ goto out;
+ task = list_first_entry(&task->sibling,
+ struct task_struct, sibling);
+ pid = get_pid(task_pid(task));
+ goto out;
+ }
+ }
+
+ /*
+ * Slow search case.
+ *
+ * We might miss some children here if children
+ * are exited while we were not holding the lock,
+ * but it was never promised to be accurate that
+ * much.
+ *
+ * "Just suppose that the parent sleeps, but N children
+ * exit after we printed their tids. Now the slow paths
+ * skips N extra children, we miss N tasks." (c)
+ *
+ * So one need to stop or freeze the leader and all
+ * its children to get a precise result.
+ */
+ list_for_each_entry(task, &start->children, sibling) {
+ if (pos-- == 0) {
+ pid = get_pid(task_pid(task));
+ break;
+ }
+ }
+
+out:
+ read_unlock(&tasklist_lock);
+ return pid;
+}
+
+static int children_seq_show(struct seq_file *seq, void *v)
+{
+ struct inode *inode = seq->private;
+ pid_t pid;
+
+ pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
+ return seq_printf(seq, "%d ", pid);
+}
+
+static void *children_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ return get_children_pid(seq->private, NULL, *pos);
+}
+
+static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct pid *pid;
+
+ pid = get_children_pid(seq->private, v, *pos + 1);
+ put_pid(v);
+
+ ++*pos;
+ return pid;
+}
+
+static void children_seq_stop(struct seq_file *seq, void *v)
+{
+ put_pid(v);
+}
+
+static const struct seq_operations children_seq_ops = {
+ .start = children_seq_start,
+ .next = children_seq_next,
+ .stop = children_seq_stop,
+ .show = children_seq_show,
+};
+
+static int children_seq_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *m;
+ int ret;
+
+ ret = seq_open(file, &children_seq_ops);
+ if (ret)
+ return ret;
+
+ m = file->private_data;
+ m->private = inode;
+
+ return ret;
+}
+
+int children_seq_release(struct inode *inode, struct file *file)
+{
+ seq_release(inode, file);
+ return 0;
+}
+
+const struct file_operations proc_tid_children_operations = {
+ .open = children_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = children_seq_release,
+};
+#endif /* CONFIG_CHECKPOINT_RESTORE */
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1c8b280146d7..437195f204e1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -81,6 +81,7 @@
#include <linux/oom.h>
#include <linux/elf.h>
#include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
#include <linux/fs_struct.h>
#include <linux/slab.h>
#include <linux/flex_array.h>
@@ -198,11 +199,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
return result;
}
-struct mm_struct *mm_for_maps(struct task_struct *task)
-{
- return mm_access(task, PTRACE_MODE_READ);
-}
-
static int proc_pid_cmdline(struct task_struct *task, char * buffer)
{
int res = 0;
@@ -242,7 +238,7 @@ out:
static int proc_pid_auxv(struct task_struct *task, char *buffer)
{
- struct mm_struct *mm = mm_for_maps(task);
+ struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ);
int res = PTR_ERR(mm);
if (mm && !IS_ERR(mm)) {
unsigned int nwords = 0;
@@ -410,12 +406,13 @@ static const struct file_operations proc_lstats_operations = {
static int proc_oom_score(struct task_struct *task, char *buffer)
{
+ unsigned long totalpages = totalram_pages + total_swap_pages;
unsigned long points = 0;
read_lock(&tasklist_lock);
if (pid_alive(task))
- points = oom_badness(task, NULL, NULL,
- totalram_pages + total_swap_pages);
+ points = oom_badness(task, NULL, NULL, totalpages) *
+ 1000 / totalpages;
read_unlock(&tasklist_lock);
return sprintf(buffer, "%lu\n", points);
}
@@ -677,7 +674,7 @@ static const struct file_operations proc_single_file_operations = {
.release = single_release,
};
-static int mem_open(struct inode* inode, struct file* file)
+static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
{
struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
struct mm_struct *mm;
@@ -685,7 +682,7 @@ static int mem_open(struct inode* inode, struct file* file)
if (!task)
return -ESRCH;
- mm = mm_access(task, PTRACE_MODE_ATTACH);
+ mm = mm_access(task, mode);
put_task_struct(task);
if (IS_ERR(mm))
@@ -705,6 +702,11 @@ static int mem_open(struct inode* inode, struct file* file)
return 0;
}
+static int mem_open(struct inode *inode, struct file *file)
+{
+ return __mem_open(inode, file, PTRACE_MODE_ATTACH);
+}
+
static ssize_t mem_rw(struct file *file, char __user *buf,
size_t count, loff_t *ppos, int write)
{
@@ -801,30 +803,29 @@ static const struct file_operations proc_mem_operations = {
.release = mem_release,
};
+static int environ_open(struct inode *inode, struct file *file)
+{
+ return __mem_open(inode, file, PTRACE_MODE_READ);
+}
+
static ssize_t environ_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
- struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
char *page;
unsigned long src = *ppos;
- int ret = -ESRCH;
- struct mm_struct *mm;
+ int ret = 0;
+ struct mm_struct *mm = file->private_data;
- if (!task)
- goto out_no_task;
+ if (!mm)
+ return 0;
- ret = -ENOMEM;
page = (char *)__get_free_page(GFP_TEMPORARY);
if (!page)
- goto out;
-
-
- mm = mm_for_maps(task);
- ret = PTR_ERR(mm);
- if (!mm || IS_ERR(mm))
- goto out_free;
+ return -ENOMEM;
ret = 0;
+ if (!atomic_inc_not_zero(&mm->mm_users))
+ goto free;
while (count > 0) {
int this_len, retval, max_len;
@@ -836,7 +837,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
this_len = (this_len > max_len) ? max_len : this_len;
- retval = access_process_vm(task, (mm->env_start + src),
+ retval = access_remote_vm(mm, (mm->env_start + src),
page, this_len, 0);
if (retval <= 0) {
@@ -855,19 +856,18 @@ static ssize_t environ_read(struct file *file, char __user *buf,
count -= retval;
}
*ppos = src;
-
mmput(mm);
-out_free:
+
+free:
free_page((unsigned long) page);
-out:
- put_task_struct(task);
-out_no_task:
return ret;
}
static const struct file_operations proc_environ_operations = {
+ .open = environ_open,
.read = environ_read,
.llseek = generic_file_llseek,
+ .release = mem_release,
};
static ssize_t oom_adjust_read(struct file *file, char __user *buf,
@@ -1561,8 +1561,8 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
generic_fillattr(inode, stat);
rcu_read_lock();
- stat->uid = 0;
- stat->gid = 0;
+ stat->uid = GLOBAL_ROOT_UID;
+ stat->gid = GLOBAL_ROOT_GID;
task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (task) {
if (!has_pid_permissions(pid, task, 2)) {
@@ -1622,8 +1622,8 @@ int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
inode->i_gid = cred->egid;
rcu_read_unlock();
} else {
- inode->i_uid = 0;
- inode->i_gid = 0;
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
}
inode->i_mode &= ~(S_ISUID | S_ISGID);
security_task_to_inode(task, inode);
@@ -1799,10 +1799,15 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
if (task) {
files = get_files_struct(task);
if (files) {
+ struct file *file;
rcu_read_lock();
- if (fcheck_files(files, fd)) {
+ file = fcheck_files(files, fd);
+ if (file) {
+ unsigned f_mode = file->f_mode;
+
rcu_read_unlock();
put_files_struct(files);
+
if (task_dumpable(task)) {
rcu_read_lock();
cred = __task_cred(task);
@@ -1810,10 +1815,19 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
inode->i_gid = cred->egid;
rcu_read_unlock();
} else {
- inode->i_uid = 0;
- inode->i_gid = 0;
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
+ }
+
+ if (S_ISLNK(inode->i_mode)) {
+ unsigned i_mode = S_IFLNK;
+ if (f_mode & FMODE_READ)
+ i_mode |= S_IRUSR | S_IXUSR;
+ if (f_mode & FMODE_WRITE)
+ i_mode |= S_IWUSR | S_IXUSR;
+ inode->i_mode = i_mode;
}
- inode->i_mode &= ~(S_ISUID | S_ISGID);
+
security_task_to_inode(task, inode);
put_task_struct(task);
return 1;
@@ -1836,9 +1850,7 @@ static const struct dentry_operations tid_fd_dentry_operations =
static struct dentry *proc_fd_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
- unsigned fd = *(const unsigned *)ptr;
- struct file *file;
- struct files_struct *files;
+ unsigned fd = (unsigned long)ptr;
struct inode *inode;
struct proc_inode *ei;
struct dentry *error = ERR_PTR(-ENOENT);
@@ -1848,26 +1860,8 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
goto out;
ei = PROC_I(inode);
ei->fd = fd;
- files = get_files_struct(task);
- if (!files)
- goto out_iput;
- inode->i_mode = S_IFLNK;
-
- /*
- * We are not taking a ref to the file structure, so we must
- * hold ->file_lock.
- */
- spin_lock(&files->file_lock);
- file = fcheck_files(files, fd);
- if (!file)
- goto out_unlock;
- if (file->f_mode & FMODE_READ)
- inode->i_mode |= S_IRUSR | S_IXUSR;
- if (file->f_mode & FMODE_WRITE)
- inode->i_mode |= S_IWUSR | S_IXUSR;
- spin_unlock(&files->file_lock);
- put_files_struct(files);
+ inode->i_mode = S_IFLNK;
inode->i_op = &proc_pid_link_inode_operations;
inode->i_size = 64;
ei->op.proc_get_link = proc_fd_link;
@@ -1879,12 +1873,6 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
out:
return error;
-out_unlock:
- spin_unlock(&files->file_lock);
- put_files_struct(files);
-out_iput:
- iput(inode);
- goto out;
}
static struct dentry *proc_lookupfd_common(struct inode *dir,
@@ -1900,7 +1888,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
if (fd == ~0U)
goto out;
- result = instantiate(dir, dentry, task, &fd);
+ result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
out:
put_task_struct(task);
out_no_task:
@@ -1943,21 +1931,22 @@ static int proc_readfd_common(struct file * filp, void * dirent,
fd++, filp->f_pos++) {
char name[PROC_NUMBUF];
int len;
+ int rv;
if (!fcheck_files(files, fd))
continue;
rcu_read_unlock();
len = snprintf(name, sizeof(name), "%d", fd);
- if (proc_fill_cache(filp, dirent, filldir,
- name, len, instantiate,
- p, &fd) < 0) {
- rcu_read_lock();
- break;
- }
+ rv = proc_fill_cache(filp, dirent, filldir,
+ name, len, instantiate, p,
+ (void *)(unsigned long)fd);
+ if (rv < 0)
+ goto out_fd_loop;
rcu_read_lock();
}
rcu_read_unlock();
+out_fd_loop:
put_files_struct(files);
}
out:
@@ -2037,11 +2026,8 @@ static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
if (!task)
goto out_notask;
- if (!ptrace_may_access(task, PTRACE_MODE_READ))
- goto out;
-
- mm = get_task_mm(task);
- if (!mm)
+ mm = mm_access(task, PTRACE_MODE_READ);
+ if (IS_ERR_OR_NULL(mm))
goto out;
if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
@@ -2060,8 +2046,8 @@ static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
inode->i_gid = cred->egid;
rcu_read_unlock();
} else {
- inode->i_uid = 0;
- inode->i_gid = 0;
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
}
security_task_to_inode(task, inode);
status = 1;
@@ -2177,16 +2163,16 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
goto out;
result = ERR_PTR(-EACCES);
- if (lock_trace(task))
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
goto out_put_task;
result = ERR_PTR(-ENOENT);
if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
- goto out_unlock;
+ goto out_put_task;
mm = get_task_mm(task);
if (!mm)
- goto out_unlock;
+ goto out_put_task;
down_read(&mm->mmap_sem);
vma = find_exact_vma(mm, vm_start, vm_end);
@@ -2198,8 +2184,6 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
out_no_vma:
up_read(&mm->mmap_sem);
mmput(mm);
-out_unlock:
- unlock_trace(task);
out_put_task:
put_task_struct(task);
out:
@@ -2233,7 +2217,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
goto out;
ret = -EACCES;
- if (lock_trace(task))
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
goto out_put_task;
ret = 0;
@@ -2241,12 +2225,12 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
case 0:
ino = inode->i_ino;
if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0)
- goto out_unlock;
+ goto out_put_task;
filp->f_pos++;
case 1:
ino = parent_ino(dentry);
if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
- goto out_unlock;
+ goto out_put_task;
filp->f_pos++;
default:
{
@@ -2257,7 +2241,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
mm = get_task_mm(task);
if (!mm)
- goto out_unlock;
+ goto out_put_task;
down_read(&mm->mmap_sem);
nr_files = 0;
@@ -2287,7 +2271,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
flex_array_free(fa);
up_read(&mm->mmap_sem);
mmput(mm);
- goto out_unlock;
+ goto out_put_task;
}
for (i = 0, vma = mm->mmap, pos = 2; vma;
vma = vma->vm_next) {
@@ -2332,8 +2316,6 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
}
}
-out_unlock:
- unlock_trace(task);
out_put_task:
put_task_struct(task);
out:
@@ -2374,7 +2356,7 @@ static const struct inode_operations proc_fd_inode_operations = {
static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
- unsigned fd = *(unsigned *)ptr;
+ unsigned fd = (unsigned long)ptr;
struct inode *inode;
struct proc_inode *ei;
struct dentry *error = ERR_PTR(-ENOENT);
@@ -2943,6 +2925,74 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
}
#endif /* CONFIG_TASK_IO_ACCOUNTING */
+#ifdef CONFIG_USER_NS
+static int proc_id_map_open(struct inode *inode, struct file *file,
+ struct seq_operations *seq_ops)
+{
+ struct user_namespace *ns = NULL;
+ struct task_struct *task;
+ struct seq_file *seq;
+ int ret = -EINVAL;
+
+ task = get_proc_task(inode);
+ if (task) {
+ rcu_read_lock();
+ ns = get_user_ns(task_cred_xxx(task, user_ns));
+ rcu_read_unlock();
+ put_task_struct(task);
+ }
+ if (!ns)
+ goto err;
+
+ ret = seq_open(file, seq_ops);
+ if (ret)
+ goto err_put_ns;
+
+ seq = file->private_data;
+ seq->private = ns;
+
+ return 0;
+err_put_ns:
+ put_user_ns(ns);
+err:
+ return ret;
+}
+
+static int proc_id_map_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct user_namespace *ns = seq->private;
+ put_user_ns(ns);
+ return seq_release(inode, file);
+}
+
+static int proc_uid_map_open(struct inode *inode, struct file *file)
+{
+ return proc_id_map_open(inode, file, &proc_uid_seq_operations);
+}
+
+static int proc_gid_map_open(struct inode *inode, struct file *file)
+{
+ return proc_id_map_open(inode, file, &proc_gid_seq_operations);
+}
+
+static const struct file_operations proc_uid_map_operations = {
+ .open = proc_uid_map_open,
+ .write = proc_uid_map_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = proc_id_map_release,
+};
+
+static const struct file_operations proc_gid_map_operations = {
+ .open = proc_gid_map_open,
+ .write = proc_gid_map_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = proc_id_map_release,
+};
+#endif /* CONFIG_USER_NS */
+
static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
@@ -3045,6 +3095,10 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_HARDWALL
INF("hardwall", S_IRUGO, proc_pid_hardwall),
#endif
+#ifdef CONFIG_USER_NS
+ REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
+ REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
+#endif
};
static int proc_tgid_base_readdir(struct file * filp,
@@ -3349,6 +3403,9 @@ static const struct pid_entry tid_base_stuff[] = {
ONE("stat", S_IRUGO, proc_tid_stat),
ONE("statm", S_IRUGO, proc_pid_statm),
REG("maps", S_IRUGO, proc_tid_maps_operations),
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ REG("children", S_IRUGO, proc_tid_children_operations),
+#endif
#ifdef CONFIG_NUMA
REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations),
#endif
@@ -3400,6 +3457,10 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_HARDWALL
INF("hardwall", S_IRUGO, proc_pid_hardwall),
#endif
+#ifdef CONFIG_USER_NS
+ REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
+ REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
+#endif
};
static int proc_tid_base_readdir(struct file * filp,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 205c92280838..7ac817b64a71 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -33,7 +33,7 @@ static void proc_evict_inode(struct inode *inode)
const struct proc_ns_operations *ns_ops;
truncate_inode_pages(&inode->i_data, 0);
- end_writeback(inode);
+ clear_inode(inode);
/* Stop tracking associated processes */
put_pid(PROC_I(inode)->pid);
@@ -108,8 +108,8 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root)
struct super_block *sb = root->d_sb;
struct pid_namespace *pid = sb->s_fs_info;
- if (pid->pid_gid)
- seq_printf(seq, ",gid=%lu", (unsigned long)pid->pid_gid);
+ if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID))
+ seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid));
if (pid->hide_pid != 0)
seq_printf(seq, ",hidepid=%u", pid->hide_pid);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5f79bb8b4c60..eca4aca5b6e2 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -31,8 +31,6 @@ struct vmalloc_info {
unsigned long largest_chunk;
};
-extern struct mm_struct *mm_for_maps(struct task_struct *);
-
#ifdef CONFIG_MMU
#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
extern void get_vmalloc_info(struct vmalloc_info *vmi);
@@ -56,6 +54,7 @@ extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task);
extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
+extern const struct file_operations proc_tid_children_operations;
extern const struct file_operations proc_pid_maps_operations;
extern const struct file_operations proc_tid_maps_operations;
extern const struct file_operations proc_pid_numa_maps_operations;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 21d836f40292..3476bca8f7af 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -371,9 +371,9 @@ void register_sysctl_root(struct ctl_table_root *root)
static int test_perm(int mode, int op)
{
- if (!current_euid())
+ if (uid_eq(current_euid(), GLOBAL_ROOT_UID))
mode >>= 6;
- else if (in_egroup_p(0))
+ else if (in_egroup_p(GLOBAL_ROOT_GID))
mode >>= 3;
if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
return 0;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index eed44bfc85db..7c30fce037c0 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -67,7 +67,7 @@ static int proc_parse_options(char *options, struct pid_namespace *pid)
case Opt_gid:
if (match_int(&args[0], &option))
return 0;
- pid->pid_gid = option;
+ pid->pid_gid = make_kgid(current_user_ns(), option);
break;
case Opt_hidepid:
if (match_int(&args[0], &option))
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2b9a7607cbd5..4540b8f76f16 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -125,7 +125,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
if (!priv->task)
return ERR_PTR(-ESRCH);
- mm = mm_for_maps(priv->task);
+ mm = mm_access(priv->task, PTRACE_MODE_READ);
if (!mm || IS_ERR(mm))
return mm;
down_read(&mm->mmap_sem);
@@ -393,6 +393,7 @@ struct mem_size_stats {
unsigned long anonymous;
unsigned long anonymous_thp;
unsigned long swap;
+ unsigned long nonlinear;
u64 pss;
};
@@ -402,24 +403,33 @@ static void smaps_pte_entry(pte_t ptent, unsigned long addr,
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = mss->vma;
- struct page *page;
+ pgoff_t pgoff = linear_page_index(vma, addr);
+ struct page *page = NULL;
int mapcount;
- if (is_swap_pte(ptent)) {
- mss->swap += ptent_size;
- return;
+ if (pte_present(ptent)) {
+ page = vm_normal_page(vma, addr, ptent);
+ } else if (is_swap_pte(ptent)) {
+ swp_entry_t swpent = pte_to_swp_entry(ptent);
+
+ if (!non_swap_entry(swpent))
+ mss->swap += ptent_size;
+ else if (is_migration_entry(swpent))
+ page = migration_entry_to_page(swpent);
+ } else if (pte_file(ptent)) {
+ if (pte_to_pgoff(ptent) != pgoff)
+ mss->nonlinear += ptent_size;
}
- if (!pte_present(ptent))
- return;
-
- page = vm_normal_page(vma, addr, ptent);
if (!page)
return;
if (PageAnon(page))
mss->anonymous += ptent_size;
+ if (page->index != pgoff)
+ mss->nonlinear += ptent_size;
+
mss->resident += ptent_size;
/* Accumulate the size in pages that have been accessed. */
if (pte_young(ptent) || PageReferenced(page))
@@ -521,6 +531,10 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
(vma->vm_flags & VM_LOCKED) ?
(unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
+ if (vma->vm_flags & VM_NONLINEAR)
+ seq_printf(m, "Nonlinear: %8lu kB\n",
+ mss.nonlinear >> 10);
+
if (m->count < m->size) /* vma is copied successfully */
m->version = (vma != get_gate_vma(task->mm))
? vma->vm_start : 0;
@@ -597,9 +611,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
if (!page)
continue;
- if (PageReserved(page))
- continue;
-
/* Clear accessed and referenced bits. */
ptep_test_and_clear_young(vma, addr, pte);
ClearPageReferenced(page);
@@ -703,6 +714,7 @@ struct pagemapread {
#define PM_PRESENT PM_STATUS(4LL)
#define PM_SWAP PM_STATUS(2LL)
+#define PM_FILE PM_STATUS(1LL)
#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT)
#define PM_END_OF_BUFFER 1
@@ -736,20 +748,33 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
return err;
}
-static u64 swap_pte_to_pagemap_entry(pte_t pte)
+static void pte_to_pagemap_entry(pagemap_entry_t *pme,
+ struct vm_area_struct *vma, unsigned long addr, pte_t pte)
{
- swp_entry_t e = pte_to_swp_entry(pte);
- return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
-}
+ u64 frame, flags;
+ struct page *page = NULL;
+
+ if (pte_present(pte)) {
+ frame = pte_pfn(pte);
+ flags = PM_PRESENT;
+ page = vm_normal_page(vma, addr, pte);
+ } else if (is_swap_pte(pte)) {
+ swp_entry_t entry = pte_to_swp_entry(pte);
+
+ frame = swp_type(entry) |
+ (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
+ flags = PM_SWAP;
+ if (is_migration_entry(entry))
+ page = migration_entry_to_page(entry);
+ } else {
+ *pme = make_pme(PM_NOT_PRESENT);
+ return;
+ }
-static void pte_to_pagemap_entry(pagemap_entry_t *pme, pte_t pte)
-{
- if (is_swap_pte(pte))
- *pme = make_pme(PM_PFRAME(swap_pte_to_pagemap_entry(pte))
- | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP);
- else if (pte_present(pte))
- *pme = make_pme(PM_PFRAME(pte_pfn(pte))
- | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
+ if (page && !PageAnon(page))
+ flags |= PM_FILE;
+
+ *pme = make_pme(PM_PFRAME(frame) | PM_PSHIFT(PAGE_SHIFT) | flags);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -764,6 +789,8 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
if (pmd_present(pmd))
*pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
+ else
+ *pme = make_pme(PM_NOT_PRESENT);
}
#else
static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
@@ -783,7 +810,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
/* find the first VMA at or above 'addr' */
vma = find_vma(walk->mm, addr);
- if (pmd_trans_huge_lock(pmd, vma) == 1) {
+ if (vma && pmd_trans_huge_lock(pmd, vma) == 1) {
for (; addr != end; addr += PAGE_SIZE) {
unsigned long offset;
@@ -804,15 +831,17 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
/* check to see if we've left 'vma' behind
* and need a new, higher one */
- if (vma && (addr >= vma->vm_end))
+ if (vma && (addr >= vma->vm_end)) {
vma = find_vma(walk->mm, addr);
+ pme = make_pme(PM_NOT_PRESENT);
+ }
/* check that 'vma' actually covers this address,
* and that it isn't a huge page vma */
if (vma && (vma->vm_start <= addr) &&
!is_vm_hugetlb_page(vma)) {
pte = pte_offset_map(pmd, addr);
- pte_to_pagemap_entry(&pme, *pte);
+ pte_to_pagemap_entry(&pme, vma, addr, *pte);
/* unmap before userspace copy */
pte_unmap(pte);
}
@@ -833,6 +862,8 @@ static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme,
if (pte_present(pte))
*pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)
| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
+ else
+ *pme = make_pme(PM_NOT_PRESENT);
}
/* This function walks within one hugetlb entry in the single call */
@@ -842,7 +873,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
{
struct pagemapread *pm = walk->private;
int err = 0;
- pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
+ pagemap_entry_t pme;
for (; addr != end; addr += PAGE_SIZE) {
int offset = (addr & ~hmask) >> PAGE_SHIFT;
@@ -864,11 +895,11 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
* For each page in the address space, this file contains one 64-bit entry
* consisting of the following:
*
- * Bits 0-55 page frame number (PFN) if present
+ * Bits 0-54 page frame number (PFN) if present
* Bits 0-4 swap type if swapped
- * Bits 5-55 swap offset if swapped
+ * Bits 5-54 swap offset if swapped
* Bits 55-60 page shift (page size = 1<<page shift)
- * Bit 61 reserved for future use
+ * Bit 61 page is file-page or shared-anon
* Bit 62 page swapped
* Bit 63 page present
*
@@ -914,7 +945,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
if (!pm.buffer)
goto out_task;
- mm = mm_for_maps(task);
+ mm = mm_access(task, PTRACE_MODE_READ);
ret = PTR_ERR(mm);
if (!mm || IS_ERR(mm))
goto out_free;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 74fe164d1b23..1ccfa537f5f5 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -223,7 +223,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
if (!priv->task)
return ERR_PTR(-ESRCH);
- mm = mm_for_maps(priv->task);
+ mm = mm_access(priv->task, PTRACE_MODE_READ);
if (!mm || IS_ERR(mm)) {
put_task_struct(priv->task);
priv->task = NULL;