From 593d1ce854dff93b3c9066e897192eb676b09c46 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 14 Sep 2017 12:07:32 -0500
Subject: vfs: Don't allow changing the link count of an inode with an invalid
 uid or gid

Changing the link count of an inode via unlink or link will cause a
write back of that inode.  If the uids or gids are invalid (aka not known
to the kernel) writing the inode back may change the uid or gid in the
filesystem.   To prevent possible filesystem and to avoid the need for
filesystem maintainers to worry about it don't allow operations on
inodes with an invalid uid or gid.

Acked-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namei.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 186bd2464fd5..942c1f096f6b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -984,13 +984,15 @@ static bool safe_hardlink_source(struct inode *inode)
  */
 static int may_linkat(struct path *link)
 {
-	struct inode *inode;
+	struct inode *inode = link->dentry->d_inode;
+
+	/* Inode writeback is not safe when the uid or gid are invalid. */
+	if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
+		return -EOVERFLOW;
 
 	if (!sysctl_protected_hardlinks)
 		return 0;
 
-	inode = link->dentry->d_inode;
-
 	/* Source inode owner (or CAP_FOWNER) can hardlink all they like,
 	 * otherwise, it must be a safe source.
 	 */
@@ -2749,6 +2751,11 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
 	BUG_ON(!inode);
 
 	BUG_ON(victim->d_parent->d_inode != dir);
+
+	/* Inode writeback is not safe when the uid or gid are invalid. */
+	if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
+		return -EOVERFLOW;
+
 	audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
 
 	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
-- 
cgit v1.2.3-59-g8ed1b


From 55956b59df336f6738da916dbb520b6e37df9fbd Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 23 May 2018 15:24:18 -0500
Subject: vfs: Allow userns root to call mknod on owned filesystems.

These filesystems already always set SB_I_NODEV so mknod will not be
useful for gaining control of any devices no matter their permissions.
This will allow overlayfs and applications like to fakeroot to use
device nodes to represent things on disk.

Acked-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namei.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 942c1f096f6b..20335896dcce 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3679,7 +3679,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 	if (error)
 		return error;
 
-	if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
+	if ((S_ISCHR(mode) || S_ISBLK(mode)) &&
+	    !ns_capable(dentry->d_sb->s_user_ns, CAP_MKNOD))
 		return -EPERM;
 
 	if (!dir->i_op->mknod)
-- 
cgit v1.2.3-59-g8ed1b


From 0031181c49ca94b14b11f08e447f40c6ebc842a4 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 15 Oct 2016 18:36:59 -0500
Subject: fs: Allow superblock owner to replace invalid owners of inodes

Allow users with CAP_SYS_CHOWN over the superblock of a filesystem to
chown files when inode owner is invalid.  Ordinarily the
capable_wrt_inode_uidgid check is sufficient to allow access to files
but when the underlying filesystem has uids or gids that don't map to
the current user namespace it is not enough, so the chown permission
checks need to be extended to allow this case.

Calling chown on filesystem nodes whose uid or gid don't map is
necessary if those nodes are going to be modified as writing back
inodes which contain uids or gids that don't map is likely to cause
filesystem corruption of the uid or gid fields.

Once chown has been called the existing capable_wrt_inode_uidgid
checks are sufficient to allow the owner of a superblock to do anything
the global root user can do with an appropriate set of capabilities.

An ordinary filesystem mountable by a userns root will limit all uids
and gids in s_user_ns or the INVALID_UID and INVALID_GID to flag all
others.  So having this added permission limited to just INVALID_UID
and INVALID_GID is sufficient to handle every case on an ordinary filesystem.

Of the virtual filesystems at least proc is known to set s_user_ns to
something other than &init_user_ns, while at the same time presenting
some files owned by GLOBAL_ROOT_UID.  Those files the mounter of proc
in a user namespace should not be able to chown to get access to.
Limiting the relaxation in permission to just the minimum of allowing
changing INVALID_UID and INVALID_GID prevents problems with cases like
that.

The original version of this patch was written by: Seth Forshee.  I
have rewritten and rethought this patch enough so it's really not the
same thing (certainly it needs a different description), but he
deserves credit for getting out there and getting the conversation
started, and finding the potential gotcha's and putting up with my
semi-paranoid feedback.

Inspired-by: Seth Forshee <seth.forshee@canonical.com>
Acked-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/attr.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/attr.c b/fs/attr.c
index 12ffdb6fb63c..d0b4d34878fb 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -18,6 +18,32 @@
 #include <linux/evm.h>
 #include <linux/ima.h>
 
+static bool chown_ok(const struct inode *inode, kuid_t uid)
+{
+	if (uid_eq(current_fsuid(), inode->i_uid) &&
+	    uid_eq(uid, inode->i_uid))
+		return true;
+	if (capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+		return true;
+	if (uid_eq(inode->i_uid, INVALID_UID) &&
+	    ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
+		return true;
+	return false;
+}
+
+static bool chgrp_ok(const struct inode *inode, kgid_t gid)
+{
+	if (uid_eq(current_fsuid(), inode->i_uid) &&
+	    (in_group_p(gid) || gid_eq(gid, inode->i_gid)))
+		return true;
+	if (capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+		return true;
+	if (gid_eq(inode->i_gid, INVALID_GID) &&
+	    ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
+		return true;
+	return false;
+}
+
 /**
  * setattr_prepare - check if attribute changes to a dentry are allowed
  * @dentry:	dentry to check
@@ -52,17 +78,11 @@ int setattr_prepare(struct dentry *dentry, struct iattr *attr)
 		goto kill_priv;
 
 	/* Make sure a caller can chown. */
-	if ((ia_valid & ATTR_UID) &&
-	    (!uid_eq(current_fsuid(), inode->i_uid) ||
-	     !uid_eq(attr->ia_uid, inode->i_uid)) &&
-	    !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+	if ((ia_valid & ATTR_UID) && !chown_ok(inode, attr->ia_uid))
 		return -EPERM;
 
 	/* Make sure caller can chgrp. */
-	if ((ia_valid & ATTR_GID) &&
-	    (!uid_eq(current_fsuid(), inode->i_uid) ||
-	    (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
-	    !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+	if ((ia_valid & ATTR_GID) && !chgrp_ok(inode, attr->ia_gid))
 		return -EPERM;
 
 	/* Make sure a caller can chmod. */
-- 
cgit v1.2.3-59-g8ed1b


From bc6155d1326092f4c29fe05a32b614249620d88e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 18 Sep 2017 17:58:08 -0500
Subject: fs: Allow superblock owner to access do_remount_sb()

Superblock level remounts are currently restricted to global
CAP_SYS_ADMIN, as is the path for changing the root mount to
read only on umount. Loosen both of these permission checks to
also allow CAP_SYS_ADMIN in any namespace which is privileged
towards the userns which originally mounted the filesystem.

Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Serge Hallyn <serge@hallyn.com>
Acked-by: Christian Brauner <christian@brauner.io>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/namespace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index 5f75969adff1..8ddd14806799 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1590,7 +1590,7 @@ static int do_umount(struct mount *mnt, int flags)
 		 * Special case for "unmounting" root ...
 		 * we just try to remount it readonly.
 		 */
-		if (!capable(CAP_SYS_ADMIN))
+		if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
 			return -EPERM;
 		down_write(&sb->s_umount);
 		if (!sb_rdonly(sb))
@@ -2333,7 +2333,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
 	down_write(&sb->s_umount);
 	if (ms_flags & MS_BIND)
 		err = change_mount_flags(path->mnt, ms_flags);
-	else if (!capable(CAP_SYS_ADMIN))
+	else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
 		err = -EPERM;
 	else
 		err = do_remount_sb(sb, sb_flags, data, 0);
-- 
cgit v1.2.3-59-g8ed1b


From f3f1a18330ac1b717cd7a32adff38d965f365aa2 Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Sun, 15 Feb 2015 14:35:35 -0600
Subject: fs: Allow CAP_SYS_ADMIN in s_user_ns to freeze and thaw filesystems

The user in control of a super block should be allowed to freeze
and thaw it. Relax the restrictions on the FIFREEZE and FITHAW
ioctls to require CAP_SYS_ADMIN in s_user_ns.

Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Acked-by: Christian Brauner <christian@brauner.io>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/ioctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 4823431d1c9d..b445b13fc59b 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -549,7 +549,7 @@ static int ioctl_fsfreeze(struct file *filp)
 {
 	struct super_block *sb = file_inode(filp)->i_sb;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	/* If filesystem doesn't support freeze feature, return. */
@@ -566,7 +566,7 @@ static int ioctl_fsthaw(struct file *filp)
 {
 	struct super_block *sb = file_inode(filp)->i_sb;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	/* Thaw */
-- 
cgit v1.2.3-59-g8ed1b