From 593d1ce854dff93b3c9066e897192eb676b09c46 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 14 Sep 2017 12:07:32 -0500 Subject: vfs: Don't allow changing the link count of an inode with an invalid uid or gid Changing the link count of an inode via unlink or link will cause a write back of that inode. If the uids or gids are invalid (aka not known to the kernel) writing the inode back may change the uid or gid in the filesystem. To prevent possible filesystem and to avoid the need for filesystem maintainers to worry about it don't allow operations on inodes with an invalid uid or gid. Acked-by: Seth Forshee Signed-off-by: "Eric W. Biederman" --- fs/namei.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 186bd2464fd5..942c1f096f6b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -984,13 +984,15 @@ static bool safe_hardlink_source(struct inode *inode) */ static int may_linkat(struct path *link) { - struct inode *inode; + struct inode *inode = link->dentry->d_inode; + + /* Inode writeback is not safe when the uid or gid are invalid. */ + if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid)) + return -EOVERFLOW; if (!sysctl_protected_hardlinks) return 0; - inode = link->dentry->d_inode; - /* Source inode owner (or CAP_FOWNER) can hardlink all they like, * otherwise, it must be a safe source. */ @@ -2749,6 +2751,11 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) BUG_ON(!inode); BUG_ON(victim->d_parent->d_inode != dir); + + /* Inode writeback is not safe when the uid or gid are invalid. */ + if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid)) + return -EOVERFLOW; + audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); error = inode_permission(dir, MAY_WRITE | MAY_EXEC); -- cgit v1.2.3-59-g8ed1b From 55956b59df336f6738da916dbb520b6e37df9fbd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 May 2018 15:24:18 -0500 Subject: vfs: Allow userns root to call mknod on owned filesystems. These filesystems already always set SB_I_NODEV so mknod will not be useful for gaining control of any devices no matter their permissions. This will allow overlayfs and applications like to fakeroot to use device nodes to represent things on disk. Acked-by: Seth Forshee Signed-off-by: "Eric W. Biederman" --- fs/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 942c1f096f6b..20335896dcce 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3679,7 +3679,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) if (error) return error; - if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) + if ((S_ISCHR(mode) || S_ISBLK(mode)) && + !ns_capable(dentry->d_sb->s_user_ns, CAP_MKNOD)) return -EPERM; if (!dir->i_op->mknod) -- cgit v1.2.3-59-g8ed1b From 0031181c49ca94b14b11f08e447f40c6ebc842a4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 15 Oct 2016 18:36:59 -0500 Subject: fs: Allow superblock owner to replace invalid owners of inodes Allow users with CAP_SYS_CHOWN over the superblock of a filesystem to chown files when inode owner is invalid. Ordinarily the capable_wrt_inode_uidgid check is sufficient to allow access to files but when the underlying filesystem has uids or gids that don't map to the current user namespace it is not enough, so the chown permission checks need to be extended to allow this case. Calling chown on filesystem nodes whose uid or gid don't map is necessary if those nodes are going to be modified as writing back inodes which contain uids or gids that don't map is likely to cause filesystem corruption of the uid or gid fields. Once chown has been called the existing capable_wrt_inode_uidgid checks are sufficient to allow the owner of a superblock to do anything the global root user can do with an appropriate set of capabilities. An ordinary filesystem mountable by a userns root will limit all uids and gids in s_user_ns or the INVALID_UID and INVALID_GID to flag all others. So having this added permission limited to just INVALID_UID and INVALID_GID is sufficient to handle every case on an ordinary filesystem. Of the virtual filesystems at least proc is known to set s_user_ns to something other than &init_user_ns, while at the same time presenting some files owned by GLOBAL_ROOT_UID. Those files the mounter of proc in a user namespace should not be able to chown to get access to. Limiting the relaxation in permission to just the minimum of allowing changing INVALID_UID and INVALID_GID prevents problems with cases like that. The original version of this patch was written by: Seth Forshee. I have rewritten and rethought this patch enough so it's really not the same thing (certainly it needs a different description), but he deserves credit for getting out there and getting the conversation started, and finding the potential gotcha's and putting up with my semi-paranoid feedback. Inspired-by: Seth Forshee Acked-by: Seth Forshee Signed-off-by: Eric W. Biederman --- fs/attr.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/attr.c b/fs/attr.c index 12ffdb6fb63c..d0b4d34878fb 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -18,6 +18,32 @@ #include #include +static bool chown_ok(const struct inode *inode, kuid_t uid) +{ + if (uid_eq(current_fsuid(), inode->i_uid) && + uid_eq(uid, inode->i_uid)) + return true; + if (capable_wrt_inode_uidgid(inode, CAP_CHOWN)) + return true; + if (uid_eq(inode->i_uid, INVALID_UID) && + ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN)) + return true; + return false; +} + +static bool chgrp_ok(const struct inode *inode, kgid_t gid) +{ + if (uid_eq(current_fsuid(), inode->i_uid) && + (in_group_p(gid) || gid_eq(gid, inode->i_gid))) + return true; + if (capable_wrt_inode_uidgid(inode, CAP_CHOWN)) + return true; + if (gid_eq(inode->i_gid, INVALID_GID) && + ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN)) + return true; + return false; +} + /** * setattr_prepare - check if attribute changes to a dentry are allowed * @dentry: dentry to check @@ -52,17 +78,11 @@ int setattr_prepare(struct dentry *dentry, struct iattr *attr) goto kill_priv; /* Make sure a caller can chown. */ - if ((ia_valid & ATTR_UID) && - (!uid_eq(current_fsuid(), inode->i_uid) || - !uid_eq(attr->ia_uid, inode->i_uid)) && - !capable_wrt_inode_uidgid(inode, CAP_CHOWN)) + if ((ia_valid & ATTR_UID) && !chown_ok(inode, attr->ia_uid)) return -EPERM; /* Make sure caller can chgrp. */ - if ((ia_valid & ATTR_GID) && - (!uid_eq(current_fsuid(), inode->i_uid) || - (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) && - !capable_wrt_inode_uidgid(inode, CAP_CHOWN)) + if ((ia_valid & ATTR_GID) && !chgrp_ok(inode, attr->ia_gid)) return -EPERM; /* Make sure a caller can chmod. */ -- cgit v1.2.3-59-g8ed1b From bc6155d1326092f4c29fe05a32b614249620d88e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 18 Sep 2017 17:58:08 -0500 Subject: fs: Allow superblock owner to access do_remount_sb() Superblock level remounts are currently restricted to global CAP_SYS_ADMIN, as is the path for changing the root mount to read only on umount. Loosen both of these permission checks to also allow CAP_SYS_ADMIN in any namespace which is privileged towards the userns which originally mounted the filesystem. Signed-off-by: Seth Forshee Acked-by: "Eric W. Biederman" Acked-by: Serge Hallyn Acked-by: Christian Brauner Signed-off-by: Eric W. Biederman --- fs/namespace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 5f75969adff1..8ddd14806799 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1590,7 +1590,7 @@ static int do_umount(struct mount *mnt, int flags) * Special case for "unmounting" root ... * we just try to remount it readonly. */ - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) return -EPERM; down_write(&sb->s_umount); if (!sb_rdonly(sb)) @@ -2333,7 +2333,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags, down_write(&sb->s_umount); if (ms_flags & MS_BIND) err = change_mount_flags(path->mnt, ms_flags); - else if (!capable(CAP_SYS_ADMIN)) + else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) err = -EPERM; else err = do_remount_sb(sb, sb_flags, data, 0); -- cgit v1.2.3-59-g8ed1b From f3f1a18330ac1b717cd7a32adff38d965f365aa2 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Sun, 15 Feb 2015 14:35:35 -0600 Subject: fs: Allow CAP_SYS_ADMIN in s_user_ns to freeze and thaw filesystems The user in control of a super block should be allowed to freeze and thaw it. Relax the restrictions on the FIFREEZE and FITHAW ioctls to require CAP_SYS_ADMIN in s_user_ns. Signed-off-by: Seth Forshee Acked-by: Christian Brauner Signed-off-by: Eric W. Biederman --- fs/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ioctl.c b/fs/ioctl.c index 4823431d1c9d..b445b13fc59b 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -549,7 +549,7 @@ static int ioctl_fsfreeze(struct file *filp) { struct super_block *sb = file_inode(filp)->i_sb; - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) return -EPERM; /* If filesystem doesn't support freeze feature, return. */ @@ -566,7 +566,7 @@ static int ioctl_fsthaw(struct file *filp) { struct super_block *sb = file_inode(filp)->i_sb; - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) return -EPERM; /* Thaw */ -- cgit v1.2.3-59-g8ed1b