diff options
Diffstat (limited to 'security/commoncap.c')
-rw-r--r-- | security/commoncap.c | 255 |
1 files changed, 182 insertions, 73 deletions
diff --git a/security/commoncap.c b/security/commoncap.c index f4ee0ae106b2..bc751fa5adad 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -24,6 +24,7 @@ #include <linux/user_namespace.h> #include <linux/binfmts.h> #include <linux/personality.h> +#include <linux/mnt_idmapping.h> /* * If a non-root user executes a setuid-root binary in @@ -50,7 +51,7 @@ static void warn_setuid_and_fcaps_mixed(const char *fname) /** * cap_capable - Determine whether a task has a particular effective capability * @cred: The credentials to use - * @ns: The user namespace in which we need the capability + * @targ_ns: The user namespace in which we need the capability * @cap: The capability to check for * @opts: Bitmask of options defined in include/linux/security.h * @@ -289,7 +290,7 @@ int cap_capset(struct cred *new, * affects the security markings on that inode, and if it is, should * inode_killpriv() be invoked or the change rejected. * - * Returns 1 if security.capability has a value, meaning inode_killpriv() + * Return: 1 if security.capability has a value, meaning inode_killpriv() * is required, 0 otherwise, meaning inode_killpriv() is not required. */ int cap_inode_need_killpriv(struct dentry *dentry) @@ -303,17 +304,25 @@ int cap_inode_need_killpriv(struct dentry *dentry) /** * cap_inode_killpriv - Erase the security markings on an inode - * @dentry: The inode/dentry to alter + * + * @mnt_userns: user namespace of the mount the inode was found from + * @dentry: The inode/dentry to alter * * Erase the privilege-enhancing security markings on an inode. * - * Returns 0 if successful, -ve on error. + * If the inode has been found through an idmapped mount the user namespace of + * the vfsmount must be passed through @mnt_userns. This function will then + * take care to map the inode according to @mnt_userns before checking + * permissions. On non-idmapped mounts or if permission checking is to be + * performed on the raw inode simply passs init_user_ns. + * + * Return: 0 if successful, -ve on error. */ -int cap_inode_killpriv(struct dentry *dentry) +int cap_inode_killpriv(struct user_namespace *mnt_userns, struct dentry *dentry) { int error; - error = __vfs_removexattr(dentry, XATTR_NAME_CAPS); + error = __vfs_removexattr(mnt_userns, dentry, XATTR_NAME_CAPS); if (error == -EOPNOTSUPP) error = 0; return error; @@ -366,15 +375,17 @@ static bool is_v3header(size_t size, const struct vfs_cap_data *cap) * by the integrity subsystem, which really wants the unconverted values - * so that's good. */ -int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, +int cap_inode_getsecurity(struct user_namespace *mnt_userns, + struct inode *inode, const char *name, void **buffer, bool alloc) { int size, ret; kuid_t kroot; + u32 nsmagic, magic; uid_t root, mappedroot; char *tmpbuf = NULL; struct vfs_cap_data *cap; - struct vfs_ns_cap_data *nscap; + struct vfs_ns_cap_data *nscap = NULL; struct dentry *dentry; struct user_namespace *fs_ns; @@ -386,56 +397,76 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, return -EINVAL; size = sizeof(struct vfs_ns_cap_data); - ret = (int) vfs_getxattr_alloc(dentry, XATTR_NAME_CAPS, - &tmpbuf, size, GFP_NOFS); + ret = (int)vfs_getxattr_alloc(mnt_userns, dentry, XATTR_NAME_CAPS, + &tmpbuf, size, GFP_NOFS); dput(dentry); - if (ret < 0) - return ret; + if (ret < 0 || !tmpbuf) { + size = ret; + goto out_free; + } fs_ns = inode->i_sb->s_user_ns; cap = (struct vfs_cap_data *) tmpbuf; if (is_v2header((size_t) ret, cap)) { - /* If this is sizeof(vfs_cap_data) then we're ok with the - * on-disk value, so return that. */ - if (alloc) - *buffer = tmpbuf; - else - kfree(tmpbuf); - return ret; - } else if (!is_v3header((size_t) ret, cap)) { - kfree(tmpbuf); - return -EINVAL; + root = 0; + } else if (is_v3header((size_t) ret, cap)) { + nscap = (struct vfs_ns_cap_data *) tmpbuf; + root = le32_to_cpu(nscap->rootid); + } else { + size = -EINVAL; + goto out_free; } - nscap = (struct vfs_ns_cap_data *) tmpbuf; - root = le32_to_cpu(nscap->rootid); kroot = make_kuid(fs_ns, root); + /* If this is an idmapped mount shift the kuid. */ + kroot = mapped_kuid_fs(mnt_userns, fs_ns, kroot); + /* If the root kuid maps to a valid uid in current ns, then return * this as a nscap. */ mappedroot = from_kuid(current_user_ns(), kroot); if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) { + size = sizeof(struct vfs_ns_cap_data); if (alloc) { - *buffer = tmpbuf; + if (!nscap) { + /* v2 -> v3 conversion */ + nscap = kzalloc(size, GFP_ATOMIC); + if (!nscap) { + size = -ENOMEM; + goto out_free; + } + nsmagic = VFS_CAP_REVISION_3; + magic = le32_to_cpu(cap->magic_etc); + if (magic & VFS_CAP_FLAGS_EFFECTIVE) + nsmagic |= VFS_CAP_FLAGS_EFFECTIVE; + memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32); + nscap->magic_etc = cpu_to_le32(nsmagic); + } else { + /* use allocated v3 buffer */ + tmpbuf = NULL; + } nscap->rootid = cpu_to_le32(mappedroot); - } else - kfree(tmpbuf); - return size; + *buffer = nscap; + } + goto out_free; } if (!rootid_owns_currentns(kroot)) { - kfree(tmpbuf); - return -EOPNOTSUPP; + size = -EOVERFLOW; + goto out_free; } /* This comes from a parent namespace. Return as a v2 capability */ size = sizeof(struct vfs_cap_data); if (alloc) { - *buffer = kmalloc(size, GFP_ATOMIC); - if (*buffer) { - struct vfs_cap_data *cap = *buffer; - __le32 nsmagic, magic; + if (nscap) { + /* v3 -> v2 conversion */ + cap = kzalloc(size, GFP_ATOMIC); + if (!cap) { + size = -ENOMEM; + goto out_free; + } magic = VFS_CAP_REVISION_2; nsmagic = le32_to_cpu(nscap->magic_etc); if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE) @@ -443,23 +474,45 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32); cap->magic_etc = cpu_to_le32(magic); } else { - size = -ENOMEM; + /* use unconverted v2 */ + tmpbuf = NULL; } + *buffer = cap; } +out_free: kfree(tmpbuf); return size; } +/** + * rootid_from_xattr - translate root uid of vfs caps + * + * @value: vfs caps value which may be modified by this function + * @size: size of @ivalue + * @task_ns: user namespace of the caller + * @mnt_userns: user namespace of the mount the inode was found from + * @fs_userns: user namespace of the filesystem + * + * If the inode has been found through an idmapped mount the user namespace of + * the vfsmount must be passed through @mnt_userns. This function will then + * take care to map the inode according to @mnt_userns before checking + * permissions. On non-idmapped mounts or if permission checking is to be + * performed on the raw inode simply passs init_user_ns. + */ static kuid_t rootid_from_xattr(const void *value, size_t size, - struct user_namespace *task_ns) + struct user_namespace *task_ns, + struct user_namespace *mnt_userns, + struct user_namespace *fs_userns) { const struct vfs_ns_cap_data *nscap = value; + kuid_t rootkid; uid_t rootid = 0; if (size == XATTR_CAPS_SZ_3) rootid = le32_to_cpu(nscap->rootid); - return make_kuid(task_ns, rootid); + rootkid = make_kuid(task_ns, rootid); + return mapped_kuid_user(mnt_userns, fs_userns, rootkid); } static bool validheader(size_t size, const struct vfs_cap_data *cap) @@ -467,13 +520,27 @@ static bool validheader(size_t size, const struct vfs_cap_data *cap) return is_v2header(size, cap) || is_v3header(size, cap); } -/* +/** + * cap_convert_nscap - check vfs caps + * + * @mnt_userns: user namespace of the mount the inode was found from + * @dentry: used to retrieve inode to check permissions on + * @ivalue: vfs caps value which may be modified by this function + * @size: size of @ivalue + * * User requested a write of security.capability. If needed, update the * xattr to change from v2 to v3, or to fixup the v3 rootid. * - * If all is ok, we return the new size, on error return < 0. + * If the inode has been found through an idmapped mount the user namespace of + * the vfsmount must be passed through @mnt_userns. This function will then + * take care to map the inode according to @mnt_userns before checking + * permissions. On non-idmapped mounts or if permission checking is to be + * performed on the raw inode simply passs init_user_ns. + * + * Return: On success, return the new size; on error, return < 0. */ -int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size) +int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry, + const void **ivalue, size_t size) { struct vfs_ns_cap_data *nscap; uid_t nsrootid; @@ -489,14 +556,14 @@ int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size) return -EINVAL; if (!validheader(size, cap)) return -EINVAL; - if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP)) + if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP)) return -EPERM; - if (size == XATTR_CAPS_SZ_2) + if (size == XATTR_CAPS_SZ_2 && (mnt_userns == fs_ns)) if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP)) /* user is privileged, just write the v2 */ return size; - rootid = rootid_from_xattr(*ivalue, size, task_ns); + rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns, fs_ns); if (!uid_valid(rootid)) return -EINVAL; @@ -516,7 +583,6 @@ int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size) nscap->magic_etc = cpu_to_le32(nsmagic); memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32); - kvfree(*ivalue); *ivalue = nscap; return newsize; } @@ -565,10 +631,24 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, return *effective ? ret : 0; } -/* +/** + * get_vfs_caps_from_disk - retrieve vfs caps from disk + * + * @mnt_userns: user namespace of the mount the inode was found from + * @dentry: dentry from which @inode is retrieved + * @cpu_caps: vfs capabilities + * * Extract the on-exec-apply capability sets for an executable file. + * + * If the inode has been found through an idmapped mount the user namespace of + * the vfsmount must be passed through @mnt_userns. This function will then + * take care to map the inode according to @mnt_userns before checking + * permissions. On non-idmapped mounts or if permission checking is to be + * performed on the raw inode simply passs init_user_ns. */ -int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps) +int get_vfs_caps_from_disk(struct user_namespace *mnt_userns, + const struct dentry *dentry, + struct cpu_vfs_cap_data *cpu_caps) { struct inode *inode = d_backing_inode(dentry); __u32 magic_etc; @@ -624,6 +704,7 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data /* Limit the caps to the mounter of the filesystem * or the more limited uid specified in the xattr. */ + rootkuid = mapped_kuid_fs(mnt_userns, fs_ns, rootkuid); if (!rootid_owns_currentns(rootkuid)) return -ENODATA; @@ -647,7 +728,8 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data * its xattrs and, if present, apply them to the proposed credentials being * constructed by execve(). */ -static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_fcap) +static int get_file_caps(struct linux_binprm *bprm, struct file *file, + bool *effective, bool *has_fcap) { int rc = 0; struct cpu_vfs_cap_data vcaps; @@ -657,7 +739,7 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_f if (!file_caps_enabled) return 0; - if (!mnt_may_suid(bprm->file->f_path.mnt)) + if (!mnt_may_suid(file->f_path.mnt)) return 0; /* @@ -665,10 +747,11 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_f * explicit that capability bits are limited to s_user_ns and its * descendants. */ - if (!current_in_userns(bprm->file->f_path.mnt->mnt_sb->s_user_ns)) + if (!current_in_userns(file->f_path.mnt->mnt_sb->s_user_ns)) return 0; - rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps); + rc = get_vfs_caps_from_disk(file_mnt_user_ns(file), + file->f_path.dentry, &vcaps); if (rc < 0) { if (rc == -EINVAL) printk(KERN_NOTICE "Invalid argument reading file caps for %s\n", @@ -797,15 +880,19 @@ static inline bool nonroot_raised_pE(struct cred *new, const struct cred *old, } /** - * cap_bprm_set_creds - Set up the proposed credentials for execve(). + * cap_bprm_creds_from_file - Set up the proposed credentials for execve(). * @bprm: The execution parameters, including the proposed creds + * @file: The file to pull the credentials from * * Set up the proposed credentials for a new execution context being * constructed by execve(). The proposed creds in @bprm->cred is altered, - * which won't take effect immediately. Returns 0 if successful, -ve on error. + * which won't take effect immediately. + * + * Return: 0 if successful, -ve on error. */ -int cap_bprm_set_creds(struct linux_binprm *bprm) +int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file) { + /* Process setpcap binaries and capabilities for uid 0 */ const struct cred *old = current_cred(); struct cred *new = bprm->cred; bool effective = false, has_fcap = false, is_setid; @@ -815,7 +902,7 @@ int cap_bprm_set_creds(struct linux_binprm *bprm) if (WARN_ON(!cap_ambient_invariant_ok(old))) return -EPERM; - ret = get_file_caps(bprm, &effective, &has_fcap); + ret = get_file_caps(bprm, file, &effective, &has_fcap); if (ret < 0) return ret; @@ -884,12 +971,11 @@ int cap_bprm_set_creds(struct linux_binprm *bprm) return -EPERM; /* Check for privilege-elevated exec. */ - bprm->cap_elevated = 0; if (is_setid || (!__is_real(root_uid, new) && (effective || __cap_grew(permitted, ambient, new)))) - bprm->cap_elevated = 1; + bprm->secureexec = 1; return 0; } @@ -932,16 +1018,25 @@ int cap_inode_setxattr(struct dentry *dentry, const char *name, /** * cap_inode_removexattr - Determine whether an xattr may be removed - * @dentry: The inode/dentry being altered - * @name: The name of the xattr to be changed + * + * @mnt_userns: User namespace of the mount the inode was found from + * @dentry: The inode/dentry being altered + * @name: The name of the xattr to be changed * * Determine whether an xattr may be removed from an inode, returning 0 if * permission is granted, -ve if denied. * + * If the inode has been found through an idmapped mount the user namespace of + * the vfsmount must be passed through @mnt_userns. This function will then + * take care to map the inode according to @mnt_userns before checking + * permissions. On non-idmapped mounts or if permission checking is to be + * performed on the raw inode simply passs init_user_ns. + * * This is used to make sure security xattrs don't get removed by those who * aren't privileged to remove them. */ -int cap_inode_removexattr(struct dentry *dentry, const char *name) +int cap_inode_removexattr(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *name) { struct user_namespace *user_ns = dentry->d_sb->s_user_ns; @@ -955,7 +1050,7 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name) struct inode *inode = d_backing_inode(dentry); if (!inode) return -EINVAL; - if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP)) + if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP)) return -EPERM; return 0; } @@ -1029,7 +1124,9 @@ static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old) * @flags: Indications of what has changed * * Fix up the results of setuid() call before the credential changes are - * actually applied, returning 0 to grant the changes, -ve to deny them. + * actually applied. + * + * Return: 0 to grant the changes, -ve to deny them. */ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags) { @@ -1099,7 +1196,9 @@ static int cap_safe_nice(struct task_struct *p) * @p: The task to affect * * Detemine if the requested scheduler policy change is permitted for the - * specified task, returning 0 if permission is granted, -ve if denied. + * specified task. + * + * Return: 0 if permission is granted, -ve if denied. */ int cap_task_setscheduler(struct task_struct *p) { @@ -1107,12 +1206,14 @@ int cap_task_setscheduler(struct task_struct *p) } /** - * cap_task_ioprio - Detemine if I/O priority change is permitted + * cap_task_setioprio - Detemine if I/O priority change is permitted * @p: The task to affect * @ioprio: The I/O priority to set * * Detemine if the requested I/O priority change is permitted for the specified - * task, returning 0 if permission is granted, -ve if denied. + * task. + * + * Return: 0 if permission is granted, -ve if denied. */ int cap_task_setioprio(struct task_struct *p, int ioprio) { @@ -1120,12 +1221,14 @@ int cap_task_setioprio(struct task_struct *p, int ioprio) } /** - * cap_task_ioprio - Detemine if task priority change is permitted + * cap_task_setnice - Detemine if task priority change is permitted * @p: The task to affect * @nice: The nice value to set * * Detemine if the requested task priority change is permitted for the - * specified task, returning 0 if permission is granted, -ve if denied. + * specified task. + * + * Return: 0 if permission is granted, -ve if denied. */ int cap_task_setnice(struct task_struct *p, int nice) { @@ -1155,12 +1258,15 @@ static int cap_prctl_drop(unsigned long cap) /** * cap_task_prctl - Implement process control functions for this security module * @option: The process control function requested - * @arg2, @arg3, @arg4, @arg5: The argument data for this function + * @arg2: The argument data for this function + * @arg3: The argument data for this function + * @arg4: The argument data for this function + * @arg5: The argument data for this function * * Allow process control functions (sys_prctl()) to alter capabilities; may * also deny access to other functions not otherwise implemented here. * - * Returns 0 or +ve on success, -ENOSYS if this function is not implemented + * Return: 0 or +ve on success, -ENOSYS if this function is not implemented * here, other -ve on error. If -ENOSYS is returned, sys_prctl() and other LSM * modules will consider performing the function. */ @@ -1295,7 +1401,9 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, * @pages: The size of the mapping * * Determine whether the allocation of a new virtual mapping by the current - * task is permitted, returning 1 if permission is granted, 0 if not. + * task is permitted. + * + * Return: 1 if permission is granted, 0 if not. */ int cap_vm_enough_memory(struct mm_struct *mm, long pages) { @@ -1308,14 +1416,15 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages) return cap_sys_admin; } -/* +/** * cap_mmap_addr - check if able to map given addr * @addr: address attempting to be mapped * * If the process is attempting to map memory below dac_mmap_min_addr they need * CAP_SYS_RAWIO. The other parameters to this function are unused by the - * capability security module. Returns 0 if this mapping should be allowed - * -EPERM if not. + * capability security module. + * + * Return: 0 if this mapping should be allowed or -EPERM if not. */ int cap_mmap_addr(unsigned long addr) { @@ -1346,7 +1455,7 @@ static struct security_hook_list capability_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ptrace_traceme, cap_ptrace_traceme), LSM_HOOK_INIT(capget, cap_capget), LSM_HOOK_INIT(capset, cap_capset), - LSM_HOOK_INIT(bprm_set_creds, cap_bprm_set_creds), + LSM_HOOK_INIT(bprm_creds_from_file, cap_bprm_creds_from_file), LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv), LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv), LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity), |