From 380cf5ba6b0a0b307f4afb62b186ca801defb203 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 23 Jun 2016 16:41:05 -0500 Subject: fs: Treat foreign mounts as nosuid If a process gets access to a mount from a different user namespace, that process should not be able to take advantage of setuid files or selinux entrypoints from that filesystem. Prevent this by treating mounts from other mount namespaces and those not owned by current_user_ns() or an ancestor as nosuid. This will make it safer to allow more complex filesystems to be mounted in non-root user namespaces. This does not remove the need for MNT_LOCK_NOSUID. The setuid, setgid, and file capability bits can no longer be abused if code in a user namespace were to clear nosuid on an untrusted filesystem, but this patch, by itself, is insufficient to protect the system from abuse of files that, when execed, would increase MAC privilege. As a more concrete explanation, any task that can manipulate a vfsmount associated with a given user namespace already has capabilities in that namespace and all of its descendents. If they can cause a malicious setuid, setgid, or file-caps executable to appear in that mount, then that executable will only allow them to elevate privileges in exactly the set of namespaces in which they are already privileges. On the other hand, if they can cause a malicious executable to appear with a dangerous MAC label, running it could change the caller's security context in a way that should not have been possible, even inside the namespace in which the task is confined. As a hardening measure, this would have made CVE-2014-5207 much more difficult to exploit. Signed-off-by: Andy Lutomirski Signed-off-by: Seth Forshee Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 887c1c955df8..ca239fc86d8d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1411,7 +1411,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm) bprm->cred->euid = current_euid(); bprm->cred->egid = current_egid(); - if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) + if (!mnt_may_suid(bprm->file->f_path.mnt)) return; if (task_no_new_privs(current)) -- cgit v1.3-8-gc7d7 From 7e7ec6a934349ef6983f06f7ac0da09cc8a42983 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sun, 24 Jul 2016 11:30:18 -0400 Subject: elf_fdpic_transfer_args_to_stack(): make it generic This copying of arguments and environment is common to both NOMMU binary formats we support. Let's make the elf_fdpic version available to the flat format as well. While at it, improve the code a bit not to copy below the actual data area. Signed-off-by: Nicolas Pitre Reviewed-by: Greg Ungerer Signed-off-by: Greg Ungerer --- fs/binfmt_elf_fdpic.c | 38 ++------------------------------------ fs/exec.c | 33 +++++++++++++++++++++++++++++++++ include/linux/binfmts.h | 2 ++ 3 files changed, 37 insertions(+), 36 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 203589311bf8..464a972e88c1 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -67,8 +67,6 @@ static int create_elf_fdpic_tables(struct linux_binprm *, struct mm_struct *, struct elf_fdpic_params *); #ifndef CONFIG_MMU -static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *, - unsigned long *); static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *, struct file *, struct mm_struct *); @@ -515,8 +513,9 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, sp = mm->start_stack; /* stack the program arguments and environment */ - if (elf_fdpic_transfer_args_to_stack(bprm, &sp) < 0) + if (transfer_args_to_stack(bprm, &sp) < 0) return -EFAULT; + sp &= ~15; #endif /* @@ -709,39 +708,6 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, return 0; } -/*****************************************************************************/ -/* - * transfer the program arguments and environment from the holding pages onto - * the stack - */ -#ifndef CONFIG_MMU -static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm, - unsigned long *_sp) -{ - unsigned long index, stop, sp; - char *src; - int ret = 0; - - stop = bprm->p >> PAGE_SHIFT; - sp = *_sp; - - for (index = MAX_ARG_PAGES - 1; index >= stop; index--) { - src = kmap(bprm->page[index]); - sp -= PAGE_SIZE; - if (copy_to_user((void *) sp, src, PAGE_SIZE) != 0) - ret = -EFAULT; - kunmap(bprm->page[index]); - if (ret < 0) - goto out; - } - - *_sp = (*_sp - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p)) & ~15; - -out: - return ret; -} -#endif - /*****************************************************************************/ /* * load the appropriate binary image (executable or interpreter) into memory diff --git a/fs/exec.c b/fs/exec.c index 887c1c955df8..ef0df2f09257 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -762,6 +762,39 @@ out_unlock: } EXPORT_SYMBOL(setup_arg_pages); +#else + +/* + * Transfer the program arguments and environment from the holding pages + * onto the stack. The provided stack pointer is adjusted accordingly. + */ +int transfer_args_to_stack(struct linux_binprm *bprm, + unsigned long *sp_location) +{ + unsigned long index, stop, sp; + int ret = 0; + + stop = bprm->p >> PAGE_SHIFT; + sp = *sp_location; + + for (index = MAX_ARG_PAGES - 1; index >= stop; index--) { + unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0; + char *src = kmap(bprm->page[index]) + offset; + sp -= PAGE_SIZE - offset; + if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0) + ret = -EFAULT; + kunmap(bprm->page[index]); + if (ret) + goto out; + } + + *sp_location = sp; + +out: + return ret; +} +EXPORT_SYMBOL(transfer_args_to_stack); + #endif /* CONFIG_MMU */ static struct file *do_open_execat(int fd, struct filename *name, int flags) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 314b3caa701c..1303b570b18c 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -113,6 +113,8 @@ extern int suid_dumpable; extern int setup_arg_pages(struct linux_binprm * bprm, unsigned long stack_top, int executable_stack); +extern int transfer_args_to_stack(struct linux_binprm *bprm, + unsigned long *sp_location); extern int bprm_change_interp(char *interp, struct linux_binprm *bprm); extern int copy_strings_kernel(int argc, const char *const *argv, struct linux_binprm *bprm); -- cgit v1.3-8-gc7d7 From a098ecd2fa7db8fa4fcc178a43627b29b226edb9 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 2 Aug 2016 14:04:28 -0700 Subject: firmware: support loading into a pre-allocated buffer Some systems are memory constrained but they need to load very large firmwares. The firmware subsystem allows drivers to request this firmware be loaded from the filesystem, but this requires that the entire firmware be loaded into kernel memory first before it's provided to the driver. This can lead to a situation where we map the firmware twice, once to load the firmware into kernel memory and once to copy the firmware into the final resting place. This creates needless memory pressure and delays loading because we have to copy from kernel memory to somewhere else. Let's add a request_firmware_into_buf() API that allows drivers to request firmware be loaded directly into a pre-allocated buffer. This skips the intermediate step of allocating a buffer in kernel memory to hold the firmware image while it's read from the filesystem. It also requires that drivers know how much memory they'll require before requesting the firmware and negates any benefits of firmware caching because the firmware layer doesn't manage the buffer lifetime. For a 16MB buffer, about half the time is spent performing a memcpy from the buffer to the final resting place. I see loading times go from 0.081171 seconds to 0.047696 seconds after applying this patch. Plus the vmalloc pressure is reduced. This is based on a patch from Vikram Mulukutla on codeaurora.org: https://www.codeaurora.org/cgit/quic/la/kernel/msm-3.18/commit/drivers/base/firmware_class.c?h=rel/msm-3.18&id=0a328c5f6cd999f5c591f172216835636f39bcb5 Link: http://lkml.kernel.org/r/20160607164741.31849-4-stephen.boyd@linaro.org Signed-off-by: Stephen Boyd Cc: Mimi Zohar Cc: Vikram Mulukutla Cc: Mark Brown Cc: Ming Lei Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/firmware_class.c | 125 +++++++++++++++++++++++++++++++++--------- fs/exec.c | 9 ++- include/linux/firmware.h | 8 +++ include/linux/fs.h | 1 + 4 files changed, 114 insertions(+), 29 deletions(-) (limited to 'fs/exec.c') diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 45ed20cefa10..22d1760a4278 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -46,7 +46,8 @@ MODULE_LICENSE("GPL"); extern struct builtin_fw __start_builtin_fw[]; extern struct builtin_fw __end_builtin_fw[]; -static bool fw_get_builtin_firmware(struct firmware *fw, const char *name) +static bool fw_get_builtin_firmware(struct firmware *fw, const char *name, + void *buf, size_t size) { struct builtin_fw *b_fw; @@ -54,6 +55,9 @@ static bool fw_get_builtin_firmware(struct firmware *fw, const char *name) if (strcmp(name, b_fw->name) == 0) { fw->size = b_fw->size; fw->data = b_fw->data; + + if (buf && fw->size <= size) + memcpy(buf, fw->data, fw->size); return true; } } @@ -74,7 +78,9 @@ static bool fw_is_builtin_firmware(const struct firmware *fw) #else /* Module case - no builtin firmware support */ -static inline bool fw_get_builtin_firmware(struct firmware *fw, const char *name) +static inline bool fw_get_builtin_firmware(struct firmware *fw, + const char *name, void *buf, + size_t size) { return false; } @@ -144,6 +150,7 @@ struct firmware_buf { unsigned long status; void *data; size_t size; + size_t allocated_size; #ifdef CONFIG_FW_LOADER_USER_HELPER bool is_paged_buf; bool need_uevent; @@ -179,7 +186,8 @@ static DEFINE_MUTEX(fw_lock); static struct firmware_cache fw_cache; static struct firmware_buf *__allocate_fw_buf(const char *fw_name, - struct firmware_cache *fwc) + struct firmware_cache *fwc, + void *dbuf, size_t size) { struct firmware_buf *buf; @@ -195,6 +203,8 @@ static struct firmware_buf *__allocate_fw_buf(const char *fw_name, kref_init(&buf->ref); buf->fwc = fwc; + buf->data = dbuf; + buf->allocated_size = size; init_completion(&buf->completion); #ifdef CONFIG_FW_LOADER_USER_HELPER INIT_LIST_HEAD(&buf->pending_list); @@ -218,7 +228,8 @@ static struct firmware_buf *__fw_lookup_buf(const char *fw_name) static int fw_lookup_and_allocate_buf(const char *fw_name, struct firmware_cache *fwc, - struct firmware_buf **buf) + struct firmware_buf **buf, void *dbuf, + size_t size) { struct firmware_buf *tmp; @@ -230,7 +241,7 @@ static int fw_lookup_and_allocate_buf(const char *fw_name, *buf = tmp; return 1; } - tmp = __allocate_fw_buf(fw_name, fwc); + tmp = __allocate_fw_buf(fw_name, fwc, dbuf, size); if (tmp) list_add(&tmp->list, &fwc->head); spin_unlock(&fwc->lock); @@ -262,6 +273,7 @@ static void __fw_free_buf(struct kref *ref) vfree(buf->pages); } else #endif + if (!buf->allocated_size) vfree(buf->data); kfree_const(buf->fw_id); kfree(buf); @@ -302,13 +314,21 @@ static void fw_finish_direct_load(struct device *device, mutex_unlock(&fw_lock); } -static int fw_get_filesystem_firmware(struct device *device, - struct firmware_buf *buf) +static int +fw_get_filesystem_firmware(struct device *device, struct firmware_buf *buf) { loff_t size; int i, len; int rc = -ENOENT; char *path; + enum kernel_read_file_id id = READING_FIRMWARE; + size_t msize = INT_MAX; + + /* Already populated data member means we're loading into a buffer */ + if (buf->data) { + id = READING_FIRMWARE_PREALLOC_BUFFER; + msize = buf->allocated_size; + } path = __getname(); if (!path) @@ -327,8 +347,8 @@ static int fw_get_filesystem_firmware(struct device *device, } buf->size = 0; - rc = kernel_read_file_from_path(path, &buf->data, &size, - INT_MAX, READING_FIRMWARE); + rc = kernel_read_file_from_path(path, &buf->data, &size, msize, + id); if (rc) { if (rc == -ENOENT) dev_dbg(device, "loading %s failed with error %d\n", @@ -692,6 +712,15 @@ out: static DEVICE_ATTR(loading, 0644, firmware_loading_show, firmware_loading_store); +static void firmware_rw_buf(struct firmware_buf *buf, char *buffer, + loff_t offset, size_t count, bool read) +{ + if (read) + memcpy(buffer, buf->data + offset, count); + else + memcpy(buf->data + offset, buffer, count); +} + static void firmware_rw(struct firmware_buf *buf, char *buffer, loff_t offset, size_t count, bool read) { @@ -739,7 +768,10 @@ static ssize_t firmware_data_read(struct file *filp, struct kobject *kobj, ret_count = count; - firmware_rw(buf, buffer, offset, count, true); + if (buf->data) + firmware_rw_buf(buf, buffer, offset, count, true); + else + firmware_rw(buf, buffer, offset, count, true); out: mutex_unlock(&fw_lock); @@ -815,12 +847,21 @@ static ssize_t firmware_data_write(struct file *filp, struct kobject *kobj, goto out; } - retval = fw_realloc_buffer(fw_priv, offset + count); - if (retval) - goto out; + if (buf->data) { + if (offset + count > buf->allocated_size) { + retval = -ENOMEM; + goto out; + } + firmware_rw_buf(buf, buffer, offset, count, false); + retval = count; + } else { + retval = fw_realloc_buffer(fw_priv, offset + count); + if (retval) + goto out; - retval = count; - firmware_rw(buf, buffer, offset, count, false); + retval = count; + firmware_rw(buf, buffer, offset, count, false); + } buf->size = max_t(size_t, offset + count, buf->size); out: @@ -890,7 +931,8 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, struct firmware_buf *buf = fw_priv->buf; /* fall back on userspace loading */ - buf->is_paged_buf = true; + if (!buf->data) + buf->is_paged_buf = true; dev_set_uevent_suppress(f_dev, true); @@ -925,7 +967,7 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, if (is_fw_load_aborted(buf)) retval = -EAGAIN; - else if (!buf->data) + else if (buf->is_paged_buf && !buf->data) retval = -ENOMEM; device_del(f_dev); @@ -1008,7 +1050,7 @@ static int sync_cached_firmware_buf(struct firmware_buf *buf) */ static int _request_firmware_prepare(struct firmware **firmware_p, const char *name, - struct device *device) + struct device *device, void *dbuf, size_t size) { struct firmware *firmware; struct firmware_buf *buf; @@ -1021,12 +1063,12 @@ _request_firmware_prepare(struct firmware **firmware_p, const char *name, return -ENOMEM; } - if (fw_get_builtin_firmware(firmware, name)) { + if (fw_get_builtin_firmware(firmware, name, dbuf, size)) { dev_dbg(device, "using built-in %s\n", name); return 0; /* assigned */ } - ret = fw_lookup_and_allocate_buf(name, &fw_cache, &buf); + ret = fw_lookup_and_allocate_buf(name, &fw_cache, &buf, dbuf, size); /* * bind with 'buf' now to avoid warning in failure path @@ -1089,7 +1131,8 @@ static int assign_firmware_buf(struct firmware *fw, struct device *device, /* called from request_firmware() and request_firmware_work_func() */ static int _request_firmware(const struct firmware **firmware_p, const char *name, - struct device *device, unsigned int opt_flags) + struct device *device, void *buf, size_t size, + unsigned int opt_flags) { struct firmware *fw = NULL; long timeout; @@ -1103,7 +1146,7 @@ _request_firmware(const struct firmware **firmware_p, const char *name, goto out; } - ret = _request_firmware_prepare(&fw, name, device); + ret = _request_firmware_prepare(&fw, name, device, buf, size); if (ret <= 0) /* error or already assigned */ goto out; @@ -1182,7 +1225,7 @@ request_firmware(const struct firmware **firmware_p, const char *name, /* Need to pin this module until return */ __module_get(THIS_MODULE); - ret = _request_firmware(firmware_p, name, device, + ret = _request_firmware(firmware_p, name, device, NULL, 0, FW_OPT_UEVENT | FW_OPT_FALLBACK); module_put(THIS_MODULE); return ret; @@ -1206,13 +1249,43 @@ int request_firmware_direct(const struct firmware **firmware_p, int ret; __module_get(THIS_MODULE); - ret = _request_firmware(firmware_p, name, device, + ret = _request_firmware(firmware_p, name, device, NULL, 0, FW_OPT_UEVENT | FW_OPT_NO_WARN); module_put(THIS_MODULE); return ret; } EXPORT_SYMBOL_GPL(request_firmware_direct); +/** + * request_firmware_into_buf - load firmware into a previously allocated buffer + * @firmware_p: pointer to firmware image + * @name: name of firmware file + * @device: device for which firmware is being loaded and DMA region allocated + * @buf: address of buffer to load firmware into + * @size: size of buffer + * + * This function works pretty much like request_firmware(), but it doesn't + * allocate a buffer to hold the firmware data. Instead, the firmware + * is loaded directly into the buffer pointed to by @buf and the @firmware_p + * data member is pointed at @buf. + * + * This function doesn't cache firmware either. + */ +int +request_firmware_into_buf(const struct firmware **firmware_p, const char *name, + struct device *device, void *buf, size_t size) +{ + int ret; + + __module_get(THIS_MODULE); + ret = _request_firmware(firmware_p, name, device, buf, size, + FW_OPT_UEVENT | FW_OPT_FALLBACK | + FW_OPT_NOCACHE); + module_put(THIS_MODULE); + return ret; +} +EXPORT_SYMBOL(request_firmware_into_buf); + /** * release_firmware: - release the resource associated with a firmware image * @fw: firmware resource to release @@ -1245,7 +1318,7 @@ static void request_firmware_work_func(struct work_struct *work) fw_work = container_of(work, struct firmware_work, work); - _request_firmware(&fw, fw_work->name, fw_work->device, + _request_firmware(&fw, fw_work->name, fw_work->device, NULL, 0, fw_work->opt_flags); fw_work->cont(fw, fw_work->context); put_device(fw_work->device); /* taken in request_firmware_nowait() */ @@ -1378,7 +1451,7 @@ static int uncache_firmware(const char *fw_name) pr_debug("%s: %s\n", __func__, fw_name); - if (fw_get_builtin_firmware(&fw, fw_name)) + if (fw_get_builtin_firmware(&fw, fw_name, NULL, 0)) return 0; buf = fw_lookup_buf(fw_name); diff --git a/fs/exec.c b/fs/exec.c index ca239fc86d8d..a1789cd684bf 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -866,7 +866,8 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size, goto out; } - *buf = vmalloc(i_size); + if (id != READING_FIRMWARE_PREALLOC_BUFFER) + *buf = vmalloc(i_size); if (!*buf) { ret = -ENOMEM; goto out; @@ -897,8 +898,10 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size, out_free: if (ret < 0) { - vfree(*buf); - *buf = NULL; + if (id != READING_FIRMWARE_PREALLOC_BUFFER) { + vfree(*buf); + *buf = NULL; + } } out: diff --git a/include/linux/firmware.h b/include/linux/firmware.h index 5c41c5e75b5c..b1f9f0ccb8ac 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -47,6 +47,8 @@ int request_firmware_nowait( void (*cont)(const struct firmware *fw, void *context)); int request_firmware_direct(const struct firmware **fw, const char *name, struct device *device); +int request_firmware_into_buf(const struct firmware **firmware_p, + const char *name, struct device *device, void *buf, size_t size); void release_firmware(const struct firmware *fw); #else @@ -75,5 +77,11 @@ static inline int request_firmware_direct(const struct firmware **fw, return -EINVAL; } +static inline int request_firmware_into_buf(const struct firmware **firmware_p, + const char *name, struct device *device, void *buf, size_t size) +{ + return -EINVAL; +} + #endif #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 577365a77b47..f3f0b4c8e8ac 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2652,6 +2652,7 @@ extern int do_pipe_flags(int *, int); #define __kernel_read_file_id(id) \ id(UNKNOWN, unknown) \ id(FIRMWARE, firmware) \ + id(FIRMWARE_PREALLOC_BUFFER, firmware) \ id(MODULE, kernel-module) \ id(KEXEC_IMAGE, kexec-image) \ id(KEXEC_INITRAMFS, kexec-initramfs) \ -- cgit v1.3-8-gc7d7