From 59ef28b1f14899b10d6b2682c7057ca00a9a3f47 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 25 Oct 2012 10:49:25 +1030 Subject: module: fix out-by-one error in kallsyms Masaki found and patched a kallsyms issue: the last symbol in a module's symtab wasn't transferred. This is because we manually copy the zero'th entry (which is always empty) then copy the rest in a loop starting at 1, though from src[0]. His fix was minimal, I prefer to rewrite the loops in more standard form. There are two loops: one to get the size, and one to copy. Make these identical: always count entry 0 and any defined symbol in an allocated non-init section. This bug exists since the following commit was introduced. module: reduce symbol table for loaded modules (v2) commit: 4a4962263f07d14660849ec134ee42b63e95ea9a LKML: http://lkml.org/lkml/2012/10/24/27 Reported-by: Masaki Kimura Cc: stable@kernel.org --- kernel/module.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 6085f5ef88ea..6e48c3a43599 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2293,12 +2293,17 @@ static void layout_symtab(struct module *mod, struct load_info *info) src = (void *)info->hdr + symsect->sh_offset; nsrc = symsect->sh_size / sizeof(*src); + /* strtab always starts with a nul, so offset 0 is the empty string. */ + strtab_size = 1; + /* Compute total space required for the core symbols' strtab. */ - for (ndst = i = strtab_size = 1; i < nsrc; ++i, ++src) - if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) { - strtab_size += strlen(&info->strtab[src->st_name]) + 1; + for (ndst = i = 0; i < nsrc; i++) { + if (i == 0 || + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { + strtab_size += strlen(&info->strtab[src[i].st_name])+1; ndst++; } + } /* Append room for core symbols at end of core part. */ info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); @@ -2332,15 +2337,15 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) mod->core_symtab = dst = mod->module_core + info->symoffs; mod->core_strtab = s = mod->module_core + info->stroffs; src = mod->symtab; - *dst = *src; *s++ = 0; - for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { - if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) - continue; - - dst[ndst] = *src; - dst[ndst++].st_name = s - mod->core_strtab; - s += strlcpy(s, &mod->strtab[src->st_name], KSYM_NAME_LEN) + 1; + for (ndst = i = 0; i < mod->num_symtab; i++) { + if (i == 0 || + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { + dst[ndst] = src[i]; + dst[ndst++].st_name = s - mod->core_strtab; + s += strlcpy(s, &mod->strtab[src[i].st_name], + KSYM_NAME_LEN) + 1; + } } mod->core_num_syms = ndst; } -- cgit v1.3-8-gc7d7 From 34e1169d996ab148490c01b65b4ee371cf8ffba2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 16 Oct 2012 07:31:07 +1030 Subject: module: add syscall to load module from fd As part of the effort to create a stronger boundary between root and kernel, Chrome OS wants to be able to enforce that kernel modules are being loaded only from our read-only crypto-hash verified (dm_verity) root filesystem. Since the init_module syscall hands the kernel a module as a memory blob, no reasoning about the origin of the blob can be made. Earlier proposals for appending signatures to kernel modules would not be useful in Chrome OS, since it would involve adding an additional set of keys to our kernel and builds for no good reason: we already trust the contents of our root filesystem. We don't need to verify those kernel modules a second time. Having to do signature checking on module loading would slow us down and be redundant. All we need to know is where a module is coming from so we can say yes/no to loading it. If a file descriptor is used as the source of a kernel module, many more things can be reasoned about. In Chrome OS's case, we could enforce that the module lives on the filesystem we expect it to live on. In the case of IMA (or other LSMs), it would be possible, for example, to examine extended attributes that may contain signatures over the contents of the module. This introduces a new syscall (on x86), similar to init_module, that has only two arguments. The first argument is used as a file descriptor to the module and the second argument is a pointer to the NULL terminated string of module arguments. Signed-off-by: Kees Cook Cc: Andrew Morton Signed-off-by: Rusty Russell (merge fixes) --- arch/x86/syscalls/syscall_32.tbl | 1 + arch/x86/syscalls/syscall_64.tbl | 1 + include/linux/syscalls.h | 1 + kernel/module.c | 367 +++++++++++++++++++++++---------------- kernel/sys_ni.c | 1 + 5 files changed, 223 insertions(+), 148 deletions(-) (limited to 'kernel/module.c') diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index a47103fbc692..83b3838417ed 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -356,3 +356,4 @@ 347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev 349 i386 kcmp sys_kcmp +350 i386 finit_module sys_finit_module diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index a582bfed95bb..7c58c84b7bc8 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -319,6 +319,7 @@ 310 64 process_vm_readv sys_process_vm_readv 311 64 process_vm_writev sys_process_vm_writev 312 common kcmp sys_kcmp +313 common finit_module sys_finit_module # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 727f0cd73921..32bc035bcd68 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -868,4 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); +asmlinkage long sys_finit_module(int fd, const char __user *uargs); #endif diff --git a/kernel/module.c b/kernel/module.c index 6e48c3a43599..6d2c4e4ca1f5 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -2425,18 +2426,17 @@ static inline void kmemleak_load_module(const struct module *mod, #endif #ifdef CONFIG_MODULE_SIG -static int module_sig_check(struct load_info *info, - const void *mod, unsigned long *_len) +static int module_sig_check(struct load_info *info) { int err = -ENOKEY; - unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; - unsigned long len = *_len; + const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; + const void *mod = info->hdr; - if (len > markerlen && - memcmp(mod + len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { + if (info->len > markerlen && + memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { /* We truncate the module to discard the signature */ - *_len -= markerlen; - err = mod_verify_sig(mod, _len); + info->len -= markerlen; + err = mod_verify_sig(mod, &info->len); } if (!err) { @@ -2454,59 +2454,97 @@ static int module_sig_check(struct load_info *info, return err; } #else /* !CONFIG_MODULE_SIG */ -static int module_sig_check(struct load_info *info, - void *mod, unsigned long *len) +static int module_sig_check(struct load_info *info) { return 0; } #endif /* !CONFIG_MODULE_SIG */ -/* Sets info->hdr, info->len and info->sig_ok. */ -static int copy_and_check(struct load_info *info, - const void __user *umod, unsigned long len, - const char __user *uargs) +/* Sanity checks against invalid binaries, wrong arch, weird elf version. */ +static int elf_header_check(struct load_info *info) { - int err; - Elf_Ehdr *hdr; + if (info->len < sizeof(*(info->hdr))) + return -ENOEXEC; + + if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0 + || info->hdr->e_type != ET_REL + || !elf_check_arch(info->hdr) + || info->hdr->e_shentsize != sizeof(Elf_Shdr)) + return -ENOEXEC; + + if (info->hdr->e_shoff >= info->len + || (info->hdr->e_shnum * sizeof(Elf_Shdr) > + info->len - info->hdr->e_shoff)) + return -ENOEXEC; - if (len < sizeof(*hdr)) + return 0; +} + +/* Sets info->hdr and info->len. */ +static int copy_module_from_user(const void __user *umod, unsigned long len, + struct load_info *info) +{ + info->len = len; + if (info->len < sizeof(*(info->hdr))) return -ENOEXEC; /* Suck in entire file: we'll want most of it. */ - if ((hdr = vmalloc(len)) == NULL) + info->hdr = vmalloc(info->len); + if (!info->hdr) return -ENOMEM; - if (copy_from_user(hdr, umod, len) != 0) { - err = -EFAULT; - goto free_hdr; + if (copy_from_user(info->hdr, umod, info->len) != 0) { + vfree(info->hdr); + return -EFAULT; } - err = module_sig_check(info, hdr, &len); + return 0; +} + +/* Sets info->hdr and info->len. */ +static int copy_module_from_fd(int fd, struct load_info *info) +{ + struct file *file; + int err; + struct kstat stat; + loff_t pos; + ssize_t bytes = 0; + + file = fget(fd); + if (!file) + return -ENOEXEC; + + err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat); if (err) - goto free_hdr; + goto out; - /* Sanity checks against insmoding binaries or wrong arch, - weird elf version */ - if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 - || hdr->e_type != ET_REL - || !elf_check_arch(hdr) - || hdr->e_shentsize != sizeof(Elf_Shdr)) { - err = -ENOEXEC; - goto free_hdr; + if (stat.size > INT_MAX) { + err = -EFBIG; + goto out; } - - if (hdr->e_shoff >= len || - hdr->e_shnum * sizeof(Elf_Shdr) > len - hdr->e_shoff) { - err = -ENOEXEC; - goto free_hdr; + info->hdr = vmalloc(stat.size); + if (!info->hdr) { + err = -ENOMEM; + goto out; } - info->hdr = hdr; - info->len = len; - return 0; + pos = 0; + while (pos < stat.size) { + bytes = kernel_read(file, pos, (char *)(info->hdr) + pos, + stat.size - pos); + if (bytes < 0) { + vfree(info->hdr); + err = bytes; + goto out; + } + if (bytes == 0) + break; + pos += bytes; + } + info->len = pos; -free_hdr: - vfree(hdr); +out: + fput(file); return err; } @@ -2945,33 +2983,123 @@ static bool finished_loading(const char *name) return ret; } +/* Call module constructors. */ +static void do_mod_ctors(struct module *mod) +{ +#ifdef CONFIG_CONSTRUCTORS + unsigned long i; + + for (i = 0; i < mod->num_ctors; i++) + mod->ctors[i](); +#endif +} + +/* This is where the real work happens */ +static int do_init_module(struct module *mod) +{ + int ret = 0; + + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_COMING, mod); + + /* Set RO and NX regions for core */ + set_section_ro_nx(mod->module_core, + mod->core_text_size, + mod->core_ro_size, + mod->core_size); + + /* Set RO and NX regions for init */ + set_section_ro_nx(mod->module_init, + mod->init_text_size, + mod->init_ro_size, + mod->init_size); + + do_mod_ctors(mod); + /* Start the module */ + if (mod->init != NULL) + ret = do_one_initcall(mod->init); + if (ret < 0) { + /* Init routine failed: abort. Try to protect us from + buggy refcounters. */ + mod->state = MODULE_STATE_GOING; + synchronize_sched(); + module_put(mod); + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_GOING, mod); + free_module(mod); + wake_up_all(&module_wq); + return ret; + } + if (ret > 0) { + printk(KERN_WARNING +"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n" +"%s: loading module anyway...\n", + __func__, mod->name, ret, + __func__); + dump_stack(); + } + + /* Now it's a first class citizen! */ + mod->state = MODULE_STATE_LIVE; + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_LIVE, mod); + + /* We need to finish all async code before the module init sequence is done */ + async_synchronize_full(); + + mutex_lock(&module_mutex); + /* Drop initial reference. */ + module_put(mod); + trim_init_extable(mod); +#ifdef CONFIG_KALLSYMS + mod->num_symtab = mod->core_num_syms; + mod->symtab = mod->core_symtab; + mod->strtab = mod->core_strtab; +#endif + unset_module_init_ro_nx(mod); + module_free(mod, mod->module_init); + mod->module_init = NULL; + mod->init_size = 0; + mod->init_ro_size = 0; + mod->init_text_size = 0; + mutex_unlock(&module_mutex); + wake_up_all(&module_wq); + + return 0; +} + +static int may_init_module(void) +{ + if (!capable(CAP_SYS_MODULE) || modules_disabled) + return -EPERM; + + return 0; +} + /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ -static struct module *load_module(void __user *umod, - unsigned long len, - const char __user *uargs) +static int load_module(struct load_info *info, const char __user *uargs) { - struct load_info info = { NULL, }; struct module *mod, *old; long err; - pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n", - umod, len, uargs); + err = module_sig_check(info); + if (err) + goto free_copy; - /* Copy in the blobs from userspace, check they are vaguely sane. */ - err = copy_and_check(&info, umod, len, uargs); + err = elf_header_check(info); if (err) - return ERR_PTR(err); + goto free_copy; /* Figure out module layout, and allocate all the memory. */ - mod = layout_and_allocate(&info); + mod = layout_and_allocate(info); if (IS_ERR(mod)) { err = PTR_ERR(mod); goto free_copy; } #ifdef CONFIG_MODULE_SIG - mod->sig_ok = info.sig_ok; + mod->sig_ok = info->sig_ok; if (!mod->sig_ok) add_taint_module(mod, TAINT_FORCED_MODULE); #endif @@ -2983,25 +3111,25 @@ static struct module *load_module(void __user *umod, /* Now we've got everything in the final locations, we can * find optional sections. */ - find_module_sections(mod, &info); + find_module_sections(mod, info); err = check_module_license_and_versions(mod); if (err) goto free_unload; /* Set up MODINFO_ATTR fields */ - setup_modinfo(mod, &info); + setup_modinfo(mod, info); /* Fix up syms, so that st_value is a pointer to location. */ - err = simplify_symbols(mod, &info); + err = simplify_symbols(mod, info); if (err < 0) goto free_modinfo; - err = apply_relocations(mod, &info); + err = apply_relocations(mod, info); if (err < 0) goto free_modinfo; - err = post_relocation(mod, &info); + err = post_relocation(mod, info); if (err < 0) goto free_modinfo; @@ -3041,14 +3169,14 @@ again: } /* This has to be done once we're sure module name is unique. */ - dynamic_debug_setup(info.debug, info.num_debug); + dynamic_debug_setup(info->debug, info->num_debug); /* Find duplicate symbols */ err = verify_export_symbols(mod); if (err < 0) goto ddebug; - module_bug_finalize(info.hdr, info.sechdrs, mod); + module_bug_finalize(info->hdr, info->sechdrs, mod); list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); @@ -3059,16 +3187,17 @@ again: goto unlink; /* Link in to syfs. */ - err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp); + err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp); if (err < 0) goto unlink; /* Get rid of temporary copy. */ - free_copy(&info); + free_copy(info); /* Done! */ trace_module_load(mod); - return mod; + + return do_init_module(mod); unlink: mutex_lock(&module_mutex); @@ -3077,7 +3206,7 @@ again: module_bug_cleanup(mod); wake_up_all(&module_wq); ddebug: - dynamic_debug_remove(info.debug); + dynamic_debug_remove(info->debug); unlock: mutex_unlock(&module_mutex); synchronize_sched(); @@ -3089,106 +3218,48 @@ again: free_unload: module_unload_free(mod); free_module: - module_deallocate(mod, &info); + module_deallocate(mod, info); free_copy: - free_copy(&info); - return ERR_PTR(err); -} - -/* Call module constructors. */ -static void do_mod_ctors(struct module *mod) -{ -#ifdef CONFIG_CONSTRUCTORS - unsigned long i; - - for (i = 0; i < mod->num_ctors; i++) - mod->ctors[i](); -#endif + free_copy(info); + return err; } -/* This is where the real work happens */ SYSCALL_DEFINE3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs) { - struct module *mod; - int ret = 0; - - /* Must have permission */ - if (!capable(CAP_SYS_MODULE) || modules_disabled) - return -EPERM; + int err; + struct load_info info = { }; - /* Do all the hard work */ - mod = load_module(umod, len, uargs); - if (IS_ERR(mod)) - return PTR_ERR(mod); + err = may_init_module(); + if (err) + return err; - blocking_notifier_call_chain(&module_notify_list, - MODULE_STATE_COMING, mod); + pr_debug("init_module: umod=%p, len=%lu, uargs=%p\n", + umod, len, uargs); - /* Set RO and NX regions for core */ - set_section_ro_nx(mod->module_core, - mod->core_text_size, - mod->core_ro_size, - mod->core_size); + err = copy_module_from_user(umod, len, &info); + if (err) + return err; - /* Set RO and NX regions for init */ - set_section_ro_nx(mod->module_init, - mod->init_text_size, - mod->init_ro_size, - mod->init_size); + return load_module(&info, uargs); +} - do_mod_ctors(mod); - /* Start the module */ - if (mod->init != NULL) - ret = do_one_initcall(mod->init); - if (ret < 0) { - /* Init routine failed: abort. Try to protect us from - buggy refcounters. */ - mod->state = MODULE_STATE_GOING; - synchronize_sched(); - module_put(mod); - blocking_notifier_call_chain(&module_notify_list, - MODULE_STATE_GOING, mod); - free_module(mod); - wake_up_all(&module_wq); - return ret; - } - if (ret > 0) { - printk(KERN_WARNING -"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n" -"%s: loading module anyway...\n", - __func__, mod->name, ret, - __func__); - dump_stack(); - } +SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs) +{ + int err; + struct load_info info = { }; - /* Now it's a first class citizen! */ - mod->state = MODULE_STATE_LIVE; - blocking_notifier_call_chain(&module_notify_list, - MODULE_STATE_LIVE, mod); + err = may_init_module(); + if (err) + return err; - /* We need to finish all async code before the module init sequence is done */ - async_synchronize_full(); + pr_debug("finit_module: fd=%d, uargs=%p\n", fd, uargs); - mutex_lock(&module_mutex); - /* Drop initial reference. */ - module_put(mod); - trim_init_extable(mod); -#ifdef CONFIG_KALLSYMS - mod->num_symtab = mod->core_num_syms; - mod->symtab = mod->core_symtab; - mod->strtab = mod->core_strtab; -#endif - unset_module_init_ro_nx(mod); - module_free(mod, mod->module_init); - mod->module_init = NULL; - mod->init_size = 0; - mod->init_ro_size = 0; - mod->init_text_size = 0; - mutex_unlock(&module_mutex); - wake_up_all(&module_wq); + err = copy_module_from_fd(fd, &info); + if (err) + return err; - return 0; + return load_module(&info, uargs); } static inline int within(unsigned long addr, void *start, unsigned long size) diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index dbff751e4086..395084d4ce16 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -25,6 +25,7 @@ cond_syscall(sys_swapoff); cond_syscall(sys_kexec_load); cond_syscall(compat_sys_kexec_load); cond_syscall(sys_init_module); +cond_syscall(sys_finit_module); cond_syscall(sys_delete_module); cond_syscall(sys_socketpair); cond_syscall(sys_bind); -- cgit v1.3-8-gc7d7 From 2f3238aebedb243804f58d62d57244edec4149b2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Oct 2012 18:09:41 +1030 Subject: module: add flags arg to sys_finit_module() Thanks to Michael Kerrisk for keeping us honest. These flags are actually useful for eliminating the only case where kmod has to mangle a module's internals: for overriding module versioning. Signed-off-by: Rusty Russell Acked-by: Lucas De Marchi Acked-by: Kees Cook --- include/linux/syscalls.h | 2 +- include/uapi/linux/module.h | 8 ++++++++ kernel/module.c | 40 ++++++++++++++++++++++++++-------------- 3 files changed, 35 insertions(+), 15 deletions(-) create mode 100644 include/uapi/linux/module.h (limited to 'kernel/module.c') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 32bc035bcd68..8cf7b508cb50 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -868,5 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); -asmlinkage long sys_finit_module(int fd, const char __user *uargs); +asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); #endif diff --git a/include/uapi/linux/module.h b/include/uapi/linux/module.h new file mode 100644 index 000000000000..38da4258b12f --- /dev/null +++ b/include/uapi/linux/module.h @@ -0,0 +1,8 @@ +#ifndef _UAPI_LINUX_MODULE_H +#define _UAPI_LINUX_MODULE_H + +/* Flags for sys_finit_module: */ +#define MODULE_INIT_IGNORE_MODVERSIONS 1 +#define MODULE_INIT_IGNORE_VERMAGIC 2 + +#endif /* _UAPI_LINUX_MODULE_H */ diff --git a/kernel/module.c b/kernel/module.c index 6d2c4e4ca1f5..1395ca382fb5 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -60,6 +60,7 @@ #include #include #include +#include #include "module-internal.h" #define CREATE_TRACE_POINTS @@ -2553,7 +2554,7 @@ static void free_copy(struct load_info *info) vfree(info->hdr); } -static int rewrite_section_headers(struct load_info *info) +static int rewrite_section_headers(struct load_info *info, int flags) { unsigned int i; @@ -2581,7 +2582,10 @@ static int rewrite_section_headers(struct load_info *info) } /* Track but don't keep modinfo and version sections. */ - info->index.vers = find_sec(info, "__versions"); + if (flags & MODULE_INIT_IGNORE_MODVERSIONS) + info->index.vers = 0; /* Pretend no __versions section! */ + else + info->index.vers = find_sec(info, "__versions"); info->index.info = find_sec(info, ".modinfo"); info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; @@ -2596,7 +2600,7 @@ static int rewrite_section_headers(struct load_info *info) * Return the temporary module pointer (we'll replace it with the final * one when we move the module sections around). */ -static struct module *setup_load_info(struct load_info *info) +static struct module *setup_load_info(struct load_info *info, int flags) { unsigned int i; int err; @@ -2607,7 +2611,7 @@ static struct module *setup_load_info(struct load_info *info) info->secstrings = (void *)info->hdr + info->sechdrs[info->hdr->e_shstrndx].sh_offset; - err = rewrite_section_headers(info); + err = rewrite_section_headers(info, flags); if (err) return ERR_PTR(err); @@ -2645,11 +2649,14 @@ static struct module *setup_load_info(struct load_info *info) return mod; } -static int check_modinfo(struct module *mod, struct load_info *info) +static int check_modinfo(struct module *mod, struct load_info *info, int flags) { const char *modmagic = get_modinfo(info, "vermagic"); int err; + if (flags & MODULE_INIT_IGNORE_VERMAGIC) + modmagic = NULL; + /* This is allowed: modprobe --force will invalidate it. */ if (!modmagic) { err = try_to_force_load(mod, "bad vermagic"); @@ -2885,18 +2892,18 @@ int __weak module_frob_arch_sections(Elf_Ehdr *hdr, return 0; } -static struct module *layout_and_allocate(struct load_info *info) +static struct module *layout_and_allocate(struct load_info *info, int flags) { /* Module within temporary copy. */ struct module *mod; Elf_Shdr *pcpusec; int err; - mod = setup_load_info(info); + mod = setup_load_info(info, flags); if (IS_ERR(mod)) return mod; - err = check_modinfo(mod, info); + err = check_modinfo(mod, info, flags); if (err) return ERR_PTR(err); @@ -3078,7 +3085,8 @@ static int may_init_module(void) /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ -static int load_module(struct load_info *info, const char __user *uargs) +static int load_module(struct load_info *info, const char __user *uargs, + int flags) { struct module *mod, *old; long err; @@ -3092,7 +3100,7 @@ static int load_module(struct load_info *info, const char __user *uargs) goto free_copy; /* Figure out module layout, and allocate all the memory. */ - mod = layout_and_allocate(info); + mod = layout_and_allocate(info, flags); if (IS_ERR(mod)) { err = PTR_ERR(mod); goto free_copy; @@ -3241,10 +3249,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, if (err) return err; - return load_module(&info, uargs); + return load_module(&info, uargs, 0); } -SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs) +SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) { int err; struct load_info info = { }; @@ -3253,13 +3261,17 @@ SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs) if (err) return err; - pr_debug("finit_module: fd=%d, uargs=%p\n", fd, uargs); + pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags); + + if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS + |MODULE_INIT_IGNORE_VERMAGIC)) + return -EINVAL; err = copy_module_from_fd(fd, &info); if (err) return err; - return load_module(&info, uargs); + return load_module(&info, uargs, flags); } static inline int within(unsigned long addr, void *start, unsigned long size) -- cgit v1.3-8-gc7d7 From 2e72d51b4ac32989496870cd8171b3682fea1839 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 16 Oct 2012 07:32:07 +1030 Subject: security: introduce kernel_module_from_file hook Now that kernel module origins can be reasoned about, provide a hook to the LSMs to make policy decisions about the module file. This will let Chrome OS enforce that loadable kernel modules can only come from its read-only hash-verified root filesystem. Other LSMs can, for example, read extended attributes for signatures, etc. Signed-off-by: Kees Cook Acked-by: Serge E. Hallyn Acked-by: Eric Paris Acked-by: Mimi Zohar Acked-by: James Morris Signed-off-by: Rusty Russell --- include/linux/security.h | 13 +++++++++++++ kernel/module.c | 11 +++++++++++ security/capability.c | 6 ++++++ security/security.c | 5 +++++ 4 files changed, 35 insertions(+) (limited to 'kernel/module.c') diff --git a/include/linux/security.h b/include/linux/security.h index 05e88bdcf7d9..0f6afc657f77 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -694,6 +694,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * userspace to load a kernel module with the given name. * @kmod_name name of the module requested by the kernel * Return 0 if successful. + * @kernel_module_from_file: + * Load a kernel module from userspace. + * @file contains the file structure pointing to the file containing + * the kernel module to load. If the module is being loaded from a blob, + * this argument will be NULL. + * Return 0 if permission is granted. * @task_fix_setuid: * Update the module's state after setting one or more of the user * identity attributes of the current process. The @flags parameter @@ -1508,6 +1514,7 @@ struct security_operations { int (*kernel_act_as)(struct cred *new, u32 secid); int (*kernel_create_files_as)(struct cred *new, struct inode *inode); int (*kernel_module_request)(char *kmod_name); + int (*kernel_module_from_file)(struct file *file); int (*task_fix_setuid) (struct cred *new, const struct cred *old, int flags); int (*task_setpgid) (struct task_struct *p, pid_t pgid); @@ -1765,6 +1772,7 @@ void security_transfer_creds(struct cred *new, const struct cred *old); int security_kernel_act_as(struct cred *new, u32 secid); int security_kernel_create_files_as(struct cred *new, struct inode *inode); int security_kernel_module_request(char *kmod_name); +int security_kernel_module_from_file(struct file *file); int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags); int security_task_setpgid(struct task_struct *p, pid_t pgid); @@ -2278,6 +2286,11 @@ static inline int security_kernel_module_request(char *kmod_name) return 0; } +static inline int security_kernel_module_from_file(struct file *file) +{ + return 0; +} + static inline int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags) diff --git a/kernel/module.c b/kernel/module.c index 1395ca382fb5..a1d2ed8bab93 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -2485,10 +2486,16 @@ static int elf_header_check(struct load_info *info) static int copy_module_from_user(const void __user *umod, unsigned long len, struct load_info *info) { + int err; + info->len = len; if (info->len < sizeof(*(info->hdr))) return -ENOEXEC; + err = security_kernel_module_from_file(NULL); + if (err) + return err; + /* Suck in entire file: we'll want most of it. */ info->hdr = vmalloc(info->len); if (!info->hdr) @@ -2515,6 +2522,10 @@ static int copy_module_from_fd(int fd, struct load_info *info) if (!file) return -ENOEXEC; + err = security_kernel_module_from_file(file); + if (err) + goto out; + err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat); if (err) goto out; diff --git a/security/capability.c b/security/capability.c index b14a30c234b8..0fe5a026aef8 100644 --- a/security/capability.c +++ b/security/capability.c @@ -395,6 +395,11 @@ static int cap_kernel_module_request(char *kmod_name) return 0; } +static int cap_kernel_module_from_file(struct file *file) +{ + return 0; +} + static int cap_task_setpgid(struct task_struct *p, pid_t pgid) { return 0; @@ -967,6 +972,7 @@ void __init security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, kernel_act_as); set_to_cap_if_null(ops, kernel_create_files_as); set_to_cap_if_null(ops, kernel_module_request); + set_to_cap_if_null(ops, kernel_module_from_file); set_to_cap_if_null(ops, task_fix_setuid); set_to_cap_if_null(ops, task_setpgid); set_to_cap_if_null(ops, task_getpgid); diff --git a/security/security.c b/security/security.c index 8dcd4ae10a5f..ce88630de15d 100644 --- a/security/security.c +++ b/security/security.c @@ -820,6 +820,11 @@ int security_kernel_module_request(char *kmod_name) return security_ops->kernel_module_request(kmod_name); } +int security_kernel_module_from_file(struct file *file) +{ + return security_ops->kernel_module_from_file(file); +} + int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags) { -- cgit v1.3-8-gc7d7 From 54523ec71f8ce99accae97c74152f14f261f7e18 Mon Sep 17 00:00:00 2001 From: Satoru Takeuchi Date: Wed, 5 Dec 2012 12:29:04 +1030 Subject: module: Remove a extra null character at the top of module->strtab. There is a extra null character('\0') at the top of module->strtab for each module. Commit 59ef28b introduced this bug and this patch fixes it. Live dump log of the current linus git kernel(HEAD is 2844a4870): ============================================================================ crash> mod | grep loop ffffffffa01db0a0 loop 16689 (not loaded) [CONFIG_KALLSYMS] crash> module.core_symtab ffffffffa01db0a0 core_symtab = 0xffffffffa01db320crash> rd 0xffffffffa01db320 12 ffffffffa01db320: 0000005500000001 0000000000000000 ....U........... ffffffffa01db330: 0000000000000000 0002007400000002 ............t... ffffffffa01db340: ffffffffa01d8000 0000000000000038 ........8....... ffffffffa01db350: 001a00640000000e ffffffffa01daeb0 ....d........... ffffffffa01db360: 00000000000000a0 0002007400000019 ............t... ffffffffa01db370: ffffffffa01d8068 000000000000001b h............... crash> module.core_strtab ffffffffa01db0a0 core_strtab = 0xffffffffa01dbb30 "" crash> rd 0xffffffffa01dbb30 4 ffffffffa01dbb30: 615f70616d6b0000 66780063696d6f74 ..kmap_atomic.xf ffffffffa01dbb40: 73636e75665f7265 72665f646e696600 er_funcs.find_fr ============================================================================ We expect Just first one byte of '\0', but actually first two bytes are '\0'. Here is The relationship between symtab and strtab. symtab_idx strtab_idx symbol ----------------------------------------------- 0 0x1 "\0" # startab_idx should be 0 1 0x2 "kmap_atomic" 2 0xe "xfer_funcs" 3 0x19 "find_fr..." By applying this patch, it becomes as follows. symtab_idx strtab_idx symbol ----------------------------------------------- 0 0x0 "\0" # extra byte is removed 1 0x1 "kmap_atomic" 2 0xd "xfer_funcs" 3 0x18 "find_fr..." Signed-off-by: Satoru Takeuchi Cc: Masaki Kimura Cc: Rusty Russell Cc: Greg Kroah-Hartman Signed-off-by: Rusty Russell --- kernel/module.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index a1d2ed8bab93..79a526dd1b11 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2285,7 +2285,7 @@ static void layout_symtab(struct module *mod, struct load_info *info) Elf_Shdr *symsect = info->sechdrs + info->index.sym; Elf_Shdr *strsect = info->sechdrs + info->index.str; const Elf_Sym *src; - unsigned int i, nsrc, ndst, strtab_size; + unsigned int i, nsrc, ndst, strtab_size = 0; /* Put symbol section at end of init part of module. */ symsect->sh_flags |= SHF_ALLOC; @@ -2296,9 +2296,6 @@ static void layout_symtab(struct module *mod, struct load_info *info) src = (void *)info->hdr + symsect->sh_offset; nsrc = symsect->sh_size / sizeof(*src); - /* strtab always starts with a nul, so offset 0 is the empty string. */ - strtab_size = 1; - /* Compute total space required for the core symbols' strtab. */ for (ndst = i = 0; i < nsrc; i++) { if (i == 0 || @@ -2340,7 +2337,6 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) mod->core_symtab = dst = mod->module_core + info->symoffs; mod->core_strtab = s = mod->module_core + info->stroffs; src = mod->symtab; - *s++ = 0; for (ndst = i = 0; i < mod->num_symtab; i++) { if (i == 0 || is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { -- cgit v1.3-8-gc7d7 From 82fab442f5322b016f72891c0db2436c6a6c20b7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 11 Dec 2012 09:38:33 +1030 Subject: modules: don't hand 0 to vmalloc. In commit d0a21265dfb5fa8a David Rientjes unified various archs' module_alloc implementation (including x86) and removed the graduitous shortcut for size == 0. Then, in commit de7d2b567d040e3b, Joe Perches added a warning for zero-length vmallocs, which can happen without kallsyms on modules with no init sections (eg. zlib_deflate). Fix this once and for all; the module code has to handle zero length anyway, so get it right at the caller and remove the now-gratuitous checks within the arch-specific module_alloc implementations. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=42608 Reported-by: Conrad Kostecki Cc: David Rientjes Cc: Joe Perches Signed-off-by: Rusty Russell --- arch/cris/kernel/module.c | 2 -- arch/parisc/kernel/module.c | 2 -- arch/sparc/kernel/module.c | 4 ---- arch/tile/kernel/module.c | 2 -- arch/unicore32/kernel/module.c | 3 --- kernel/module.c | 33 ++++++++++++++++++--------------- 6 files changed, 18 insertions(+), 28 deletions(-) (limited to 'kernel/module.c') diff --git a/arch/cris/kernel/module.c b/arch/cris/kernel/module.c index 37400f5869e6..51123f985eb5 100644 --- a/arch/cris/kernel/module.c +++ b/arch/cris/kernel/module.c @@ -32,8 +32,6 @@ #ifdef CONFIG_ETRAX_KMALLOCED_MODULES void *module_alloc(unsigned long size) { - if (size == 0) - return NULL; return kmalloc(size, GFP_KERNEL); } diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 5e34ccf39a49..2a625fb063e1 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -214,8 +214,6 @@ static inline int reassemble_22(int as22) void *module_alloc(unsigned long size) { - if (size == 0) - return NULL; /* using RWX means less protection for modules, but it's * easier than trying to map the text, data, init_text and * init_data correctly */ diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index f1ddc0d23679..4435488ebe25 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -43,10 +43,6 @@ void *module_alloc(unsigned long size) { void *ret; - /* We handle the zero case fine, unlike vmalloc */ - if (size == 0) - return NULL; - ret = module_map(size); if (ret) memset(ret, 0, size); diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c index 243ffebe38d6..4918d91bc3a6 100644 --- a/arch/tile/kernel/module.c +++ b/arch/tile/kernel/module.c @@ -42,8 +42,6 @@ void *module_alloc(unsigned long size) int i = 0; int npages; - if (size == 0) - return NULL; npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; pages = kmalloc(npages * sizeof(struct page *), GFP_KERNEL); if (pages == NULL) diff --git a/arch/unicore32/kernel/module.c b/arch/unicore32/kernel/module.c index 8fbe8577f5e6..16bd1495b934 100644 --- a/arch/unicore32/kernel/module.c +++ b/arch/unicore32/kernel/module.c @@ -27,9 +27,6 @@ void *module_alloc(unsigned long size) struct vm_struct *area; size = PAGE_ALIGN(size); - if (!size) - return NULL; - area = __get_vm_area(size, VM_ALLOC, MODULES_VADDR, MODULES_END); if (!area) return NULL; diff --git a/kernel/module.c b/kernel/module.c index 79a526dd1b11..4542ff3f7ef9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2377,7 +2377,7 @@ static void dynamic_debug_remove(struct _ddebug *debug) void * __weak module_alloc(unsigned long size) { - return size == 0 ? NULL : vmalloc_exec(size); + return vmalloc_exec(size); } static void *module_alloc_update_bounds(unsigned long size) @@ -2793,20 +2793,23 @@ static int move_module(struct module *mod, struct load_info *info) memset(ptr, 0, mod->core_size); mod->module_core = ptr; - ptr = module_alloc_update_bounds(mod->init_size); - /* - * The pointer to this block is stored in the module structure - * which is inside the block. This block doesn't need to be - * scanned as it contains data and code that will be freed - * after the module is initialized. - */ - kmemleak_ignore(ptr); - if (!ptr && mod->init_size) { - module_free(mod, mod->module_core); - return -ENOMEM; - } - memset(ptr, 0, mod->init_size); - mod->module_init = ptr; + if (mod->init_size) { + ptr = module_alloc_update_bounds(mod->init_size); + /* + * The pointer to this block is stored in the module structure + * which is inside the block. This block doesn't need to be + * scanned as it contains data and code that will be freed + * after the module is initialized. + */ + kmemleak_ignore(ptr); + if (!ptr) { + module_free(mod, mod->module_core); + return -ENOMEM; + } + memset(ptr, 0, mod->init_size); + mod->module_init = ptr; + } else + mod->module_init = NULL; /* Transfer each section which specifies SHF_ALLOC */ pr_debug("final section addresses:\n"); -- cgit v1.3-8-gc7d7 From 8ec7d50f1ed2b072d23ce810f86df09ee0568d4b Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 17 Dec 2012 15:59:36 -0800 Subject: kernel: remove reference to feature-removal-schedule.txt In commit 9c0ece069b32 ("Get rid of Documentation/feature-removal.txt"), Linus removed feature-removal-schedule.txt from Documentation, but there is still some reference to this file. So remove them. Signed-off-by: Tao Ma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 1 - kernel/module.c | 3 --- 2 files changed, 4 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f34c41bfaa37..571264830a29 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1333,7 +1333,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) if (ret) goto out_unlock; - /* See feature-removal-schedule.txt */ if (opts.subsys_mask != root->actual_subsys_mask || opts.release_agent) pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", task_tgid_nr(current), current->comm); diff --git a/kernel/module.c b/kernel/module.c index 6e48c3a43599..808bd62e1723 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -372,9 +372,6 @@ static bool check_symbol(const struct symsearch *syms, printk(KERN_WARNING "Symbol %s is being used " "by a non-GPL module, which will not " "be allowed in the future\n", fsa->name); - printk(KERN_WARNING "Please see the file " - "Documentation/feature-removal-schedule.txt " - "in the kernel source tree for more details.\n"); } } -- cgit v1.3-8-gc7d7 From 774a1221e862b343388347bac9b318767336b20b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Jan 2013 18:52:51 -0800 Subject: module, async: async_synchronize_full() on module init iff async is used If the default iosched is built as module, the kernel may deadlock while trying to load the iosched module on device probe if the probing was running off async. This is because async_synchronize_full() at the end of module init ends up waiting for the async job which initiated the module loading. async A modprobe 1. finds a device 2. registers the block device 3. request_module(default iosched) 4. modprobe in userland 5. load and init module 6. async_synchronize_full() Async A waits for modprobe to finish in request_module() and modprobe waits for async A to finish in async_synchronize_full(). Because there's no easy to track dependency once control goes out to userland, implementing properly nested flushing is difficult. For now, make module init perform async_synchronize_full() iff module init has queued async jobs as suggested by Linus. This avoids the described deadlock because iosched module doesn't use async and thus wouldn't invoke async_synchronize_full(). This is hacky and incomplete. It will deadlock if async module loading nests; however, this works around the known problem case and seems to be the best of bad options. For more details, please refer to the following thread. http://thread.gmane.org/gmane.linux.kernel/1420814 Signed-off-by: Tejun Heo Reported-by: Alex Riesen Tested-by: Ming Lei Tested-by: Alex Riesen Cc: Arjan van de Ven Cc: Jens Axboe Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + kernel/async.c | 3 +++ kernel/module.c | 27 +++++++++++++++++++++++++-- 3 files changed, 29 insertions(+), 2 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 206bb089c06b..6fc8f45de4e9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1810,6 +1810,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ +#define PF_USED_ASYNC 0x00004000 /* used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ #define PF_FROZEN 0x00010000 /* frozen for system suspend */ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ diff --git a/kernel/async.c b/kernel/async.c index 9d3118384858..a1d585c351d6 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -196,6 +196,9 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a atomic_inc(&entry_count); spin_unlock_irqrestore(&async_lock, flags); + /* mark that this task has queued an async job, used by module init */ + current->flags |= PF_USED_ASYNC; + /* schedule for execution */ queue_work(system_unbound_wq, &entry->work); diff --git a/kernel/module.c b/kernel/module.c index 250092c1d57d..b10b048367e1 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3013,6 +3013,12 @@ static int do_init_module(struct module *mod) { int ret = 0; + /* + * We want to find out whether @mod uses async during init. Clear + * PF_USED_ASYNC. async_schedule*() will set it. + */ + current->flags &= ~PF_USED_ASYNC; + blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); @@ -3058,8 +3064,25 @@ static int do_init_module(struct module *mod) blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_LIVE, mod); - /* We need to finish all async code before the module init sequence is done */ - async_synchronize_full(); + /* + * We need to finish all async code before the module init sequence + * is done. This has potential to deadlock. For example, a newly + * detected block device can trigger request_module() of the + * default iosched from async probing task. Once userland helper + * reaches here, async_synchronize_full() will wait on the async + * task waiting on request_module() and deadlock. + * + * This deadlock is avoided by perfomring async_synchronize_full() + * iff module init queued any async jobs. This isn't a full + * solution as it will deadlock the same if module loading from + * async jobs nests more than once; however, due to the various + * constraints, this hack seems to be the best option for now. + * Please refer to the following thread for details. + * + * http://thread.gmane.org/gmane.linux.kernel/1420814 + */ + if (current->flags & PF_USED_ASYNC) + async_synchronize_full(); mutex_lock(&module_mutex); /* Drop initial reference. */ -- cgit v1.3-8-gc7d7