From 96738c69a7fcdbf0d7c9df0c8a27660011e82a7b Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 13 Jan 2015 15:25:00 +0000 Subject: x86/efi: Avoid triple faults during EFI mixed mode calls Andy pointed out that if an NMI or MCE is received while we're in the middle of an EFI mixed mode call a triple fault will occur. This can happen, for example, when issuing an EFI mixed mode call while running perf. The reason for the triple fault is that we execute the mixed mode call in 32-bit mode with paging disabled but with 64-bit kernel IDT handlers installed throughout the call. At Andy's suggestion, stop playing the games we currently do at runtime, such as disabling paging and installing a 32-bit GDT for __KERNEL_CS. We can simply switch to the __KERNEL32_CS descriptor before invoking firmware services, and run in compatibility mode. This way, if an NMI/MCE does occur the kernel IDT handler will execute correctly, since it'll jump to __KERNEL_CS automatically. However, this change is only possible post-ExitBootServices(). Before then the firmware "owns" the machine and expects for its 32-bit IDT handlers to be left intact to service interrupts, etc. So, we now need to distinguish between early boot and runtime invocations of EFI services. During early boot, we need to restore the GDT that the firmware expects to be present. We can only jump to the __KERNEL32_CS code segment for mixed mode calls after ExitBootServices() has been invoked. A liberal sprinkling of comments in the thunking code should make the differences in early and late environments more apparent. Reported-by: Andy Lutomirski Tested-by: Borislav Petkov Cc: Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/Makefile | 1 + arch/x86/boot/compressed/efi_stub_64.S | 25 ---- arch/x86/boot/compressed/efi_thunk_64.S | 196 ++++++++++++++++++++++++++++++++ arch/x86/platform/efi/efi_stub_64.S | 161 -------------------------- arch/x86/platform/efi/efi_thunk_64.S | 121 +++++++++++++++++--- 5 files changed, 301 insertions(+), 203 deletions(-) create mode 100644 arch/x86/boot/compressed/efi_thunk_64.S (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index ad754b4411f7..8bd44e8ee6e2 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -49,6 +49,7 @@ $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ $(objtree)/drivers/firmware/efi/libstub/lib.a +vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o $(obj)/vmlinux: $(vmlinux-objs-y) FORCE $(call if_changed,ld) diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S index 7ff3632806b1..99494dff2113 100644 --- a/arch/x86/boot/compressed/efi_stub_64.S +++ b/arch/x86/boot/compressed/efi_stub_64.S @@ -3,28 +3,3 @@ #include #include "../../platform/efi/efi_stub_64.S" - -#ifdef CONFIG_EFI_MIXED - .code64 - .text -ENTRY(efi64_thunk) - push %rbp - push %rbx - - subq $16, %rsp - leaq efi_exit32(%rip), %rax - movl %eax, 8(%rsp) - leaq efi_gdt64(%rip), %rax - movl %eax, 4(%rsp) - movl %eax, 2(%rax) /* Fixup the gdt base address */ - leaq efi32_boot_gdt(%rip), %rax - movl %eax, (%rsp) - - call __efi64_thunk - - addq $16, %rsp - pop %rbx - pop %rbp - ret -ENDPROC(efi64_thunk) -#endif /* CONFIG_EFI_MIXED */ diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S new file mode 100644 index 000000000000..630384a4c14a --- /dev/null +++ b/arch/x86/boot/compressed/efi_thunk_64.S @@ -0,0 +1,196 @@ +/* + * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming + * + * Early support for invoking 32-bit EFI services from a 64-bit kernel. + * + * Because this thunking occurs before ExitBootServices() we have to + * restore the firmware's 32-bit GDT before we make EFI serivce calls, + * since the firmware's 32-bit IDT is still currently installed and it + * needs to be able to service interrupts. + * + * On the plus side, we don't have to worry about mangling 64-bit + * addresses into 32-bits because we're executing with an identify + * mapped pagetable and haven't transitioned to 64-bit virtual addresses + * yet. + */ + +#include +#include +#include +#include +#include + + .code64 + .text +ENTRY(efi64_thunk) + push %rbp + push %rbx + + subq $8, %rsp + leaq efi_exit32(%rip), %rax + movl %eax, 4(%rsp) + leaq efi_gdt64(%rip), %rax + movl %eax, (%rsp) + movl %eax, 2(%rax) /* Fixup the gdt base address */ + + movl %ds, %eax + push %rax + movl %es, %eax + push %rax + movl %ss, %eax + push %rax + + /* + * Convert x86-64 ABI params to i386 ABI + */ + subq $32, %rsp + movl %esi, 0x0(%rsp) + movl %edx, 0x4(%rsp) + movl %ecx, 0x8(%rsp) + movq %r8, %rsi + movl %esi, 0xc(%rsp) + movq %r9, %rsi + movl %esi, 0x10(%rsp) + + sgdt save_gdt(%rip) + + leaq 1f(%rip), %rbx + movq %rbx, func_rt_ptr(%rip) + + /* + * Switch to gdt with 32-bit segments. This is the firmware GDT + * that was installed when the kernel started executing. This + * pointer was saved at the EFI stub entry point in head_64.S. + */ + leaq efi32_boot_gdt(%rip), %rax + lgdt (%rax) + + pushq $__KERNEL_CS + leaq efi_enter32(%rip), %rax + pushq %rax + lretq + +1: addq $32, %rsp + + lgdt save_gdt(%rip) + + pop %rbx + movl %ebx, %ss + pop %rbx + movl %ebx, %es + pop %rbx + movl %ebx, %ds + + /* + * Convert 32-bit status code into 64-bit. + */ + test %rax, %rax + jz 1f + movl %eax, %ecx + andl $0x0fffffff, %ecx + andl $0xf0000000, %eax + shl $32, %rax + or %rcx, %rax +1: + addq $8, %rsp + pop %rbx + pop %rbp + ret +ENDPROC(efi64_thunk) + +ENTRY(efi_exit32) + movq func_rt_ptr(%rip), %rax + push %rax + mov %rdi, %rax + ret +ENDPROC(efi_exit32) + + .code32 +/* + * EFI service pointer must be in %edi. + * + * The stack should represent the 32-bit calling convention. + */ +ENTRY(efi_enter32) + movl $__KERNEL_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + + /* Reload pgtables */ + movl %cr3, %eax + movl %eax, %cr3 + + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + /* Disable long mode via EFER */ + movl $MSR_EFER, %ecx + rdmsr + btrl $_EFER_LME, %eax + wrmsr + + call *%edi + + /* We must preserve return value */ + movl %eax, %edi + + /* + * Some firmware will return with interrupts enabled. Be sure to + * disable them before we switch GDTs. + */ + cli + + movl 56(%esp), %eax + movl %eax, 2(%eax) + lgdtl (%eax) + + movl %cr4, %eax + btsl $(X86_CR4_PAE_BIT), %eax + movl %eax, %cr4 + + movl %cr3, %eax + movl %eax, %cr3 + + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + + xorl %eax, %eax + lldt %ax + + movl 60(%esp), %eax + pushl $__KERNEL_CS + pushl %eax + + /* Enable paging */ + movl %cr0, %eax + btsl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + lret +ENDPROC(efi_enter32) + + .data + .balign 8 + .global efi32_boot_gdt +efi32_boot_gdt: .word 0 + .quad 0 + +save_gdt: .word 0 + .quad 0 +func_rt_ptr: .quad 0 + + .global efi_gdt64 +efi_gdt64: + .word efi_gdt64_end - efi_gdt64 + .long 0 /* Filled out by user */ + .word 0 + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x00af9a000000ffff /* __KERNEL_CS */ + .quad 0x00cf92000000ffff /* __KERNEL_DS */ + .quad 0x0080890000000000 /* TS descriptor */ + .quad 0x0000000000000000 /* TS continued */ +efi_gdt64_end: diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 5fcda7272550..86d0f9e08dd9 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -91,167 +91,6 @@ ENTRY(efi_call) ret ENDPROC(efi_call) -#ifdef CONFIG_EFI_MIXED - -/* - * We run this function from the 1:1 mapping. - * - * This function must be invoked with a 1:1 mapped stack. - */ -ENTRY(__efi64_thunk) - movl %ds, %eax - push %rax - movl %es, %eax - push %rax - movl %ss, %eax - push %rax - - subq $32, %rsp - movl %esi, 0x0(%rsp) - movl %edx, 0x4(%rsp) - movl %ecx, 0x8(%rsp) - movq %r8, %rsi - movl %esi, 0xc(%rsp) - movq %r9, %rsi - movl %esi, 0x10(%rsp) - - sgdt save_gdt(%rip) - - leaq 1f(%rip), %rbx - movq %rbx, func_rt_ptr(%rip) - - /* Switch to gdt with 32-bit segments */ - movl 64(%rsp), %eax - lgdt (%rax) - - leaq efi_enter32(%rip), %rax - pushq $__KERNEL_CS - pushq %rax - lretq - -1: addq $32, %rsp - - lgdt save_gdt(%rip) - - pop %rbx - movl %ebx, %ss - pop %rbx - movl %ebx, %es - pop %rbx - movl %ebx, %ds - - /* - * Convert 32-bit status code into 64-bit. - */ - test %rax, %rax - jz 1f - movl %eax, %ecx - andl $0x0fffffff, %ecx - andl $0xf0000000, %eax - shl $32, %rax - or %rcx, %rax -1: - ret -ENDPROC(__efi64_thunk) - -ENTRY(efi_exit32) - movq func_rt_ptr(%rip), %rax - push %rax - mov %rdi, %rax - ret -ENDPROC(efi_exit32) - - .code32 -/* - * EFI service pointer must be in %edi. - * - * The stack should represent the 32-bit calling convention. - */ -ENTRY(efi_enter32) - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %es - movl %eax, %ss - - /* Reload pgtables */ - movl %cr3, %eax - movl %eax, %cr3 - - /* Disable paging */ - movl %cr0, %eax - btrl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - - /* Disable long mode via EFER */ - movl $MSR_EFER, %ecx - rdmsr - btrl $_EFER_LME, %eax - wrmsr - - call *%edi - - /* We must preserve return value */ - movl %eax, %edi - - /* - * Some firmware will return with interrupts enabled. Be sure to - * disable them before we switch GDTs. - */ - cli - - movl 68(%esp), %eax - movl %eax, 2(%eax) - lgdtl (%eax) - - movl %cr4, %eax - btsl $(X86_CR4_PAE_BIT), %eax - movl %eax, %cr4 - - movl %cr3, %eax - movl %eax, %cr3 - - movl $MSR_EFER, %ecx - rdmsr - btsl $_EFER_LME, %eax - wrmsr - - xorl %eax, %eax - lldt %ax - - movl 72(%esp), %eax - pushl $__KERNEL_CS - pushl %eax - - /* Enable paging */ - movl %cr0, %eax - btsl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - lret -ENDPROC(efi_enter32) - - .data - .balign 8 - .global efi32_boot_gdt -efi32_boot_gdt: .word 0 - .quad 0 - -save_gdt: .word 0 - .quad 0 -func_rt_ptr: .quad 0 - - .global efi_gdt64 -efi_gdt64: - .word efi_gdt64_end - efi_gdt64 - .long 0 /* Filled out by user */ - .word 0 - .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x00af9a000000ffff /* __KERNEL_CS */ - .quad 0x00cf92000000ffff /* __KERNEL_DS */ - .quad 0x0080890000000000 /* TS descriptor */ - .quad 0x0000000000000000 /* TS continued */ -efi_gdt64_end: -#endif /* CONFIG_EFI_MIXED */ - .data ENTRY(efi_scratch) .fill 3,8,0 diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 8806fa73e6e6..ff85d28c50f2 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -1,9 +1,26 @@ /* * Copyright (C) 2014 Intel Corporation; author Matt Fleming + * + * Support for invoking 32-bit EFI runtime services from a 64-bit + * kernel. + * + * The below thunking functions are only used after ExitBootServices() + * has been called. This simplifies things considerably as compared with + * the early EFI thunking because we can leave all the kernel state + * intact (GDT, IDT, etc) and simply invoke the the 32-bit EFI runtime + * services from __KERNEL32_CS. This means we can continue to service + * interrupts across an EFI mixed mode call. + * + * We do however, need to handle the fact that we're running in a full + * 64-bit virtual address space. Things like the stack and instruction + * addresses need to be accessible by the 32-bit firmware, so we rely on + * using the identity mappings in the EFI page table to access the stack + * and kernel text (see efi_setup_page_tables()). */ #include #include +#include .text .code64 @@ -33,14 +50,6 @@ ENTRY(efi64_thunk) leaq efi_exit32(%rip), %rbx subq %rax, %rbx movl %ebx, 8(%rsp) - leaq efi_gdt64(%rip), %rbx - subq %rax, %rbx - movl %ebx, 2(%ebx) - movl %ebx, 4(%rsp) - leaq efi_gdt32(%rip), %rbx - subq %rax, %rbx - movl %ebx, 2(%ebx) - movl %ebx, (%rsp) leaq __efi64_thunk(%rip), %rbx subq %rax, %rbx @@ -52,14 +61,92 @@ ENTRY(efi64_thunk) retq ENDPROC(efi64_thunk) - .data -efi_gdt32: - .word efi_gdt32_end - efi_gdt32 - .long 0 /* Filled out above */ - .word 0 - .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x00cf9a000000ffff /* __KERNEL_CS */ - .quad 0x00cf93000000ffff /* __KERNEL_DS */ -efi_gdt32_end: +/* + * We run this function from the 1:1 mapping. + * + * This function must be invoked with a 1:1 mapped stack. + */ +ENTRY(__efi64_thunk) + movl %ds, %eax + push %rax + movl %es, %eax + push %rax + movl %ss, %eax + push %rax + + subq $32, %rsp + movl %esi, 0x0(%rsp) + movl %edx, 0x4(%rsp) + movl %ecx, 0x8(%rsp) + movq %r8, %rsi + movl %esi, 0xc(%rsp) + movq %r9, %rsi + movl %esi, 0x10(%rsp) + + leaq 1f(%rip), %rbx + movq %rbx, func_rt_ptr(%rip) + + /* Switch to 32-bit descriptor */ + pushq $__KERNEL32_CS + leaq efi_enter32(%rip), %rax + pushq %rax + lretq + +1: addq $32, %rsp + + pop %rbx + movl %ebx, %ss + pop %rbx + movl %ebx, %es + pop %rbx + movl %ebx, %ds + /* + * Convert 32-bit status code into 64-bit. + */ + test %rax, %rax + jz 1f + movl %eax, %ecx + andl $0x0fffffff, %ecx + andl $0xf0000000, %eax + shl $32, %rax + or %rcx, %rax +1: + ret +ENDPROC(__efi64_thunk) + +ENTRY(efi_exit32) + movq func_rt_ptr(%rip), %rax + push %rax + mov %rdi, %rax + ret +ENDPROC(efi_exit32) + + .code32 +/* + * EFI service pointer must be in %edi. + * + * The stack should represent the 32-bit calling convention. + */ +ENTRY(efi_enter32) + movl $__KERNEL_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + + call *%edi + + /* We must preserve return value */ + movl %eax, %edi + + movl 72(%esp), %eax + pushl $__KERNEL_CS + pushl %eax + + lret +ENDPROC(efi_enter32) + + .data + .balign 8 +func_rt_ptr: .quad 0 efi_saved_sp: .quad 0 -- cgit v1.2.3-59-g8ed1b From b273c2c2f2d2d13dc0bfa8923d52fbaf8fa56ae8 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Mon, 2 Feb 2015 20:27:11 +0100 Subject: x86/apic: Fix the devicetree build in certain configs Without this patch: LD init/built-in.o arch/x86/built-in.o: In function `dtb_lapic_setup': kernel/devicetree.c:155: undefined reference to `apic_force_enable' Makefile:923: recipe for target 'vmlinux' failed make: *** [vmlinux] Error 1 Signed-off-by: Ricardo Ribalda Delgado Reviewed-by: Maciej W. Rozycki Cc: David Rientjes Cc: Jan Beulich Link: http://lkml.kernel.org/r/1422905231-16067-1-git-send-email-ricardo.ribalda@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 92003f3c8a42..efc3b22d896e 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -213,7 +213,15 @@ void register_lapic_address(unsigned long address); extern void setup_boot_APIC_clock(void); extern void setup_secondary_APIC_clock(void); extern int APIC_init_uniprocessor(void); + +#ifdef CONFIG_X86_64 +static inline int apic_force_enable(unsigned long addr) +{ + return -1; +} +#else extern int apic_force_enable(unsigned long addr); +#endif extern int apic_bsp_setup(bool upmode); extern void apic_ap_setup(void); -- cgit v1.2.3-59-g8ed1b From f47233c2d34f243ecdaac179c3408a39ff9216a7 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 13 Feb 2015 16:04:55 +0100 Subject: x86/mm/ASLR: Propagate base load address calculation Commit: e2b32e678513 ("x86, kaslr: randomize module base load address") makes the base address for module to be unconditionally randomized in case when CONFIG_RANDOMIZE_BASE is defined and "nokaslr" option isn't present on the commandline. This is not consistent with how choose_kernel_location() decides whether it will randomize kernel load base. Namely, CONFIG_HIBERNATION disables kASLR (unless "kaslr" option is explicitly specified on kernel commandline), which makes the state space larger than what module loader is looking at. IOW CONFIG_HIBERNATION && CONFIG_RANDOMIZE_BASE is a valid config option, kASLR wouldn't be applied by default in that case, but module loader is not aware of that. Instead of fixing the logic in module.c, this patch takes more generic aproach. It introduces a new bootparam setup data_type SETUP_KASLR and uses that to pass the information whether kaslr has been applied during kernel decompression, and sets a global 'kaslr_enabled' variable accordingly, so that any kernel code (module loading, livepatching, ...) can make decisions based on its value. x86 module loader is converted to make use of this flag. Signed-off-by: Jiri Kosina Acked-by: Kees Cook Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/alpine.LNX.2.00.1502101411280.10719@pobox.suse.cz [ Always dump correct kaslr status when panicking ] Signed-off-by: Borislav Petkov --- arch/x86/boot/compressed/aslr.c | 34 +++++++++++++++++++++++++++++++++- arch/x86/boot/compressed/misc.c | 3 ++- arch/x86/boot/compressed/misc.h | 6 ++++-- arch/x86/include/asm/page_types.h | 3 +++ arch/x86/include/uapi/asm/bootparam.h | 1 + arch/x86/kernel/module.c | 11 ++--------- arch/x86/kernel/setup.c | 22 ++++++++++++++++++---- 7 files changed, 63 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index bb1376381985..7083c16cccba 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -14,6 +14,13 @@ static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; +struct kaslr_setup_data { + __u64 next; + __u32 type; + __u32 len; + __u8 data[1]; +} kaslr_setup_data; + #define I8254_PORT_CONTROL 0x43 #define I8254_PORT_COUNTER0 0x40 #define I8254_CMD_READBACK 0xC0 @@ -295,7 +302,29 @@ static unsigned long find_random_addr(unsigned long minimum, return slots_fetch_random(); } -unsigned char *choose_kernel_location(unsigned char *input, +static void add_kaslr_setup_data(struct boot_params *params, __u8 enabled) +{ + struct setup_data *data; + + kaslr_setup_data.type = SETUP_KASLR; + kaslr_setup_data.len = 1; + kaslr_setup_data.next = 0; + kaslr_setup_data.data[0] = enabled; + + data = (struct setup_data *)(unsigned long)params->hdr.setup_data; + + while (data && data->next) + data = (struct setup_data *)(unsigned long)data->next; + + if (data) + data->next = (unsigned long)&kaslr_setup_data; + else + params->hdr.setup_data = (unsigned long)&kaslr_setup_data; + +} + +unsigned char *choose_kernel_location(struct boot_params *params, + unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size) @@ -306,14 +335,17 @@ unsigned char *choose_kernel_location(unsigned char *input, #ifdef CONFIG_HIBERNATION if (!cmdline_find_option_bool("kaslr")) { debug_putstr("KASLR disabled by default...\n"); + add_kaslr_setup_data(params, 0); goto out; } #else if (cmdline_find_option_bool("nokaslr")) { debug_putstr("KASLR disabled by cmdline...\n"); + add_kaslr_setup_data(params, 0); goto out; } #endif + add_kaslr_setup_data(params, 1); /* Record the various known unsafe memory ranges. */ mem_avoid_init((unsigned long)input, input_size, diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index a950864a64da..5903089c818f 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -401,7 +401,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, * the entire decompressed kernel plus relocation table, or the * entire decompressed kernel plus .bss and .brk sections. */ - output = choose_kernel_location(input_data, input_len, output, + output = choose_kernel_location(real_mode, input_data, input_len, + output, output_len > run_size ? output_len : run_size); diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 24e3e569a13c..6d6730743024 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -56,7 +56,8 @@ int cmdline_find_option_bool(const char *option); #if CONFIG_RANDOMIZE_BASE /* aslr.c */ -unsigned char *choose_kernel_location(unsigned char *input, +unsigned char *choose_kernel_location(struct boot_params *params, + unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size); @@ -64,7 +65,8 @@ unsigned char *choose_kernel_location(unsigned char *input, bool has_cpuflag(int flag); #else static inline -unsigned char *choose_kernel_location(unsigned char *input, +unsigned char *choose_kernel_location(struct boot_params *params, + unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size) diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index f97fbe3abb67..3d43ce36eaba 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -3,6 +3,7 @@ #include #include +#include /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT 12 @@ -51,6 +52,8 @@ extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; +extern bool kaslr_enabled; + static inline phys_addr_t get_max_mapped(void) { return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 225b0988043a..44e6dd7e36a2 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -7,6 +7,7 @@ #define SETUP_DTB 2 #define SETUP_PCI 3 #define SETUP_EFI 4 +#define SETUP_KASLR 5 /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e69f9882bf95..c3c59a3a14ad 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -32,6 +32,7 @@ #include #include +#include #if 0 #define DEBUGP(fmt, ...) \ @@ -46,21 +47,13 @@ do { \ #ifdef CONFIG_RANDOMIZE_BASE static unsigned long module_load_offset; -static int randomize_modules = 1; /* Mutex protects the module_load_offset. */ static DEFINE_MUTEX(module_kaslr_mutex); -static int __init parse_nokaslr(char *p) -{ - randomize_modules = 0; - return 0; -} -early_param("nokaslr", parse_nokaslr); - static unsigned long int get_module_load_offset(void) { - if (randomize_modules) { + if (kaslr_enabled) { mutex_lock(&module_kaslr_mutex); /* * Calculate the module_load_offset the first time this diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ab4734e5411d..16b6043cb073 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -121,6 +121,8 @@ unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; +bool __read_mostly kaslr_enabled = false; + #ifdef CONFIG_DMI RESERVE_BRK(dmi_alloc, 65536); #endif @@ -424,6 +426,11 @@ static void __init reserve_initrd(void) } #endif /* CONFIG_BLK_DEV_INITRD */ +static void __init parse_kaslr_setup(u64 pa_data, u32 data_len) +{ + kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data)); +} + static void __init parse_setup_data(void) { struct setup_data *data; @@ -451,6 +458,9 @@ static void __init parse_setup_data(void) case SETUP_EFI: parse_efi_setup(pa_data, data_len); break; + case SETUP_KASLR: + parse_kaslr_setup(pa_data, data_len); + break; default: break; } @@ -833,10 +843,14 @@ static void __init trim_low_memory_range(void) static int dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) { - pr_emerg("Kernel Offset: 0x%lx from 0x%lx " - "(relocation range: 0x%lx-0x%lx)\n", - (unsigned long)&_text - __START_KERNEL, __START_KERNEL, - __START_KERNEL_map, MODULES_VADDR-1); + if (kaslr_enabled) + pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n", + (unsigned long)&_text - __START_KERNEL, + __START_KERNEL, + __START_KERNEL_map, + MODULES_VADDR-1); + else + pr_emerg("Kernel Offset: disabled\n"); return 0; } -- cgit v1.2.3-59-g8ed1b From f15e05186c3244e9195378a0a568283a8ccc60b0 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 10 Feb 2015 13:20:30 -0800 Subject: x86/mm/init: Fix incorrect page size in init_memory_mapping() printks With 32-bit non-PAE kernels, we have 2 page sizes available (at most): 4k and 4M. Enabling PAE replaces that 4M size with a 2M one (which 64-bit systems use too). But, when booting a 32-bit non-PAE kernel, in one of our early-boot printouts, we say: init_memory_mapping: [mem 0x00000000-0x000fffff] [mem 0x00000000-0x000fffff] page 4k init_memory_mapping: [mem 0x37000000-0x373fffff] [mem 0x37000000-0x373fffff] page 2M init_memory_mapping: [mem 0x00100000-0x36ffffff] [mem 0x00100000-0x003fffff] page 4k [mem 0x00400000-0x36ffffff] page 2M init_memory_mapping: [mem 0x37400000-0x377fdfff] [mem 0x37400000-0x377fdfff] page 4k Which is obviously wrong. There is no 2M page available. This is probably because of a badly-named variable: in the map_range code: PG_LEVEL_2M. Instead of renaming all the PG_LEVEL_2M's. This patch just fixes the printout: init_memory_mapping: [mem 0x00000000-0x000fffff] [mem 0x00000000-0x000fffff] page 4k init_memory_mapping: [mem 0x37000000-0x373fffff] [mem 0x37000000-0x373fffff] page 4M init_memory_mapping: [mem 0x00100000-0x36ffffff] [mem 0x00100000-0x003fffff] page 4k [mem 0x00400000-0x36ffffff] page 4M init_memory_mapping: [mem 0x37400000-0x377fdfff] [mem 0x37400000-0x377fdfff] page 4k BRK [0x03206000, 0x03206fff] PGTABLE Signed-off-by: Dave Hansen Cc: Pekka Enberg Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20150210212030.665EC267@viggo.jf.intel.com Signed-off-by: Borislav Petkov --- arch/x86/mm/init.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 079c3b6a3ff1..7ff24240d863 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -238,6 +238,31 @@ static void __init_refok adjust_range_page_size_mask(struct map_range *mr, } } +static const char *page_size_string(struct map_range *mr) +{ + static const char str_1g[] = "1G"; + static const char str_2m[] = "2M"; + static const char str_4m[] = "4M"; + static const char str_4k[] = "4k"; + + if (mr->page_size_mask & (1<page_size_mask & (1<page_size_mask & (1< Date: Sat, 14 Feb 2015 09:33:50 -0800 Subject: x86, mm/ASLR: Fix stack randomization on 64-bit systems The issue is that the stack for processes is not properly randomized on 64 bit architectures due to an integer overflow. The affected function is randomize_stack_top() in file "fs/binfmt_elf.c": static unsigned long randomize_stack_top(unsigned long stack_top) { unsigned int random_variable = 0; if ((current->flags & PF_RANDOMIZE) && !(current->personality & ADDR_NO_RANDOMIZE)) { random_variable = get_random_int() & STACK_RND_MASK; random_variable <<= PAGE_SHIFT; } return PAGE_ALIGN(stack_top) + random_variable; return PAGE_ALIGN(stack_top) - random_variable; } Note that, it declares the "random_variable" variable as "unsigned int". Since the result of the shifting operation between STACK_RND_MASK (which is 0x3fffff on x86_64, 22 bits) and PAGE_SHIFT (which is 12 on x86_64): random_variable <<= PAGE_SHIFT; then the two leftmost bits are dropped when storing the result in the "random_variable". This variable shall be at least 34 bits long to hold the (22+12) result. These two dropped bits have an impact on the entropy of process stack. Concretely, the total stack entropy is reduced by four: from 2^28 to 2^30 (One fourth of expected entropy). This patch restores back the entropy by correcting the types involved in the operations in the functions randomize_stack_top() and stack_maxrandom_size(). The successful fix can be tested with: $ for i in `seq 1 10`; do cat /proc/self/maps | grep stack; done 7ffeda566000-7ffeda587000 rw-p 00000000 00:00 0 [stack] 7fff5a332000-7fff5a353000 rw-p 00000000 00:00 0 [stack] 7ffcdb7a1000-7ffcdb7c2000 rw-p 00000000 00:00 0 [stack] 7ffd5e2c4000-7ffd5e2e5000 rw-p 00000000 00:00 0 [stack] ... Once corrected, the leading bytes should be between 7ffc and 7fff, rather than always being 7fff. Signed-off-by: Hector Marco-Gisbert Signed-off-by: Ismael Ripoll [ Rebased, fixed 80 char bugs, cleaned up commit message, added test example and CVE ] Signed-off-by: Kees Cook Cc: Cc: Linus Torvalds Cc: Andrew Morton Cc: Al Viro Fixes: CVE-2015-1593 Link: http://lkml.kernel.org/r/20150214173350.GA18393@www.outflux.net Signed-off-by: Borislav Petkov --- arch/x86/mm/mmap.c | 6 +++--- fs/binfmt_elf.c | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 919b91205cd4..df4552bd239e 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -35,12 +35,12 @@ struct va_alignment __read_mostly va_align = { .flags = -1, }; -static unsigned int stack_maxrandom_size(void) +static unsigned long stack_maxrandom_size(void) { - unsigned int max = 0; + unsigned long max = 0; if ((current->flags & PF_RANDOMIZE) && !(current->personality & ADDR_NO_RANDOMIZE)) { - max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT; + max = ((-1UL) & STACK_RND_MASK) << PAGE_SHIFT; } return max; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 02b16910f4c9..995986b8e36b 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -645,11 +645,12 @@ out: static unsigned long randomize_stack_top(unsigned long stack_top) { - unsigned int random_variable = 0; + unsigned long random_variable = 0; if ((current->flags & PF_RANDOMIZE) && !(current->personality & ADDR_NO_RANDOMIZE)) { - random_variable = get_random_int() & STACK_RND_MASK; + random_variable = (unsigned long) get_random_int(); + random_variable &= STACK_RND_MASK; random_variable <<= PAGE_SHIFT; } #ifdef CONFIG_STACK_GROWSUP -- cgit v1.2.3-59-g8ed1b From f84598bd7c851f8b0bf8cd0d7c3be0d73c432ff4 Mon Sep 17 00:00:00 2001 From: Quentin Casasnovas Date: Tue, 3 Feb 2015 13:00:22 +0100 Subject: x86/microcode/intel: Guard against stack overflow in the loader mc_saved_tmp is a static array allocated on the stack, we need to make sure mc_saved_count stays within its bounds, otherwise we're overflowing the stack in _save_mc(). A specially crafted microcode header could lead to a kernel crash or potentially kernel execution. Signed-off-by: Quentin Casasnovas Cc: "H. Peter Anvin" Cc: Fenghua Yu Link: http://lkml.kernel.org/r/1422964824-22056-1-git-send-email-quentin.casasnovas@oracle.com Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/microcode/intel_early.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c index ec9df6f9cd47..5e109a31f62b 100644 --- a/arch/x86/kernel/cpu/microcode/intel_early.c +++ b/arch/x86/kernel/cpu/microcode/intel_early.c @@ -321,7 +321,7 @@ get_matching_model_microcode(int cpu, unsigned long start, unsigned int mc_saved_count = mc_saved_data->mc_saved_count; int i; - while (leftover) { + while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) { mc_header = (struct microcode_header_intel *)ucode_ptr; mc_size = get_totalsize(mc_header); -- cgit v1.2.3-59-g8ed1b From 35a9ff4eec7a1725ac4364972fc6c156e4feedd0 Mon Sep 17 00:00:00 2001 From: Quentin Casasnovas Date: Tue, 3 Feb 2015 13:00:24 +0100 Subject: x86/microcode/intel: Handle truncated microcode images more robustly We do not check the input data bounds containing the microcode before copying a struct microcode_intel_header from it. A specially crafted microcode could cause the kernel to read invalid memory and lead to a denial-of-service. Signed-off-by: Quentin Casasnovas Cc: "H. Peter Anvin" Cc: Fenghua Yu Link: http://lkml.kernel.org/r/1422964824-22056-3-git-send-email-quentin.casasnovas@oracle.com [ Made error message differ from the next one and flipped comparison. ] Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/microcode/intel.c | 5 +++++ arch/x86/kernel/cpu/microcode/intel_early.c | 4 ++++ 2 files changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index c6826d1e8082..746e7fd08aad 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -196,6 +196,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, struct microcode_header_intel mc_header; unsigned int mc_size; + if (leftover < sizeof(mc_header)) { + pr_err("error! Truncated header in microcode data file\n"); + break; + } + if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header))) break; diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c index 5e109a31f62b..420eb933189c 100644 --- a/arch/x86/kernel/cpu/microcode/intel_early.c +++ b/arch/x86/kernel/cpu/microcode/intel_early.c @@ -322,6 +322,10 @@ get_matching_model_microcode(int cpu, unsigned long start, int i; while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) { + + if (leftover < sizeof(mc_header)) + break; + mc_header = (struct microcode_header_intel *)ucode_ptr; mc_size = get_totalsize(mc_header); -- cgit v1.2.3-59-g8ed1b From 570e1aa84c376ff39809442f09c7606ddf62cfd1 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 20 Feb 2015 10:18:59 +0100 Subject: x86/mm/ASLR: Avoid PAGE_SIZE redefinition for UML subarch Commit f47233c2d34 ("x86/mm/ASLR: Propagate base load address calculation") causes PAGE_SIZE redefinition warnings for UML subarch builds. This is caused by added includes that were leftovers from previous patch versions are are not actually needed (especially page_types.h inlcude in module.c). Drop those stray includes. Reported-by: kbuild test robot Signed-off-by: Jiri Kosina Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Kees Cook Link: http://lkml.kernel.org/r/alpine.LNX.2.00.1502201017240.28769@pobox.suse.cz Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page_types.h | 1 - arch/x86/kernel/module.c | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 3d43ce36eaba..95e11f79f123 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -3,7 +3,6 @@ #include #include -#include /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT 12 diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index c3c59a3a14ad..ef00116e8270 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -32,7 +32,6 @@ #include #include -#include #if 0 #define DEBUGP(fmt, ...) \ -- cgit v1.2.3-59-g8ed1b