From a466ef76b815b86748d9870ef2a430af7b39c710 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 9 May 2018 19:42:20 +0900 Subject: x86/kexec: Avoid double free_page() upon do_kexec_load() failure >From ff82bedd3e12f0d3353282054ae48c3bd8c72012 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 9 May 2018 12:12:39 +0900 Subject: [PATCH v3] x86/kexec: avoid double free_page() upon do_kexec_load() failure. syzbot is reporting crashes after memory allocation failure inside do_kexec_load() [1]. This is because free_transition_pgtable() is called by both init_transition_pgtable() and machine_kexec_cleanup() when memory allocation failed inside init_transition_pgtable(). Regarding 32bit code, machine_kexec_free_page_tables() is called by both machine_kexec_alloc_page_tables() and machine_kexec_cleanup() when memory allocation failed inside machine_kexec_alloc_page_tables(). Fix this by leaving the error handling to machine_kexec_cleanup() (and optionally setting NULL after free_page()). [1] https://syzkaller.appspot.com/bug?id=91e52396168cf2bdd572fe1e1bc0bc645c1c6b40 Fixes: f5deb79679af6eb4 ("x86: kexec: Use one page table in x86_64 machine_kexec") Fixes: 92be3d6bdf2cb349 ("kexec/i386: allocate page table pages dynamically") Reported-by: syzbot Signed-off-by: Tetsuo Handa Signed-off-by: Thomas Gleixner Acked-by: Baoquan He Cc: thomas.lendacky@amd.com Cc: prudo@linux.vnet.ibm.com Cc: Huang Ying Cc: syzkaller-bugs@googlegroups.com Cc: takahiro.akashi@linaro.org Cc: H. Peter Anvin Cc: akpm@linux-foundation.org Cc: dyoung@redhat.com Cc: kirill.shutemov@linux.intel.com Link: https://lkml.kernel.org/r/201805091942.DGG12448.tMFVFSJFQOOLHO@I-love.SAKURA.ne.jp --- arch/x86/kernel/machine_kexec_32.c | 6 +++++- arch/x86/kernel/machine_kexec_64.c | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 60cdec6628b0..d1ab07ec8c9a 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -57,12 +57,17 @@ static void load_segments(void) static void machine_kexec_free_page_tables(struct kimage *image) { free_page((unsigned long)image->arch.pgd); + image->arch.pgd = NULL; #ifdef CONFIG_X86_PAE free_page((unsigned long)image->arch.pmd0); + image->arch.pmd0 = NULL; free_page((unsigned long)image->arch.pmd1); + image->arch.pmd1 = NULL; #endif free_page((unsigned long)image->arch.pte0); + image->arch.pte0 = NULL; free_page((unsigned long)image->arch.pte1); + image->arch.pte1 = NULL; } static int machine_kexec_alloc_page_tables(struct kimage *image) @@ -79,7 +84,6 @@ static int machine_kexec_alloc_page_tables(struct kimage *image) !image->arch.pmd0 || !image->arch.pmd1 || #endif !image->arch.pte0 || !image->arch.pte1) { - machine_kexec_free_page_tables(image); return -ENOMEM; } return 0; diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index a5e55d832d0a..6010449ca6d2 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -39,9 +39,13 @@ const struct kexec_file_ops * const kexec_file_loaders[] = { static void free_transition_pgtable(struct kimage *image) { free_page((unsigned long)image->arch.p4d); + image->arch.p4d = NULL; free_page((unsigned long)image->arch.pud); + image->arch.pud = NULL; free_page((unsigned long)image->arch.pmd); + image->arch.pmd = NULL; free_page((unsigned long)image->arch.pte); + image->arch.pte = NULL; } static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) @@ -91,7 +95,6 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC)); return 0; err: - free_transition_pgtable(image); return result; } -- cgit v1.2.3-59-g8ed1b From ee6a7354a3629f9b65bc18dbe393503e9440d6f5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 9 May 2018 21:58:15 +0900 Subject: kprobes/x86: Prohibit probing on exception masking instructions Since MOV SS and POP SS instructions will delay the exceptions until the next instruction is executed, single-stepping on it by kprobes must be prohibited. However, kprobes usually executes those instructions directly on trampoline buffer (a.k.a. kprobe-booster), except for the kprobes which has post_handler. Thus if kprobe user probes MOV SS with post_handler, it will do single-stepping on the MOV SS. This means it is safe that if it is used via ftrace or perf/bpf since those don't use the post_handler. Anyway, since the stack switching is a rare case, it is safer just rejecting kprobes on such instructions. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Cc: Ricardo Neri Cc: Francis Deslauriers Cc: Oleg Nesterov Cc: Alexei Starovoitov Cc: Steven Rostedt Cc: Andy Lutomirski Cc: "H . Peter Anvin" Cc: Yonghong Song Cc: Borislav Petkov Cc: Linus Torvalds Cc: "David S . Miller" Link: https://lkml.kernel.org/r/152587069574.17316.3311695234863248641.stgit@devbox --- arch/x86/include/asm/insn.h | 18 ++++++++++++++++++ arch/x86/kernel/kprobes/core.c | 4 ++++ 2 files changed, 22 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index b3e32b010ab1..c2c01f84df75 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +#define POP_SS_OPCODE 0x1f +#define MOV_SREG_OPCODE 0x8e + +/* + * Intel SDM Vol.3A 6.8.3 states; + * "Any single-step trap that would be delivered following the MOV to SS + * instruction or POP to SS instruction (because EFLAGS.TF is 1) is + * suppressed." + * This function returns true if @insn is MOV SS or POP SS. On these + * instructions, single stepping is suppressed. + */ +static inline int insn_masking_exception(struct insn *insn) +{ + return insn->opcode.bytes[0] == POP_SS_OPCODE || + (insn->opcode.bytes[0] == MOV_SREG_OPCODE && + X86_MODRM_REG(insn->modrm.bytes[0]) == 2); +} + #endif /* _ASM_X86_INSN_H */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 0715f827607c..6f4d42377fe5 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -370,6 +370,10 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION) return 0; + /* We should not singlestep on the exception masking instructions */ + if (insn_masking_exception(insn)) + return 0; + #ifdef CONFIG_X86_64 /* Only x86_64 has RIP relative instructions */ if (insn_rip_relative(insn)) { -- cgit v1.2.3-59-g8ed1b From 13ebe18c94f5b0665c01ae7fad2717ae959f4212 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 9 May 2018 21:58:45 +0900 Subject: uprobes/x86: Prohibit probing on MOV SS instruction Since MOV SS and POP SS instructions will delay the exceptions until the next instruction is executed, single-stepping on it by uprobes must be prohibited. uprobe already rejects probing on POP SS (0x1f), but allows probing on MOV SS (0x8e and reg == 2). This checks the target instruction and if it is MOV SS or POP SS, returns -ENOTSUPP to reject probing. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Acked-by: Oleg Nesterov Cc: Ricardo Neri Cc: Francis Deslauriers Cc: Alexei Starovoitov Cc: Steven Rostedt Cc: Andy Lutomirski Cc: "H . Peter Anvin" Cc: Yonghong Song Cc: Borislav Petkov Cc: Linus Torvalds Cc: "David S . Miller" Link: https://lkml.kernel.org/r/152587072544.17316.5950935243917346341.stgit@devbox --- arch/x86/kernel/uprobes.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 85c7ef23d99f..c84bb5396958 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -299,6 +299,10 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool if (is_prefix_bad(insn)) return -ENOTSUPP; + /* We should not singlestep on the exception masking instructions */ + if (insn_masking_exception(insn)) + return -ENOTSUPP; + if (x86_64) good_insns = good_insns_64; else -- cgit v1.2.3-59-g8ed1b From b1ae32dbab50ed19cfc16d225b0fb0114fb13025 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 13 May 2018 12:32:22 -0700 Subject: x86/cpufeature: Guard asm_volatile_goto usage for BPF compilation Workaround for the sake of BPF compilation which utilizes kernel headers, but clang does not support ASM GOTO and fails the build. Fixes: d0266046ad54 ("x86: Remove FAST_FEATURE_TESTS") Suggested-by: Thomas Gleixner Signed-off-by: Alexei Starovoitov Signed-off-by: Thomas Gleixner Cc: daniel@iogearbox.net Cc: peterz@infradead.org Cc: netdev@vger.kernel.org Cc: bp@alien8.de Cc: yhs@fb.com Cc: kernel-team@fb.com Cc: torvalds@linux-foundation.org Cc: davem@davemloft.net Link: https://lkml.kernel.org/r/20180513193222.1997938-1-ast@kernel.org --- arch/x86/include/asm/cpufeature.h | 15 +++++++++++++++ samples/bpf/Makefile | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index b27da9602a6d..aced6c9290d6 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -140,6 +140,20 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) +#if defined(__clang__) && !defined(CC_HAVE_ASM_GOTO) + +/* + * Workaround for the sake of BPF compilation which utilizes kernel + * headers, but clang does not support ASM GOTO and fails the build. + */ +#ifndef __BPF_TRACING__ +#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments" +#endif + +#define static_cpu_has(bit) boot_cpu_has(bit) + +#else + /* * Static testing of CPU features. Used the same as boot_cpu_has(). * These will statically patch the target code for additional @@ -195,6 +209,7 @@ t_no: boot_cpu_has(bit) : \ _static_cpu_has(bit) \ ) +#endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) #define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 4d6a6edd4bf6..092947676143 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -255,7 +255,7 @@ $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h $(obj)/%.o: $(src)/%.c $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ -I$(srctree)/tools/testing/selftests/bpf/ \ - -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ + -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ -D__TARGET_ARCH_$(ARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ -- cgit v1.2.3-59-g8ed1b