aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2017-08-18 10:29:54 +0200
committerIngo Molnar <mingo@kernel.org>2017-08-18 10:29:54 +0200
commit0c2364791343e4b04cd1f097ff2abc2799062448 (patch)
treebeb9d94252d42d35b2066bf23383576b0beeebeb /arch
parentlocking/lockdep: Make CONFIG_LOCKDEP_CROSSRELEASE and CONFIG_LOCKDEP_COMPLETIONS truly non-interactive (diff)
parentx86/asm: Add ASM_UNREACHABLE (diff)
downloadlinux-dev-0c2364791343e4b04cd1f097ff2abc2799062448.tar.xz
linux-dev-0c2364791343e4b04cd1f097ff2abc2799062448.zip
Merge branch 'x86/asm' into locking/core
We need the ASM_UNREACHABLE() macro for a dependent patch. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kconfig.debug2
-rw-r--r--arch/x86/entry/Makefile1
-rw-r--r--arch/x86/entry/calling.h5
-rw-r--r--arch/x86/entry/entry_64.S170
-rw-r--r--arch/x86/include/asm/io.h98
-rw-r--r--arch/x86/include/asm/orc_types.h107
-rw-r--r--arch/x86/include/asm/processor.h3
-rw-r--r--arch/x86/include/asm/rmwcc.h37
-rw-r--r--arch/x86/include/asm/unwind_hints.h103
-rw-r--r--arch/x86/kernel/dumpstack.c12
-rw-r--r--arch/x86/kernel/dumpstack_32.c4
-rw-r--r--arch/x86/kernel/dumpstack_64.c4
-rw-r--r--arch/x86/kernel/process_64.c3
13 files changed, 443 insertions, 106 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index cd20ca0b4043..1fc519f3c49e 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -305,8 +305,6 @@ config DEBUG_ENTRY
Some of these sanity checks may slow down kernel entries and
exits or otherwise impact performance.
- This is currently used to help test NMI code.
-
If unsure, say N.
config DEBUG_NMI_SELFTEST
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 9976fcecd17e..af28a8a24366 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -2,7 +2,6 @@
# Makefile for the x86 low level entry code
#
-OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,)
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 05ed3d393da7..640aafebdc00 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -1,4 +1,5 @@
#include <linux/jump_label.h>
+#include <asm/unwind_hints.h>
/*
@@ -112,6 +113,7 @@ For 32-bit we have the following conventions - kernel is built with
movq %rdx, 12*8+\offset(%rsp)
movq %rsi, 13*8+\offset(%rsp)
movq %rdi, 14*8+\offset(%rsp)
+ UNWIND_HINT_REGS offset=\offset extra=0
.endm
.macro SAVE_C_REGS offset=0
SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
@@ -136,6 +138,7 @@ For 32-bit we have the following conventions - kernel is built with
movq %r12, 3*8+\offset(%rsp)
movq %rbp, 4*8+\offset(%rsp)
movq %rbx, 5*8+\offset(%rsp)
+ UNWIND_HINT_REGS offset=\offset
.endm
.macro RESTORE_EXTRA_REGS offset=0
@@ -145,6 +148,7 @@ For 32-bit we have the following conventions - kernel is built with
movq 3*8+\offset(%rsp), %r12
movq 4*8+\offset(%rsp), %rbp
movq 5*8+\offset(%rsp), %rbx
+ UNWIND_HINT_REGS offset=\offset extra=0
.endm
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
@@ -167,6 +171,7 @@ For 32-bit we have the following conventions - kernel is built with
.endif
movq 13*8(%rsp), %rsi
movq 14*8(%rsp), %rdi
+ UNWIND_HINT_IRET_REGS offset=16*8
.endm
.macro RESTORE_C_REGS
RESTORE_C_REGS_HELPER 1,1,1,1,1
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d271fb79248f..daf8936d0628 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -36,6 +36,7 @@
#include <asm/smap.h>
#include <asm/pgtable_types.h>
#include <asm/export.h>
+#include <asm/frame.h>
#include <linux/err.h>
.code64
@@ -43,9 +44,10 @@
#ifdef CONFIG_PARAVIRT
ENTRY(native_usergs_sysret64)
+ UNWIND_HINT_EMPTY
swapgs
sysretq
-ENDPROC(native_usergs_sysret64)
+END(native_usergs_sysret64)
#endif /* CONFIG_PARAVIRT */
.macro TRACE_IRQS_IRETQ
@@ -134,6 +136,7 @@ ENDPROC(native_usergs_sysret64)
*/
ENTRY(entry_SYSCALL_64)
+ UNWIND_HINT_EMPTY
/*
* Interrupts are off on entry.
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
@@ -169,6 +172,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
pushq %r10 /* pt_regs->r10 */
pushq %r11 /* pt_regs->r11 */
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
+ UNWIND_HINT_REGS extra=0
/*
* If we need to do entry work or if we guess we'll need to do
@@ -223,6 +227,7 @@ entry_SYSCALL_64_fastpath:
movq EFLAGS(%rsp), %r11
RESTORE_C_REGS_EXCEPT_RCX_R11
movq RSP(%rsp), %rsp
+ UNWIND_HINT_EMPTY
USERGS_SYSRET64
1:
@@ -316,6 +321,7 @@ syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
RESTORE_C_REGS_EXCEPT_RCX_R11
movq RSP(%rsp), %rsp
+ UNWIND_HINT_EMPTY
USERGS_SYSRET64
opportunistic_sysret_failed:
@@ -343,6 +349,7 @@ ENTRY(stub_ptregs_64)
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
popq %rax
+ UNWIND_HINT_REGS extra=0
jmp entry_SYSCALL64_slow_path
1:
@@ -351,6 +358,7 @@ END(stub_ptregs_64)
.macro ptregs_stub func
ENTRY(ptregs_\func)
+ UNWIND_HINT_FUNC
leaq \func(%rip), %rax
jmp stub_ptregs_64
END(ptregs_\func)
@@ -367,6 +375,7 @@ END(ptregs_\func)
* %rsi: next task
*/
ENTRY(__switch_to_asm)
+ UNWIND_HINT_FUNC
/*
* Save callee-saved registers
* This must match the order in inactive_task_frame
@@ -406,6 +415,7 @@ END(__switch_to_asm)
* r12: kernel thread arg
*/
ENTRY(ret_from_fork)
+ UNWIND_HINT_EMPTY
movq %rax, %rdi
call schedule_tail /* rdi: 'prev' task parameter */
@@ -413,6 +423,7 @@ ENTRY(ret_from_fork)
jnz 1f /* kernel threads are uncommon */
2:
+ UNWIND_HINT_REGS
movq %rsp, %rdi
call syscall_return_slowpath /* returns with IRQs disabled */
TRACE_IRQS_ON /* user mode is traced as IRQS on */
@@ -440,13 +451,102 @@ END(ret_from_fork)
ENTRY(irq_entries_start)
vector=FIRST_EXTERNAL_VECTOR
.rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+ UNWIND_HINT_IRET_REGS
pushq $(~vector+0x80) /* Note: always in signed byte range */
- vector=vector+1
jmp common_interrupt
.align 8
+ vector=vector+1
.endr
END(irq_entries_start)
+.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
+#ifdef CONFIG_DEBUG_ENTRY
+ pushfq
+ testl $X86_EFLAGS_IF, (%rsp)
+ jz .Lokay_\@
+ ud2
+.Lokay_\@:
+ addq $8, %rsp
+#endif
+.endm
+
+/*
+ * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers
+ * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
+ * Requires kernel GSBASE.
+ *
+ * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
+ */
+.macro ENTER_IRQ_STACK regs=1 old_rsp
+ DEBUG_ENTRY_ASSERT_IRQS_OFF
+ movq %rsp, \old_rsp
+
+ .if \regs
+ UNWIND_HINT_REGS base=\old_rsp
+ .endif
+
+ incl PER_CPU_VAR(irq_count)
+ jnz .Lirq_stack_push_old_rsp_\@
+
+ /*
+ * Right now, if we just incremented irq_count to zero, we've
+ * claimed the IRQ stack but we haven't switched to it yet.
+ *
+ * If anything is added that can interrupt us here without using IST,
+ * it must be *extremely* careful to limit its stack usage. This
+ * could include kprobes and a hypothetical future IST-less #DB
+ * handler.
+ *
+ * The OOPS unwinder relies on the word at the top of the IRQ
+ * stack linking back to the previous RSP for the entire time we're
+ * on the IRQ stack. For this to work reliably, we need to write
+ * it before we actually move ourselves to the IRQ stack.
+ */
+
+ movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
+ movq PER_CPU_VAR(irq_stack_ptr), %rsp
+
+#ifdef CONFIG_DEBUG_ENTRY
+ /*
+ * If the first movq above becomes wrong due to IRQ stack layout
+ * changes, the only way we'll notice is if we try to unwind right
+ * here. Assert that we set up the stack right to catch this type
+ * of bug quickly.
+ */
+ cmpq -8(%rsp), \old_rsp
+ je .Lirq_stack_okay\@
+ ud2
+ .Lirq_stack_okay\@:
+#endif
+
+.Lirq_stack_push_old_rsp_\@:
+ pushq \old_rsp
+
+ .if \regs
+ UNWIND_HINT_REGS indirect=1
+ .endif
+.endm
+
+/*
+ * Undoes ENTER_IRQ_STACK.
+ */
+.macro LEAVE_IRQ_STACK regs=1
+ DEBUG_ENTRY_ASSERT_IRQS_OFF
+ /* We need to be off the IRQ stack before decrementing irq_count. */
+ popq %rsp
+
+ .if \regs
+ UNWIND_HINT_REGS
+ .endif
+
+ /*
+ * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
+ * the irq stack but we're not on it.
+ */
+
+ decl PER_CPU_VAR(irq_count)
+.endm
+
/*
* Interrupt entry/exit.
*
@@ -485,17 +585,7 @@ END(irq_entries_start)
CALL_enter_from_user_mode
1:
- /*
- * Save previous stack pointer, optionally switch to interrupt stack.
- * irq_count is used to check if a CPU is already on an interrupt stack
- * or not. While this is essentially redundant with preempt_count it is
- * a little cheaper to use a separate counter in the PDA (short of
- * moving irq_enter into assembly, which would be too much work)
- */
- movq %rsp, %rdi
- incl PER_CPU_VAR(irq_count)
- cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
- pushq %rdi
+ ENTER_IRQ_STACK old_rsp=%rdi
/* We entered an interrupt context - irqs are off: */
TRACE_IRQS_OFF
@@ -515,10 +605,8 @@ common_interrupt:
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
- decl PER_CPU_VAR(irq_count)
- /* Restore saved previous stack */
- popq %rsp
+ LEAVE_IRQ_STACK
testb $3, CS(%rsp)
jz retint_kernel
@@ -561,6 +649,7 @@ restore_c_regs_and_iret:
INTERRUPT_RETURN
ENTRY(native_iret)
+ UNWIND_HINT_IRET_REGS
/*
* Are we returning to a stack segment from the LDT? Note: in
* 64-bit mode SS:RSP on the exception stack is always valid.
@@ -633,6 +722,7 @@ native_irq_return_ldt:
orq PER_CPU_VAR(espfix_stack), %rax
SWAPGS
movq %rax, %rsp
+ UNWIND_HINT_IRET_REGS offset=8
/*
* At this point, we cannot write to the stack any more, but we can
@@ -654,6 +744,7 @@ END(common_interrupt)
*/
.macro apicinterrupt3 num sym do_sym
ENTRY(\sym)
+ UNWIND_HINT_IRET_REGS
ASM_CLAC
pushq $~(\num)
.Lcommon_\sym:
@@ -740,6 +831,8 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
+ UNWIND_HINT_IRET_REGS offset=8
+
/* Sanity check */
.if \shift_ist != -1 && \paranoid == 0
.error "using shift_ist requires paranoid=1"
@@ -763,6 +856,7 @@ ENTRY(\sym)
.else
call error_entry
.endif
+ UNWIND_HINT_REGS
/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
.if \paranoid
@@ -860,6 +954,7 @@ idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0
* edi: new selector
*/
ENTRY(native_load_gs_index)
+ FRAME_BEGIN
pushfq
DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
SWAPGS
@@ -868,8 +963,9 @@ ENTRY(native_load_gs_index)
2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
SWAPGS
popfq
+ FRAME_END
ret
-END(native_load_gs_index)
+ENDPROC(native_load_gs_index)
EXPORT_SYMBOL(native_load_gs_index)
_ASM_EXTABLE(.Lgs_change, bad_gs)
@@ -892,14 +988,12 @@ bad_gs:
ENTRY(do_softirq_own_stack)
pushq %rbp
mov %rsp, %rbp
- incl PER_CPU_VAR(irq_count)
- cmove PER_CPU_VAR(irq_stack_ptr), %rsp
- push %rbp /* frame pointer backlink */
+ ENTER_IRQ_STACK regs=0 old_rsp=%r11
call __do_softirq
+ LEAVE_IRQ_STACK regs=0
leaveq
- decl PER_CPU_VAR(irq_count)
ret
-END(do_softirq_own_stack)
+ENDPROC(do_softirq_own_stack)
#ifdef CONFIG_XEN
idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
@@ -923,14 +1017,14 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
* see the correct pointer to the pt_regs
*/
+ UNWIND_HINT_FUNC
movq %rdi, %rsp /* we don't return, adjust the stack frame */
-11: incl PER_CPU_VAR(irq_count)
- movq %rsp, %rbp
- cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
- pushq %rbp /* frame pointer backlink */
+ UNWIND_HINT_REGS
+
+ ENTER_IRQ_STACK old_rsp=%r10
call xen_evtchn_do_upcall
- popq %rsp
- decl PER_CPU_VAR(irq_count)
+ LEAVE_IRQ_STACK
+
#ifndef CONFIG_PREEMPT
call xen_maybe_preempt_hcall
#endif
@@ -951,6 +1045,7 @@ END(xen_do_hypervisor_callback)
* with its current contents: any discrepancy means we in category 1.
*/
ENTRY(xen_failsafe_callback)
+ UNWIND_HINT_EMPTY
movl %ds, %ecx
cmpw %cx, 0x10(%rsp)
jne 1f
@@ -970,11 +1065,13 @@ ENTRY(xen_failsafe_callback)
pushq $0 /* RIP */
pushq %r11
pushq %rcx
+ UNWIND_HINT_IRET_REGS offset=8
jmp general_protection
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
movq (%rsp), %rcx
movq 8(%rsp), %r11
addq $0x30, %rsp
+ UNWIND_HINT_IRET_REGS
pushq $-1 /* orig_ax = -1 => not a system call */
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
@@ -1020,6 +1117,7 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vec
* Return: ebx=0: need swapgs on exit, ebx=1: otherwise
*/
ENTRY(paranoid_entry)
+ UNWIND_HINT_FUNC
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
@@ -1047,6 +1145,7 @@ END(paranoid_entry)
* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
*/
ENTRY(paranoid_exit)
+ UNWIND_HINT_REGS
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF_DEBUG
testl %ebx, %ebx /* swapgs needed? */
@@ -1068,6 +1167,7 @@ END(paranoid_exit)
* Return: EBX=0: came from user mode; EBX=1: otherwise
*/
ENTRY(error_entry)
+ UNWIND_HINT_FUNC
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
@@ -1152,6 +1252,7 @@ END(error_entry)
* 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode
*/
ENTRY(error_exit)
+ UNWIND_HINT_REGS
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
testl %ebx, %ebx
@@ -1161,6 +1262,7 @@ END(error_exit)
/* Runs on exception stack */
ENTRY(nmi)
+ UNWIND_HINT_IRET_REGS
/*
* Fix up the exception frame if we're on Xen.
* PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
@@ -1232,11 +1334,13 @@ ENTRY(nmi)
cld
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ UNWIND_HINT_IRET_REGS base=%rdx offset=8
pushq 5*8(%rdx) /* pt_regs->ss */
pushq 4*8(%rdx) /* pt_regs->rsp */
pushq 3*8(%rdx) /* pt_regs->flags */
pushq 2*8(%rdx) /* pt_regs->cs */
pushq 1*8(%rdx) /* pt_regs->rip */
+ UNWIND_HINT_IRET_REGS
pushq $-1 /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
@@ -1253,6 +1357,7 @@ ENTRY(nmi)
pushq %r13 /* pt_regs->r13 */
pushq %r14 /* pt_regs->r14 */
pushq %r15 /* pt_regs->r15 */
+ UNWIND_HINT_REGS
ENCODE_FRAME_POINTER
/*
@@ -1407,6 +1512,7 @@ first_nmi:
.rept 5
pushq 11*8(%rsp)
.endr
+ UNWIND_HINT_IRET_REGS
/* Everything up to here is safe from nested NMIs */
@@ -1422,6 +1528,7 @@ first_nmi:
pushq $__KERNEL_CS /* CS */
pushq $1f /* RIP */
INTERRUPT_RETURN /* continues at repeat_nmi below */
+ UNWIND_HINT_IRET_REGS
1:
#endif
@@ -1471,6 +1578,7 @@ end_repeat_nmi:
* exceptions might do.
*/
call paranoid_entry
+ UNWIND_HINT_REGS
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
movq %rsp, %rdi
@@ -1508,17 +1616,19 @@ nmi_restore:
END(nmi)
ENTRY(ignore_sysret)
+ UNWIND_HINT_EMPTY
mov $-ENOSYS, %eax
sysret
END(ignore_sysret)
ENTRY(rewind_stack_do_exit)
+ UNWIND_HINT_FUNC
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
- leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp
+ leaq -PTREGS_SIZE(%rax), %rsp
+ UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
call do_exit
-1: jmp 1b
END(rewind_stack_do_exit)
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 48febf07e828..1310e1f1cd65 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -69,6 +69,9 @@ build_mmio_write(__writeb, "b", unsigned char, "q", )
build_mmio_write(__writew, "w", unsigned short, "r", )
build_mmio_write(__writel, "l", unsigned int, "r", )
+#define readb readb
+#define readw readw
+#define readl readl
#define readb_relaxed(a) __readb(a)
#define readw_relaxed(a) __readw(a)
#define readl_relaxed(a) __readl(a)
@@ -76,6 +79,9 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
#define __raw_readw __readw
#define __raw_readl __readl
+#define writeb writeb
+#define writew writew
+#define writel writel
#define writeb_relaxed(v, a) __writeb(v, a)
#define writew_relaxed(v, a) __writew(v, a)
#define writel_relaxed(v, a) __writel(v, a)
@@ -88,13 +94,15 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
#ifdef CONFIG_X86_64
build_mmio_read(readq, "q", unsigned long, "=r", :"memory")
+build_mmio_read(__readq, "q", unsigned long, "=r", )
build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
+build_mmio_write(__writeq, "q", unsigned long, "r", )
-#define readq_relaxed(a) readq(a)
-#define writeq_relaxed(v, a) writeq(v, a)
+#define readq_relaxed(a) __readq(a)
+#define writeq_relaxed(v, a) __writeq(v, a)
-#define __raw_readq(a) readq(a)
-#define __raw_writeq(val, addr) writeq(val, addr)
+#define __raw_readq __readq
+#define __raw_writeq __writeq
/* Let people know that we have them */
#define readq readq
@@ -119,6 +127,7 @@ static inline phys_addr_t virt_to_phys(volatile void *address)
{
return __pa(address);
}
+#define virt_to_phys virt_to_phys
/**
* phys_to_virt - map physical address to virtual
@@ -137,6 +146,7 @@ static inline void *phys_to_virt(phys_addr_t address)
{
return __va(address);
}
+#define phys_to_virt phys_to_virt
/*
* Change "struct page" to physical address.
@@ -169,11 +179,14 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
* else, you probably want one of the following.
*/
extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
+#define ioremap_nocache ioremap_nocache
extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
#define ioremap_uc ioremap_uc
extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
+#define ioremap_cache ioremap_cache
extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val);
+#define ioremap_prot ioremap_prot
/**
* ioremap - map bus memory into CPU space
@@ -193,8 +206,10 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
{
return ioremap_nocache(offset, size);
}
+#define ioremap ioremap
extern void iounmap(volatile void __iomem *addr);
+#define iounmap iounmap
extern void set_iounmap_nonlazy(void);
@@ -203,53 +218,6 @@ extern void set_iounmap_nonlazy(void);
#include <asm-generic/iomap.h>
/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p) p
-
-/**
- * memset_io Set a range of I/O memory to a constant value
- * @addr: The beginning of the I/O-memory range to set
- * @val: The value to set the memory to
- * @count: The number of bytes to set
- *
- * Set a range of I/O memory to a given value.
- */
-static inline void
-memset_io(volatile void __iomem *addr, unsigned char val, size_t count)
-{
- memset((void __force *)addr, val, count);
-}
-
-/**
- * memcpy_fromio Copy a block of data from I/O memory
- * @dst: The (RAM) destination for the copy
- * @src: The (I/O memory) source for the data
- * @count: The number of bytes to copy
- *
- * Copy a block of data from I/O memory.
- */
-static inline void
-memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count)
-{
- memcpy(dst, (const void __force *)src, count);
-}
-
-/**
- * memcpy_toio Copy a block of data into I/O memory
- * @dst: The (I/O memory) destination for the copy
- * @src: The (RAM) source for the data
- * @count: The number of bytes to copy
- *
- * Copy a block of data to I/O memory.
- */
-static inline void
-memcpy_toio(volatile void __iomem *dst, const void *src, size_t count)
-{
- memcpy((void __force *)dst, src, count);
-}
-
-/*
* ISA space is 'always mapped' on a typical x86 system, no need to
* explicitly ioremap() it. The fact that the ISA IO space is mapped
* to PAGE_OFFSET is pure coincidence - it does not mean ISA values
@@ -341,13 +309,38 @@ BUILDIO(b, b, char)
BUILDIO(w, w, short)
BUILDIO(l, , int)
+#define inb inb
+#define inw inw
+#define inl inl
+#define inb_p inb_p
+#define inw_p inw_p
+#define inl_p inl_p
+#define insb insb
+#define insw insw
+#define insl insl
+
+#define outb outb
+#define outw outw
+#define outl outl
+#define outb_p outb_p
+#define outw_p outw_p
+#define outl_p outl_p
+#define outsb outsb
+#define outsw outsw
+#define outsl outsl
+
extern void *xlate_dev_mem_ptr(phys_addr_t phys);
extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
+#define xlate_dev_mem_ptr xlate_dev_mem_ptr
+#define unxlate_dev_mem_ptr unxlate_dev_mem_ptr
+
extern int ioremap_change_attr(unsigned long vaddr, unsigned long size,
enum page_cache_mode pcm);
extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size);
+#define ioremap_wc ioremap_wc
extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size);
+#define ioremap_wt ioremap_wt
extern bool is_early_ioremap_ptep(pte_t *ptep);
@@ -365,6 +358,9 @@ extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
#define IO_SPACE_LIMIT 0xffff
+#include <asm-generic/io.h>
+#undef PCI_IOBASE
+
#ifdef CONFIG_MTRR
extern int __must_check arch_phys_wc_index(int handle);
#define arch_phys_wc_index arch_phys_wc_index
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
new file mode 100644
index 000000000000..7dc777a6cb40
--- /dev/null
+++ b/arch/x86/include/asm/orc_types.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ORC_TYPES_H
+#define _ORC_TYPES_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+/*
+ * The ORC_REG_* registers are base registers which are used to find other
+ * registers on the stack.
+ *
+ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
+ * address of the previous frame: the caller's SP before it called the current
+ * function.
+ *
+ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
+ * the current frame.
+ *
+ * The most commonly used base registers are SP and BP -- which the previous SP
+ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is
+ * usually based on.
+ *
+ * The rest of the base registers are needed for special cases like entry code
+ * and GCC realigned stacks.
+ */
+#define ORC_REG_UNDEFINED 0
+#define ORC_REG_PREV_SP 1
+#define ORC_REG_DX 2
+#define ORC_REG_DI 3
+#define ORC_REG_BP 4
+#define ORC_REG_SP 5
+#define ORC_REG_R10 6
+#define ORC_REG_R13 7
+#define ORC_REG_BP_INDIRECT 8
+#define ORC_REG_SP_INDIRECT 9
+#define ORC_REG_MAX 15
+
+/*
+ * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
+ * caller's SP right before it made the call). Used for all callable
+ * functions, i.e. all C code and all callable asm functions.
+ *
+ * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
+ * to a fully populated pt_regs from a syscall, interrupt, or exception.
+ *
+ * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
+ * points to the iret return frame.
+ *
+ * The UNWIND_HINT macros are used only for the unwind_hint struct. They
+ * aren't used in struct orc_entry due to size and complexity constraints.
+ * Objtool converts them to real types when it converts the hints to orc
+ * entries.
+ */
+#define ORC_TYPE_CALL 0
+#define ORC_TYPE_REGS 1
+#define ORC_TYPE_REGS_IRET 2
+#define UNWIND_HINT_TYPE_SAVE 3
+#define UNWIND_HINT_TYPE_RESTORE 4
+
+#ifndef __ASSEMBLY__
+/*
+ * This struct is more or less a vastly simplified version of the DWARF Call
+ * Frame Information standard. It contains only the necessary parts of DWARF
+ * CFI, simplified for ease of access by the in-kernel unwinder. It tells the
+ * unwinder how to find the previous SP and BP (and sometimes entry regs) on
+ * the stack for a given code address. Each instance of the struct corresponds
+ * to one or more code locations.
+ */
+struct orc_entry {
+ s16 sp_offset;
+ s16 bp_offset;
+ unsigned sp_reg:4;
+ unsigned bp_reg:4;
+ unsigned type:2;
+};
+
+/*
+ * This struct is used by asm and inline asm code to manually annotate the
+ * location of registers on the stack for the ORC unwinder.
+ *
+ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
+ */
+struct unwind_hint {
+ u32 ip;
+ s16 sp_offset;
+ u8 sp_reg;
+ u8 type;
+};
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ORC_TYPES_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 028245e1c42b..0b03d655db7c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -22,6 +22,7 @@ struct vm86;
#include <asm/nops.h>
#include <asm/special_insns.h>
#include <asm/fpu/types.h>
+#include <asm/unwind_hints.h>
#include <linux/personality.h>
#include <linux/cache.h>
@@ -684,6 +685,7 @@ static inline void sync_core(void)
unsigned int tmp;
asm volatile (
+ UNWIND_HINT_SAVE
"mov %%ss, %0\n\t"
"pushq %q0\n\t"
"pushq %%rsp\n\t"
@@ -693,6 +695,7 @@ static inline void sync_core(void)
"pushq %q0\n\t"
"pushq $1f\n\t"
"iretq\n\t"
+ UNWIND_HINT_RESTORE
"1:"
: "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
#endif
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 661dd305694a..045f99211a99 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -1,45 +1,56 @@
#ifndef _ASM_X86_RMWcc
#define _ASM_X86_RMWcc
+#define __CLOBBERS_MEM "memory"
+#define __CLOBBERS_MEM_CC_CX "memory", "cc", "cx"
+
#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
/* Use asm goto */
-#define __GEN_RMWcc(fullop, var, cc, ...) \
+#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \
do { \
asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \
- : : "m" (var), ## __VA_ARGS__ \
- : "memory" : cc_label); \
+ : : [counter] "m" (var), ## __VA_ARGS__ \
+ : clobbers : cc_label); \
return 0; \
cc_label: \
return 1; \
} while (0)
-#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
- __GEN_RMWcc(op " " arg0, var, cc)
+#define __BINARY_RMWcc_ARG " %1, "
-#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
- __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val))
#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
/* Use flags output or a set instruction */
-#define __GEN_RMWcc(fullop, var, cc, ...) \
+#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \
do { \
bool c; \
asm volatile (fullop ";" CC_SET(cc) \
- : "+m" (var), CC_OUT(cc) (c) \
- : __VA_ARGS__ : "memory"); \
+ : [counter] "+m" (var), CC_OUT(cc) (c) \
+ : __VA_ARGS__ : clobbers); \
return c; \
} while (0)
+#define __BINARY_RMWcc_ARG " %2, "
+
+#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
+
#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
- __GEN_RMWcc(op " " arg0, var, cc)
+ __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM)
+
+#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc) \
+ __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \
+ __CLOBBERS_MEM_CC_CX)
#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
- __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val))
+ __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \
+ __CLOBBERS_MEM, vcon (val))
-#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
+#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc) \
+ __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \
+ __CLOBBERS_MEM_CC_CX, vcon (val))
#endif /* _ASM_X86_RMWcc */
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
new file mode 100644
index 000000000000..5e02b11c9b86
--- /dev/null
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -0,0 +1,103 @@
+#ifndef _ASM_X86_UNWIND_HINTS_H
+#define _ASM_X86_UNWIND_HINTS_H
+
+#include "orc_types.h"
+
+#ifdef __ASSEMBLY__
+
+/*
+ * In asm, there are two kinds of code: normal C-type callable functions and
+ * the rest. The normal callable functions can be called by other code, and
+ * don't do anything unusual with the stack. Such normal callable functions
+ * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this
+ * category. In this case, no special debugging annotations are needed because
+ * objtool can automatically generate the ORC data for the ORC unwinder to read
+ * at runtime.
+ *
+ * Anything which doesn't fall into the above category, such as syscall and
+ * interrupt handlers, tends to not be called directly by other functions, and
+ * often does unusual non-C-function-type things with the stack pointer. Such
+ * code needs to be annotated such that objtool can understand it. The
+ * following CFI hint macros are for this type of code.
+ *
+ * These macros provide hints to objtool about the state of the stack at each
+ * instruction. Objtool starts from the hints and follows the code flow,
+ * making automatic CFI adjustments when it sees pushes and pops, filling out
+ * the debuginfo as necessary. It will also warn if it sees any
+ * inconsistencies.
+ */
+.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL
+#ifdef CONFIG_STACK_VALIDATION
+.Lunwind_hint_ip_\@:
+ .pushsection .discard.unwind_hints
+ /* struct unwind_hint */
+ .long .Lunwind_hint_ip_\@ - .
+ .short \sp_offset
+ .byte \sp_reg
+ .byte \type
+ .popsection
+#endif
+.endm
+
+.macro UNWIND_HINT_EMPTY
+ UNWIND_HINT sp_reg=ORC_REG_UNDEFINED
+.endm
+
+.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0
+ .if \base == %rsp && \indirect
+ .set sp_reg, ORC_REG_SP_INDIRECT
+ .elseif \base == %rsp
+ .set sp_reg, ORC_REG_SP
+ .elseif \base == %rbp
+ .set sp_reg, ORC_REG_BP
+ .elseif \base == %rdi
+ .set sp_reg, ORC_REG_DI
+ .elseif \base == %rdx
+ .set sp_reg, ORC_REG_DX
+ .elseif \base == %r10
+ .set sp_reg, ORC_REG_R10
+ .else
+ .error "UNWIND_HINT_REGS: bad base register"
+ .endif
+
+ .set sp_offset, \offset
+
+ .if \iret
+ .set type, ORC_TYPE_REGS_IRET
+ .elseif \extra == 0
+ .set type, ORC_TYPE_REGS_IRET
+ .set sp_offset, \offset + (16*8)
+ .else
+ .set type, ORC_TYPE_REGS
+ .endif
+
+ UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type
+.endm
+
+.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0
+ UNWIND_HINT_REGS base=\base offset=\offset iret=1
+.endm
+
+.macro UNWIND_HINT_FUNC sp_offset=8
+ UNWIND_HINT sp_offset=\sp_offset
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#define UNWIND_HINT(sp_reg, sp_offset, type) \
+ "987: \n\t" \
+ ".pushsection .discard.unwind_hints\n\t" \
+ /* struct unwind_hint */ \
+ ".long 987b - .\n\t" \
+ ".short " __stringify(sp_offset) "\n\t" \
+ ".byte " __stringify(sp_reg) "\n\t" \
+ ".byte " __stringify(type) "\n\t" \
+ ".popsection\n\t"
+
+#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE)
+
+#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_X86_UNWIND_HINTS_H */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index dbce3cca94cb..bd265a4cf108 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -94,6 +94,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
if (stack_name)
printk("%s <%s>\n", log_lvl, stack_name);
+ if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
+ __show_regs(regs, 0);
+
/*
* Scan the stack, printing any text addresses we find. At the
* same time, follow proper stack frames with the unwinder.
@@ -118,10 +121,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
* Don't print regs->ip again if it was already printed
* by __show_regs() below.
*/
- if (regs && stack == &regs->ip) {
- unwind_next_frame(&state);
- continue;
- }
+ if (regs && stack == &regs->ip)
+ goto next;
if (stack == ret_addr_p)
reliable = 1;
@@ -144,6 +145,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
if (!reliable)
continue;
+next:
/*
* Get the next frame from the unwinder. No need to
* check for an error: if anything goes wrong, the rest
@@ -153,7 +155,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
/* if the frame has entry regs, print them */
regs = unwind_get_entry_regs(&state);
- if (regs)
+ if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
__show_regs(regs, 0);
}
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index e5f0b40e66d2..4f0481474903 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -37,7 +37,7 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
- if (stack < begin || stack > end)
+ if (stack <= begin || stack > end)
return false;
info->type = STACK_TYPE_IRQ;
@@ -62,7 +62,7 @@ static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
- if (stack < begin || stack > end)
+ if (stack <= begin || stack > end)
return false;
info->type = STACK_TYPE_SOFTIRQ;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 3e1471d57487..225af4184f06 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -55,7 +55,7 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
begin = end - (exception_stack_sizes[k] / sizeof(long));
regs = (struct pt_regs *)end - 1;
- if (stack < begin || stack >= end)
+ if (stack <= begin || stack >= end)
continue;
info->type = STACK_TYPE_EXCEPTION + k;
@@ -78,7 +78,7 @@ static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
- if (stack < begin || stack > end)
+ if (stack <= begin || stack > end)
return false;
info->type = STACK_TYPE_IRQ;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c3169be4c596..2987e3991c2b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -279,6 +279,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
unsigned prev_fsindex, prev_gsindex;
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
+ this_cpu_read(irq_count) != -1);
+
switch_fpu_prepare(prev_fpu, cpu);
/* We must save %fs and %gs before load_TLS() because