diff options
Diffstat (limited to 'arch/powerpc/kernel')
47 files changed, 3305 insertions, 1448 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 811f441a125f..e132902e1f14 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -25,8 +25,6 @@ CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) -# do not trace tracer code -CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) # timers used by tracing CFLAGS_REMOVE_time.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) endif @@ -58,6 +56,7 @@ obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y) obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_RTAS_PROC) += rtas-proc.o +obj-$(CONFIG_PPC_DT_CPU_FTRS) += dt_cpu_ftrs.o obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o @@ -97,6 +96,7 @@ obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o +obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o obj-$(CONFIG_STACKTRACE) += stacktrace.o @@ -118,10 +118,7 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o -obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o -obj-$(CONFIG_TRACING) += trace_clock.o +obj-y += trace/ ifneq ($(CONFIG_PPC_INDIRECT_PIO),y) obj-y += iomap.o @@ -142,14 +139,14 @@ obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o # Disable GCOV & sanitizers in odd or sensitive code GCOV_PROFILE_prom_init.o := n UBSAN_SANITIZE_prom_init.o := n -GCOV_PROFILE_ftrace.o := n -UBSAN_SANITIZE_ftrace.o := n GCOV_PROFILE_machine_kexec_64.o := n UBSAN_SANITIZE_machine_kexec_64.o := n GCOV_PROFILE_machine_kexec_32.o := n UBSAN_SANITIZE_machine_kexec_32.o := n GCOV_PROFILE_kprobes.o := n UBSAN_SANITIZE_kprobes.o := n +GCOV_PROFILE_kprobes-ftrace.o := n +UBSAN_SANITIZE_kprobes-ftrace.o := n UBSAN_SANITIZE_vdso.o := n extra-$(CONFIG_PPC_FPU) += fpu.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 4367e7df51a1..709e23425317 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -185,6 +185,7 @@ int main(void) #ifdef CONFIG_PPC_MM_SLICES OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize); OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize); + DEFINE(PACA_ADDR_LIMIT, offsetof(struct paca_struct, addr_limit)); DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); #endif /* CONFIG_PPC_MM_SLICES */ #endif @@ -219,6 +220,7 @@ int main(void) OFFSET(PACA_EXGEN, paca_struct, exgen); OFFSET(PACA_EXMC, paca_struct, exmc); OFFSET(PACA_EXSLB, paca_struct, exslb); + OFFSET(PACA_EXNMI, paca_struct, exnmi); OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr); OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr); OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid); @@ -232,7 +234,9 @@ int main(void) OFFSET(PACAEMERGSP, paca_struct, emergency_sp); #ifdef CONFIG_PPC_BOOK3S_64 OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp); + OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp); OFFSET(PACA_IN_MCE, paca_struct, in_mce); + OFFSET(PACA_IN_NMI, paca_struct, in_nmi); #endif OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); @@ -399,8 +403,8 @@ int main(void) DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); #endif -#ifdef MAX_PGD_TABLE_SIZE - DEFINE(PGD_TABLE_SIZE, MAX_PGD_TABLE_SIZE); +#ifdef CONFIG_PPC_BOOK3S_64 + DEFINE(PGD_TABLE_SIZE, (sizeof(pgd_t) << max(RADIX_PGD_INDEX_SIZE, H_PGD_INDEX_SIZE))); #else DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); #endif @@ -630,6 +634,8 @@ int main(void) HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); + HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys); + HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt); HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_PTID, ptid); @@ -715,6 +721,14 @@ int main(void) OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6); #endif +#ifdef CONFIG_KVM_XICS + DEFINE(VCPU_XIVE_SAVED_STATE, offsetof(struct kvm_vcpu, + arch.xive_saved_state)); + DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu, + arch.xive_cam_word)); + DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed)); +#endif + #ifdef CONFIG_KVM_EXIT_TIMING OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu); OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl); @@ -727,6 +741,7 @@ int main(void) OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state); OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask); OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask); + OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas); #endif DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 7fe8c79e6937..10cb2896b2ae 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -29,7 +29,8 @@ _GLOBAL(__setup_cpu_power7) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - bl __init_LPCR + li r4,(LPCR_LPES1 >> LPCR_LPES_SH) + bl __init_LPCR_ISA206 bl __init_tlb_power7 mtlr r11 blr @@ -42,7 +43,8 @@ _GLOBAL(__restore_cpu_power7) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - bl __init_LPCR + li r4,(LPCR_LPES1 >> LPCR_LPES_SH) + bl __init_LPCR_ISA206 bl __init_tlb_power7 mtlr r11 blr @@ -59,7 +61,8 @@ _GLOBAL(__setup_cpu_power8) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH - bl __init_LPCR + li r4,0 /* LPES = 0 */ + bl __init_LPCR_ISA206 bl __init_HFSCR bl __init_tlb_power8 bl __init_PMU_HV @@ -80,7 +83,8 @@ _GLOBAL(__restore_cpu_power8) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH - bl __init_LPCR + li r4,0 /* LPES = 0 */ + bl __init_LPCR_ISA206 bl __init_HFSCR bl __init_tlb_power8 bl __init_PMU_HV @@ -99,11 +103,12 @@ _GLOBAL(__setup_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) + LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) andc r3, r3, r4 - bl __init_LPCR + li r4,0 /* LPES = 0 */ + bl __init_LPCR_ISA300 bl __init_HFSCR bl __init_tlb_power9 bl __init_PMU_HV @@ -122,11 +127,12 @@ _GLOBAL(__restore_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) + LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) andc r3, r3, r4 - bl __init_LPCR + li r4,0 /* LPES = 0 */ + bl __init_LPCR_ISA300 bl __init_HFSCR bl __init_tlb_power9 bl __init_PMU_HV @@ -144,9 +150,9 @@ __init_hvmode_206: std r5,CPU_SPEC_FEATURES(r4) blr -__init_LPCR: +__init_LPCR_ISA206: /* Setup a sane LPCR: - * Called with initial LPCR in R3 + * Called with initial LPCR in R3 and desired LPES 2-bit value in R4 * * LPES = 0b01 (HSRR0/1 used for 0x500) * PECE = 0b111 @@ -157,16 +163,18 @@ __init_LPCR: * * Other bits untouched for now */ - li r5,1 - rldimi r3,r5, LPCR_LPES_SH, 64-LPCR_LPES_SH-2 + li r5,0x10 + rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 + + /* POWER9 has no VRMASD */ +__init_LPCR_ISA300: + rldimi r3,r4, LPCR_LPES_SH, 64-LPCR_LPES_SH-2 ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) li r5,4 rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3 clrrdi r3,r3,1 /* clear HDICE */ li r5,4 rldimi r3,r5, LPCR_VC_SH, 0 - li r5,0x10 - rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 mtspr SPRN_LPCR,r3 isync blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index e79b9daa873c..9b3e88b1a9c8 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -23,7 +23,9 @@ #include <asm/mmu.h> #include <asm/setup.h> -struct cpu_spec* cur_cpu_spec = NULL; +static struct cpu_spec the_cpu_spec __read_mostly; + +struct cpu_spec* cur_cpu_spec __read_mostly = NULL; EXPORT_SYMBOL(cur_cpu_spec); /* The platform string corresponding to the real PVR */ @@ -2179,7 +2181,15 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_E500 */ }; -static struct cpu_spec the_cpu_spec; +void __init set_cur_cpu_spec(struct cpu_spec *s) +{ + struct cpu_spec *t = &the_cpu_spec; + + t = PTRRELOC(t); + *t = *s; + + *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; +} static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, struct cpu_spec *s) @@ -2266,6 +2276,29 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr) return NULL; } +/* + * Used by cpufeatures to get the name for CPUs with a PVR table. + * If they don't hae a PVR table, cpufeatures gets the name from + * cpu device-tree node. + */ +void __init identify_cpu_name(unsigned int pvr) +{ + struct cpu_spec *s = cpu_specs; + struct cpu_spec *t = &the_cpu_spec; + int i; + + s = PTRRELOC(s); + t = PTRRELOC(t); + + for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++) { + if ((pvr & s->pvr_mask) == s->pvr_value) { + t->cpu_name = s->cpu_name; + return; + } + } +} + + #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS] = { [0 ... NUM_CPU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 47b63de81f9b..cbabb5adccd9 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -43,8 +43,6 @@ #define IPI_TIMEOUT 10000 #define REAL_MODE_TIMEOUT 10000 -/* This keeps a track of which one is the crashing cpu. */ -int crashing_cpu = -1; static int time_to_dump; #define CRASH_HANDLER_MAX 3 diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 2128f3a96c32..b6fe883b1016 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -20,18 +20,60 @@ #include <asm/kvm_ppc.h> #ifdef CONFIG_SMP -void doorbell_setup_this_cpu(void) + +/* + * Doorbells must only be used if CPU_FTR_DBELL is available. + * msgsnd is used in HV, and msgsndp is used in !HV. + * + * These should be used by platform code that is aware of restrictions. + * Other arch code should use ->cause_ipi. + * + * doorbell_global_ipi() sends a dbell to any target CPU. + * Must be used only by architectures that address msgsnd target + * by PIR/get_hard_smp_processor_id. + */ +void doorbell_global_ipi(int cpu) { - unsigned long tag = mfspr(SPRN_DOORBELL_CPUTAG) & PPC_DBELL_TAG_MASK; + u32 tag = get_hard_smp_processor_id(cpu); - smp_muxed_ipi_set_data(smp_processor_id(), tag); + kvmppc_set_host_ipi(cpu, 1); + /* Order previous accesses vs. msgsnd, which is treated as a store */ + ppc_msgsnd_sync(); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); } -void doorbell_cause_ipi(int cpu, unsigned long data) +/* + * doorbell_core_ipi() sends a dbell to a target CPU in the same core. + * Must be used only by architectures that address msgsnd target + * by TIR/cpu_thread_in_core. + */ +void doorbell_core_ipi(int cpu) { + u32 tag = cpu_thread_in_core(cpu); + + kvmppc_set_host_ipi(cpu, 1); /* Order previous accesses vs. msgsnd, which is treated as a store */ - mb(); - ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, data); + ppc_msgsnd_sync(); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); +} + +/* + * Attempt to cause a core doorbell if destination is on the same core. + * Returns 1 on success, 0 on failure. + */ +int doorbell_try_core_ipi(int cpu) +{ + int this_cpu = get_cpu(); + int ret = 0; + + if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) { + doorbell_core_ipi(cpu); + ret = 1; + } + + put_cpu(); + + return ret; } void doorbell_exception(struct pt_regs *regs) @@ -40,12 +82,14 @@ void doorbell_exception(struct pt_regs *regs) irq_enter(); + ppc_msgsync(); + may_hard_irq_enable(); kvmppc_set_host_ipi(smp_processor_id(), 0); __this_cpu_inc(irq_stat.doorbell_irqs); - smp_ipi_demux(); + smp_ipi_demux_relaxed(); /* already performed the barrier */ irq_exit(); set_irq_regs(old_regs); diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c new file mode 100644 index 000000000000..fcc7588a96d6 --- /dev/null +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -0,0 +1,1031 @@ +/* + * Copyright 2017, Nicholas Piggin, IBM Corporation + * Licensed under GPLv2. + */ + +#define pr_fmt(fmt) "dt-cpu-ftrs: " fmt + +#include <linux/export.h> +#include <linux/init.h> +#include <linux/jump_label.h> +#include <linux/memblock.h> +#include <linux/printk.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/threads.h> + +#include <asm/cputable.h> +#include <asm/dt_cpu_ftrs.h> +#include <asm/mmu.h> +#include <asm/oprofile_impl.h> +#include <asm/prom.h> +#include <asm/setup.h> + + +/* Device-tree visible constants follow */ +#define ISA_V2_07B 2070 +#define ISA_V3_0B 3000 + +#define USABLE_PR (1U << 0) +#define USABLE_OS (1U << 1) +#define USABLE_HV (1U << 2) + +#define HV_SUPPORT_HFSCR (1U << 0) +#define OS_SUPPORT_FSCR (1U << 0) + +/* For parsing, we define all bits set as "NONE" case */ +#define HV_SUPPORT_NONE 0xffffffffU +#define OS_SUPPORT_NONE 0xffffffffU + +struct dt_cpu_feature { + const char *name; + uint32_t isa; + uint32_t usable_privilege; + uint32_t hv_support; + uint32_t os_support; + uint32_t hfscr_bit_nr; + uint32_t fscr_bit_nr; + uint32_t hwcap_bit_nr; + /* fdt parsing */ + unsigned long node; + int enabled; + int disabled; +}; + +#define CPU_FTRS_BASE \ + (CPU_FTR_USE_TB | \ + CPU_FTR_LWSYNC | \ + CPU_FTR_FPU_UNAVAILABLE |\ + CPU_FTR_NODSISRALIGN |\ + CPU_FTR_NOEXECUTE |\ + CPU_FTR_COHERENT_ICACHE | \ + CPU_FTR_STCX_CHECKS_ADDRESS |\ + CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ + CPU_FTR_DAWR | \ + CPU_FTR_ARCH_206 |\ + CPU_FTR_ARCH_207S) + +#define MMU_FTRS_HASH_BASE (MMU_FTRS_POWER8) + +#define COMMON_USER_BASE (PPC_FEATURE_32 | PPC_FEATURE_64 | \ + PPC_FEATURE_ARCH_2_06 |\ + PPC_FEATURE_ICACHE_SNOOP) +#define COMMON_USER2_BASE (PPC_FEATURE2_ARCH_2_07 | \ + PPC_FEATURE2_ISEL) +/* + * Set up the base CPU + */ + +extern void __flush_tlb_power8(unsigned int action); +extern void __flush_tlb_power9(unsigned int action); +extern long __machine_check_early_realmode_p8(struct pt_regs *regs); +extern long __machine_check_early_realmode_p9(struct pt_regs *regs); + +static int hv_mode; + +static struct { + u64 lpcr; + u64 hfscr; + u64 fscr; +} system_registers; + +static void (*init_pmu_registers)(void); + +static void cpufeatures_flush_tlb(void) +{ + unsigned long rb; + unsigned int i, num_sets; + + /* + * This is a temporary measure to keep equivalent TLB flush as the + * cputable based setup code. + */ + switch (PVR_VER(mfspr(SPRN_PVR))) { + case PVR_POWER8: + case PVR_POWER8E: + case PVR_POWER8NVL: + num_sets = POWER8_TLB_SETS; + break; + case PVR_POWER9: + num_sets = POWER9_TLB_SETS_HASH; + break; + default: + num_sets = 1; + pr_err("unknown CPU version for boot TLB flush\n"); + break; + } + + asm volatile("ptesync" : : : "memory"); + rb = TLBIEL_INVAL_SET; + for (i = 0; i < num_sets; i++) { + asm volatile("tlbiel %0" : : "r" (rb)); + rb += 1 << TLBIEL_INVAL_SET_SHIFT; + } + asm volatile("ptesync" : : : "memory"); +} + +static void __restore_cpu_cpufeatures(void) +{ + /* + * LPCR is restored by the power on engine already. It can be changed + * after early init e.g., by radix enable, and we have no unified API + * for saving and restoring such SPRs. + * + * This ->restore hook should really be removed from idle and register + * restore moved directly into the idle restore code, because this code + * doesn't know how idle is implemented or what it needs restored here. + * + * The best we can do to accommodate secondary boot and idle restore + * for now is "or" LPCR with existing. + */ + + mtspr(SPRN_LPCR, system_registers.lpcr | mfspr(SPRN_LPCR)); + if (hv_mode) { + mtspr(SPRN_LPID, 0); + mtspr(SPRN_HFSCR, system_registers.hfscr); + } + mtspr(SPRN_FSCR, system_registers.fscr); + + if (init_pmu_registers) + init_pmu_registers(); + + cpufeatures_flush_tlb(); +} + +static char dt_cpu_name[64]; + +static struct cpu_spec __initdata base_cpu_spec = { + .cpu_name = NULL, + .cpu_features = CPU_FTRS_BASE, + .cpu_user_features = COMMON_USER_BASE, + .cpu_user_features2 = COMMON_USER2_BASE, + .mmu_features = 0, + .icache_bsize = 32, /* minimum block size, fixed by */ + .dcache_bsize = 32, /* cache info init. */ + .num_pmcs = 0, + .pmc_type = PPC_PMC_DEFAULT, + .oprofile_cpu_type = NULL, + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = NULL, + .cpu_restore = __restore_cpu_cpufeatures, + .flush_tlb = NULL, + .machine_check_early = NULL, + .platform = NULL, +}; + +static void __init cpufeatures_setup_cpu(void) +{ + set_cur_cpu_spec(&base_cpu_spec); + + cur_cpu_spec->pvr_mask = -1; + cur_cpu_spec->pvr_value = mfspr(SPRN_PVR); + + /* Initialize the base environment -- clear FSCR/HFSCR. */ + hv_mode = !!(mfmsr() & MSR_HV); + if (hv_mode) { + /* CPU_FTR_HVMODE is used early in PACA setup */ + cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE; + mtspr(SPRN_HFSCR, 0); + } + mtspr(SPRN_FSCR, 0); + + /* + * LPCR does not get cleared, to match behaviour with secondaries + * in __restore_cpu_cpufeatures. Once the idle code is fixed, this + * could clear LPCR too. + */ +} + +static int __init feat_try_enable_unknown(struct dt_cpu_feature *f) +{ + if (f->hv_support == HV_SUPPORT_NONE) { + } else if (f->hv_support & HV_SUPPORT_HFSCR) { + u64 hfscr = mfspr(SPRN_HFSCR); + hfscr |= 1UL << f->hfscr_bit_nr; + mtspr(SPRN_HFSCR, hfscr); + } else { + /* Does not have a known recipe */ + return 0; + } + + if (f->os_support == OS_SUPPORT_NONE) { + } else if (f->os_support & OS_SUPPORT_FSCR) { + u64 fscr = mfspr(SPRN_FSCR); + fscr |= 1UL << f->fscr_bit_nr; + mtspr(SPRN_FSCR, fscr); + } else { + /* Does not have a known recipe */ + return 0; + } + + if ((f->usable_privilege & USABLE_PR) && (f->hwcap_bit_nr != -1)) { + uint32_t word = f->hwcap_bit_nr / 32; + uint32_t bit = f->hwcap_bit_nr % 32; + + if (word == 0) + cur_cpu_spec->cpu_user_features |= 1U << bit; + else if (word == 1) + cur_cpu_spec->cpu_user_features2 |= 1U << bit; + else + pr_err("%s could not advertise to user (no hwcap bits)\n", f->name); + } + + return 1; +} + +static int __init feat_enable(struct dt_cpu_feature *f) +{ + if (f->hv_support != HV_SUPPORT_NONE) { + if (f->hfscr_bit_nr != -1) { + u64 hfscr = mfspr(SPRN_HFSCR); + hfscr |= 1UL << f->hfscr_bit_nr; + mtspr(SPRN_HFSCR, hfscr); + } + } + + if (f->os_support != OS_SUPPORT_NONE) { + if (f->fscr_bit_nr != -1) { + u64 fscr = mfspr(SPRN_FSCR); + fscr |= 1UL << f->fscr_bit_nr; + mtspr(SPRN_FSCR, fscr); + } + } + + if ((f->usable_privilege & USABLE_PR) && (f->hwcap_bit_nr != -1)) { + uint32_t word = f->hwcap_bit_nr / 32; + uint32_t bit = f->hwcap_bit_nr % 32; + + if (word == 0) + cur_cpu_spec->cpu_user_features |= 1U << bit; + else if (word == 1) + cur_cpu_spec->cpu_user_features2 |= 1U << bit; + else + pr_err("CPU feature: %s could not advertise to user (no hwcap bits)\n", f->name); + } + + return 1; +} + +static int __init feat_disable(struct dt_cpu_feature *f) +{ + return 0; +} + +static int __init feat_enable_hv(struct dt_cpu_feature *f) +{ + u64 lpcr; + + if (!hv_mode) { + pr_err("CPU feature hypervisor present in device tree but HV mode not enabled in the CPU. Ignoring.\n"); + return 0; + } + + mtspr(SPRN_LPID, 0); + + lpcr = mfspr(SPRN_LPCR); + lpcr &= ~LPCR_LPES0; /* HV external interrupts */ + mtspr(SPRN_LPCR, lpcr); + + cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE; + + return 1; +} + +static int __init feat_enable_le(struct dt_cpu_feature *f) +{ + cur_cpu_spec->cpu_user_features |= PPC_FEATURE_TRUE_LE; + return 1; +} + +static int __init feat_enable_smt(struct dt_cpu_feature *f) +{ + cur_cpu_spec->cpu_features |= CPU_FTR_SMT; + cur_cpu_spec->cpu_user_features |= PPC_FEATURE_SMT; + return 1; +} + +static int __init feat_enable_idle_nap(struct dt_cpu_feature *f) +{ + u64 lpcr; + + /* Set PECE wakeup modes for ISA 207 */ + lpcr = mfspr(SPRN_LPCR); + lpcr |= LPCR_PECE0; + lpcr |= LPCR_PECE1; + lpcr |= LPCR_PECE2; + mtspr(SPRN_LPCR, lpcr); + + return 1; +} + +static int __init feat_enable_align_dsisr(struct dt_cpu_feature *f) +{ + cur_cpu_spec->cpu_features &= ~CPU_FTR_NODSISRALIGN; + + return 1; +} + +static int __init feat_enable_idle_stop(struct dt_cpu_feature *f) +{ + u64 lpcr; + + /* Set PECE wakeup modes for ISAv3.0B */ + lpcr = mfspr(SPRN_LPCR); + lpcr |= LPCR_PECE0; + lpcr |= LPCR_PECE1; + lpcr |= LPCR_PECE2; + mtspr(SPRN_LPCR, lpcr); + + return 1; +} + +static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f) +{ + u64 lpcr; + + lpcr = mfspr(SPRN_LPCR); + lpcr &= ~LPCR_ISL; + + /* VRMASD */ + lpcr |= LPCR_VPM0; + lpcr &= ~LPCR_VPM1; + lpcr |= 0x10UL << LPCR_VRMASD_SH; /* L=1 LP=00 */ + mtspr(SPRN_LPCR, lpcr); + + cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE; + cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU; + + return 1; +} + +static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f) +{ + u64 lpcr; + + lpcr = mfspr(SPRN_LPCR); + lpcr &= ~LPCR_ISL; + mtspr(SPRN_LPCR, lpcr); + + cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE; + cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU; + + return 1; +} + + +static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f) +{ +#ifdef CONFIG_PPC_RADIX_MMU + cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; + cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE; + cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU; + + return 1; +#endif + return 0; +} + +static int __init feat_enable_dscr(struct dt_cpu_feature *f) +{ + u64 lpcr; + + feat_enable(f); + + lpcr = mfspr(SPRN_LPCR); + lpcr &= ~LPCR_DPFD; + lpcr |= (4UL << LPCR_DPFD_SH); + mtspr(SPRN_LPCR, lpcr); + + return 1; +} + +static void hfscr_pmu_enable(void) +{ + u64 hfscr = mfspr(SPRN_HFSCR); + hfscr |= PPC_BIT(60); + mtspr(SPRN_HFSCR, hfscr); +} + +static void init_pmu_power8(void) +{ + if (hv_mode) { + mtspr(SPRN_MMCRC, 0); + mtspr(SPRN_MMCRH, 0); + } + + mtspr(SPRN_MMCRA, 0); + mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_MMCR1, 0); + mtspr(SPRN_MMCR2, 0); + mtspr(SPRN_MMCRS, 0); +} + +static int __init feat_enable_mce_power8(struct dt_cpu_feature *f) +{ + cur_cpu_spec->platform = "power8"; + cur_cpu_spec->flush_tlb = __flush_tlb_power8; + cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p8; + + return 1; +} + +static int __init feat_enable_pmu_power8(struct dt_cpu_feature *f) +{ + hfscr_pmu_enable(); + + init_pmu_power8(); + init_pmu_registers = init_pmu_power8; + + cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA; + cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT; + if (pvr_version_is(PVR_POWER8E)) + cur_cpu_spec->cpu_features |= CPU_FTR_PMAO_BUG; + + cur_cpu_spec->num_pmcs = 6; + cur_cpu_spec->pmc_type = PPC_PMC_IBM; + cur_cpu_spec->oprofile_cpu_type = "ppc64/power8"; + + return 1; +} + +static void init_pmu_power9(void) +{ + if (hv_mode) + mtspr(SPRN_MMCRC, 0); + + mtspr(SPRN_MMCRA, 0); + mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_MMCR1, 0); + mtspr(SPRN_MMCR2, 0); +} + +static int __init feat_enable_mce_power9(struct dt_cpu_feature *f) +{ + cur_cpu_spec->platform = "power9"; + cur_cpu_spec->flush_tlb = __flush_tlb_power9; + cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p9; + + return 1; +} + +static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f) +{ + hfscr_pmu_enable(); + + init_pmu_power9(); + init_pmu_registers = init_pmu_power9; + + cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA; + cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT; + + cur_cpu_spec->num_pmcs = 6; + cur_cpu_spec->pmc_type = PPC_PMC_IBM; + cur_cpu_spec->oprofile_cpu_type = "ppc64/power9"; + + return 1; +} + +static int __init feat_enable_tm(struct dt_cpu_feature *f) +{ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + feat_enable(f); + cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NOSC; + return 1; +#endif + return 0; +} + +static int __init feat_enable_fp(struct dt_cpu_feature *f) +{ + feat_enable(f); + cur_cpu_spec->cpu_features &= ~CPU_FTR_FPU_UNAVAILABLE; + + return 1; +} + +static int __init feat_enable_vector(struct dt_cpu_feature *f) +{ +#ifdef CONFIG_ALTIVEC + feat_enable(f); + cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; + cur_cpu_spec->cpu_features |= CPU_FTR_VMX_COPY; + cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; + + return 1; +#endif + return 0; +} + +static int __init feat_enable_vsx(struct dt_cpu_feature *f) +{ +#ifdef CONFIG_VSX + feat_enable(f); + cur_cpu_spec->cpu_features |= CPU_FTR_VSX; + cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_VSX; + + return 1; +#endif + return 0; +} + +static int __init feat_enable_purr(struct dt_cpu_feature *f) +{ + cur_cpu_spec->cpu_features |= CPU_FTR_PURR | CPU_FTR_SPURR; + + return 1; +} + +static int __init feat_enable_ebb(struct dt_cpu_feature *f) +{ + /* + * PPC_FEATURE2_EBB is enabled in PMU init code because it has + * historically been related to the PMU facility. This may have + * to be decoupled if EBB becomes more generic. For now, follow + * existing convention. + */ + f->hwcap_bit_nr = -1; + feat_enable(f); + + return 1; +} + +static int __init feat_enable_dbell(struct dt_cpu_feature *f) +{ + u64 lpcr; + + /* P9 has an HFSCR for privileged state */ + feat_enable(f); + + cur_cpu_spec->cpu_features |= CPU_FTR_DBELL; + + lpcr = mfspr(SPRN_LPCR); + lpcr |= LPCR_PECEDH; /* hyp doorbell wakeup */ + mtspr(SPRN_LPCR, lpcr); + + return 1; +} + +static int __init feat_enable_hvi(struct dt_cpu_feature *f) +{ + u64 lpcr; + + /* + * POWER9 XIVE interrupts including in OPAL XICS compatibility + * are always delivered as hypervisor virtualization interrupts (HVI) + * rather than EE. + * + * However LPES0 is not set here, in the chance that an EE does get + * delivered to the host somehow, the EE handler would not expect it + * to be delivered in LPES0 mode (e.g., using SRR[01]). This could + * happen if there is a bug in interrupt controller code, or IC is + * misconfigured in systemsim. + */ + + lpcr = mfspr(SPRN_LPCR); + lpcr |= LPCR_HVICE; /* enable hvi interrupts */ + lpcr |= LPCR_HEIC; /* disable ee interrupts when MSR_HV */ + lpcr |= LPCR_PECE_HVEE; /* hvi can wake from stop */ + mtspr(SPRN_LPCR, lpcr); + + return 1; +} + +static int __init feat_enable_large_ci(struct dt_cpu_feature *f) +{ + cur_cpu_spec->mmu_features |= MMU_FTR_CI_LARGE_PAGE; + + return 1; +} + +struct dt_cpu_feature_match { + const char *name; + int (*enable)(struct dt_cpu_feature *f); + u64 cpu_ftr_bit_mask; +}; + +static struct dt_cpu_feature_match __initdata + dt_cpu_feature_match_table[] = { + {"hypervisor", feat_enable_hv, 0}, + {"big-endian", feat_enable, 0}, + {"little-endian", feat_enable_le, CPU_FTR_REAL_LE}, + {"smt", feat_enable_smt, 0}, + {"interrupt-facilities", feat_enable, 0}, + {"timer-facilities", feat_enable, 0}, + {"timer-facilities-v3", feat_enable, 0}, + {"debug-facilities", feat_enable, 0}, + {"come-from-address-register", feat_enable, CPU_FTR_CFAR}, + {"branch-tracing", feat_enable, 0}, + {"floating-point", feat_enable_fp, 0}, + {"vector", feat_enable_vector, 0}, + {"vector-scalar", feat_enable_vsx, 0}, + {"vector-scalar-v3", feat_enable, 0}, + {"decimal-floating-point", feat_enable, 0}, + {"decimal-integer", feat_enable, 0}, + {"quadword-load-store", feat_enable, 0}, + {"vector-crypto", feat_enable, 0}, + {"mmu-hash", feat_enable_mmu_hash, 0}, + {"mmu-radix", feat_enable_mmu_radix, 0}, + {"mmu-hash-v3", feat_enable_mmu_hash_v3, 0}, + {"virtual-page-class-key-protection", feat_enable, 0}, + {"transactional-memory", feat_enable_tm, CPU_FTR_TM}, + {"transactional-memory-v3", feat_enable_tm, 0}, + {"idle-nap", feat_enable_idle_nap, 0}, + {"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0}, + {"idle-stop", feat_enable_idle_stop, 0}, + {"machine-check-power8", feat_enable_mce_power8, 0}, + {"performance-monitor-power8", feat_enable_pmu_power8, 0}, + {"data-stream-control-register", feat_enable_dscr, CPU_FTR_DSCR}, + {"event-based-branch", feat_enable_ebb, 0}, + {"target-address-register", feat_enable, 0}, + {"branch-history-rolling-buffer", feat_enable, 0}, + {"control-register", feat_enable, CPU_FTR_CTRL}, + {"processor-control-facility", feat_enable_dbell, CPU_FTR_DBELL}, + {"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL}, + {"processor-utilization-of-resources-register", feat_enable_purr, 0}, + {"subcore", feat_enable, CPU_FTR_SUBCORE}, + {"no-execute", feat_enable, 0}, + {"strong-access-ordering", feat_enable, CPU_FTR_SAO}, + {"cache-inhibited-large-page", feat_enable_large_ci, 0}, + {"coprocessor-icswx", feat_enable, CPU_FTR_ICSWX}, + {"hypervisor-virtualization-interrupt", feat_enable_hvi, 0}, + {"program-priority-register", feat_enable, CPU_FTR_HAS_PPR}, + {"wait", feat_enable, 0}, + {"atomic-memory-operations", feat_enable, 0}, + {"branch-v3", feat_enable, 0}, + {"copy-paste", feat_enable, 0}, + {"decimal-floating-point-v3", feat_enable, 0}, + {"decimal-integer-v3", feat_enable, 0}, + {"fixed-point-v3", feat_enable, 0}, + {"floating-point-v3", feat_enable, 0}, + {"group-start-register", feat_enable, 0}, + {"pc-relative-addressing", feat_enable, 0}, + {"machine-check-power9", feat_enable_mce_power9, 0}, + {"performance-monitor-power9", feat_enable_pmu_power9, 0}, + {"event-based-branch-v3", feat_enable, 0}, + {"random-number-generator", feat_enable, 0}, + {"system-call-vectored", feat_disable, 0}, + {"trace-interrupt-v3", feat_enable, 0}, + {"vector-v3", feat_enable, 0}, + {"vector-binary128", feat_enable, 0}, + {"vector-binary16", feat_enable, 0}, + {"wait-v3", feat_enable, 0}, +}; + +/* XXX: how to configure this? Default + boot time? */ +#ifdef CONFIG_PPC_CPUFEATURES_ENABLE_UNKNOWN +#define CPU_FEATURE_ENABLE_UNKNOWN 1 +#else +#define CPU_FEATURE_ENABLE_UNKNOWN 0 +#endif + +static void __init cpufeatures_setup_start(u32 isa) +{ + pr_info("setup for ISA %d\n", isa); + + if (isa >= 3000) { + cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_300; + cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00; + } +} + +static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f) +{ + const struct dt_cpu_feature_match *m; + bool known = false; + int i; + + for (i = 0; i < ARRAY_SIZE(dt_cpu_feature_match_table); i++) { + m = &dt_cpu_feature_match_table[i]; + if (!strcmp(f->name, m->name)) { + known = true; + if (m->enable(f)) + break; + + pr_info("not enabling: %s (disabled or unsupported by kernel)\n", + f->name); + return false; + } + } + + if (!known && CPU_FEATURE_ENABLE_UNKNOWN) { + if (!feat_try_enable_unknown(f)) { + pr_info("not enabling: %s (unknown and unsupported by kernel)\n", + f->name); + return false; + } + } + + if (m->cpu_ftr_bit_mask) + cur_cpu_spec->cpu_features |= m->cpu_ftr_bit_mask; + + if (known) + pr_debug("enabling: %s\n", f->name); + else + pr_debug("enabling: %s (unknown)\n", f->name); + + return true; +} + +static __init void cpufeatures_cpu_quirks(void) +{ + int version = mfspr(SPRN_PVR); + + /* + * Not all quirks can be derived from the cpufeatures device tree. + */ + if ((version & 0xffffff00) == 0x004e0100) + cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; +} + +static void __init cpufeatures_setup_finished(void) +{ + cpufeatures_cpu_quirks(); + + if (hv_mode && !(cur_cpu_spec->cpu_features & CPU_FTR_HVMODE)) { + pr_err("hypervisor not present in device tree but HV mode is enabled in the CPU. Enabling.\n"); + cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE; + } + + system_registers.lpcr = mfspr(SPRN_LPCR); + system_registers.hfscr = mfspr(SPRN_HFSCR); + system_registers.fscr = mfspr(SPRN_FSCR); + + cpufeatures_flush_tlb(); + + pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n", + cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features); +} + +static int __init fdt_find_cpu_features(unsigned long node, const char *uname, + int depth, void *data) +{ + if (of_flat_dt_is_compatible(node, "ibm,powerpc-cpu-features") + && of_get_flat_dt_prop(node, "isa", NULL)) + return 1; + + return 0; +} + +static bool __initdata using_dt_cpu_ftrs = false; + +bool __init dt_cpu_ftrs_in_use(void) +{ + return using_dt_cpu_ftrs; +} + +bool __init dt_cpu_ftrs_init(void *fdt) +{ + /* Setup and verify the FDT, if it fails we just bail */ + if (!early_init_dt_verify(fdt)) + return false; + + if (!of_scan_flat_dt(fdt_find_cpu_features, NULL)) + return false; + + cpufeatures_setup_cpu(); + + using_dt_cpu_ftrs = true; + return true; +} + +static int nr_dt_cpu_features; +static struct dt_cpu_feature *dt_cpu_features; + +static int __init process_cpufeatures_node(unsigned long node, + const char *uname, int i) +{ + const __be32 *prop; + struct dt_cpu_feature *f; + int len; + + f = &dt_cpu_features[i]; + memset(f, 0, sizeof(struct dt_cpu_feature)); + + f->node = node; + + f->name = uname; + + prop = of_get_flat_dt_prop(node, "isa", &len); + if (!prop) { + pr_warn("%s: missing isa property\n", uname); + return 0; + } + f->isa = be32_to_cpup(prop); + + prop = of_get_flat_dt_prop(node, "usable-privilege", &len); + if (!prop) { + pr_warn("%s: missing usable-privilege property", uname); + return 0; + } + f->usable_privilege = be32_to_cpup(prop); + + prop = of_get_flat_dt_prop(node, "hv-support", &len); + if (prop) + f->hv_support = be32_to_cpup(prop); + else + f->hv_support = HV_SUPPORT_NONE; + + prop = of_get_flat_dt_prop(node, "os-support", &len); + if (prop) + f->os_support = be32_to_cpup(prop); + else + f->os_support = OS_SUPPORT_NONE; + + prop = of_get_flat_dt_prop(node, "hfscr-bit-nr", &len); + if (prop) + f->hfscr_bit_nr = be32_to_cpup(prop); + else + f->hfscr_bit_nr = -1; + prop = of_get_flat_dt_prop(node, "fscr-bit-nr", &len); + if (prop) + f->fscr_bit_nr = be32_to_cpup(prop); + else + f->fscr_bit_nr = -1; + prop = of_get_flat_dt_prop(node, "hwcap-bit-nr", &len); + if (prop) + f->hwcap_bit_nr = be32_to_cpup(prop); + else + f->hwcap_bit_nr = -1; + + if (f->usable_privilege & USABLE_HV) { + if (!(mfmsr() & MSR_HV)) { + pr_warn("%s: HV feature passed to guest\n", uname); + return 0; + } + + if (f->hv_support == HV_SUPPORT_NONE && f->hfscr_bit_nr != -1) { + pr_warn("%s: unwanted hfscr_bit_nr\n", uname); + return 0; + } + + if (f->hv_support == HV_SUPPORT_HFSCR) { + if (f->hfscr_bit_nr == -1) { + pr_warn("%s: missing hfscr_bit_nr\n", uname); + return 0; + } + } + } else { + if (f->hv_support != HV_SUPPORT_NONE || f->hfscr_bit_nr != -1) { + pr_warn("%s: unwanted hv_support/hfscr_bit_nr\n", uname); + return 0; + } + } + + if (f->usable_privilege & USABLE_OS) { + if (f->os_support == OS_SUPPORT_NONE && f->fscr_bit_nr != -1) { + pr_warn("%s: unwanted fscr_bit_nr\n", uname); + return 0; + } + + if (f->os_support == OS_SUPPORT_FSCR) { + if (f->fscr_bit_nr == -1) { + pr_warn("%s: missing fscr_bit_nr\n", uname); + return 0; + } + } + } else { + if (f->os_support != OS_SUPPORT_NONE || f->fscr_bit_nr != -1) { + pr_warn("%s: unwanted os_support/fscr_bit_nr\n", uname); + return 0; + } + } + + if (!(f->usable_privilege & USABLE_PR)) { + if (f->hwcap_bit_nr != -1) { + pr_warn("%s: unwanted hwcap_bit_nr\n", uname); + return 0; + } + } + + /* Do all the independent features in the first pass */ + if (!of_get_flat_dt_prop(node, "dependencies", &len)) { + if (cpufeatures_process_feature(f)) + f->enabled = 1; + else + f->disabled = 1; + } + + return 0; +} + +static void __init cpufeatures_deps_enable(struct dt_cpu_feature *f) +{ + const __be32 *prop; + int len; + int nr_deps; + int i; + + if (f->enabled || f->disabled) + return; + + prop = of_get_flat_dt_prop(f->node, "dependencies", &len); + if (!prop) { + pr_warn("%s: missing dependencies property", f->name); + return; + } + + nr_deps = len / sizeof(int); + + for (i = 0; i < nr_deps; i++) { + unsigned long phandle = be32_to_cpu(prop[i]); + int j; + + for (j = 0; j < nr_dt_cpu_features; j++) { + struct dt_cpu_feature *d = &dt_cpu_features[j]; + + if (of_get_flat_dt_phandle(d->node) == phandle) { + cpufeatures_deps_enable(d); + if (d->disabled) { + f->disabled = 1; + return; + } + } + } + } + + if (cpufeatures_process_feature(f)) + f->enabled = 1; + else + f->disabled = 1; +} + +static int __init scan_cpufeatures_subnodes(unsigned long node, + const char *uname, + void *data) +{ + int *count = data; + + process_cpufeatures_node(node, uname, *count); + + (*count)++; + + return 0; +} + +static int __init count_cpufeatures_subnodes(unsigned long node, + const char *uname, + void *data) +{ + int *count = data; + + (*count)++; + + return 0; +} + +static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char + *uname, int depth, void *data) +{ + const __be32 *prop; + int count, i; + u32 isa; + + /* We are scanning "ibm,powerpc-cpu-features" nodes only */ + if (!of_flat_dt_is_compatible(node, "ibm,powerpc-cpu-features")) + return 0; + + prop = of_get_flat_dt_prop(node, "isa", NULL); + if (!prop) + /* We checked before, "can't happen" */ + return 0; + + isa = be32_to_cpup(prop); + + /* Count and allocate space for cpu features */ + of_scan_flat_dt_subnodes(node, count_cpufeatures_subnodes, + &nr_dt_cpu_features); + dt_cpu_features = __va( + memblock_alloc(sizeof(struct dt_cpu_feature)* + nr_dt_cpu_features, PAGE_SIZE)); + + cpufeatures_setup_start(isa); + + /* Scan nodes into dt_cpu_features and enable those without deps */ + count = 0; + of_scan_flat_dt_subnodes(node, scan_cpufeatures_subnodes, &count); + + /* Recursive enable remaining features with dependencies */ + for (i = 0; i < nr_dt_cpu_features; i++) { + struct dt_cpu_feature *f = &dt_cpu_features[i]; + + cpufeatures_deps_enable(f); + } + + prop = of_get_flat_dt_prop(node, "display-name", NULL); + if (prop && strlen((char *)prop) != 0) { + strlcpy(dt_cpu_name, (char *)prop, sizeof(dt_cpu_name)); + cur_cpu_spec->cpu_name = dt_cpu_name; + } + + cpufeatures_setup_finished(); + + memblock_free(__pa(dt_cpu_features), + sizeof(struct dt_cpu_feature)*nr_dt_cpu_features); + + return 0; +} + +void __init dt_cpu_ftrs_scan(void) +{ + of_scan_flat_dt(dt_cpu_ftrs_scan_callback, NULL); +} diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 9de7f79e702b..63992b2d8e15 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -22,7 +22,6 @@ */ #include <linux/delay.h> -#include <linux/debugfs.h> #include <linux/sched.h> #include <linux/init.h> #include <linux/list.h> @@ -37,7 +36,7 @@ #include <linux/of.h> #include <linux/atomic.h> -#include <asm/debug.h> +#include <asm/debugfs.h> #include <asm/eeh.h> #include <asm/eeh_event.h> #include <asm/io.h> diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index b94887165a10..c405c79e50cd 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -724,7 +724,16 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, */ #define MAX_WAIT_FOR_RECOVERY 300 -static void eeh_handle_normal_event(struct eeh_pe *pe) +/** + * eeh_handle_normal_event - Handle EEH events on a specific PE + * @pe: EEH PE + * + * Attempts to recover the given PE. If recovery fails or the PE has failed + * too many times, remove the PE. + * + * Returns true if @pe should no longer be used, else false. + */ +static bool eeh_handle_normal_event(struct eeh_pe *pe) { struct pci_bus *frozen_bus; struct eeh_dev *edev, *tmp; @@ -736,13 +745,18 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) if (!frozen_bus) { pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); - return; + return false; } eeh_pe_update_time_stamp(pe); pe->freeze_count++; - if (pe->freeze_count > eeh_max_freezes) - goto excess_failures; + if (pe->freeze_count > eeh_max_freezes) { + pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n" + "last hour and has been permanently disabled.\n", + pe->phb->global_number, pe->addr, + pe->freeze_count); + goto hard_fail; + } pr_warn("EEH: This PCI device has failed %d times in the last hour\n", pe->freeze_count); @@ -870,27 +884,18 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Notify device driver to resume\n"); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); - return; + return false; -excess_failures: +hard_fail: /* * About 90% of all real-life EEH failures in the field * are due to poorly seated PCI cards. Only 10% or so are * due to actual, failed cards. */ - pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n" - "last hour and has been permanently disabled.\n" - "Please try reseating or replacing it.\n", - pe->phb->global_number, pe->addr, - pe->freeze_count); - goto perm_error; - -hard_fail: pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" "Please try reseating or replacing it\n", pe->phb->global_number, pe->addr); -perm_error: eeh_slot_error_detail(pe, EEH_LOG_PERM); /* Notify all devices that they're about to go down. */ @@ -915,10 +920,21 @@ perm_error: pci_lock_rescan_remove(); pci_hp_remove_devices(frozen_bus); pci_unlock_rescan_remove(); + + /* The passed PE should no longer be used */ + return true; } } + return false; } +/** + * eeh_handle_special_event - Handle EEH events without a specific failing PE + * + * Called when an EEH event is detected but can't be narrowed down to a + * specific PE. Iterates through possible failures and handles them as + * necessary. + */ static void eeh_handle_special_event(void) { struct eeh_pe *pe, *phb_pe; @@ -982,7 +998,14 @@ static void eeh_handle_special_event(void) */ if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { - eeh_handle_normal_event(pe); + /* + * eeh_handle_normal_event() can make the PE stale if it + * determines that the PE cannot possibly be recovered. + * Don't modify the PE state if that's the case. + */ + if (eeh_handle_normal_event(pe)) + continue; + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); } else { pci_lock_rescan_remove(); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index a38600949f3a..8587059ad848 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -31,7 +31,6 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> -#include <asm/ftrace.h> #include <asm/ptrace.h> #include <asm/export.h> @@ -1315,109 +1314,3 @@ machine_check_in_rtas: /* XXX load up BATs and panic */ #endif /* CONFIG_PPC_RTAS */ - -#ifdef CONFIG_FUNCTION_TRACER -#ifdef CONFIG_DYNAMIC_FTRACE -_GLOBAL(mcount) -_GLOBAL(_mcount) - /* - * It is required that _mcount on PPC32 must preserve the - * link register. But we have r0 to play with. We use r0 - * to push the return address back to the caller of mcount - * into the ctr register, restore the link register and - * then jump back using the ctr register. - */ - mflr r0 - mtctr r0 - lwz r0, 4(r1) - mtlr r0 - bctr - -_GLOBAL(ftrace_caller) - MCOUNT_SAVE_FRAME - /* r3 ends up with link register */ - subi r3, r3, MCOUNT_INSN_SIZE -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) -#endif - MCOUNT_RESTORE_FRAME - /* old link register ends up in ctr reg */ - bctr -#else -_GLOBAL(mcount) -_GLOBAL(_mcount) - - MCOUNT_SAVE_FRAME - - subi r3, r3, MCOUNT_INSN_SIZE - LOAD_REG_ADDR(r5, ftrace_trace_function) - lwz r5,0(r5) - - mtctr r5 - bctrl - nop - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - b ftrace_graph_caller -#endif - MCOUNT_RESTORE_FRAME - bctr -#endif -EXPORT_SYMBOL(_mcount) - -_GLOBAL(ftrace_stub) - blr - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -_GLOBAL(ftrace_graph_caller) - /* load r4 with local address */ - lwz r4, 44(r1) - subi r4, r4, MCOUNT_INSN_SIZE - - /* Grab the LR out of the caller stack frame */ - lwz r3,52(r1) - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR in the callers stack frame to this. - */ - stw r3,52(r1) - - MCOUNT_RESTORE_FRAME - /* old link register ends up in ctr reg */ - bctr - -_GLOBAL(return_to_handler) - /* need to save return values */ - stwu r1, -32(r1) - stw r3, 20(r1) - stw r4, 16(r1) - stw r31, 12(r1) - mr r31, r1 - - bl ftrace_return_to_handler - nop - - /* return value has real return address */ - mtlr r3 - - lwz r3, 20(r1) - lwz r4, 16(r1) - lwz r31,12(r1) - lwz r1, 0(r1) - - /* Jump back to real return address */ - blr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 767ef6d68c9e..bfbad08a1207 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -20,7 +20,6 @@ #include <linux/errno.h> #include <linux/err.h> -#include <linux/magic.h> #include <asm/unistd.h> #include <asm/processor.h> #include <asm/page.h> @@ -33,7 +32,6 @@ #include <asm/bug.h> #include <asm/ptrace.h> #include <asm/irqflags.h> -#include <asm/ftrace.h> #include <asm/hw_irq.h> #include <asm/context_tracking.h> #include <asm/tm.h> @@ -1173,381 +1171,3 @@ _GLOBAL(enter_prom) ld r0,16(r1) mtlr r0 blr - -#ifdef CONFIG_FUNCTION_TRACER -#ifdef CONFIG_DYNAMIC_FTRACE -_GLOBAL(mcount) -_GLOBAL(_mcount) -EXPORT_SYMBOL(_mcount) - mflr r12 - mtctr r12 - mtlr r0 - bctr - -#ifndef CC_USING_MPROFILE_KERNEL -_GLOBAL_TOC(ftrace_caller) - /* Taken from output of objdump from lib64/glibc */ - mflr r3 - ld r11, 0(r1) - stdu r1, -112(r1) - std r3, 128(r1) - ld r4, 16(r11) - subi r3, r3, MCOUNT_INSN_SIZE -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) -#endif - ld r0, 128(r1) - mtlr r0 - addi r1, r1, 112 - -#else /* CC_USING_MPROFILE_KERNEL */ -/* - * - * ftrace_caller() is the function that replaces _mcount() when ftrace is - * active. - * - * We arrive here after a function A calls function B, and we are the trace - * function for B. When we enter r1 points to A's stack frame, B has not yet - * had a chance to allocate one yet. - * - * Additionally r2 may point either to the TOC for A, or B, depending on - * whether B did a TOC setup sequence before calling us. - * - * On entry the LR points back to the _mcount() call site, and r0 holds the - * saved LR as it was on entry to B, ie. the original return address at the - * call site in A. - * - * Our job is to save the register state into a struct pt_regs (on the stack) - * and then arrange for the ftrace function to be called. - */ -_GLOBAL(ftrace_caller) - /* Save the original return address in A's stack frame */ - std r0,LRSAVE(r1) - - /* Create our stack frame + pt_regs */ - stdu r1,-SWITCH_FRAME_SIZE(r1) - - /* Save all gprs to pt_regs */ - SAVE_8GPRS(0,r1) - SAVE_8GPRS(8,r1) - SAVE_8GPRS(16,r1) - SAVE_8GPRS(24,r1) - - /* Load special regs for save below */ - mfmsr r8 - mfctr r9 - mfxer r10 - mfcr r11 - - /* Get the _mcount() call site out of LR */ - mflr r7 - /* Save it as pt_regs->nip & pt_regs->link */ - std r7, _NIP(r1) - std r7, _LINK(r1) - - /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) - ld r2,PACATOC(r13) /* get kernel TOC in r2 */ - - addis r3,r2,function_trace_op@toc@ha - addi r3,r3,function_trace_op@toc@l - ld r5,0(r3) - -#ifdef CONFIG_LIVEPATCH - mr r14,r7 /* remember old NIP */ -#endif - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - mr r4, r0 - - /* Save special regs */ - std r8, _MSR(r1) - std r9, _CTR(r1) - std r10, _XER(r1) - std r11, _CCR(r1) - - /* Load &pt_regs in r6 for call below */ - addi r6, r1 ,STACK_FRAME_OVERHEAD - - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop - - /* Load ctr with the possibly modified NIP */ - ld r3, _NIP(r1) - mtctr r3 -#ifdef CONFIG_LIVEPATCH - cmpd r14,r3 /* has NIP been altered? */ -#endif - - /* Restore gprs */ - REST_8GPRS(0,r1) - REST_8GPRS(8,r1) - REST_8GPRS(16,r1) - REST_8GPRS(24,r1) - - /* Restore callee's TOC */ - ld r2, 24(r1) - - /* Pop our stack frame */ - addi r1, r1, SWITCH_FRAME_SIZE - - /* Restore original LR for return to B */ - ld r0, LRSAVE(r1) - mtlr r0 - -#ifdef CONFIG_LIVEPATCH - /* Based on the cmpd above, if the NIP was altered handle livepatch */ - bne- livepatch_handler -#endif - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - stdu r1, -112(r1) -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) - addi r1, r1, 112 -#endif - - ld r0,LRSAVE(r1) /* restore callee's lr at _mcount site */ - mtlr r0 - bctr /* jump after _mcount site */ -#endif /* CC_USING_MPROFILE_KERNEL */ - -_GLOBAL(ftrace_stub) - blr - -#ifdef CONFIG_LIVEPATCH - /* - * This function runs in the mcount context, between two functions. As - * such it can only clobber registers which are volatile and used in - * function linkage. - * - * We get here when a function A, calls another function B, but B has - * been live patched with a new function C. - * - * On entry: - * - we have no stack frame and can not allocate one - * - LR points back to the original caller (in A) - * - CTR holds the new NIP in C - * - r0 & r12 are free - * - * r0 can't be used as the base register for a DS-form load or store, so - * we temporarily shuffle r1 (stack pointer) into r0 and then put it back. - */ -livepatch_handler: - CURRENT_THREAD_INFO(r12, r1) - - /* Save stack pointer into r0 */ - mr r0, r1 - - /* Allocate 3 x 8 bytes */ - ld r1, TI_livepatch_sp(r12) - addi r1, r1, 24 - std r1, TI_livepatch_sp(r12) - - /* Save toc & real LR on livepatch stack */ - std r2, -24(r1) - mflr r12 - std r12, -16(r1) - - /* Store stack end marker */ - lis r12, STACK_END_MAGIC@h - ori r12, r12, STACK_END_MAGIC@l - std r12, -8(r1) - - /* Restore real stack pointer */ - mr r1, r0 - - /* Put ctr in r12 for global entry and branch there */ - mfctr r12 - bctrl - - /* - * Now we are returning from the patched function to the original - * caller A. We are free to use r0 and r12, and we can use r2 until we - * restore it. - */ - - CURRENT_THREAD_INFO(r12, r1) - - /* Save stack pointer into r0 */ - mr r0, r1 - - ld r1, TI_livepatch_sp(r12) - - /* Check stack marker hasn't been trashed */ - lis r2, STACK_END_MAGIC@h - ori r2, r2, STACK_END_MAGIC@l - ld r12, -8(r1) -1: tdne r12, r2 - EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0 - - /* Restore LR & toc from livepatch stack */ - ld r12, -16(r1) - mtlr r12 - ld r2, -24(r1) - - /* Pop livepatch stack frame */ - CURRENT_THREAD_INFO(r12, r0) - subi r1, r1, 24 - std r1, TI_livepatch_sp(r12) - - /* Restore real stack pointer */ - mr r1, r0 - - /* Return to original caller of live patched function */ - blr -#endif - - -#else -_GLOBAL_TOC(_mcount) -EXPORT_SYMBOL(_mcount) - /* Taken from output of objdump from lib64/glibc */ - mflr r3 - ld r11, 0(r1) - stdu r1, -112(r1) - std r3, 128(r1) - ld r4, 16(r11) - - subi r3, r3, MCOUNT_INSN_SIZE - LOAD_REG_ADDR(r5,ftrace_trace_function) - ld r5,0(r5) - ld r5,0(r5) - mtctr r5 - bctrl - nop - - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - b ftrace_graph_caller -#endif - ld r0, 128(r1) - mtlr r0 - addi r1, r1, 112 -_GLOBAL(ftrace_stub) - blr - -#endif /* CONFIG_DYNAMIC_FTRACE */ - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -#ifndef CC_USING_MPROFILE_KERNEL -_GLOBAL(ftrace_graph_caller) - /* load r4 with local address */ - ld r4, 128(r1) - subi r4, r4, MCOUNT_INSN_SIZE - - /* Grab the LR out of the caller stack frame */ - ld r11, 112(r1) - ld r3, 16(r11) - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR in the callers stack frame to this. - */ - ld r11, 112(r1) - std r3, 16(r11) - - ld r0, 128(r1) - mtlr r0 - addi r1, r1, 112 - blr - -#else /* CC_USING_MPROFILE_KERNEL */ -_GLOBAL(ftrace_graph_caller) - /* with -mprofile-kernel, parameter regs are still alive at _mcount */ - std r10, 104(r1) - std r9, 96(r1) - std r8, 88(r1) - std r7, 80(r1) - std r6, 72(r1) - std r5, 64(r1) - std r4, 56(r1) - std r3, 48(r1) - - /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) - ld r2, PACATOC(r13) /* get kernel TOC in r2 */ - - mfctr r4 /* ftrace_caller has moved local addr here */ - std r4, 40(r1) - mflr r3 /* ftrace_caller has restored LR from stack */ - subi r4, r4, MCOUNT_INSN_SIZE - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR to this. - */ - mtlr r3 - - ld r0, 40(r1) - mtctr r0 - ld r10, 104(r1) - ld r9, 96(r1) - ld r8, 88(r1) - ld r7, 80(r1) - ld r6, 72(r1) - ld r5, 64(r1) - ld r4, 56(r1) - ld r3, 48(r1) - - /* Restore callee's TOC */ - ld r2, 24(r1) - - addi r1, r1, 112 - mflr r0 - std r0, LRSAVE(r1) - bctr -#endif /* CC_USING_MPROFILE_KERNEL */ - -_GLOBAL(return_to_handler) - /* need to save return values */ - std r4, -32(r1) - std r3, -24(r1) - /* save TOC */ - std r2, -16(r1) - std r31, -8(r1) - mr r31, r1 - stdu r1, -112(r1) - - /* - * We might be called from a module. - * Switch to our TOC to run inside the core kernel. - */ - ld r2, PACATOC(r13) - - bl ftrace_return_to_handler - nop - - /* return value has real return address */ - mtlr r3 - - ld r1, 0(r1) - ld r4, -32(r1) - ld r3, -24(r1) - ld r2, -16(r1) - ld r31, -8(r1) - - /* Jump back to real return address */ - blr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -#endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 45b453e4d0c8..acd8ca76233e 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -735,8 +735,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) andis. r15,r14,(DBSR_IC|DBSR_BT)@h beq+ 1f +#ifdef CONFIG_RELOCATABLE + ld r15,PACATOC(r13) + ld r14,interrupt_base_book3e@got(r15) + ld r15,__end_interrupts@got(r15) +#else LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) LOAD_REG_IMMEDIATE(r15,__end_interrupts) +#endif cmpld cr0,r10,r14 cmpld cr1,r10,r15 blt+ cr0,1f @@ -799,8 +805,14 @@ kernel_dbg_exc: andis. r15,r14,(DBSR_IC|DBSR_BT)@h beq+ 1f +#ifdef CONFIG_RELOCATABLE + ld r15,PACATOC(r13) + ld r14,interrupt_base_book3e@got(r15) + ld r15,__end_interrupts@got(r15) +#else LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) LOAD_REG_IMMEDIATE(r15,__end_interrupts) +#endif cmpld cr0,r10,r14 cmpld cr1,r10,r15 blt+ cr0,1f diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6353019966e6..ae418b85c17c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -116,9 +116,11 @@ EXC_VIRT_NONE(0x4000, 0x100) EXC_REAL_BEGIN(system_reset, 0x100, 0x100) SET_SCRATCH0(r13) - GET_PACA(r13) - clrrdi r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */ - EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD, + /* + * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is + * being used, so a nested NMI exception would corrupt it. + */ + EXCEPTION_PROLOG_PSERIES_NORI(PACA_EXNMI, system_reset_common, EXC_STD, IDLETEST, 0x100) EXC_REAL_END(system_reset, 0x100, 0x100) @@ -126,34 +128,37 @@ EXC_VIRT_NONE(0x4100, 0x100) #ifdef CONFIG_PPC_P7_NAP EXC_COMMON_BEGIN(system_reset_idle_common) -BEGIN_FTR_SECTION - GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */ -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) - bl pnv_restore_hyp_resource + b pnv_powersave_wakeup +#endif - li r0,PNV_THREAD_RUNNING - stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ +EXC_COMMON_BEGIN(system_reset_common) + /* + * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able + * to recover, but nested NMI will notice in_nmi and not recover + * because of the use of the NMI stack. in_nmi reentrancy is tested in + * system_reset_exception. + */ + lhz r10,PACA_IN_NMI(r13) + addi r10,r10,1 + sth r10,PACA_IN_NMI(r13) + li r10,MSR_RI + mtmsrd r10,1 -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - li r0,KVM_HWTHREAD_IN_KERNEL - stb r0,HSTATE_HWTHREAD_STATE(r13) - /* Order setting hwthread_state vs. testing hwthread_req */ - sync - lbz r0,HSTATE_HWTHREAD_REQ(r13) - cmpwi r0,0 - beq 1f - BRANCH_TO_KVM(r10, kvm_start_guest) -1: -#endif + mr r10,r1 + ld r1,PACA_NMI_EMERG_SP(r13) + subi r1,r1,INT_FRAME_SIZE + EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100, + system_reset, system_reset_exception, + ADD_NVGPRS;ADD_RECONCILE) - /* Return SRR1 from power7_nap() */ - mfspr r3,SPRN_SRR1 - blt cr3,2f - b pnv_wakeup_loss -2: b pnv_wakeup_noloss -#endif + /* + * The stack is no longer in use, decrement in_nmi. + */ + lhz r10,PACA_IN_NMI(r13) + subi r10,r10,1 + sth r10,PACA_IN_NMI(r13) -EXC_COMMON(system_reset_common, 0x100, system_reset_exception) + b ret_from_except #ifdef CONFIG_PPC_PSERIES /* @@ -161,8 +166,9 @@ EXC_COMMON(system_reset_common, 0x100, system_reset_exception) */ TRAMP_REAL_BEGIN(system_reset_fwnmi) SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, - NOTEST, 0x100) + /* See comment at system_reset exception */ + EXCEPTION_PROLOG_PSERIES_NORI(PACA_EXNMI, system_reset_common, + EXC_STD, NOTEST, 0x100) #endif /* CONFIG_PPC_PSERIES */ @@ -172,14 +178,6 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100) * vector */ SET_SCRATCH0(r13) /* save r13 */ - /* - * Running native on arch 2.06 or later, we may wakeup from winkle - * inside machine check. If yes, then last bit of HSPRG0 would be set - * to 1. Hence clear it unconditionally. - */ - GET_PACA(r13) - clrrdi r13,r13,1 - SET_PACA(r13) EXCEPTION_PROLOG_0(PACA_EXMC) BEGIN_FTR_SECTION b machine_check_powernv_early @@ -212,6 +210,12 @@ BEGIN_FTR_SECTION * NOTE: We are here with MSR_ME=0 (off), which means we risk a * checkstop if we get another machine check exception before we do * rfid with MSR_ME=1. + * + * This interrupt can wake directly from idle. If that is the case, + * the machine check is handled then the idle wakeup code is called + * to restore state. In that case, the POWER9 DD1 idle PACA workaround + * is not applied in the early machine check code, which will cause + * bugs. */ mr r11,r1 /* Save r1 */ lhz r10,PACA_IN_MCE(r13) @@ -268,20 +272,11 @@ machine_check_fwnmi: machine_check_pSeries_0: EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200) /* - * The following is essentially EXCEPTION_PROLOG_PSERIES_1 with the - * difference that MSR_RI is not enabled, because PACA_EXMC is being - * used, so nested machine check corrupts it. machine_check_common - * enables MSR_RI. + * MSR_RI is not enabled, because PACA_EXMC is being used, so a + * nested machine check corrupts it. machine_check_common enables + * MSR_RI. */ - ld r10,PACAKMSR(r13) - xori r10,r10,MSR_RI - mfspr r11,SPRN_SRR0 - LOAD_HANDLER(r12, machine_check_common) - mtspr SPRN_SRR0,r12 - mfspr r12,SPRN_SRR1 - mtspr SPRN_SRR1,r10 - rfid - b . /* prevent speculative execution */ + EXCEPTION_PROLOG_PSERIES_1_NORI(machine_check_common, EXC_STD) TRAMP_KVM_SKIP(PACA_EXMC, 0x200) @@ -340,6 +335,37 @@ EXC_COMMON_BEGIN(machine_check_common) /* restore original r1. */ \ ld r1,GPR1(r1) +#ifdef CONFIG_PPC_P7_NAP +/* + * This is an idle wakeup. Low level machine check has already been + * done. Queue the event then call the idle code to do the wake up. + */ +EXC_COMMON_BEGIN(machine_check_idle_common) + bl machine_check_queue_event + + /* + * We have not used any non-volatile GPRs here, and as a rule + * most exception code including machine check does not. + * Therefore PACA_NAPSTATELOST does not need to be set. Idle + * wakeup will restore volatile registers. + * + * Load the original SRR1 into r3 for pnv_powersave_wakeup_mce. + * + * Then decrement MCE nesting after finishing with the stack. + */ + ld r3,_MSR(r1) + + lhz r11,PACA_IN_MCE(r13) + subi r11,r11,1 + sth r11,PACA_IN_MCE(r13) + + /* Turn off the RI bit because SRR1 is used by idle wakeup code. */ + /* Recoverability could be improved by reducing the use of SRR1. */ + li r11,0 + mtmsrd r11,1 + + b pnv_powersave_wakeup_mce +#endif /* * Handle machine check early in real mode. We come here with * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack. @@ -352,6 +378,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) bl machine_check_early std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) + #ifdef CONFIG_PPC_P7_NAP /* * Check if thread was in power saving mode. We come here when any @@ -362,48 +389,12 @@ EXC_COMMON_BEGIN(machine_check_handle_early) * * Go back to nap/sleep/winkle mode again if (b) is true. */ - rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */ - beq 4f /* No, it wasn;t */ - /* Thread was in power saving mode. Go back to nap again. */ - cmpwi r11,2 - blt 3f - /* Supervisor/Hypervisor state loss */ - li r0,1 - stb r0,PACA_NAPSTATELOST(r13) -3: bl machine_check_queue_event - MACHINE_CHECK_HANDLER_WINDUP - GET_PACA(r13) - ld r1,PACAR1(r13) - /* - * Check what idle state this CPU was in and go back to same mode - * again. - */ - lbz r3,PACA_THREAD_IDLE_STATE(r13) - cmpwi r3,PNV_THREAD_NAP - bgt 10f - IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) - /* No return */ -10: - cmpwi r3,PNV_THREAD_SLEEP - bgt 2f - IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) - /* No return */ - -2: - /* - * Go back to winkle. Please note that this thread was woken up in - * machine check from winkle and have not restored the per-subcore - * state. Hence before going back to winkle, set last bit of HSPRG0 - * to 1. This will make sure that if this thread gets woken up - * again at reset vector 0x100 then it will get chance to restore - * the subcore state. - */ - ori r13,r13,1 - SET_PACA(r13) - IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) - /* No return */ -4: + BEGIN_FTR_SECTION + rlwinm. r11,r12,47-31,30,31 + bne machine_check_idle_common + END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif + /* * Check if we are coming from hypervisor userspace. If yes then we * continue in host kernel in V mode to deliver the MC event. @@ -968,17 +959,12 @@ EXC_VIRT_NONE(0x4e60, 0x20) TRAMP_KVM_HV(PACA_EXGEN, 0xe60) TRAMP_REAL_BEGIN(hmi_exception_early) EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60) - mr r10,r1 /* Save r1 */ - ld r1,PACAEMERGSP(r13) /* Use emergency stack */ + mr r10,r1 /* Save r1 */ + ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ - std r9,_CCR(r1) /* save CR in stackframe */ mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ - std r11,_NIP(r1) /* save HSRR0 in stackframe */ - mfspr r12,SPRN_HSRR1 /* Save SRR1 */ - std r12,_MSR(r1) /* save SRR1 in stackframe */ - std r10,0(r1) /* make stack chain pointer */ - std r0,GPR0(r1) /* save r0 in stackframe */ - std r10,GPR1(r1) /* save r1 in stackframe */ + mfspr r12,SPRN_HSRR1 /* Save HSRR1 */ + EXCEPTION_PROLOG_COMMON_1() EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) EXCEPTION_PROLOG_COMMON_3(0xe60) addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 8ff0dd4e77a7..466569e26278 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -30,17 +30,16 @@ #include <linux/string.h> #include <linux/memblock.h> #include <linux/delay.h> -#include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/crash_dump.h> #include <linux/kobject.h> #include <linux/sysfs.h> +#include <asm/debugfs.h> #include <asm/page.h> #include <asm/prom.h> #include <asm/rtas.h> #include <asm/fadump.h> -#include <asm/debug.h> #include <asm/setup.h> static struct fw_dump fw_dump; @@ -210,14 +209,20 @@ static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm, */ static inline unsigned long fadump_calculate_reserve_size(void) { - unsigned long size; + int ret; + unsigned long long base, size; /* - * Check if the size is specified through fadump_reserve_mem= cmdline - * option. If yes, then use that. + * Check if the size is specified through crashkernel= cmdline + * option. If yes, then use that but ignore base as fadump + * reserves memory at end of RAM. */ - if (fw_dump.reserve_bootvar) + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), + &size, &base); + if (ret == 0 && size > 0) { + fw_dump.reserve_bootvar = (unsigned long)size; return fw_dump.reserve_bootvar; + } /* divide by 20 to get 5% of value */ size = memblock_end_of_DRAM() / 20; @@ -319,15 +324,34 @@ int __init fadump_reserve_mem(void) pr_debug("fadumphdr_addr = %p\n", (void *) fw_dump.fadumphdr_addr); } else { - /* Reserve the memory at the top of memory. */ size = get_fadump_area_size(); - base = memory_boundary - size; - memblock_reserve(base, size); - printk(KERN_INFO "Reserved %ldMB of memory at %ldMB " - "for firmware-assisted dump\n", - (unsigned long)(size >> 20), - (unsigned long)(base >> 20)); + + /* + * Reserve memory at an offset closer to bottom of the RAM to + * minimize the impact of memory hot-remove operation. We can't + * use memblock_find_in_range() here since it doesn't allocate + * from bottom to top. + */ + for (base = fw_dump.boot_memory_size; + base <= (memory_boundary - size); + base += size) { + if (memblock_is_region_memory(base, size) && + !memblock_is_region_reserved(base, size)) + break; + } + if ((base > (memory_boundary - size)) || + memblock_reserve(base, size)) { + pr_err("Failed to reserve memory\n"); + return 0; + } + + pr_info("Reserved %ldMB of memory at %ldMB for firmware-" + "assisted dump (System RAM: %ldMB)\n", + (unsigned long)(size >> 20), + (unsigned long)(base >> 20), + (unsigned long)(memblock_phys_mem_size() >> 20)); } + fw_dump.reserve_dump_area_start = base; fw_dump.reserve_dump_area_size = size; return 1; @@ -353,15 +377,6 @@ static int __init early_fadump_param(char *p) } early_param("fadump", early_fadump_param); -/* Look for fadump_reserve_mem= cmdline option */ -static int __init early_fadump_reserve_mem(char *p) -{ - if (p) - fw_dump.reserve_bootvar = memparse(p, &p); - return 0; -} -early_param("fadump_reserve_mem", early_fadump_reserve_mem); - static void register_fw_dump(struct fadump_mem_struct *fdm) { int rc; @@ -509,34 +524,6 @@ fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs) return reg_entry; } -static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type, - void *data, size_t data_len) -{ - struct elf_note note; - - note.n_namesz = strlen(name) + 1; - note.n_descsz = data_len; - note.n_type = type; - memcpy(buf, ¬e, sizeof(note)); - buf += (sizeof(note) + 3)/4; - memcpy(buf, name, note.n_namesz); - buf += (note.n_namesz + 3)/4; - memcpy(buf, data, note.n_descsz); - buf += (note.n_descsz + 3)/4; - - return buf; -} - -static void fadump_final_note(u32 *buf) -{ - struct elf_note note; - - note.n_namesz = 0; - note.n_descsz = 0; - note.n_type = 0; - memcpy(buf, ¬e, sizeof(note)); -} - static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) { struct elf_prstatus prstatus; @@ -547,8 +534,8 @@ static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) * prstatus.pr_pid = ???? */ elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); - buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, - &prstatus, sizeof(prstatus)); + buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS, + &prstatus, sizeof(prstatus)); return buf; } @@ -689,7 +676,7 @@ static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm) note_buf = fadump_regs_to_elf_notes(note_buf, ®s); } } - fadump_final_note(note_buf); + final_note(note_buf); if (fdh) { pr_debug("Updating elfcore header (%llx) with cpu notes\n", diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 1607be7c0ef2..e22734278458 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -735,11 +735,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE) EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE) EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2f00, MOLTrampoline, unknown_exception, EXC_XFER_EE_LITE) - - .globl mol_trampoline - .set mol_trampoline, i0x2f00 - EXPORT_SYMBOL(mol_trampoline) + EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_EE) . = 0x3000 @@ -1278,16 +1274,6 @@ EXPORT_SYMBOL(empty_zero_page) swapper_pg_dir: .space PGD_TABLE_SIZE - .globl intercept_table -intercept_table: - .long 0, 0, i0x200, i0x300, i0x400, 0, i0x600, i0x700 - .long i0x800, 0, 0, 0, 0, i0xd00, 0, 0 - .long 0, 0, 0, i0x1300, 0, 0, 0, 0 - .long 0, 0, 0, 0, 0, 0, 0, 0 - .long 0, 0, 0, 0, 0, 0, 0, 0 - .long 0, 0, 0, 0, 0, 0, 0, 0 -EXPORT_SYMBOL(intercept_table) - /* Room for two PTE pointers, usually the kernel and current user pointers * to their respective root page table. */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 1dc5eae2ced3..0ddc602b33a4 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -949,7 +949,8 @@ start_here_multiplatform: LOAD_REG_ADDR(r3,init_thread_union) /* set up a stack pointer */ - addi r1,r3,THREAD_SIZE + LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) + add r1,r3,r1 li r0,0 stdu r0,-STACK_FRAME_OVERHEAD(r1) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 6fd08219248d..07d4e0ad60db 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -20,6 +20,7 @@ #include <asm/kvm_book3s_asm.h> #include <asm/opal.h> #include <asm/cpuidle.h> +#include <asm/exception-64s.h> #include <asm/book3s/64/mmu-hash.h> #include <asm/mmu.h> @@ -94,12 +95,12 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) core_idle_lock_held: HMT_LOW 3: lwz r15,0(r14) - andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT + andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h bne 3b HMT_MEDIUM lwarx r15,0,r14 - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bne core_idle_lock_held + andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h + bne- core_idle_lock_held blr /* @@ -113,7 +114,7 @@ core_idle_lock_held: * * Address to 'rfid' to in r5 */ -_GLOBAL(pnv_powersave_common) +pnv_powersave_common: /* Use r3 to pass state nap/sleep/winkle */ /* NAP is a state loss, we create a regs frame on the * stack, fill it up with the state we care about and @@ -188,8 +189,8 @@ pnv_enter_arch207_idle_mode: /* The following store to HSTATE_HWTHREAD_STATE(r13) */ /* MUST occur in real mode, i.e. with the MMU off, */ /* and the MMU must stay off until we clear this flag */ - /* and test HSTATE_HWTHREAD_REQ(r13) in the system */ - /* reset interrupt vector in exceptions-64s.S. */ + /* and test HSTATE_HWTHREAD_REQ(r13) in */ + /* pnv_powersave_wakeup in this file. */ /* The reason is that another thread can switch the */ /* MMU to a guest context whenever this flag is set */ /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ @@ -209,15 +210,20 @@ pnv_enter_arch207_idle_mode: /* Sleep or winkle */ lbz r7,PACA_THREAD_MASK(r13) ld r14,PACA_CORE_IDLE_STATE_PTR(r13) + li r5,0 + beq cr3,3f + lis r5,PNV_CORE_IDLE_WINKLE_COUNT@h +3: lwarx_loop1: lwarx r15,0,r14 - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bnel core_idle_lock_held + andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h + bnel- core_idle_lock_held + add r15,r15,r5 /* Add if winkle */ andc r15,r15,r7 /* Clear thread bit */ - andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS /* * If cr0 = 0, then current thread is the last thread of the core entering @@ -240,7 +246,7 @@ common_enter: /* common code for all the threads entering sleep or winkle */ IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) fastsleep_workaround_at_entry: - ori r15,r15,PNV_CORE_IDLE_LOCK_BIT + oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h stwcx. r15,0,r14 bne- lwarx_loop1 isync @@ -250,10 +256,10 @@ fastsleep_workaround_at_entry: li r4,1 bl opal_config_cpu_idle_state - /* Clear Lock bit */ - li r0,0 + /* Unlock */ + xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h lwsync - stw r0,0(r14) + stw r15,0(r14) b common_enter enter_winkle: @@ -301,8 +307,8 @@ power_enter_stop: lwarx_loop_stop: lwarx r15,0,r14 - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bnel core_idle_lock_held + andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h + bnel- core_idle_lock_held andc r15,r15,r7 /* Clear thread bit */ stwcx. r15,0,r14 @@ -375,17 +381,113 @@ _GLOBAL(power9_idle_stop) li r4,1 b pnv_powersave_common /* No return */ + /* - * Called from reset vector. Check whether we have woken up with - * hypervisor state loss. If yes, restore hypervisor state and return - * back to reset vector. + * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1, + * HSPRG0 will be set to the HSPRG0 value of one of the + * threads in this core. Thus the value we have in r13 + * may not be this thread's paca pointer. + * + * Fortunately, the TIR remains invariant. Since this thread's + * paca pointer is recorded in all its sibling's paca, we can + * correctly recover this thread's paca pointer if we + * know the index of this thread in the core. + * + * This index can be obtained from the TIR. * - * r13 - Contents of HSPRG0 + * i.e, thread's position in the core = TIR. + * If this value is i, then this thread's paca is + * paca->thread_sibling_pacas[i]. + */ +power9_dd1_recover_paca: + mfspr r4, SPRN_TIR + /* + * Since each entry in thread_sibling_pacas is 8 bytes + * we need to left-shift by 3 bits. Thus r4 = i * 8 + */ + sldi r4, r4, 3 + /* Get &paca->thread_sibling_pacas[0] in r5 */ + ld r5, PACA_SIBLING_PACA_PTRS(r13) + /* Load paca->thread_sibling_pacas[i] into r13 */ + ldx r13, r4, r5 + SET_PACA(r13) + /* + * Indicate that we have lost NVGPR state + * which needs to be restored from the stack. + */ + li r3, 1 + stb r0,PACA_NAPSTATELOST(r13) + blr + +/* + * Called from machine check handler for powersave wakeups. + * Low level machine check processing has already been done. Now just + * go through the wake up path to get everything in order. + * + * r3 - The original SRR1 value. + * Original SRR[01] have been clobbered. + * MSR_RI is clear. + */ +.global pnv_powersave_wakeup_mce +pnv_powersave_wakeup_mce: + /* Set cr3 for pnv_powersave_wakeup */ + rlwinm r11,r3,47-31,30,31 + cmpwi cr3,r11,2 + + /* + * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake + * reason into SRR1, which allows reuse of the system reset wakeup + * code without being mistaken for another type of wakeup. + */ + oris r3,r3,SRR1_WAKEMCE_RESVD@h + mtspr SPRN_SRR1,r3 + + b pnv_powersave_wakeup + +/* + * Called from reset vector for powersave wakeups. * cr3 - set to gt if waking up with partial/complete hypervisor state loss */ -_GLOBAL(pnv_restore_hyp_resource) +.global pnv_powersave_wakeup +pnv_powersave_wakeup: + ld r2, PACATOC(r13) + BEGIN_FTR_SECTION - ld r2,PACATOC(r13); +BEGIN_FTR_SECTION_NESTED(70) + bl power9_dd1_recover_paca +END_FTR_SECTION_NESTED_IFSET(CPU_FTR_POWER9_DD1, 70) + bl pnv_restore_hyp_resource_arch300 +FTR_SECTION_ELSE + bl pnv_restore_hyp_resource_arch207 +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) + + li r0,PNV_THREAD_RUNNING + stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + li r0,KVM_HWTHREAD_IN_KERNEL + stb r0,HSTATE_HWTHREAD_STATE(r13) + /* Order setting hwthread_state vs. testing hwthread_req */ + sync + lbz r0,HSTATE_HWTHREAD_REQ(r13) + cmpwi r0,0 + beq 1f + b kvm_start_guest +1: +#endif + + /* Return SRR1 from power7_nap() */ + mfspr r3,SPRN_SRR1 + blt cr3,pnv_wakeup_noloss + b pnv_wakeup_loss + +/* + * Check whether we have woken up with hypervisor state loss. + * If yes, restore hypervisor state and return back to link. + * + * cr3 - set to gt if waking up with partial/complete hypervisor state loss + */ +pnv_restore_hyp_resource_arch300: /* * POWER ISA 3. Use PSSCR to determine if we * are waking up from deep idle state @@ -400,31 +502,19 @@ BEGIN_FTR_SECTION */ rldicl r5,r5,4,60 cmpd cr4,r5,r4 - bge cr4,pnv_wakeup_tb_loss - /* - * Waking up without hypervisor state loss. Return to - * reset vector - */ - blr + bge cr4,pnv_wakeup_tb_loss /* returns to caller */ -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + blr /* Waking up without hypervisor state loss. */ +/* Same calling convention as arch300 */ +pnv_restore_hyp_resource_arch207: /* * POWER ISA 2.07 or less. - * Check if last bit of HSPGR0 is set. This indicates whether we are - * waking up from winkle. + * Check if we slept with sleep or winkle. */ - clrldi r5,r13,63 - clrrdi r13,r13,1 - - /* Now that we are sure r13 is corrected, load TOC */ - ld r2,PACATOC(r13); - cmpwi cr4,r5,1 - mtspr SPRN_HSPRG0,r13 - - lbz r0,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr2,r0,PNV_THREAD_NAP - bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */ + lbz r4,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr2,r4,PNV_THREAD_NAP + bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */ /* * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking @@ -433,8 +523,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) */ bgt cr3,. - blr /* Return back to System Reset vector from where - pnv_restore_hyp_resource was invoked */ + blr /* Waking up without hypervisor state loss */ /* * Called if waking up from idle state which can cause either partial or @@ -444,9 +533,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) * * r13 - PACA * cr3 - gt if waking up with partial/complete hypervisor state loss + * + * If ISA300: * cr4 - gt or eq if waking up from complete hypervisor state loss. + * + * If ISA207: + * r4 - PACA_THREAD_IDLE_STATE */ -_GLOBAL(pnv_wakeup_tb_loss) +pnv_wakeup_tb_loss: ld r1,PACAR1(r13) /* * Before entering any idle state, the NVGPRs are saved in the stack. @@ -473,18 +567,19 @@ _GLOBAL(pnv_wakeup_tb_loss) * is required to return back to reset vector after hypervisor state * restore is complete. */ + mr r18,r4 mflr r17 mfspr r16,SPRN_SRR1 BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - lbz r7,PACA_THREAD_MASK(r13) ld r14,PACA_CORE_IDLE_STATE_PTR(r13) -lwarx_loop2: - lwarx r15,0,r14 - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + lbz r7,PACA_THREAD_MASK(r13) + /* + * Take the core lock to synchronize against other threads. + * * Lock bit is set in one of the 2 cases- * a. In the sleep/winkle enter path, the last thread is executing * fastsleep workaround code. @@ -492,23 +587,93 @@ lwarx_loop2: * workaround undo code or resyncing timebase or restoring context * In either case loop until the lock bit is cleared. */ - bnel core_idle_lock_held +1: + lwarx r15,0,r14 + andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h + bnel- core_idle_lock_held + oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h + stwcx. r15,0,r14 + bne- 1b + isync - cmpwi cr2,r15,0 + andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS + cmpwi cr2,r9,0 /* * At this stage * cr2 - eq if first thread to wakeup in core * cr3- gt if waking up with partial/complete hypervisor state loss + * ISA300: * cr4 - gt or eq if waking up from complete hypervisor state loss. */ - ori r15,r15,PNV_CORE_IDLE_LOCK_BIT - stwcx. r15,0,r14 - bne- lwarx_loop2 - isync - BEGIN_FTR_SECTION + /* + * Were we in winkle? + * If yes, check if all threads were in winkle, decrement our + * winkle count, set all thread winkle bits if all were in winkle. + * Check if our thread has a winkle bit set, and set cr4 accordingly + * (to match ISA300, above). Pseudo-code for core idle state + * transitions for ISA207 is as follows (everything happens atomically + * due to store conditional and/or lock bit): + * + * nap_idle() { } + * nap_wake() { } + * + * sleep_idle() + * { + * core_idle_state &= ~thread_in_core + * } + * + * sleep_wake() + * { + * bool first_in_core, first_in_subcore; + * + * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0; + * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0; + * + * core_idle_state |= thread_in_core; + * } + * + * winkle_idle() + * { + * core_idle_state &= ~thread_in_core; + * core_idle_state += 1 << WINKLE_COUNT_SHIFT; + * } + * + * winkle_wake() + * { + * bool first_in_core, first_in_subcore, winkle_state_lost; + * + * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0; + * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0; + * + * core_idle_state |= thread_in_core; + * + * if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT)) + * core_idle_state |= THREAD_WINKLE_BITS; + * core_idle_state -= 1 << WINKLE_COUNT_SHIFT; + * + * winkle_state_lost = core_idle_state & + * (thread_in_core << WINKLE_THREAD_SHIFT); + * core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT); + * } + * + */ + cmpwi r18,PNV_THREAD_WINKLE + bne 2f + andis. r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h + subis r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h + beq 2f + ori r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */ +2: + /* Shift thread bit to winkle mask, then test if this thread is set, + * and remove it from the winkle bits */ + slwi r8,r7,8 + and r8,r8,r15 + andc r15,r15,r8 + cmpwi cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */ + lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) and r4,r4,r15 cmpwi r4,0 /* Check if first in subcore */ @@ -593,7 +758,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) mtspr SPRN_WORC,r4 clear_lock: - andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h lwsync stw r15,0(r14) @@ -651,8 +816,7 @@ hypervisor_state_restored: mtspr SPRN_SRR1,r16 mtlr r17 - blr /* Return back to System Reset vector from where - pnv_restore_hyp_resource was invoked */ + blr /* return to pnv_powersave_wakeup */ fastsleep_workaround_at_exit: li r3,1 @@ -664,7 +828,8 @@ fastsleep_workaround_at_exit: * R3 here contains the value that will be returned to the caller * of power7_nap. */ -_GLOBAL(pnv_wakeup_loss) +.global pnv_wakeup_loss +pnv_wakeup_loss: ld r1,PACAR1(r13) BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT @@ -684,7 +849,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * R3 here contains the value that will be returned to the caller * of power7_nap. */ -_GLOBAL(pnv_wakeup_noloss) +pnv_wakeup_noloss: lbz r0,PACA_NAPSTATELOST(r13) cmpwi r0,0 bne pnv_wakeup_loss diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 5f202a566ec5..f2b724cd9e64 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -711,13 +711,16 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) return tbl; } -void iommu_free_table(struct iommu_table *tbl, const char *node_name) +static void iommu_table_free(struct kref *kref) { unsigned long bitmap_sz; unsigned int order; + struct iommu_table *tbl; - if (!tbl) - return; + tbl = container_of(kref, struct iommu_table, it_kref); + + if (tbl->it_ops->free) + tbl->it_ops->free(tbl); if (!tbl->it_map) { kfree(tbl); @@ -733,7 +736,7 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) /* verify that table contains no entries */ if (!bitmap_empty(tbl->it_map, tbl->it_size)) - pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); + pr_warn("%s: Unexpected TCEs\n", __func__); /* calculate bitmap size in bytes */ bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); @@ -746,6 +749,24 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) kfree(tbl); } +struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl) +{ + if (kref_get_unless_zero(&tbl->it_kref)) + return tbl; + + return NULL; +} +EXPORT_SYMBOL_GPL(iommu_tce_table_get); + +int iommu_tce_table_put(struct iommu_table *tbl) +{ + if (WARN_ON(!tbl)) + return 0; + + return kref_put(&tbl->it_kref, iommu_table_free); +} +EXPORT_SYMBOL_GPL(iommu_tce_table_put); + /* Creates TCEs for a user provided buffer. The user buffer must be * contiguous real kernel storage (not vmalloc). The address passed here * comprises a page address and offset into that page. The dma_addr_t @@ -942,47 +963,36 @@ void iommu_flush_tce(struct iommu_table *tbl) } EXPORT_SYMBOL_GPL(iommu_flush_tce); -int iommu_tce_clear_param_check(struct iommu_table *tbl, - unsigned long ioba, unsigned long tce_value, - unsigned long npages) +int iommu_tce_check_ioba(unsigned long page_shift, + unsigned long offset, unsigned long size, + unsigned long ioba, unsigned long npages) { - /* tbl->it_ops->clear() does not support any value but 0 */ - if (tce_value) - return -EINVAL; + unsigned long mask = (1UL << page_shift) - 1; - if (ioba & ~IOMMU_PAGE_MASK(tbl)) + if (ioba & mask) return -EINVAL; - ioba >>= tbl->it_page_shift; - if (ioba < tbl->it_offset) + ioba >>= page_shift; + if (ioba < offset) return -EINVAL; - if ((ioba + npages) > (tbl->it_offset + tbl->it_size)) + if ((ioba + 1) > (offset + size)) return -EINVAL; return 0; } -EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check); +EXPORT_SYMBOL_GPL(iommu_tce_check_ioba); -int iommu_tce_put_param_check(struct iommu_table *tbl, - unsigned long ioba, unsigned long tce) +int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa) { - if (tce & ~IOMMU_PAGE_MASK(tbl)) - return -EINVAL; - - if (ioba & ~IOMMU_PAGE_MASK(tbl)) - return -EINVAL; + unsigned long mask = (1UL << page_shift) - 1; - ioba >>= tbl->it_page_shift; - if (ioba < tbl->it_offset) - return -EINVAL; - - if ((ioba + 1) > (tbl->it_offset + tbl->it_size)) + if (gpa & mask) return -EINVAL; return 0; } -EXPORT_SYMBOL_GPL(iommu_tce_put_param_check); +EXPORT_SYMBOL_GPL(iommu_tce_check_gpa); long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, unsigned long *hpa, enum dma_data_direction *direction) @@ -1004,6 +1014,31 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, } EXPORT_SYMBOL_GPL(iommu_tce_xchg); +#ifdef CONFIG_PPC_BOOK3S_64 +long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry, + unsigned long *hpa, enum dma_data_direction *direction) +{ + long ret; + + ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); + + if (!ret && ((*direction == DMA_FROM_DEVICE) || + (*direction == DMA_BIDIRECTIONAL))) { + struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT); + + if (likely(pg)) { + SetPageDirty(pg); + } else { + tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); + ret = -EFAULT; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm); +#endif + int iommu_take_ownership(struct iommu_table *tbl) { unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index a018f5cae899..5c291df30fe3 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -65,7 +65,6 @@ #include <asm/machdep.h> #include <asm/udbg.h> #include <asm/smp.h> -#include <asm/debug.h> #include <asm/livepatch.h> #include <asm/asm-prototypes.h> @@ -442,46 +441,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) return sum; } -#ifdef CONFIG_HOTPLUG_CPU -void migrate_irqs(void) -{ - struct irq_desc *desc; - unsigned int irq; - static int warned; - cpumask_var_t mask; - const struct cpumask *map = cpu_online_mask; - - alloc_cpumask_var(&mask, GFP_KERNEL); - - for_each_irq_desc(irq, desc) { - struct irq_data *data; - struct irq_chip *chip; - - data = irq_desc_get_irq_data(desc); - if (irqd_is_per_cpu(data)) - continue; - - chip = irq_data_get_irq_chip(data); - - cpumask_and(mask, irq_data_get_affinity_mask(data), map); - if (cpumask_any(mask) >= nr_cpu_ids) { - pr_warn("Breaking affinity for irq %i\n", irq); - cpumask_copy(mask, map); - } - if (chip->irq_set_affinity) - chip->irq_set_affinity(data, mask, true); - else if (desc->action && !(warned++)) - pr_err("Cannot set affinity for irq %i\n", irq); - } - - free_cpumask_var(mask); - - local_irq_enable(); - mdelay(1); - local_irq_disable(); -} -#endif - static inline void check_stack_overflow(void) { #ifdef CONFIG_DEBUG_STACKOVERFLOW diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c new file mode 100644 index 000000000000..6c089d9757c9 --- /dev/null +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -0,0 +1,104 @@ +/* + * Dynamic Ftrace based Kprobes Optimization + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) Hitachi Ltd., 2012 + * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> + * IBM Corporation + */ +#include <linux/kprobes.h> +#include <linux/ptrace.h> +#include <linux/hardirq.h> +#include <linux/preempt.h> +#include <linux/ftrace.h> + +static nokprobe_inline +int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb, unsigned long orig_nip) +{ + /* + * Emulate singlestep (and also recover regs->nip) + * as if there is a nop + */ + regs->nip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); + if (orig_nip) + regs->nip = orig_nip; + return 1; +} + +int skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + if (kprobe_ftrace(p)) + return __skip_singlestep(p, regs, kcb, 0); + else + return 0; +} +NOKPROBE_SYMBOL(skip_singlestep); + +/* Ftrace callback handler for kprobes */ +void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + unsigned long flags; + + /* Disable irq for emulating a breakpoint and avoiding preempt */ + local_irq_save(flags); + hard_irq_disable(); + + p = get_kprobe((kprobe_opcode_t *)nip); + if (unlikely(!p) || kprobe_disabled(p)) + goto end; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + unsigned long orig_nip = regs->nip; + + /* + * On powerpc, NIP is *before* this instruction for the + * pre handler + */ + regs->nip -= MCOUNT_INSN_SIZE; + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, regs)) + __skip_singlestep(p, regs, kcb, orig_nip); + /* + * If pre_handler returns !0, it sets regs->nip and + * resets current kprobe. + */ + } +end: + local_irq_restore(flags); +} +NOKPROBE_SYMBOL(kprobe_ftrace_handler); + +int arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + p->ainsn.boostable = -1; + return 0; +} diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index fce05a38851c..160ae0fa7d0d 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -35,6 +35,7 @@ #include <asm/code-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> +#include <asm/sections.h> #include <linux/uaccess.h> DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -42,7 +43,86 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; -int __kprobes arch_prepare_kprobe(struct kprobe *p) +bool arch_within_kprobe_blacklist(unsigned long addr) +{ + return (addr >= (unsigned long)__kprobes_text_start && + addr < (unsigned long)__kprobes_text_end) || + (addr >= (unsigned long)_stext && + addr < (unsigned long)__head_end); +} + +kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) +{ + kprobe_opcode_t *addr; + +#ifdef PPC64_ELF_ABI_v2 + /* PPC64 ABIv2 needs local entry point */ + addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); + if (addr && !offset) { +#ifdef CONFIG_KPROBES_ON_FTRACE + unsigned long faddr; + /* + * Per livepatch.h, ftrace location is always within the first + * 16 bytes of a function on powerpc with -mprofile-kernel. + */ + faddr = ftrace_location_range((unsigned long)addr, + (unsigned long)addr + 16); + if (faddr) + addr = (kprobe_opcode_t *)faddr; + else +#endif + addr = (kprobe_opcode_t *)ppc_function_entry(addr); + } +#elif defined(PPC64_ELF_ABI_v1) + /* + * 64bit powerpc ABIv1 uses function descriptors: + * - Check for the dot variant of the symbol first. + * - If that fails, try looking up the symbol provided. + * + * This ensures we always get to the actual symbol and not + * the descriptor. + * + * Also handle <module:symbol> format. + */ + char dot_name[MODULE_NAME_LEN + 1 + KSYM_NAME_LEN]; + const char *modsym; + bool dot_appended = false; + if ((modsym = strchr(name, ':')) != NULL) { + modsym++; + if (*modsym != '\0' && *modsym != '.') { + /* Convert to <module:.symbol> */ + strncpy(dot_name, name, modsym - name); + dot_name[modsym - name] = '.'; + dot_name[modsym - name + 1] = '\0'; + strncat(dot_name, modsym, + sizeof(dot_name) - (modsym - name) - 2); + dot_appended = true; + } else { + dot_name[0] = '\0'; + strncat(dot_name, name, sizeof(dot_name) - 1); + } + } else if (name[0] != '.') { + dot_name[0] = '.'; + dot_name[1] = '\0'; + strncat(dot_name, name, KSYM_NAME_LEN - 2); + dot_appended = true; + } else { + dot_name[0] = '\0'; + strncat(dot_name, name, KSYM_NAME_LEN - 1); + } + addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name); + if (!addr && dot_appended) { + /* Let's try the original non-dot symbol lookup */ + addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); + } +#else + addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); +#endif + + return addr; +} + +int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; kprobe_opcode_t insn = *p->addr; @@ -74,30 +154,34 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) p->ainsn.boostable = 0; return ret; } +NOKPROBE_SYMBOL(arch_prepare_kprobe); -void __kprobes arch_arm_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { *p->addr = BREAKPOINT_INSTRUCTION; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } +NOKPROBE_SYMBOL(arch_arm_kprobe); -void __kprobes arch_disarm_kprobe(struct kprobe *p) +void arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } +NOKPROBE_SYMBOL(arch_disarm_kprobe); -void __kprobes arch_remove_kprobe(struct kprobe *p) +void arch_remove_kprobe(struct kprobe *p) { if (p->ainsn.insn) { free_insn_slot(p->ainsn.insn, 0); p->ainsn.insn = NULL; } } +NOKPROBE_SYMBOL(arch_remove_kprobe); -static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +static nokprobe_inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { enable_single_step(regs); @@ -110,37 +194,80 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->nip = (unsigned long)p->ainsn.insn; } -static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +static nokprobe_inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) { kcb->prev_kprobe.kp = kprobe_running(); kcb->prev_kprobe.status = kcb->kprobe_status; kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr; } -static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +static nokprobe_inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) { __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr; } -static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, +static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_msr = regs->msr; } -void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs) +bool arch_function_offset_within_entry(unsigned long offset) +{ +#ifdef PPC64_ELF_ABI_v2 +#ifdef CONFIG_KPROBES_ON_FTRACE + return offset <= 16; +#else + return offset <= 8; +#endif +#else + return !offset; +#endif +} + +void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { ri->ret_addr = (kprobe_opcode_t *)regs->link; /* Replace the return addr with trampoline addr */ regs->link = (unsigned long)kretprobe_trampoline; } +NOKPROBE_SYMBOL(arch_prepare_kretprobe); -int __kprobes kprobe_handler(struct pt_regs *regs) +int try_to_emulate(struct kprobe *p, struct pt_regs *regs) +{ + int ret; + unsigned int insn = *p->ainsn.insn; + + /* regs->nip is also adjusted if emulate_step returns 1 */ + ret = emulate_step(regs, insn); + if (ret > 0) { + /* + * Once this instruction has been boosted + * successfully, set the boostable flag + */ + if (unlikely(p->ainsn.boostable == 0)) + p->ainsn.boostable = 1; + } else if (ret < 0) { + /* + * We don't allow kprobes on mtmsr(d)/rfi(d), etc. + * So, we should never get here... but, its still + * good to catch them, just in case... + */ + printk("Can't step on instruction %x\n", insn); + BUG(); + } else if (ret == 0) + /* This instruction can't be boosted */ + p->ainsn.boostable = -1; + + return ret; +} +NOKPROBE_SYMBOL(try_to_emulate); + +int kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; @@ -177,10 +304,17 @@ int __kprobes kprobe_handler(struct pt_regs *regs) */ save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); - kcb->kprobe_saved_msr = regs->msr; kprobes_inc_nmissed_count(p); prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_REENTER; + if (p->ainsn.boostable >= 0) { + ret = try_to_emulate(p, regs); + + if (ret > 0) { + restore_previous_kprobe(kcb); + return 1; + } + } return 1; } else { if (*addr != BREAKPOINT_INSTRUCTION) { @@ -197,7 +331,9 @@ int __kprobes kprobe_handler(struct pt_regs *regs) } p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { - goto ss_probe; + if (!skip_singlestep(p, regs, kcb)) + goto ss_probe; + ret = 1; } } goto no_kprobe; @@ -235,18 +371,9 @@ int __kprobes kprobe_handler(struct pt_regs *regs) ss_probe: if (p->ainsn.boostable >= 0) { - unsigned int insn = *p->ainsn.insn; + ret = try_to_emulate(p, regs); - /* regs->nip is also adjusted if emulate_step returns 1 */ - ret = emulate_step(regs, insn); if (ret > 0) { - /* - * Once this instruction has been boosted - * successfully, set the boostable flag - */ - if (unlikely(p->ainsn.boostable == 0)) - p->ainsn.boostable = 1; - if (p->post_handler) p->post_handler(p, regs, 0); @@ -254,17 +381,7 @@ ss_probe: reset_current_kprobe(); preempt_enable_no_resched(); return 1; - } else if (ret < 0) { - /* - * We don't allow kprobes on mtmsr(d)/rfi(d), etc. - * So, we should never get here... but, its still - * good to catch them, just in case... - */ - printk("Can't step on instruction %x\n", insn); - BUG(); - } else if (ret == 0) - /* This instruction can't be boosted */ - p->ainsn.boostable = -1; + } } prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_HIT_SS; @@ -274,6 +391,7 @@ no_kprobe: preempt_enable_no_resched(); return ret; } +NOKPROBE_SYMBOL(kprobe_handler); /* * Function return probe trampoline: @@ -291,8 +409,7 @@ asm(".global kretprobe_trampoline\n" /* * Called when the probe at kretprobe trampoline is hit */ -static int __kprobes trampoline_probe_handler(struct kprobe *p, - struct pt_regs *regs) +static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; @@ -361,6 +478,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, */ return 1; } +NOKPROBE_SYMBOL(trampoline_probe_handler); /* * Called after single-stepping. p->addr is the address of the @@ -370,7 +488,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, * single-stepped a copy of the instruction. The address of this * copy is p->ainsn.insn. */ -int __kprobes kprobe_post_handler(struct pt_regs *regs) +int kprobe_post_handler(struct pt_regs *regs) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -410,8 +528,9 @@ out: return 1; } +NOKPROBE_SYMBOL(kprobe_post_handler); -int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -474,13 +593,15 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) } return 0; } +NOKPROBE_SYMBOL(kprobe_fault_handler); unsigned long arch_deref_entry_point(void *entry) { return ppc_global_function_entry(entry); } +NOKPROBE_SYMBOL(arch_deref_entry_point); -int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -497,17 +618,20 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) return 1; } +NOKPROBE_SYMBOL(setjmp_pre_handler); -void __used __kprobes jprobe_return(void) +void __used jprobe_return(void) { asm volatile("trap" ::: "memory"); } +NOKPROBE_SYMBOL(jprobe_return); -static void __used __kprobes jprobe_return_end(void) +static void __used jprobe_return_end(void) { -}; +} +NOKPROBE_SYMBOL(jprobe_return_end); -int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -520,6 +644,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) preempt_enable_no_resched(); return 1; } +NOKPROBE_SYMBOL(longjmp_break_handler); static struct kprobe trampoline_p = { .addr = (kprobe_opcode_t *) &kretprobe_trampoline, @@ -531,10 +656,11 @@ int __init arch_init_kprobes(void) return register_kprobe(&trampoline_p); } -int __kprobes arch_trampoline_kprobe(struct kprobe *p) +int arch_trampoline_kprobe(struct kprobe *p) { if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) return 1; return 0; } +NOKPROBE_SYMBOL(arch_trampoline_kprobe); diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index a1475e6aef3a..5f9eada3519b 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -221,6 +221,8 @@ static void machine_check_process_queued_event(struct irq_work *work) { int index; + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + /* * For now just print it to console. * TODO: log this error event to FSP or nvram. @@ -228,12 +230,13 @@ static void machine_check_process_queued_event(struct irq_work *work) while (__this_cpu_read(mce_queue_count) > 0) { index = __this_cpu_read(mce_queue_count) - 1; machine_check_print_event_info( - this_cpu_ptr(&mce_event_queue[index])); + this_cpu_ptr(&mce_event_queue[index]), false); __this_cpu_dec(mce_queue_count); } } -void machine_check_print_event_info(struct machine_check_event *evt) +void machine_check_print_event_info(struct machine_check_event *evt, + bool user_mode) { const char *level, *sevstr, *subtype; static const char *mc_ue_types[] = { @@ -310,7 +313,16 @@ void machine_check_print_event_info(struct machine_check_event *evt) printk("%s%s Machine check interrupt [%s]\n", level, sevstr, evt->disposition == MCE_DISPOSITION_RECOVERED ? - "Recovered" : "[Not recovered"); + "Recovered" : "Not recovered"); + + if (user_mode) { + printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level, + evt->srr0, current->pid, current->comm); + } else { + printk("%s NIP [%016llx]: %pS\n", level, evt->srr0, + (void *)evt->srr0); + } + printk("%s Initiator: %s\n", level, evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); switch (evt->error_type) { diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 763d6f58caa8..f913139bb0c2 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -72,10 +72,14 @@ void __flush_tlb_power8(unsigned int action) void __flush_tlb_power9(unsigned int action) { + unsigned int num_sets; + if (radix_enabled()) - flush_tlb_206(POWER9_TLB_SETS_RADIX, action); + num_sets = POWER9_TLB_SETS_RADIX; + else + num_sets = POWER9_TLB_SETS_HASH; - flush_tlb_206(POWER9_TLB_SETS_HASH, action); + flush_tlb_206(num_sets, action); } @@ -147,159 +151,365 @@ static int mce_flush(int what) return 0; } -static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat) -{ - if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB)) - dsisr &= ~slb; - if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT)) - dsisr &= ~erat; - if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB)) - dsisr &= ~tlb; - /* Any other errors we don't understand? */ - if (dsisr) - return 0; - return 1; -} - -static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits) +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) + +struct mce_ierror_table { + unsigned long srr1_mask; + unsigned long srr1_value; + bool nip_valid; /* nip is a valid indicator of faulting address */ + unsigned int error_type; + unsigned int error_subtype; + unsigned int initiator; + unsigned int severity; +}; + +static const struct mce_ierror_table mce_p7_ierror_table[] = { +{ 0x00000000001c0000, 0x0000000000040000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x0000000000080000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x00000000000c0000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x0000000000100000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x0000000000140000, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x0000000000180000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x00000000001c0000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, 0, 0, 0, 0, 0 } }; + +static const struct mce_ierror_table mce_p8_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000080000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x00000000000c0000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000100000, true, + MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000140000, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000180000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x00000000001c0000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008000000, true, + MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008040000, true, + MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, 0, 0, 0, 0, 0 } }; + +static const struct mce_ierror_table mce_p9_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000080000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x00000000000c0000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000100000, true, + MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000140000, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000180000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008000000, true, + MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008040000, true, + MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x00000000080c0000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008100000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008140000, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */ +{ 0x00000000081c0000, 0x0000000008180000, false, + MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */ +{ 0x00000000081c0000, 0x00000000081c0000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, 0, 0, 0, 0, 0 } }; + +struct mce_derror_table { + unsigned long dsisr_value; + bool dar_valid; /* dar is a valid indicator of faulting address */ + unsigned int error_type; + unsigned int error_subtype; + unsigned int initiator; + unsigned int severity; +}; + +static const struct mce_derror_table mce_p7_derror_table[] = { +{ 0x00008000, false, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00004000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000800, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000400, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000100, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000040, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, false, 0, 0, 0, 0 } }; + +static const struct mce_derror_table mce_p8_derror_table[] = { +{ 0x00008000, false, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00004000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00002000, true, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00001000, true, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000800, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000400, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000200, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */ + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000100, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, false, 0, 0, 0, 0 } }; + +static const struct mce_derror_table mce_p9_derror_table[] = { +{ 0x00008000, false, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00004000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00002000, true, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00001000, true, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000800, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000400, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000200, false, + MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000100, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000040, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000020, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000010, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000008, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, false, 0, 0, 0, 0 } }; + +static int mce_handle_ierror(struct pt_regs *regs, + const struct mce_ierror_table table[], + struct mce_error_info *mce_err, uint64_t *addr) { - long handled = 1; + uint64_t srr1 = regs->msr; + int handled = 0; + int i; + + *addr = 0; + + for (i = 0; table[i].srr1_mask; i++) { + if ((srr1 & table[i].srr1_mask) != table[i].srr1_value) + continue; + + /* attempt to correct the error */ + switch (table[i].error_type) { + case MCE_ERROR_TYPE_SLB: + handled = mce_flush(MCE_FLUSH_SLB); + break; + case MCE_ERROR_TYPE_ERAT: + handled = mce_flush(MCE_FLUSH_ERAT); + break; + case MCE_ERROR_TYPE_TLB: + handled = mce_flush(MCE_FLUSH_TLB); + break; + } - /* - * flush and reload SLBs for SLB errors and flush TLBs for TLB errors. - * reset the error bits whenever we handle them so that at the end - * we can check whether we handled all of them or not. - * */ -#ifdef CONFIG_PPC_STD_MMU_64 - if (dsisr & slb_error_bits) { - flush_and_reload_slb(); - /* reset error bits */ - dsisr &= ~(slb_error_bits); - } - if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { - if (cur_cpu_spec && cur_cpu_spec->flush_tlb) - cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL); - /* reset error bits */ - dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB; + /* now fill in mce_error_info */ + mce_err->error_type = table[i].error_type; + switch (table[i].error_type) { + case MCE_ERROR_TYPE_UE: + mce_err->u.ue_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_SLB: + mce_err->u.slb_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_ERAT: + mce_err->u.erat_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_TLB: + mce_err->u.tlb_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_USER: + mce_err->u.user_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_RA: + mce_err->u.ra_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_LINK: + mce_err->u.link_error_type = table[i].error_subtype; + break; + } + mce_err->severity = table[i].severity; + mce_err->initiator = table[i].initiator; + if (table[i].nip_valid) + *addr = regs->nip; + return handled; } -#endif - /* Any other errors we don't understand? */ - if (dsisr & 0xffffffffUL) - handled = 0; - return handled; -} + mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; + mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->initiator = MCE_INITIATOR_CPU; -static long mce_handle_derror_p7(uint64_t dsisr) -{ - return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS); + return 0; } -static long mce_handle_common_ierror(uint64_t srr1) +static int mce_handle_derror(struct pt_regs *regs, + const struct mce_derror_table table[], + struct mce_error_info *mce_err, uint64_t *addr) { - long handled = 0; - - switch (P7_SRR1_MC_IFETCH(srr1)) { - case 0: - break; -#ifdef CONFIG_PPC_STD_MMU_64 - case P7_SRR1_MC_IFETCH_SLB_PARITY: - case P7_SRR1_MC_IFETCH_SLB_MULTIHIT: - /* flush and reload SLBs for SLB errors. */ - flush_and_reload_slb(); - handled = 1; - break; - case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: - if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { - cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL); - handled = 1; + uint64_t dsisr = regs->dsisr; + int handled = 0; + int found = 0; + int i; + + *addr = 0; + + for (i = 0; table[i].dsisr_value; i++) { + if (!(dsisr & table[i].dsisr_value)) + continue; + + /* attempt to correct the error */ + switch (table[i].error_type) { + case MCE_ERROR_TYPE_SLB: + if (mce_flush(MCE_FLUSH_SLB)) + handled = 1; + break; + case MCE_ERROR_TYPE_ERAT: + if (mce_flush(MCE_FLUSH_ERAT)) + handled = 1; + break; + case MCE_ERROR_TYPE_TLB: + if (mce_flush(MCE_FLUSH_TLB)) + handled = 1; + break; } - break; -#endif - default: - break; - } - return handled; -} - -static long mce_handle_ierror_p7(uint64_t srr1) -{ - long handled = 0; - - handled = mce_handle_common_ierror(srr1); + /* + * Attempt to handle multiple conditions, but only return + * one. Ensure uncorrectable errors are first in the table + * to match. + */ + if (found) + continue; + + /* now fill in mce_error_info */ + mce_err->error_type = table[i].error_type; + switch (table[i].error_type) { + case MCE_ERROR_TYPE_UE: + mce_err->u.ue_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_SLB: + mce_err->u.slb_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_ERAT: + mce_err->u.erat_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_TLB: + mce_err->u.tlb_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_USER: + mce_err->u.user_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_RA: + mce_err->u.ra_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_LINK: + mce_err->u.link_error_type = table[i].error_subtype; + break; + } + mce_err->severity = table[i].severity; + mce_err->initiator = table[i].initiator; + if (table[i].dar_valid) + *addr = regs->dar; -#ifdef CONFIG_PPC_STD_MMU_64 - if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) { - flush_and_reload_slb(); - handled = 1; + found = 1; } -#endif - return handled; -} -static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1) -{ - switch (P7_SRR1_MC_IFETCH(srr1)) { - case P7_SRR1_MC_IFETCH_SLB_PARITY: - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; - break; - case P7_SRR1_MC_IFETCH_SLB_MULTIHIT: - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; - break; - case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: - mce_err->error_type = MCE_ERROR_TYPE_TLB; - mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; - break; - case P7_SRR1_MC_IFETCH_UE: - case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL: - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH; - break; - case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD: - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = - MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; - break; - } -} + if (found) + return handled; -static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1) -{ - mce_get_common_ierror(mce_err, srr1); - if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; - } -} + mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; + mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->initiator = MCE_INITIATOR_CPU; -static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr) -{ - if (dsisr & P7_DSISR_MC_UE) { - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; - } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) { - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = - MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; - } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) { - mce_err->error_type = MCE_ERROR_TYPE_ERAT; - mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; - } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; - } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; - } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { - mce_err->error_type = MCE_ERROR_TYPE_TLB; - mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; - } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; - } + return 0; } static long mce_handle_ue_error(struct pt_regs *regs) @@ -320,292 +530,42 @@ static long mce_handle_ue_error(struct pt_regs *regs) return handled; } -long __machine_check_early_realmode_p7(struct pt_regs *regs) +static long mce_handle_error(struct pt_regs *regs, + const struct mce_derror_table dtable[], + const struct mce_ierror_table itable[]) { - uint64_t srr1, nip, addr; - long handled = 1; - struct mce_error_info mce_error_info = { 0 }; - - mce_error_info.severity = MCE_SEV_ERROR_SYNC; - mce_error_info.initiator = MCE_INITIATOR_CPU; - - srr1 = regs->msr; - nip = regs->nip; + struct mce_error_info mce_err = { 0 }; + uint64_t addr; + uint64_t srr1 = regs->msr; + long handled; - /* - * Handle memory errors depending whether this was a load/store or - * ifetch exception. Also, populate the mce error_type and - * type-specific error_type from either SRR1 or DSISR, depending - * whether this was a load/store or ifetch exception - */ - if (P7_SRR1_MC_LOADSTORE(srr1)) { - handled = mce_handle_derror_p7(regs->dsisr); - mce_get_derror_p7(&mce_error_info, regs->dsisr); - addr = regs->dar; - } else { - handled = mce_handle_ierror_p7(srr1); - mce_get_ierror_p7(&mce_error_info, srr1); - addr = regs->nip; - } + if (SRR1_MC_LOADSTORE(srr1)) + handled = mce_handle_derror(regs, dtable, &mce_err, &addr); + else + handled = mce_handle_ierror(regs, itable, &mce_err, &addr); - /* Handle UE error. */ - if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) + if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) handled = mce_handle_ue_error(regs); - save_mce_event(regs, handled, &mce_error_info, nip, addr); - return handled; -} - -static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1) -{ - mce_get_common_ierror(mce_err, srr1); - if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) { - mce_err->error_type = MCE_ERROR_TYPE_ERAT; - mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; - } -} - -static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr) -{ - mce_get_derror_p7(mce_err, dsisr); - if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) { - mce_err->error_type = MCE_ERROR_TYPE_ERAT; - mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; - } -} - -static long mce_handle_ierror_p8(uint64_t srr1) -{ - long handled = 0; + save_mce_event(regs, handled, &mce_err, regs->nip, addr); - handled = mce_handle_common_ierror(srr1); - -#ifdef CONFIG_PPC_STD_MMU_64 - if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) { - flush_and_reload_slb(); - handled = 1; - } -#endif return handled; } -static long mce_handle_derror_p8(uint64_t dsisr) -{ - return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS); -} - -long __machine_check_early_realmode_p8(struct pt_regs *regs) -{ - uint64_t srr1, nip, addr; - long handled = 1; - struct mce_error_info mce_error_info = { 0 }; - - mce_error_info.severity = MCE_SEV_ERROR_SYNC; - mce_error_info.initiator = MCE_INITIATOR_CPU; - - srr1 = regs->msr; - nip = regs->nip; - - if (P7_SRR1_MC_LOADSTORE(srr1)) { - handled = mce_handle_derror_p8(regs->dsisr); - mce_get_derror_p8(&mce_error_info, regs->dsisr); - addr = regs->dar; - } else { - handled = mce_handle_ierror_p8(srr1); - mce_get_ierror_p8(&mce_error_info, srr1); - addr = regs->nip; - } - - /* Handle UE error. */ - if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) - handled = mce_handle_ue_error(regs); - - save_mce_event(regs, handled, &mce_error_info, nip, addr); - return handled; -} - -static int mce_handle_derror_p9(struct pt_regs *regs) -{ - uint64_t dsisr = regs->dsisr; - - return mce_handle_flush_derrors(dsisr, - P9_DSISR_MC_SLB_PARITY_MFSLB | - P9_DSISR_MC_SLB_MULTIHIT_MFSLB, - - P9_DSISR_MC_TLB_MULTIHIT_MFTLB, - - P9_DSISR_MC_ERAT_MULTIHIT); -} - -static int mce_handle_ierror_p9(struct pt_regs *regs) +long __machine_check_early_realmode_p7(struct pt_regs *regs) { - uint64_t srr1 = regs->msr; + /* P7 DD1 leaves top bits of DSISR undefined */ + regs->dsisr &= 0x0000ffff; - switch (P9_SRR1_MC_IFETCH(srr1)) { - case P9_SRR1_MC_IFETCH_SLB_PARITY: - case P9_SRR1_MC_IFETCH_SLB_MULTIHIT: - return mce_flush(MCE_FLUSH_SLB); - case P9_SRR1_MC_IFETCH_TLB_MULTIHIT: - return mce_flush(MCE_FLUSH_TLB); - case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT: - return mce_flush(MCE_FLUSH_ERAT); - default: - return 0; - } + return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table); } -static void mce_get_derror_p9(struct pt_regs *regs, - struct mce_error_info *mce_err, uint64_t *addr) -{ - uint64_t dsisr = regs->dsisr; - - mce_err->severity = MCE_SEV_ERROR_SYNC; - mce_err->initiator = MCE_INITIATOR_CPU; - - if (dsisr & P9_DSISR_MC_USER_TLBIE) - *addr = regs->nip; - else - *addr = regs->dar; - - if (dsisr & P9_DSISR_MC_UE) { - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; - } else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) { - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; - } else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) { - mce_err->error_type = MCE_ERROR_TYPE_LINK; - mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT; - } else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) { - mce_err->error_type = MCE_ERROR_TYPE_LINK; - mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT; - } else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) { - mce_err->error_type = MCE_ERROR_TYPE_ERAT; - mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; - } else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) { - mce_err->error_type = MCE_ERROR_TYPE_TLB; - mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; - } else if (dsisr & P9_DSISR_MC_USER_TLBIE) { - mce_err->error_type = MCE_ERROR_TYPE_USER; - mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE; - } else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; - } else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; - } else if (dsisr & P9_DSISR_MC_RA_LOAD) { - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD; - } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) { - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE; - } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) { - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; - } else if (dsisr & P9_DSISR_MC_RA_FOREIGN) { - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN; - } -} - -static void mce_get_ierror_p9(struct pt_regs *regs, - struct mce_error_info *mce_err, uint64_t *addr) +long __machine_check_early_realmode_p8(struct pt_regs *regs) { - uint64_t srr1 = regs->msr; - - switch (P9_SRR1_MC_IFETCH(srr1)) { - case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE: - case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT: - mce_err->severity = MCE_SEV_FATAL; - break; - default: - mce_err->severity = MCE_SEV_ERROR_SYNC; - break; - } - - mce_err->initiator = MCE_INITIATOR_CPU; - - *addr = regs->nip; - - switch (P9_SRR1_MC_IFETCH(srr1)) { - case P9_SRR1_MC_IFETCH_UE: - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH; - break; - case P9_SRR1_MC_IFETCH_SLB_PARITY: - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; - break; - case P9_SRR1_MC_IFETCH_SLB_MULTIHIT: - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; - break; - case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT: - mce_err->error_type = MCE_ERROR_TYPE_ERAT; - mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; - break; - case P9_SRR1_MC_IFETCH_TLB_MULTIHIT: - mce_err->error_type = MCE_ERROR_TYPE_TLB; - mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; - break; - case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD: - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; - break; - case P9_SRR1_MC_IFETCH_LINK_TIMEOUT: - mce_err->error_type = MCE_ERROR_TYPE_LINK; - mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT; - break; - case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT: - mce_err->error_type = MCE_ERROR_TYPE_LINK; - mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT; - break; - case P9_SRR1_MC_IFETCH_RA: - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH; - break; - case P9_SRR1_MC_IFETCH_RA_TABLEWALK: - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH; - break; - case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE: - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_STORE; - break; - case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT: - mce_err->error_type = MCE_ERROR_TYPE_LINK; - mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT; - break; - case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN: - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN; - break; - default: - break; - } + return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table); } long __machine_check_early_realmode_p9(struct pt_regs *regs) { - uint64_t nip, addr; - long handled; - struct mce_error_info mce_error_info = { 0 }; - - nip = regs->nip; - - if (P9_SRR1_MC_LOADSTORE(regs->msr)) { - handled = mce_handle_derror_p9(regs); - mce_get_derror_p9(regs, &mce_error_info, &addr); - } else { - handled = mce_handle_ierror_p9(regs); - mce_get_ierror_p9(regs, &mce_error_info, &addr); - } - - /* Handle UE error. */ - if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) - handled = mce_handle_ue_error(regs); - - save_mce_event(regs, handled, &mce_error_info, nip, addr); - return handled; + return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); } diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index d5e2b8309939..eae61b044e9e 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -389,51 +389,40 @@ static int nvram_pstore_open(struct pstore_info *psi) /** * nvram_pstore_write - pstore write callback for nvram - * @type: Type of message logged - * @reason: reason behind dump (oops/panic) - * @id: identifier to indicate the write performed - * @part: pstore writes data to registered buffer in parts, - * part number will indicate the same. - * @count: Indicates oops count - * @compressed: Flag to indicate the log is compressed - * @size: number of bytes written to the registered buffer - * @psi: registered pstore_info structure + * @record: pstore record to write, with @id to be set * * Called by pstore_dump() when an oops or panic report is logged in the * printk buffer. * Returns 0 on successful write. */ -static int nvram_pstore_write(enum pstore_type_id type, - enum kmsg_dump_reason reason, - u64 *id, unsigned int part, int count, - bool compressed, size_t size, - struct pstore_info *psi) +static int nvram_pstore_write(struct pstore_record *record) { int rc; unsigned int err_type = ERR_TYPE_KERNEL_PANIC; struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf; /* part 1 has the recent messages from printk buffer */ - if (part > 1 || (type != PSTORE_TYPE_DMESG)) + if (record->part > 1 || (record->type != PSTORE_TYPE_DMESG)) return -1; if (clobbering_unread_rtas_event()) return -1; oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION); - oops_hdr->report_length = cpu_to_be16(size); + oops_hdr->report_length = cpu_to_be16(record->size); oops_hdr->timestamp = cpu_to_be64(ktime_get_real_seconds()); - if (compressed) + if (record->compressed) err_type = ERR_TYPE_KERNEL_PANIC_GZ; rc = nvram_write_os_partition(&oops_log_partition, oops_buf, - (int) (sizeof(*oops_hdr) + size), err_type, count); + (int) (sizeof(*oops_hdr) + record->size), err_type, + record->count); if (rc != 0) return rc; - *id = part; + record->id = record->part; return 0; } @@ -442,10 +431,7 @@ static int nvram_pstore_write(enum pstore_type_id type, * Returns the length of the data we read from each partition. * Returns 0 if we've been called before. */ -static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, - int *count, struct timespec *time, char **buf, - bool *compressed, ssize_t *ecc_notice_size, - struct pstore_info *psi) +static ssize_t nvram_pstore_read(struct pstore_record *record) { struct oops_log_info *oops_hdr; unsigned int err_type, id_no, size = 0; @@ -459,40 +445,40 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, switch (nvram_type_ids[read_type]) { case PSTORE_TYPE_DMESG: part = &oops_log_partition; - *type = PSTORE_TYPE_DMESG; + record->type = PSTORE_TYPE_DMESG; break; case PSTORE_TYPE_PPC_COMMON: sig = NVRAM_SIG_SYS; part = &common_partition; - *type = PSTORE_TYPE_PPC_COMMON; - *id = PSTORE_TYPE_PPC_COMMON; - time->tv_sec = 0; - time->tv_nsec = 0; + record->type = PSTORE_TYPE_PPC_COMMON; + record->id = PSTORE_TYPE_PPC_COMMON; + record->time.tv_sec = 0; + record->time.tv_nsec = 0; break; #ifdef CONFIG_PPC_PSERIES case PSTORE_TYPE_PPC_RTAS: part = &rtas_log_partition; - *type = PSTORE_TYPE_PPC_RTAS; - time->tv_sec = last_rtas_event; - time->tv_nsec = 0; + record->type = PSTORE_TYPE_PPC_RTAS; + record->time.tv_sec = last_rtas_event; + record->time.tv_nsec = 0; break; case PSTORE_TYPE_PPC_OF: sig = NVRAM_SIG_OF; part = &of_config_partition; - *type = PSTORE_TYPE_PPC_OF; - *id = PSTORE_TYPE_PPC_OF; - time->tv_sec = 0; - time->tv_nsec = 0; + record->type = PSTORE_TYPE_PPC_OF; + record->id = PSTORE_TYPE_PPC_OF; + record->time.tv_sec = 0; + record->time.tv_nsec = 0; break; #endif #ifdef CONFIG_PPC_POWERNV case PSTORE_TYPE_PPC_OPAL: sig = NVRAM_SIG_FW; part = &skiboot_partition; - *type = PSTORE_TYPE_PPC_OPAL; - *id = PSTORE_TYPE_PPC_OPAL; - time->tv_sec = 0; - time->tv_nsec = 0; + record->type = PSTORE_TYPE_PPC_OPAL; + record->id = PSTORE_TYPE_PPC_OPAL; + record->time.tv_sec = 0; + record->time.tv_nsec = 0; break; #endif default: @@ -520,10 +506,10 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, return 0; } - *count = 0; + record->count = 0; if (part->os_partition) - *id = id_no; + record->id = id_no; if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) { size_t length, hdr_size; @@ -533,34 +519,35 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, /* Old format oops header had 2-byte record size */ hdr_size = sizeof(u16); length = be16_to_cpu(oops_hdr->version); - time->tv_sec = 0; - time->tv_nsec = 0; + record->time.tv_sec = 0; + record->time.tv_nsec = 0; } else { hdr_size = sizeof(*oops_hdr); length = be16_to_cpu(oops_hdr->report_length); - time->tv_sec = be64_to_cpu(oops_hdr->timestamp); - time->tv_nsec = 0; + record->time.tv_sec = be64_to_cpu(oops_hdr->timestamp); + record->time.tv_nsec = 0; } - *buf = kmemdup(buff + hdr_size, length, GFP_KERNEL); + record->buf = kmemdup(buff + hdr_size, length, GFP_KERNEL); kfree(buff); - if (*buf == NULL) + if (record->buf == NULL) return -ENOMEM; - *ecc_notice_size = 0; + record->ecc_notice_size = 0; if (err_type == ERR_TYPE_KERNEL_PANIC_GZ) - *compressed = true; + record->compressed = true; else - *compressed = false; + record->compressed = false; return length; } - *buf = buff; + record->buf = buff; return part->size; } static struct pstore_info nvram_pstore_info = { .owner = THIS_MODULE, .name = "nvram", + .flags = PSTORE_FLAGS_DMESG, .open = nvram_pstore_open, .read = nvram_pstore_read, .write = nvram_pstore_write, diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 2282bf4e63cd..ec60ed0d4aad 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -243,10 +243,10 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) /* * 2. branch to optimized_callback() and emulate_step() */ - kprobe_lookup_name("optimized_callback", op_callback_addr); - kprobe_lookup_name("emulate_step", emulate_step_addr); + op_callback_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("optimized_callback"); + emulate_step_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("emulate_step"); if (!op_callback_addr || !emulate_step_addr) { - WARN(1, "kprobe_lookup_name() failed\n"); + WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n"); goto error; } diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index dfc479df9634..8d63627e067f 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -245,3 +245,24 @@ void __init free_unused_pacas(void) free_lppacas(); } + +void copy_mm_to_paca(struct mm_struct *mm) +{ +#ifdef CONFIG_PPC_BOOK3S + mm_context_t *context = &mm->context; + + get_paca()->mm_ctx_id = context->id; +#ifdef CONFIG_PPC_MM_SLICES + VM_BUG_ON(!mm->context.addr_limit); + get_paca()->addr_limit = mm->context.addr_limit; + get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize; + memcpy(&get_paca()->mm_ctx_high_slices_psize, + &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm)); +#else /* CONFIG_PPC_MM_SLICES */ + get_paca()->mm_ctx_user_psize = context->user_psize; + get_paca()->mm_ctx_sllp = context->sllp; +#endif +#else /* CONFIG_PPC_BOOK3S */ + return; +#endif +} diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index ffda24a38dda..341a7469cab8 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -233,6 +233,14 @@ void pcibios_reset_secondary_bus(struct pci_dev *dev) pci_reset_secondary_bus(dev); } +resource_size_t pcibios_default_alignment(void) +{ + if (ppc_md.pcibios_default_alignment) + return ppc_md.pcibios_default_alignment(); + + return 0; +} + #ifdef CONFIG_PCI_IOV resource_size_t pcibios_iov_resource_alignment(struct pci_dev *pdev, int resno) { @@ -513,7 +521,8 @@ pgprot_t pci_phys_mem_access_prot(struct file *file, * * Returns a negative error code on failure, zero on success. */ -int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, +int pci_mmap_page_range(struct pci_dev *dev, int bar, + struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine) { resource_size_t offset = diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index f5d399e46193..40c4887c27b6 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -55,9 +55,9 @@ #include <asm/kexec.h> #include <asm/opal.h> #include <asm/fadump.h> -#include <asm/debug.h> #include <asm/epapr_hcalls.h> #include <asm/firmware.h> +#include <asm/dt_cpu_ftrs.h> #include <mm/mmu_decl.h> @@ -376,23 +376,31 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * A POWER6 partition in "POWER6 architected" mode * uses the 0x0f000002 PVR value; in POWER5+ mode * it uses 0x0f000001. + * + * If we're using device tree CPU feature discovery then we don't + * support the cpu-version property, and it's the responsibility of the + * firmware/hypervisor to provide the correct feature set for the + * architecture level via the ibm,powerpc-cpu-features binding. */ - prop = of_get_flat_dt_prop(node, "cpu-version", NULL); - if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) - identify_cpu(0, be32_to_cpup(prop)); + if (!dt_cpu_ftrs_in_use()) { + prop = of_get_flat_dt_prop(node, "cpu-version", NULL); + if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) + identify_cpu(0, be32_to_cpup(prop)); - identical_pvr_fixup(node); + check_cpu_feature_properties(node); + check_cpu_pa_features(node); + } - check_cpu_feature_properties(node); - check_cpu_pa_features(node); + identical_pvr_fixup(node); init_mmu_slb_size(node); #ifdef CONFIG_PPC64 - if (nthreads > 1) - cur_cpu_spec->cpu_features |= CPU_FTR_SMT; - else + if (nthreads == 1) cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; + else if (!dt_cpu_ftrs_in_use()) + cur_cpu_spec->cpu_features |= CPU_FTR_SMT; #endif + return 0; } @@ -722,6 +730,8 @@ void __init early_init_devtree(void *params) DBG("Scanning CPUs ...\n"); + dt_cpu_ftrs_scan(); + /* Retrieve CPU related informations from the flat tree * (altivec support, boot CPU ID, ...) */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 1c1b44ec7642..dd8a04f3053a 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -815,7 +815,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .virt_base = cpu_to_be32(0xffffffff), .virt_size = cpu_to_be32(0xffffffff), .load_base = cpu_to_be32(0xffffffff), - .min_rma = cpu_to_be32(256), /* 256MB min RMA */ + .min_rma = cpu_to_be32(512), /* 512MB min RMA */ .min_load = cpu_to_be32(0xffffffff), /* full client load */ .min_rma_percent = 0, /* min RMA percentage of total RAM */ .max_pft_size = 48, /* max log_2(hash table size) */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4697da895133..71dcda91755d 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -31,11 +31,11 @@ #include <linux/unistd.h> #include <linux/serial.h> #include <linux/serial_8250.h> -#include <linux/debugfs.h> #include <linux/percpu.h> #include <linux/memblock.h> #include <linux/of_platform.h> #include <linux/hugetlb.h> +#include <asm/debugfs.h> #include <asm/io.h> #include <asm/paca.h> #include <asm/prom.h> @@ -125,6 +125,11 @@ int ppc_do_canonicalize_irqs; EXPORT_SYMBOL(ppc_do_canonicalize_irqs); #endif +#ifdef CONFIG_CRASH_CORE +/* This keeps a track of which one is the crashing cpu. */ +int crashing_cpu = -1; +#endif + /* also used by kexec */ void machine_shutdown(void) { @@ -256,7 +261,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "processor\t: %lu\n", cpu_id); seq_printf(m, "cpu\t\t: "); - if (cur_cpu_spec->pvr_mask) + if (cur_cpu_spec->pvr_mask && cur_cpu_spec->cpu_name) seq_printf(m, "%s", cur_cpu_spec->cpu_name); else seq_printf(m, "unknown (%08x)", pvr); @@ -920,6 +925,15 @@ void __init setup_arch(char **cmdline_p) init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; init_mm.brk = klimit; + +#ifdef CONFIG_PPC_MM_SLICES +#ifdef CONFIG_PPC64 + init_mm.context.addr_limit = TASK_SIZE_128TB; +#else +#error "context.addr_limit not initialized." +#endif +#endif + #ifdef CONFIG_PPC_64K_PAGES init_mm.context.pte_frag = NULL; #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index f997154dfc41..f35ff9dea4fb 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -49,6 +49,7 @@ #include <asm/paca.h> #include <asm/time.h> #include <asm/cputable.h> +#include <asm/dt_cpu_ftrs.h> #include <asm/sections.h> #include <asm/btext.h> #include <asm/nvram.h> @@ -230,8 +231,8 @@ static void cpu_ready_for_interrupts(void) * If we are not in hypervisor mode the job is done once for * the whole partition in configure_exceptions(). */ - if (early_cpu_has_feature(CPU_FTR_HVMODE) && - early_cpu_has_feature(CPU_FTR_ARCH_207S)) { + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) { unsigned long lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); } @@ -274,8 +275,10 @@ void __init early_setup(unsigned long dt_ptr) /* -------- printk is _NOT_ safe to use here ! ------- */ - /* Identify CPU type */ - identify_cpu(0, mfspr(SPRN_PVR)); + /* Try new device tree based feature discovery ... */ + if (!dt_cpu_ftrs_init(__va(dt_ptr))) + /* Otherwise use the old style CPU table */ + identify_cpu(0, mfspr(SPRN_PVR)); /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ initialise_paca(&boot_paca, 0); @@ -541,6 +544,9 @@ void __init initialize_cache_info(void) dcache_bsize = ppc64_caches.l1d.block_size; icache_bsize = ppc64_caches.l1i.block_size; + cur_cpu_spec->dcache_bsize = dcache_bsize; + cur_cpu_spec->icache_bsize = icache_bsize; + DBG(" <- initialize_cache_info()\n"); } @@ -637,6 +643,11 @@ void __init emergency_stack_init(void) paca[i].emergency_sp = (void *)ti + THREAD_SIZE; #ifdef CONFIG_PPC_BOOK3S_64 + /* emergency stack for NMI exception handling. */ + ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); + klp_init_thread_info(ti); + paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE; + /* emergency stack for machine check exception handling. */ ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); klp_init_thread_info(ti); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 3a3671172436..e9436c5e1e09 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -14,6 +14,7 @@ #include <linux/uprobes.h> #include <linux/key.h> #include <linux/context_tracking.h> +#include <linux/livepatch.h> #include <asm/hw_breakpoint.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -162,6 +163,9 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) tracehook_notify_resume(regs); } + if (thread_info_flags & _TIF_PATCH_PENDING) + klp_update_patch_state(current); + user_enter(); } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 46f89e66a273..df2a41647d8e 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -39,6 +39,7 @@ #include <asm/irq.h> #include <asm/hw_irq.h> #include <asm/kvm_ppc.h> +#include <asm/dbell.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/prom.h> @@ -86,8 +87,6 @@ volatile unsigned int cpu_callin_map[NR_CPUS]; int smt_enabled_at_boot = 1; -static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL; - /* * Returns 1 if the specified cpu should be brought up during boot. * Used to inhibit booting threads if they've been disabled or @@ -158,32 +157,33 @@ static irqreturn_t tick_broadcast_ipi_action(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t debug_ipi_action(int irq, void *data) +#ifdef CONFIG_NMI_IPI +static irqreturn_t nmi_ipi_action(int irq, void *data) { - if (crash_ipi_function_ptr) { - crash_ipi_function_ptr(get_irq_regs()); - return IRQ_HANDLED; - } - -#ifdef CONFIG_DEBUGGER - debugger_ipi(get_irq_regs()); -#endif /* CONFIG_DEBUGGER */ - + smp_handle_nmi_ipi(get_irq_regs()); return IRQ_HANDLED; } +#endif static irq_handler_t smp_ipi_action[] = { [PPC_MSG_CALL_FUNCTION] = call_function_action, [PPC_MSG_RESCHEDULE] = reschedule_action, [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action, - [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action, +#ifdef CONFIG_NMI_IPI + [PPC_MSG_NMI_IPI] = nmi_ipi_action, +#endif }; +/* + * The NMI IPI is a fallback and not truly non-maskable. It is simpler + * than going through the call function infrastructure, and strongly + * serialized, so it is more appropriate for debugging. + */ const char *smp_ipi_name[] = { [PPC_MSG_CALL_FUNCTION] = "ipi call function", [PPC_MSG_RESCHEDULE] = "ipi reschedule", [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast", - [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger", + [PPC_MSG_NMI_IPI] = "nmi ipi", }; /* optional function to request ipi, for controllers with >= 4 ipis */ @@ -191,14 +191,13 @@ int smp_request_message_ipi(int virq, int msg) { int err; - if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) { + if (msg < 0 || msg > PPC_MSG_NMI_IPI) return -EINVAL; - } -#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC_CORE) - if (msg == PPC_MSG_DEBUGGER_BREAK) { +#ifndef CONFIG_NMI_IPI + if (msg == PPC_MSG_NMI_IPI) return 1; - } #endif + err = request_irq(virq, smp_ipi_action[msg], IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND, smp_ipi_name[msg], NULL); @@ -211,17 +210,9 @@ int smp_request_message_ipi(int virq, int msg) #ifdef CONFIG_PPC_SMP_MUXED_IPI struct cpu_messages { long messages; /* current messages */ - unsigned long data; /* data for cause ipi */ }; static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message); -void smp_muxed_ipi_set_data(int cpu, unsigned long data) -{ - struct cpu_messages *info = &per_cpu(ipi_message, cpu); - - info->data = data; -} - void smp_muxed_ipi_set_message(int cpu, int msg) { struct cpu_messages *info = &per_cpu(ipi_message, cpu); @@ -236,14 +227,13 @@ void smp_muxed_ipi_set_message(int cpu, int msg) void smp_muxed_ipi_message_pass(int cpu, int msg) { - struct cpu_messages *info = &per_cpu(ipi_message, cpu); - smp_muxed_ipi_set_message(cpu, msg); + /* * cause_ipi functions are required to include a full barrier * before doing whatever causes the IPI. */ - smp_ops->cause_ipi(cpu, info->data); + smp_ops->cause_ipi(cpu); } #ifdef __BIG_ENDIAN__ @@ -254,11 +244,18 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) irqreturn_t smp_ipi_demux(void) { - struct cpu_messages *info = this_cpu_ptr(&ipi_message); - unsigned long all; - mb(); /* order any irq clear */ + return smp_ipi_demux_relaxed(); +} + +/* sync-free variant. Callers should ensure synchronization */ +irqreturn_t smp_ipi_demux_relaxed(void) +{ + struct cpu_messages *info; + unsigned long all; + + info = this_cpu_ptr(&ipi_message); do { all = xchg(&info->messages, 0); #if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) @@ -278,8 +275,10 @@ irqreturn_t smp_ipi_demux(void) scheduler_ipi(); if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST)) tick_broadcast_ipi_handler(); - if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK)) - debug_ipi_action(0, NULL); +#ifdef CONFIG_NMI_IPI + if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI)) + nmi_ipi_action(0, NULL); +#endif } while (info->messages); return IRQ_HANDLED; @@ -316,6 +315,187 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) do_message_pass(cpu, PPC_MSG_CALL_FUNCTION); } +#ifdef CONFIG_NMI_IPI + +/* + * "NMI IPI" system. + * + * NMI IPIs may not be recoverable, so should not be used as ongoing part of + * a running system. They can be used for crash, debug, halt/reboot, etc. + * + * NMI IPIs are globally single threaded. No more than one in progress at + * any time. + * + * The IPI call waits with interrupts disabled until all targets enter the + * NMI handler, then the call returns. + * + * No new NMI can be initiated until targets exit the handler. + * + * The IPI call may time out without all targets entering the NMI handler. + * In that case, there is some logic to recover (and ignore subsequent + * NMI interrupts that may eventually be raised), but the platform interrupt + * handler may not be able to distinguish this from other exception causes, + * which may cause a crash. + */ + +static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0); +static struct cpumask nmi_ipi_pending_mask; +static int nmi_ipi_busy_count = 0; +static void (*nmi_ipi_function)(struct pt_regs *) = NULL; + +static void nmi_ipi_lock_start(unsigned long *flags) +{ + raw_local_irq_save(*flags); + hard_irq_disable(); + while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { + raw_local_irq_restore(*flags); + cpu_relax(); + raw_local_irq_save(*flags); + hard_irq_disable(); + } +} + +static void nmi_ipi_lock(void) +{ + while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) + cpu_relax(); +} + +static void nmi_ipi_unlock(void) +{ + smp_mb(); + WARN_ON(atomic_read(&__nmi_ipi_lock) != 1); + atomic_set(&__nmi_ipi_lock, 0); +} + +static void nmi_ipi_unlock_end(unsigned long *flags) +{ + nmi_ipi_unlock(); + raw_local_irq_restore(*flags); +} + +/* + * Platform NMI handler calls this to ack + */ +int smp_handle_nmi_ipi(struct pt_regs *regs) +{ + void (*fn)(struct pt_regs *); + unsigned long flags; + int me = raw_smp_processor_id(); + int ret = 0; + + /* + * Unexpected NMIs are possible here because the interrupt may not + * be able to distinguish NMI IPIs from other types of NMIs, or + * because the caller may have timed out. + */ + nmi_ipi_lock_start(&flags); + if (!nmi_ipi_busy_count) + goto out; + if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask)) + goto out; + + fn = nmi_ipi_function; + if (!fn) + goto out; + + cpumask_clear_cpu(me, &nmi_ipi_pending_mask); + nmi_ipi_busy_count++; + nmi_ipi_unlock(); + + ret = 1; + + fn(regs); + + nmi_ipi_lock(); + nmi_ipi_busy_count--; +out: + nmi_ipi_unlock_end(&flags); + + return ret; +} + +static void do_smp_send_nmi_ipi(int cpu) +{ + if (smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu)) + return; + + if (cpu >= 0) { + do_message_pass(cpu, PPC_MSG_NMI_IPI); + } else { + int c; + + for_each_online_cpu(c) { + if (c == raw_smp_processor_id()) + continue; + do_message_pass(c, PPC_MSG_NMI_IPI); + } + } +} + +/* + * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS. + * - fn is the target callback function. + * - delay_us > 0 is the delay before giving up waiting for targets to + * enter the handler, == 0 specifies indefinite delay. + */ +static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) +{ + unsigned long flags; + int me = raw_smp_processor_id(); + int ret = 1; + + BUG_ON(cpu == me); + BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS); + + if (unlikely(!smp_ops)) + return 0; + + /* Take the nmi_ipi_busy count/lock with interrupts hard disabled */ + nmi_ipi_lock_start(&flags); + while (nmi_ipi_busy_count) { + nmi_ipi_unlock_end(&flags); + cpu_relax(); + nmi_ipi_lock_start(&flags); + } + + nmi_ipi_function = fn; + + if (cpu < 0) { + /* ALL_OTHERS */ + cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask); + cpumask_clear_cpu(me, &nmi_ipi_pending_mask); + } else { + /* cpumask starts clear */ + cpumask_set_cpu(cpu, &nmi_ipi_pending_mask); + } + nmi_ipi_busy_count++; + nmi_ipi_unlock(); + + do_smp_send_nmi_ipi(cpu); + + while (!cpumask_empty(&nmi_ipi_pending_mask)) { + udelay(1); + if (delay_us) { + delay_us--; + if (!delay_us) + break; + } + } + + nmi_ipi_lock(); + if (!cpumask_empty(&nmi_ipi_pending_mask)) { + /* Could not gather all CPUs */ + ret = 0; + cpumask_clear(&nmi_ipi_pending_mask); + } + nmi_ipi_busy_count--; + nmi_ipi_unlock_end(&flags); + + return ret; +} +#endif /* CONFIG_NMI_IPI */ + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST void tick_broadcast(const struct cpumask *mask) { @@ -326,29 +506,22 @@ void tick_broadcast(const struct cpumask *mask) } #endif -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) -void smp_send_debugger_break(void) +#ifdef CONFIG_DEBUGGER +void debugger_ipi_callback(struct pt_regs *regs) { - int cpu; - int me = raw_smp_processor_id(); - - if (unlikely(!smp_ops)) - return; + debugger_ipi(regs); +} - for_each_online_cpu(cpu) - if (cpu != me) - do_message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); +void smp_send_debugger_break(void) +{ + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000); } #endif #ifdef CONFIG_KEXEC_CORE void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { - crash_ipi_function_ptr = crash_ipi_callback; - if (crash_ipi_callback) { - mb(); - smp_send_debugger_break(); - } + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000); } #endif @@ -439,7 +612,21 @@ int generic_cpu_disable(void) #ifdef CONFIG_PPC64 vdso_data->processorCount--; #endif - migrate_irqs(); + /* Update affinity of all IRQs previously aimed at this CPU */ + irq_migrate_all_off_this_cpu(); + + /* + * Depending on the details of the interrupt controller, it's possible + * that one of the interrupts we just migrated away from this CPU is + * actually already pending on this CPU. If we leave it in that state + * the interrupt will never be EOI'ed, and will never fire again. So + * temporarily enable interrupts here, to allow any pending interrupt to + * be received (and EOI'ed), before we take this CPU offline. + */ + local_irq_enable(); + mdelay(1); + local_irq_disable(); + return 0; } @@ -521,6 +708,16 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) cpu_idle_thread_init(cpu, tidle); + /* + * The platform might need to allocate resources prior to bringing + * up the CPU + */ + if (smp_ops->prepare_cpu) { + rc = smp_ops->prepare_cpu(cpu); + if (rc) + return rc; + } + /* Make sure callin-map entry is 0 (can be leftover a CPU * hotplug */ @@ -787,24 +984,21 @@ static struct sched_domain_topology_level powerpc_topology[] = { { NULL, }, }; -void __init smp_cpus_done(unsigned int max_cpus) +static __init long smp_setup_cpu_workfn(void *data __always_unused) { - cpumask_var_t old_mask; + smp_ops->setup_cpu(boot_cpuid); + return 0; +} - /* We want the setup_cpu() here to be called from CPU 0, but our - * init thread may have been "borrowed" by another CPU in the meantime - * se we pin us down to CPU 0 for a short while +void __init smp_cpus_done(unsigned int max_cpus) +{ + /* + * We want the setup_cpu() here to be called on the boot CPU, but + * init might run on any CPU, so make sure it's invoked on the boot + * CPU. */ - alloc_cpumask_var(&old_mask, GFP_NOWAIT); - cpumask_copy(old_mask, ¤t->cpus_allowed); - set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid)); - if (smp_ops && smp_ops->setup_cpu) - smp_ops->setup_cpu(boot_cpuid); - - set_cpus_allowed_ptr(current, old_mask); - - free_cpumask_var(old_mask); + work_on_cpu_safe(boot_cpuid, smp_setup_cpu_workfn, NULL); if (smp_ops && smp_ops->bringup_done) smp_ops->bringup_done(); @@ -812,7 +1006,6 @@ void __init smp_cpus_done(unsigned int max_cpus) dump_numa_cpu_topology(); set_sched_topology(powerpc_topology); - } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 66711958493c..d534ed901538 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -59,7 +59,14 @@ EXPORT_SYMBOL_GPL(save_stack_trace); void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { - save_context_stack(trace, tsk->thread.ksp, tsk, 0); + unsigned long sp; + + if (tsk == current) + sp = current_stack_pointer(); + else + sp = tsk->thread.ksp; + + save_context_stack(trace, sp, tsk, 0); } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c index 6ae9bd5086a4..0050b2d2ff7a 100644 --- a/arch/powerpc/kernel/swsusp.c +++ b/arch/powerpc/kernel/swsusp.c @@ -10,6 +10,7 @@ */ #include <linux/sched.h> +#include <linux/suspend.h> #include <asm/current.h> #include <asm/mmu_context.h> #include <asm/switch_to.h> diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index de04c9fbb5cd..a877bf8269fe 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -42,11 +42,11 @@ #include <asm/unistd.h> #include <asm/asm-prototypes.h> -static inline unsigned long do_mmap2(unsigned long addr, size_t len, +static inline long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off, int shift) { - unsigned long ret = -EINVAL; + long ret = -EINVAL; if (!arch_validate_prot(prot)) goto out; @@ -62,16 +62,16 @@ out: return ret; } -unsigned long sys_mmap2(unsigned long addr, size_t len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) +SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, pgoff) { return do_mmap2(addr, len, prot, flags, fd, pgoff, PAGE_SHIFT-12); } -unsigned long sys_mmap(unsigned long addr, size_t len, - unsigned long prot, unsigned long flags, - unsigned long fd, off_t offset) +SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, off_t, offset) { return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT); } diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index c1fb255a60d6..4437c70c7c2b 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -710,6 +710,10 @@ static int register_cpu_online(unsigned int cpu) struct device_attribute *attrs, *pmc_attrs; int i, nattrs; + /* For cpus present at boot a reference was already grabbed in register_cpu() */ + if (!s->of_node) + s->of_node = of_get_cpu_node(cpu, NULL); + #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_SMT)) device_create_file(s, &dev_attr_smt_snooze_delay); @@ -785,9 +789,9 @@ static int register_cpu_online(unsigned int cpu) return 0; } +#ifdef CONFIG_HOTPLUG_CPU static int unregister_cpu_online(unsigned int cpu) { -#ifdef CONFIG_HOTPLUG_CPU struct cpu *c = &per_cpu(cpu_devices, cpu); struct device *s = &c->dev; struct device_attribute *attrs, *pmc_attrs; @@ -864,9 +868,13 @@ static int unregister_cpu_online(unsigned int cpu) } #endif cacheinfo_cpu_offline(cpu); -#endif /* CONFIG_HOTPLUG_CPU */ + of_node_put(s->of_node); + s->of_node = NULL; return 0; } +#else /* !CONFIG_HOTPLUG_CPU */ +#define unregister_cpu_online NULL +#endif #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE ssize_t arch_cpu_probe(const char *buf, size_t count) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 07b90725855e..2b33cfaac7b8 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -995,8 +995,10 @@ static void __init init_decrementer_clockevent(void) decrementer_clockevent.max_delta_ns = clockevent_delta2ns(decrementer_max, &decrementer_clockevent); + decrementer_clockevent.max_delta_ticks = decrementer_max; decrementer_clockevent.min_delta_ns = clockevent_delta2ns(2, &decrementer_clockevent); + decrementer_clockevent.min_delta_ticks = 2; register_decrementer_clockevent(cpu); } diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile new file mode 100644 index 000000000000..729dffc5f7bc --- /dev/null +++ b/arch/powerpc/kernel/trace/Makefile @@ -0,0 +1,29 @@ +# +# Makefile for the powerpc trace subsystem +# + +subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror + +ifdef CONFIG_FUNCTION_TRACER +# do not trace tracer code +CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) +endif + +obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_32.o +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64.o +ifdef CONFIG_MPROFILE_KERNEL +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_mprofile.o +else +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o +endif +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o +obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_TRACING) += trace_clock.o + +obj-$(CONFIG_PPC64) += $(obj64-y) +obj-$(CONFIG_PPC32) += $(obj32-y) + +# Disable GCOV & sanitizers in odd or sensitive code +GCOV_PROFILE_ftrace.o := n +UBSAN_SANITIZE_ftrace.o := n diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 5c9f50c1aa99..32509de6ce4c 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -21,6 +21,7 @@ #include <linux/init.h> #include <linux/list.h> +#include <asm/asm-prototypes.h> #include <asm/cacheflush.h> #include <asm/code-patching.h> #include <asm/ftrace.h> diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S new file mode 100644 index 000000000000..afef2c076282 --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -0,0 +1,118 @@ +/* + * Split from entry_32.S + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/magic.h> +#include <asm/reg.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> +#include <asm/export.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +_GLOBAL(mcount) +_GLOBAL(_mcount) + /* + * It is required that _mcount on PPC32 must preserve the + * link register. But we have r0 to play with. We use r0 + * to push the return address back to the caller of mcount + * into the ctr register, restore the link register and + * then jump back using the ctr register. + */ + mflr r0 + mtctr r0 + lwz r0, 4(r1) + mtlr r0 + bctr + +_GLOBAL(ftrace_caller) + MCOUNT_SAVE_FRAME + /* r3 ends up with link register */ + subi r3, r3, MCOUNT_INSN_SIZE +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) +#endif + MCOUNT_RESTORE_FRAME + /* old link register ends up in ctr reg */ + bctr +#else +_GLOBAL(mcount) +_GLOBAL(_mcount) + + MCOUNT_SAVE_FRAME + + subi r3, r3, MCOUNT_INSN_SIZE + LOAD_REG_ADDR(r5, ftrace_trace_function) + lwz r5,0(r5) + + mtctr r5 + bctrl + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + b ftrace_graph_caller +#endif + MCOUNT_RESTORE_FRAME + bctr +#endif +EXPORT_SYMBOL(_mcount) + +_GLOBAL(ftrace_stub) + blr + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(ftrace_graph_caller) + /* load r4 with local address */ + lwz r4, 44(r1) + subi r4, r4, MCOUNT_INSN_SIZE + + /* Grab the LR out of the caller stack frame */ + lwz r3,52(r1) + + bl prepare_ftrace_return + nop + + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR in the callers stack frame to this. + */ + stw r3,52(r1) + + MCOUNT_RESTORE_FRAME + /* old link register ends up in ctr reg */ + bctr + +_GLOBAL(return_to_handler) + /* need to save return values */ + stwu r1, -32(r1) + stw r3, 20(r1) + stw r4, 16(r1) + stw r31, 12(r1) + mr r31, r1 + + bl ftrace_return_to_handler + nop + + /* return value has real return address */ + mtlr r3 + + lwz r3, 20(r1) + lwz r4, 16(r1) + lwz r31,12(r1) + lwz r1, 0(r1) + + /* Jump back to real return address */ + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_64.S new file mode 100644 index 000000000000..e5ccea19821e --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_64.S @@ -0,0 +1,85 @@ +/* + * Split from entry_64.S + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/magic.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> +#include <asm/ppc-opcode.h> +#include <asm/export.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +_GLOBAL(mcount) +_GLOBAL(_mcount) +EXPORT_SYMBOL(_mcount) + mflr r12 + mtctr r12 + mtlr r0 + bctr + +#else /* CONFIG_DYNAMIC_FTRACE */ +_GLOBAL_TOC(_mcount) +EXPORT_SYMBOL(_mcount) + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + ld r11, 0(r1) + stdu r1, -112(r1) + std r3, 128(r1) + ld r4, 16(r11) + + subi r3, r3, MCOUNT_INSN_SIZE + LOAD_REG_ADDR(r5,ftrace_trace_function) + ld r5,0(r5) + ld r5,0(r5) + mtctr r5 + bctrl + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + b ftrace_graph_caller +#endif + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 +_GLOBAL(ftrace_stub) + blr +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(return_to_handler) + /* need to save return values */ + std r4, -32(r1) + std r3, -24(r1) + /* save TOC */ + std r2, -16(r1) + std r31, -8(r1) + mr r31, r1 + stdu r1, -112(r1) + + /* + * We might be called from a module. + * Switch to our TOC to run inside the core kernel. + */ + ld r2, PACATOC(r13) + + bl ftrace_return_to_handler + nop + + /* return value has real return address */ + mtlr r3 + + ld r1, 0(r1) + ld r4, -32(r1) + ld r3, -24(r1) + ld r2, -16(r1) + ld r31, -8(r1) + + /* Jump back to real return address */ + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S new file mode 100644 index 000000000000..7c933a99f5d5 --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -0,0 +1,272 @@ +/* + * Split from ftrace_64.S + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/magic.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> +#include <asm/ppc-opcode.h> +#include <asm/export.h> +#include <asm/thread_info.h> +#include <asm/bug.h> +#include <asm/ptrace.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * + * ftrace_caller() is the function that replaces _mcount() when ftrace is + * active. + * + * We arrive here after a function A calls function B, and we are the trace + * function for B. When we enter r1 points to A's stack frame, B has not yet + * had a chance to allocate one yet. + * + * Additionally r2 may point either to the TOC for A, or B, depending on + * whether B did a TOC setup sequence before calling us. + * + * On entry the LR points back to the _mcount() call site, and r0 holds the + * saved LR as it was on entry to B, ie. the original return address at the + * call site in A. + * + * Our job is to save the register state into a struct pt_regs (on the stack) + * and then arrange for the ftrace function to be called. + */ +_GLOBAL(ftrace_caller) + /* Save the original return address in A's stack frame */ + std r0,LRSAVE(r1) + + /* Create our stack frame + pt_regs */ + stdu r1,-SWITCH_FRAME_SIZE(r1) + + /* Save all gprs to pt_regs */ + SAVE_8GPRS(0,r1) + SAVE_8GPRS(8,r1) + SAVE_8GPRS(16,r1) + SAVE_8GPRS(24,r1) + + /* Load special regs for save below */ + mfmsr r8 + mfctr r9 + mfxer r10 + mfcr r11 + + /* Get the _mcount() call site out of LR */ + mflr r7 + /* Save it as pt_regs->nip */ + std r7, _NIP(r1) + /* Save the read LR in pt_regs->link */ + std r0, _LINK(r1) + + /* Save callee's TOC in the ABI compliant location */ + std r2, 24(r1) + ld r2,PACATOC(r13) /* get kernel TOC in r2 */ + + addis r3,r2,function_trace_op@toc@ha + addi r3,r3,function_trace_op@toc@l + ld r5,0(r3) + +#ifdef CONFIG_LIVEPATCH + mr r14,r7 /* remember old NIP */ +#endif + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r7, MCOUNT_INSN_SIZE + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + + /* Save special regs */ + std r8, _MSR(r1) + std r9, _CTR(r1) + std r10, _XER(r1) + std r11, _CCR(r1) + + /* Load &pt_regs in r6 for call below */ + addi r6, r1 ,STACK_FRAME_OVERHEAD + + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + + /* Load ctr with the possibly modified NIP */ + ld r3, _NIP(r1) + mtctr r3 +#ifdef CONFIG_LIVEPATCH + cmpd r14,r3 /* has NIP been altered? */ +#endif + + /* Restore gprs */ + REST_8GPRS(0,r1) + REST_8GPRS(8,r1) + REST_8GPRS(16,r1) + REST_8GPRS(24,r1) + + /* Restore possibly modified LR */ + ld r0, _LINK(r1) + mtlr r0 + + /* Restore callee's TOC */ + ld r2, 24(r1) + + /* Pop our stack frame */ + addi r1, r1, SWITCH_FRAME_SIZE + +#ifdef CONFIG_LIVEPATCH + /* Based on the cmpd above, if the NIP was altered handle livepatch */ + bne- livepatch_handler +#endif + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) +#endif + + bctr /* jump after _mcount site */ + +_GLOBAL(ftrace_stub) + blr + +#ifdef CONFIG_LIVEPATCH + /* + * This function runs in the mcount context, between two functions. As + * such it can only clobber registers which are volatile and used in + * function linkage. + * + * We get here when a function A, calls another function B, but B has + * been live patched with a new function C. + * + * On entry: + * - we have no stack frame and can not allocate one + * - LR points back to the original caller (in A) + * - CTR holds the new NIP in C + * - r0 & r12 are free + * + * r0 can't be used as the base register for a DS-form load or store, so + * we temporarily shuffle r1 (stack pointer) into r0 and then put it back. + */ +livepatch_handler: + CURRENT_THREAD_INFO(r12, r1) + + /* Save stack pointer into r0 */ + mr r0, r1 + + /* Allocate 3 x 8 bytes */ + ld r1, TI_livepatch_sp(r12) + addi r1, r1, 24 + std r1, TI_livepatch_sp(r12) + + /* Save toc & real LR on livepatch stack */ + std r2, -24(r1) + mflr r12 + std r12, -16(r1) + + /* Store stack end marker */ + lis r12, STACK_END_MAGIC@h + ori r12, r12, STACK_END_MAGIC@l + std r12, -8(r1) + + /* Restore real stack pointer */ + mr r1, r0 + + /* Put ctr in r12 for global entry and branch there */ + mfctr r12 + bctrl + + /* + * Now we are returning from the patched function to the original + * caller A. We are free to use r0 and r12, and we can use r2 until we + * restore it. + */ + + CURRENT_THREAD_INFO(r12, r1) + + /* Save stack pointer into r0 */ + mr r0, r1 + + ld r1, TI_livepatch_sp(r12) + + /* Check stack marker hasn't been trashed */ + lis r2, STACK_END_MAGIC@h + ori r2, r2, STACK_END_MAGIC@l + ld r12, -8(r1) +1: tdne r12, r2 + EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0 + + /* Restore LR & toc from livepatch stack */ + ld r12, -16(r1) + mtlr r12 + ld r2, -24(r1) + + /* Pop livepatch stack frame */ + CURRENT_THREAD_INFO(r12, r0) + subi r1, r1, 24 + std r1, TI_livepatch_sp(r12) + + /* Restore real stack pointer */ + mr r1, r0 + + /* Return to original caller of live patched function */ + blr +#endif /* CONFIG_LIVEPATCH */ + +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(ftrace_graph_caller) + stdu r1, -112(r1) + /* with -mprofile-kernel, parameter regs are still alive at _mcount */ + std r10, 104(r1) + std r9, 96(r1) + std r8, 88(r1) + std r7, 80(r1) + std r6, 72(r1) + std r5, 64(r1) + std r4, 56(r1) + std r3, 48(r1) + + /* Save callee's TOC in the ABI compliant location */ + std r2, 24(r1) + ld r2, PACATOC(r13) /* get kernel TOC in r2 */ + + mfctr r4 /* ftrace_caller has moved local addr here */ + std r4, 40(r1) + mflr r3 /* ftrace_caller has restored LR from stack */ + subi r4, r4, MCOUNT_INSN_SIZE + + bl prepare_ftrace_return + nop + + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR to this. + */ + mtlr r3 + + ld r0, 40(r1) + mtctr r0 + ld r10, 104(r1) + ld r9, 96(r1) + ld r8, 88(r1) + ld r7, 80(r1) + ld r6, 72(r1) + ld r5, 64(r1) + ld r4, 56(r1) + ld r3, 48(r1) + + /* Restore callee's TOC */ + ld r2, 24(r1) + + addi r1, r1, 112 + mflr r0 + std r0, LRSAVE(r1) + bctr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S new file mode 100644 index 000000000000..f095358da96e --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S @@ -0,0 +1,68 @@ +/* + * Split from ftrace_64.S + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/magic.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> +#include <asm/ppc-opcode.h> +#include <asm/export.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +_GLOBAL_TOC(ftrace_caller) + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + ld r11, 0(r1) + stdu r1, -112(r1) + std r3, 128(r1) + ld r4, 16(r11) + subi r3, r3, MCOUNT_INSN_SIZE +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) +#endif + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 + +_GLOBAL(ftrace_stub) + blr +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(ftrace_graph_caller) + /* load r4 with local address */ + ld r4, 128(r1) + subi r4, r4, MCOUNT_INSN_SIZE + + /* Grab the LR out of the caller stack frame */ + ld r11, 112(r1) + ld r3, 16(r11) + + bl prepare_ftrace_return + nop + + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR in the callers stack frame to this. + */ + ld r11, 112(r1) + std r3, 16(r11) + + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace_clock.c b/arch/powerpc/kernel/trace/trace_clock.c index 49170690946d..49170690946d 100644 --- a/arch/powerpc/kernel/trace_clock.c +++ b/arch/powerpc/kernel/trace/trace_clock.c diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index ff365f9de27a..d4e545d27ef9 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -35,13 +35,13 @@ #include <linux/backlight.h> #include <linux/bug.h> #include <linux/kdebug.h> -#include <linux/debugfs.h> #include <linux/ratelimit.h> #include <linux/context_tracking.h> #include <asm/emulated_ops.h> #include <asm/pgtable.h> #include <linux/uaccess.h> +#include <asm/debugfs.h> #include <asm/io.h> #include <asm/machdep.h> #include <asm/rtas.h> @@ -279,18 +279,35 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) void system_reset_exception(struct pt_regs *regs) { + /* + * Avoid crashes in case of nested NMI exceptions. Recoverability + * is determined by RI and in_nmi + */ + bool nested = in_nmi(); + if (!nested) + nmi_enter(); + /* See if any machine dependent calls */ if (ppc_md.system_reset_exception) { if (ppc_md.system_reset_exception(regs)) - return; + goto out; } die("System Reset", regs, SIGABRT); +out: +#ifdef CONFIG_PPC_BOOK3S_64 + BUG_ON(get_paca()->in_nmi == 0); + if (get_paca()->in_nmi > 1) + panic("Unrecoverable nested System Reset"); +#endif /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) panic("Unrecoverable System Reset"); + if (!nested) + nmi_exit(); + /* What should we do here? We could issue a shutdown or hard reset. */ } @@ -306,8 +323,6 @@ long machine_check_early(struct pt_regs *regs) __this_cpu_inc(irq_stat.mce_exceptions); - add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); return handled; @@ -741,6 +756,8 @@ void machine_check_exception(struct pt_regs *regs) __this_cpu_inc(irq_stat.mce_exceptions); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + /* See if any machine dependent calls. In theory, we would want * to call the CPU first, and call the ppc_md. one if the CPU * one returns a positive number. However there is existing code @@ -1440,6 +1457,8 @@ void facility_unavailable_exception(struct pt_regs *regs) [FSCR_TM_LG] = "TM", [FSCR_EBB_LG] = "EBB", [FSCR_TAR_LG] = "TAR", + [FSCR_MSGP_LG] = "MSGP", + [FSCR_SCV_LG] = "SCV", }; char *facility = "unknown"; u64 value; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7394b770ae1f..2f793be3d2b1 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -77,6 +77,8 @@ SECTIONS #endif } :kernel + __head_end = .; + /* * If the build dies here, it's likely code in head_64.S is referencing * labels it can't reach, and the linker inserting stubs without the @@ -312,6 +314,8 @@ SECTIONS NOSAVE_DATA } + BUG_TABLE + . = ALIGN(PAGE_SIZE); _edata = .; PROVIDE32 (edata = .); |