aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile13
-rw-r--r--arch/powerpc/kernel/asm-offsets.c19
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S36
-rw-r--r--arch/powerpc/kernel/cputable.c37
-rw-r--r--arch/powerpc/kernel/crash.c2
-rw-r--r--arch/powerpc/kernel/dbell.c58
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c1031
-rw-r--r--arch/powerpc/kernel/eeh.c3
-rw-r--r--arch/powerpc/kernel/eeh_driver.c55
-rw-r--r--arch/powerpc/kernel/entry_32.S107
-rw-r--r--arch/powerpc/kernel/entry_64.S380
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S12
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S186
-rw-r--r--arch/powerpc/kernel/fadump.c93
-rw-r--r--arch/powerpc/kernel/head_32.S16
-rw-r--r--arch/powerpc/kernel/head_64.S3
-rw-r--r--arch/powerpc/kernel/idle_book3s.S285
-rw-r--r--arch/powerpc/kernel/iommu.c91
-rw-r--r--arch/powerpc/kernel/irq.c41
-rw-r--r--arch/powerpc/kernel/kprobes-ftrace.c104
-rw-r--r--arch/powerpc/kernel/kprobes.c214
-rw-r--r--arch/powerpc/kernel/mce.c18
-rw-r--r--arch/powerpc/kernel/mce_power.c780
-rw-r--r--arch/powerpc/kernel/nvram_64.c89
-rw-r--r--arch/powerpc/kernel/optprobes.c6
-rw-r--r--arch/powerpc/kernel/paca.c21
-rw-r--r--arch/powerpc/kernel/pci-common.c11
-rw-r--r--arch/powerpc/kernel/prom.c30
-rw-r--r--arch/powerpc/kernel/prom_init.c2
-rw-r--r--arch/powerpc/kernel/setup-common.c18
-rw-r--r--arch/powerpc/kernel/setup_64.c19
-rw-r--r--arch/powerpc/kernel/signal.c4
-rw-r--r--arch/powerpc/kernel/smp.c325
-rw-r--r--arch/powerpc/kernel/stacktrace.c9
-rw-r--r--arch/powerpc/kernel/swsusp.c1
-rw-r--r--arch/powerpc/kernel/syscalls.c16
-rw-r--r--arch/powerpc/kernel/sysfs.c12
-rw-r--r--arch/powerpc/kernel/time.c2
-rw-r--r--arch/powerpc/kernel/trace/Makefile29
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c (renamed from arch/powerpc/kernel/ftrace.c)1
-rw-r--r--arch/powerpc/kernel/trace/ftrace_32.S118
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64.S85
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S272
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_pg.S68
-rw-r--r--arch/powerpc/kernel/trace/trace_clock.c (renamed from arch/powerpc/kernel/trace_clock.c)0
-rw-r--r--arch/powerpc/kernel/traps.c27
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S4
47 files changed, 3305 insertions, 1448 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 811f441a125f..e132902e1f14 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -25,8 +25,6 @@ CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
-# do not trace tracer code
-CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
# timers used by tracing
CFLAGS_REMOVE_time.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
endif
@@ -58,6 +56,7 @@ obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y)
obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o
obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
+obj-$(CONFIG_PPC_DT_CPU_FTRS) += dt_cpu_ftrs.o
obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
@@ -97,6 +96,7 @@ obj-$(CONFIG_BOOTX_TEXT) += btext.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o
+obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o
obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
@@ -118,10 +118,7 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o
obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o
-obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
-obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
-obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
-obj-$(CONFIG_TRACING) += trace_clock.o
+obj-y += trace/
ifneq ($(CONFIG_PPC_INDIRECT_PIO),y)
obj-y += iomap.o
@@ -142,14 +139,14 @@ obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
# Disable GCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_prom_init.o := n
UBSAN_SANITIZE_prom_init.o := n
-GCOV_PROFILE_ftrace.o := n
-UBSAN_SANITIZE_ftrace.o := n
GCOV_PROFILE_machine_kexec_64.o := n
UBSAN_SANITIZE_machine_kexec_64.o := n
GCOV_PROFILE_machine_kexec_32.o := n
UBSAN_SANITIZE_machine_kexec_32.o := n
GCOV_PROFILE_kprobes.o := n
UBSAN_SANITIZE_kprobes.o := n
+GCOV_PROFILE_kprobes-ftrace.o := n
+UBSAN_SANITIZE_kprobes-ftrace.o := n
UBSAN_SANITIZE_vdso.o := n
extra-$(CONFIG_PPC_FPU) += fpu.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4367e7df51a1..709e23425317 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -185,6 +185,7 @@ int main(void)
#ifdef CONFIG_PPC_MM_SLICES
OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize);
OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize);
+ DEFINE(PACA_ADDR_LIMIT, offsetof(struct paca_struct, addr_limit));
DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
#endif /* CONFIG_PPC_MM_SLICES */
#endif
@@ -219,6 +220,7 @@ int main(void)
OFFSET(PACA_EXGEN, paca_struct, exgen);
OFFSET(PACA_EXMC, paca_struct, exmc);
OFFSET(PACA_EXSLB, paca_struct, exslb);
+ OFFSET(PACA_EXNMI, paca_struct, exnmi);
OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr);
OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
@@ -232,7 +234,9 @@ int main(void)
OFFSET(PACAEMERGSP, paca_struct, emergency_sp);
#ifdef CONFIG_PPC_BOOK3S_64
OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp);
+ OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
OFFSET(PACA_IN_MCE, paca_struct, in_mce);
+ OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
#endif
OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
@@ -399,8 +403,8 @@ int main(void)
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
#endif
-#ifdef MAX_PGD_TABLE_SIZE
- DEFINE(PGD_TABLE_SIZE, MAX_PGD_TABLE_SIZE);
+#ifdef CONFIG_PPC_BOOK3S_64
+ DEFINE(PGD_TABLE_SIZE, (sizeof(pgd_t) << max(RADIX_PGD_INDEX_SIZE, H_PGD_INDEX_SIZE)));
#else
DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
#endif
@@ -630,6 +634,8 @@ int main(void)
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
+ HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys);
+ HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt);
HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_PTID, ptid);
@@ -715,6 +721,14 @@ int main(void)
OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6);
#endif
+#ifdef CONFIG_KVM_XICS
+ DEFINE(VCPU_XIVE_SAVED_STATE, offsetof(struct kvm_vcpu,
+ arch.xive_saved_state));
+ DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
+ arch.xive_cam_word));
+ DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
+#endif
+
#ifdef CONFIG_KVM_EXIT_TIMING
OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu);
OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl);
@@ -727,6 +741,7 @@ int main(void)
OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state);
OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask);
OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
+ OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
#endif
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 7fe8c79e6937..10cb2896b2ae 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -29,7 +29,8 @@ _GLOBAL(__setup_cpu_power7)
li r0,0
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
- bl __init_LPCR
+ li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
+ bl __init_LPCR_ISA206
bl __init_tlb_power7
mtlr r11
blr
@@ -42,7 +43,8 @@ _GLOBAL(__restore_cpu_power7)
li r0,0
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
- bl __init_LPCR
+ li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
+ bl __init_LPCR_ISA206
bl __init_tlb_power7
mtlr r11
blr
@@ -59,7 +61,8 @@ _GLOBAL(__setup_cpu_power8)
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
- bl __init_LPCR
+ li r4,0 /* LPES = 0 */
+ bl __init_LPCR_ISA206
bl __init_HFSCR
bl __init_tlb_power8
bl __init_PMU_HV
@@ -80,7 +83,8 @@ _GLOBAL(__restore_cpu_power8)
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
- bl __init_LPCR
+ li r4,0 /* LPES = 0 */
+ bl __init_LPCR_ISA206
bl __init_HFSCR
bl __init_tlb_power8
bl __init_PMU_HV
@@ -99,11 +103,12 @@ _GLOBAL(__setup_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
- LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE)
+ LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
or r3, r3, r4
LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR)
andc r3, r3, r4
- bl __init_LPCR
+ li r4,0 /* LPES = 0 */
+ bl __init_LPCR_ISA300
bl __init_HFSCR
bl __init_tlb_power9
bl __init_PMU_HV
@@ -122,11 +127,12 @@ _GLOBAL(__restore_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
- LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE)
+ LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
or r3, r3, r4
LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR)
andc r3, r3, r4
- bl __init_LPCR
+ li r4,0 /* LPES = 0 */
+ bl __init_LPCR_ISA300
bl __init_HFSCR
bl __init_tlb_power9
bl __init_PMU_HV
@@ -144,9 +150,9 @@ __init_hvmode_206:
std r5,CPU_SPEC_FEATURES(r4)
blr
-__init_LPCR:
+__init_LPCR_ISA206:
/* Setup a sane LPCR:
- * Called with initial LPCR in R3
+ * Called with initial LPCR in R3 and desired LPES 2-bit value in R4
*
* LPES = 0b01 (HSRR0/1 used for 0x500)
* PECE = 0b111
@@ -157,16 +163,18 @@ __init_LPCR:
*
* Other bits untouched for now
*/
- li r5,1
- rldimi r3,r5, LPCR_LPES_SH, 64-LPCR_LPES_SH-2
+ li r5,0x10
+ rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5
+
+ /* POWER9 has no VRMASD */
+__init_LPCR_ISA300:
+ rldimi r3,r4, LPCR_LPES_SH, 64-LPCR_LPES_SH-2
ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)
li r5,4
rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3
clrrdi r3,r3,1 /* clear HDICE */
li r5,4
rldimi r3,r5, LPCR_VC_SH, 0
- li r5,0x10
- rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5
mtspr SPRN_LPCR,r3
isync
blr
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index e79b9daa873c..9b3e88b1a9c8 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -23,7 +23,9 @@
#include <asm/mmu.h>
#include <asm/setup.h>
-struct cpu_spec* cur_cpu_spec = NULL;
+static struct cpu_spec the_cpu_spec __read_mostly;
+
+struct cpu_spec* cur_cpu_spec __read_mostly = NULL;
EXPORT_SYMBOL(cur_cpu_spec);
/* The platform string corresponding to the real PVR */
@@ -2179,7 +2181,15 @@ static struct cpu_spec __initdata cpu_specs[] = {
#endif /* CONFIG_E500 */
};
-static struct cpu_spec the_cpu_spec;
+void __init set_cur_cpu_spec(struct cpu_spec *s)
+{
+ struct cpu_spec *t = &the_cpu_spec;
+
+ t = PTRRELOC(t);
+ *t = *s;
+
+ *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
+}
static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
struct cpu_spec *s)
@@ -2266,6 +2276,29 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
return NULL;
}
+/*
+ * Used by cpufeatures to get the name for CPUs with a PVR table.
+ * If they don't hae a PVR table, cpufeatures gets the name from
+ * cpu device-tree node.
+ */
+void __init identify_cpu_name(unsigned int pvr)
+{
+ struct cpu_spec *s = cpu_specs;
+ struct cpu_spec *t = &the_cpu_spec;
+ int i;
+
+ s = PTRRELOC(s);
+ t = PTRRELOC(t);
+
+ for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++) {
+ if ((pvr & s->pvr_mask) == s->pvr_value) {
+ t->cpu_name = s->cpu_name;
+ return;
+ }
+ }
+}
+
+
#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS] = {
[0 ... NUM_CPU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 47b63de81f9b..cbabb5adccd9 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -43,8 +43,6 @@
#define IPI_TIMEOUT 10000
#define REAL_MODE_TIMEOUT 10000
-/* This keeps a track of which one is the crashing cpu. */
-int crashing_cpu = -1;
static int time_to_dump;
#define CRASH_HANDLER_MAX 3
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 2128f3a96c32..b6fe883b1016 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -20,18 +20,60 @@
#include <asm/kvm_ppc.h>
#ifdef CONFIG_SMP
-void doorbell_setup_this_cpu(void)
+
+/*
+ * Doorbells must only be used if CPU_FTR_DBELL is available.
+ * msgsnd is used in HV, and msgsndp is used in !HV.
+ *
+ * These should be used by platform code that is aware of restrictions.
+ * Other arch code should use ->cause_ipi.
+ *
+ * doorbell_global_ipi() sends a dbell to any target CPU.
+ * Must be used only by architectures that address msgsnd target
+ * by PIR/get_hard_smp_processor_id.
+ */
+void doorbell_global_ipi(int cpu)
{
- unsigned long tag = mfspr(SPRN_DOORBELL_CPUTAG) & PPC_DBELL_TAG_MASK;
+ u32 tag = get_hard_smp_processor_id(cpu);
- smp_muxed_ipi_set_data(smp_processor_id(), tag);
+ kvmppc_set_host_ipi(cpu, 1);
+ /* Order previous accesses vs. msgsnd, which is treated as a store */
+ ppc_msgsnd_sync();
+ ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
}
-void doorbell_cause_ipi(int cpu, unsigned long data)
+/*
+ * doorbell_core_ipi() sends a dbell to a target CPU in the same core.
+ * Must be used only by architectures that address msgsnd target
+ * by TIR/cpu_thread_in_core.
+ */
+void doorbell_core_ipi(int cpu)
{
+ u32 tag = cpu_thread_in_core(cpu);
+
+ kvmppc_set_host_ipi(cpu, 1);
/* Order previous accesses vs. msgsnd, which is treated as a store */
- mb();
- ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, data);
+ ppc_msgsnd_sync();
+ ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
+}
+
+/*
+ * Attempt to cause a core doorbell if destination is on the same core.
+ * Returns 1 on success, 0 on failure.
+ */
+int doorbell_try_core_ipi(int cpu)
+{
+ int this_cpu = get_cpu();
+ int ret = 0;
+
+ if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
+ doorbell_core_ipi(cpu);
+ ret = 1;
+ }
+
+ put_cpu();
+
+ return ret;
}
void doorbell_exception(struct pt_regs *regs)
@@ -40,12 +82,14 @@ void doorbell_exception(struct pt_regs *regs)
irq_enter();
+ ppc_msgsync();
+
may_hard_irq_enable();
kvmppc_set_host_ipi(smp_processor_id(), 0);
__this_cpu_inc(irq_stat.doorbell_irqs);
- smp_ipi_demux();
+ smp_ipi_demux_relaxed(); /* already performed the barrier */
irq_exit();
set_irq_regs(old_regs);
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
new file mode 100644
index 000000000000..fcc7588a96d6
--- /dev/null
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -0,0 +1,1031 @@
+/*
+ * Copyright 2017, Nicholas Piggin, IBM Corporation
+ * Licensed under GPLv2.
+ */
+
+#define pr_fmt(fmt) "dt-cpu-ftrs: " fmt
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <linux/memblock.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/threads.h>
+
+#include <asm/cputable.h>
+#include <asm/dt_cpu_ftrs.h>
+#include <asm/mmu.h>
+#include <asm/oprofile_impl.h>
+#include <asm/prom.h>
+#include <asm/setup.h>
+
+
+/* Device-tree visible constants follow */
+#define ISA_V2_07B 2070
+#define ISA_V3_0B 3000
+
+#define USABLE_PR (1U << 0)
+#define USABLE_OS (1U << 1)
+#define USABLE_HV (1U << 2)
+
+#define HV_SUPPORT_HFSCR (1U << 0)
+#define OS_SUPPORT_FSCR (1U << 0)
+
+/* For parsing, we define all bits set as "NONE" case */
+#define HV_SUPPORT_NONE 0xffffffffU
+#define OS_SUPPORT_NONE 0xffffffffU
+
+struct dt_cpu_feature {
+ const char *name;
+ uint32_t isa;
+ uint32_t usable_privilege;
+ uint32_t hv_support;
+ uint32_t os_support;
+ uint32_t hfscr_bit_nr;
+ uint32_t fscr_bit_nr;
+ uint32_t hwcap_bit_nr;
+ /* fdt parsing */
+ unsigned long node;
+ int enabled;
+ int disabled;
+};
+
+#define CPU_FTRS_BASE \
+ (CPU_FTR_USE_TB | \
+ CPU_FTR_LWSYNC | \
+ CPU_FTR_FPU_UNAVAILABLE |\
+ CPU_FTR_NODSISRALIGN |\
+ CPU_FTR_NOEXECUTE |\
+ CPU_FTR_COHERENT_ICACHE | \
+ CPU_FTR_STCX_CHECKS_ADDRESS |\
+ CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+ CPU_FTR_DAWR | \
+ CPU_FTR_ARCH_206 |\
+ CPU_FTR_ARCH_207S)
+
+#define MMU_FTRS_HASH_BASE (MMU_FTRS_POWER8)
+
+#define COMMON_USER_BASE (PPC_FEATURE_32 | PPC_FEATURE_64 | \
+ PPC_FEATURE_ARCH_2_06 |\
+ PPC_FEATURE_ICACHE_SNOOP)
+#define COMMON_USER2_BASE (PPC_FEATURE2_ARCH_2_07 | \
+ PPC_FEATURE2_ISEL)
+/*
+ * Set up the base CPU
+ */
+
+extern void __flush_tlb_power8(unsigned int action);
+extern void __flush_tlb_power9(unsigned int action);
+extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
+
+static int hv_mode;
+
+static struct {
+ u64 lpcr;
+ u64 hfscr;
+ u64 fscr;
+} system_registers;
+
+static void (*init_pmu_registers)(void);
+
+static void cpufeatures_flush_tlb(void)
+{
+ unsigned long rb;
+ unsigned int i, num_sets;
+
+ /*
+ * This is a temporary measure to keep equivalent TLB flush as the
+ * cputable based setup code.
+ */
+ switch (PVR_VER(mfspr(SPRN_PVR))) {
+ case PVR_POWER8:
+ case PVR_POWER8E:
+ case PVR_POWER8NVL:
+ num_sets = POWER8_TLB_SETS;
+ break;
+ case PVR_POWER9:
+ num_sets = POWER9_TLB_SETS_HASH;
+ break;
+ default:
+ num_sets = 1;
+ pr_err("unknown CPU version for boot TLB flush\n");
+ break;
+ }
+
+ asm volatile("ptesync" : : : "memory");
+ rb = TLBIEL_INVAL_SET;
+ for (i = 0; i < num_sets; i++) {
+ asm volatile("tlbiel %0" : : "r" (rb));
+ rb += 1 << TLBIEL_INVAL_SET_SHIFT;
+ }
+ asm volatile("ptesync" : : : "memory");
+}
+
+static void __restore_cpu_cpufeatures(void)
+{
+ /*
+ * LPCR is restored by the power on engine already. It can be changed
+ * after early init e.g., by radix enable, and we have no unified API
+ * for saving and restoring such SPRs.
+ *
+ * This ->restore hook should really be removed from idle and register
+ * restore moved directly into the idle restore code, because this code
+ * doesn't know how idle is implemented or what it needs restored here.
+ *
+ * The best we can do to accommodate secondary boot and idle restore
+ * for now is "or" LPCR with existing.
+ */
+
+ mtspr(SPRN_LPCR, system_registers.lpcr | mfspr(SPRN_LPCR));
+ if (hv_mode) {
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_HFSCR, system_registers.hfscr);
+ }
+ mtspr(SPRN_FSCR, system_registers.fscr);
+
+ if (init_pmu_registers)
+ init_pmu_registers();
+
+ cpufeatures_flush_tlb();
+}
+
+static char dt_cpu_name[64];
+
+static struct cpu_spec __initdata base_cpu_spec = {
+ .cpu_name = NULL,
+ .cpu_features = CPU_FTRS_BASE,
+ .cpu_user_features = COMMON_USER_BASE,
+ .cpu_user_features2 = COMMON_USER2_BASE,
+ .mmu_features = 0,
+ .icache_bsize = 32, /* minimum block size, fixed by */
+ .dcache_bsize = 32, /* cache info init. */
+ .num_pmcs = 0,
+ .pmc_type = PPC_PMC_DEFAULT,
+ .oprofile_cpu_type = NULL,
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = NULL,
+ .cpu_restore = __restore_cpu_cpufeatures,
+ .flush_tlb = NULL,
+ .machine_check_early = NULL,
+ .platform = NULL,
+};
+
+static void __init cpufeatures_setup_cpu(void)
+{
+ set_cur_cpu_spec(&base_cpu_spec);
+
+ cur_cpu_spec->pvr_mask = -1;
+ cur_cpu_spec->pvr_value = mfspr(SPRN_PVR);
+
+ /* Initialize the base environment -- clear FSCR/HFSCR. */
+ hv_mode = !!(mfmsr() & MSR_HV);
+ if (hv_mode) {
+ /* CPU_FTR_HVMODE is used early in PACA setup */
+ cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
+ mtspr(SPRN_HFSCR, 0);
+ }
+ mtspr(SPRN_FSCR, 0);
+
+ /*
+ * LPCR does not get cleared, to match behaviour with secondaries
+ * in __restore_cpu_cpufeatures. Once the idle code is fixed, this
+ * could clear LPCR too.
+ */
+}
+
+static int __init feat_try_enable_unknown(struct dt_cpu_feature *f)
+{
+ if (f->hv_support == HV_SUPPORT_NONE) {
+ } else if (f->hv_support & HV_SUPPORT_HFSCR) {
+ u64 hfscr = mfspr(SPRN_HFSCR);
+ hfscr |= 1UL << f->hfscr_bit_nr;
+ mtspr(SPRN_HFSCR, hfscr);
+ } else {
+ /* Does not have a known recipe */
+ return 0;
+ }
+
+ if (f->os_support == OS_SUPPORT_NONE) {
+ } else if (f->os_support & OS_SUPPORT_FSCR) {
+ u64 fscr = mfspr(SPRN_FSCR);
+ fscr |= 1UL << f->fscr_bit_nr;
+ mtspr(SPRN_FSCR, fscr);
+ } else {
+ /* Does not have a known recipe */
+ return 0;
+ }
+
+ if ((f->usable_privilege & USABLE_PR) && (f->hwcap_bit_nr != -1)) {
+ uint32_t word = f->hwcap_bit_nr / 32;
+ uint32_t bit = f->hwcap_bit_nr % 32;
+
+ if (word == 0)
+ cur_cpu_spec->cpu_user_features |= 1U << bit;
+ else if (word == 1)
+ cur_cpu_spec->cpu_user_features2 |= 1U << bit;
+ else
+ pr_err("%s could not advertise to user (no hwcap bits)\n", f->name);
+ }
+
+ return 1;
+}
+
+static int __init feat_enable(struct dt_cpu_feature *f)
+{
+ if (f->hv_support != HV_SUPPORT_NONE) {
+ if (f->hfscr_bit_nr != -1) {
+ u64 hfscr = mfspr(SPRN_HFSCR);
+ hfscr |= 1UL << f->hfscr_bit_nr;
+ mtspr(SPRN_HFSCR, hfscr);
+ }
+ }
+
+ if (f->os_support != OS_SUPPORT_NONE) {
+ if (f->fscr_bit_nr != -1) {
+ u64 fscr = mfspr(SPRN_FSCR);
+ fscr |= 1UL << f->fscr_bit_nr;
+ mtspr(SPRN_FSCR, fscr);
+ }
+ }
+
+ if ((f->usable_privilege & USABLE_PR) && (f->hwcap_bit_nr != -1)) {
+ uint32_t word = f->hwcap_bit_nr / 32;
+ uint32_t bit = f->hwcap_bit_nr % 32;
+
+ if (word == 0)
+ cur_cpu_spec->cpu_user_features |= 1U << bit;
+ else if (word == 1)
+ cur_cpu_spec->cpu_user_features2 |= 1U << bit;
+ else
+ pr_err("CPU feature: %s could not advertise to user (no hwcap bits)\n", f->name);
+ }
+
+ return 1;
+}
+
+static int __init feat_disable(struct dt_cpu_feature *f)
+{
+ return 0;
+}
+
+static int __init feat_enable_hv(struct dt_cpu_feature *f)
+{
+ u64 lpcr;
+
+ if (!hv_mode) {
+ pr_err("CPU feature hypervisor present in device tree but HV mode not enabled in the CPU. Ignoring.\n");
+ return 0;
+ }
+
+ mtspr(SPRN_LPID, 0);
+
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr &= ~LPCR_LPES0; /* HV external interrupts */
+ mtspr(SPRN_LPCR, lpcr);
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
+
+ return 1;
+}
+
+static int __init feat_enable_le(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_TRUE_LE;
+ return 1;
+}
+
+static int __init feat_enable_smt(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_SMT;
+ return 1;
+}
+
+static int __init feat_enable_idle_nap(struct dt_cpu_feature *f)
+{
+ u64 lpcr;
+
+ /* Set PECE wakeup modes for ISA 207 */
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr |= LPCR_PECE0;
+ lpcr |= LPCR_PECE1;
+ lpcr |= LPCR_PECE2;
+ mtspr(SPRN_LPCR, lpcr);
+
+ return 1;
+}
+
+static int __init feat_enable_align_dsisr(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->cpu_features &= ~CPU_FTR_NODSISRALIGN;
+
+ return 1;
+}
+
+static int __init feat_enable_idle_stop(struct dt_cpu_feature *f)
+{
+ u64 lpcr;
+
+ /* Set PECE wakeup modes for ISAv3.0B */
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr |= LPCR_PECE0;
+ lpcr |= LPCR_PECE1;
+ lpcr |= LPCR_PECE2;
+ mtspr(SPRN_LPCR, lpcr);
+
+ return 1;
+}
+
+static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f)
+{
+ u64 lpcr;
+
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr &= ~LPCR_ISL;
+
+ /* VRMASD */
+ lpcr |= LPCR_VPM0;
+ lpcr &= ~LPCR_VPM1;
+ lpcr |= 0x10UL << LPCR_VRMASD_SH; /* L=1 LP=00 */
+ mtspr(SPRN_LPCR, lpcr);
+
+ cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
+
+ return 1;
+}
+
+static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
+{
+ u64 lpcr;
+
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr &= ~LPCR_ISL;
+ mtspr(SPRN_LPCR, lpcr);
+
+ cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
+
+ return 1;
+}
+
+
+static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_PPC_RADIX_MMU
+ cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
+ cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
+
+ return 1;
+#endif
+ return 0;
+}
+
+static int __init feat_enable_dscr(struct dt_cpu_feature *f)
+{
+ u64 lpcr;
+
+ feat_enable(f);
+
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr &= ~LPCR_DPFD;
+ lpcr |= (4UL << LPCR_DPFD_SH);
+ mtspr(SPRN_LPCR, lpcr);
+
+ return 1;
+}
+
+static void hfscr_pmu_enable(void)
+{
+ u64 hfscr = mfspr(SPRN_HFSCR);
+ hfscr |= PPC_BIT(60);
+ mtspr(SPRN_HFSCR, hfscr);
+}
+
+static void init_pmu_power8(void)
+{
+ if (hv_mode) {
+ mtspr(SPRN_MMCRC, 0);
+ mtspr(SPRN_MMCRH, 0);
+ }
+
+ mtspr(SPRN_MMCRA, 0);
+ mtspr(SPRN_MMCR0, 0);
+ mtspr(SPRN_MMCR1, 0);
+ mtspr(SPRN_MMCR2, 0);
+ mtspr(SPRN_MMCRS, 0);
+}
+
+static int __init feat_enable_mce_power8(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->platform = "power8";
+ cur_cpu_spec->flush_tlb = __flush_tlb_power8;
+ cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p8;
+
+ return 1;
+}
+
+static int __init feat_enable_pmu_power8(struct dt_cpu_feature *f)
+{
+ hfscr_pmu_enable();
+
+ init_pmu_power8();
+ init_pmu_registers = init_pmu_power8;
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT;
+ if (pvr_version_is(PVR_POWER8E))
+ cur_cpu_spec->cpu_features |= CPU_FTR_PMAO_BUG;
+
+ cur_cpu_spec->num_pmcs = 6;
+ cur_cpu_spec->pmc_type = PPC_PMC_IBM;
+ cur_cpu_spec->oprofile_cpu_type = "ppc64/power8";
+
+ return 1;
+}
+
+static void init_pmu_power9(void)
+{
+ if (hv_mode)
+ mtspr(SPRN_MMCRC, 0);
+
+ mtspr(SPRN_MMCRA, 0);
+ mtspr(SPRN_MMCR0, 0);
+ mtspr(SPRN_MMCR1, 0);
+ mtspr(SPRN_MMCR2, 0);
+}
+
+static int __init feat_enable_mce_power9(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->platform = "power9";
+ cur_cpu_spec->flush_tlb = __flush_tlb_power9;
+ cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p9;
+
+ return 1;
+}
+
+static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f)
+{
+ hfscr_pmu_enable();
+
+ init_pmu_power9();
+ init_pmu_registers = init_pmu_power9;
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT;
+
+ cur_cpu_spec->num_pmcs = 6;
+ cur_cpu_spec->pmc_type = PPC_PMC_IBM;
+ cur_cpu_spec->oprofile_cpu_type = "ppc64/power9";
+
+ return 1;
+}
+
+static int __init feat_enable_tm(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ feat_enable(f);
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NOSC;
+ return 1;
+#endif
+ return 0;
+}
+
+static int __init feat_enable_fp(struct dt_cpu_feature *f)
+{
+ feat_enable(f);
+ cur_cpu_spec->cpu_features &= ~CPU_FTR_FPU_UNAVAILABLE;
+
+ return 1;
+}
+
+static int __init feat_enable_vector(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_ALTIVEC
+ feat_enable(f);
+ cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
+ cur_cpu_spec->cpu_features |= CPU_FTR_VMX_COPY;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
+
+ return 1;
+#endif
+ return 0;
+}
+
+static int __init feat_enable_vsx(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_VSX
+ feat_enable(f);
+ cur_cpu_spec->cpu_features |= CPU_FTR_VSX;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_VSX;
+
+ return 1;
+#endif
+ return 0;
+}
+
+static int __init feat_enable_purr(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->cpu_features |= CPU_FTR_PURR | CPU_FTR_SPURR;
+
+ return 1;
+}
+
+static int __init feat_enable_ebb(struct dt_cpu_feature *f)
+{
+ /*
+ * PPC_FEATURE2_EBB is enabled in PMU init code because it has
+ * historically been related to the PMU facility. This may have
+ * to be decoupled if EBB becomes more generic. For now, follow
+ * existing convention.
+ */
+ f->hwcap_bit_nr = -1;
+ feat_enable(f);
+
+ return 1;
+}
+
+static int __init feat_enable_dbell(struct dt_cpu_feature *f)
+{
+ u64 lpcr;
+
+ /* P9 has an HFSCR for privileged state */
+ feat_enable(f);
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_DBELL;
+
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr |= LPCR_PECEDH; /* hyp doorbell wakeup */
+ mtspr(SPRN_LPCR, lpcr);
+
+ return 1;
+}
+
+static int __init feat_enable_hvi(struct dt_cpu_feature *f)
+{
+ u64 lpcr;
+
+ /*
+ * POWER9 XIVE interrupts including in OPAL XICS compatibility
+ * are always delivered as hypervisor virtualization interrupts (HVI)
+ * rather than EE.
+ *
+ * However LPES0 is not set here, in the chance that an EE does get
+ * delivered to the host somehow, the EE handler would not expect it
+ * to be delivered in LPES0 mode (e.g., using SRR[01]). This could
+ * happen if there is a bug in interrupt controller code, or IC is
+ * misconfigured in systemsim.
+ */
+
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr |= LPCR_HVICE; /* enable hvi interrupts */
+ lpcr |= LPCR_HEIC; /* disable ee interrupts when MSR_HV */
+ lpcr |= LPCR_PECE_HVEE; /* hvi can wake from stop */
+ mtspr(SPRN_LPCR, lpcr);
+
+ return 1;
+}
+
+static int __init feat_enable_large_ci(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->mmu_features |= MMU_FTR_CI_LARGE_PAGE;
+
+ return 1;
+}
+
+struct dt_cpu_feature_match {
+ const char *name;
+ int (*enable)(struct dt_cpu_feature *f);
+ u64 cpu_ftr_bit_mask;
+};
+
+static struct dt_cpu_feature_match __initdata
+ dt_cpu_feature_match_table[] = {
+ {"hypervisor", feat_enable_hv, 0},
+ {"big-endian", feat_enable, 0},
+ {"little-endian", feat_enable_le, CPU_FTR_REAL_LE},
+ {"smt", feat_enable_smt, 0},
+ {"interrupt-facilities", feat_enable, 0},
+ {"timer-facilities", feat_enable, 0},
+ {"timer-facilities-v3", feat_enable, 0},
+ {"debug-facilities", feat_enable, 0},
+ {"come-from-address-register", feat_enable, CPU_FTR_CFAR},
+ {"branch-tracing", feat_enable, 0},
+ {"floating-point", feat_enable_fp, 0},
+ {"vector", feat_enable_vector, 0},
+ {"vector-scalar", feat_enable_vsx, 0},
+ {"vector-scalar-v3", feat_enable, 0},
+ {"decimal-floating-point", feat_enable, 0},
+ {"decimal-integer", feat_enable, 0},
+ {"quadword-load-store", feat_enable, 0},
+ {"vector-crypto", feat_enable, 0},
+ {"mmu-hash", feat_enable_mmu_hash, 0},
+ {"mmu-radix", feat_enable_mmu_radix, 0},
+ {"mmu-hash-v3", feat_enable_mmu_hash_v3, 0},
+ {"virtual-page-class-key-protection", feat_enable, 0},
+ {"transactional-memory", feat_enable_tm, CPU_FTR_TM},
+ {"transactional-memory-v3", feat_enable_tm, 0},
+ {"idle-nap", feat_enable_idle_nap, 0},
+ {"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0},
+ {"idle-stop", feat_enable_idle_stop, 0},
+ {"machine-check-power8", feat_enable_mce_power8, 0},
+ {"performance-monitor-power8", feat_enable_pmu_power8, 0},
+ {"data-stream-control-register", feat_enable_dscr, CPU_FTR_DSCR},
+ {"event-based-branch", feat_enable_ebb, 0},
+ {"target-address-register", feat_enable, 0},
+ {"branch-history-rolling-buffer", feat_enable, 0},
+ {"control-register", feat_enable, CPU_FTR_CTRL},
+ {"processor-control-facility", feat_enable_dbell, CPU_FTR_DBELL},
+ {"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL},
+ {"processor-utilization-of-resources-register", feat_enable_purr, 0},
+ {"subcore", feat_enable, CPU_FTR_SUBCORE},
+ {"no-execute", feat_enable, 0},
+ {"strong-access-ordering", feat_enable, CPU_FTR_SAO},
+ {"cache-inhibited-large-page", feat_enable_large_ci, 0},
+ {"coprocessor-icswx", feat_enable, CPU_FTR_ICSWX},
+ {"hypervisor-virtualization-interrupt", feat_enable_hvi, 0},
+ {"program-priority-register", feat_enable, CPU_FTR_HAS_PPR},
+ {"wait", feat_enable, 0},
+ {"atomic-memory-operations", feat_enable, 0},
+ {"branch-v3", feat_enable, 0},
+ {"copy-paste", feat_enable, 0},
+ {"decimal-floating-point-v3", feat_enable, 0},
+ {"decimal-integer-v3", feat_enable, 0},
+ {"fixed-point-v3", feat_enable, 0},
+ {"floating-point-v3", feat_enable, 0},
+ {"group-start-register", feat_enable, 0},
+ {"pc-relative-addressing", feat_enable, 0},
+ {"machine-check-power9", feat_enable_mce_power9, 0},
+ {"performance-monitor-power9", feat_enable_pmu_power9, 0},
+ {"event-based-branch-v3", feat_enable, 0},
+ {"random-number-generator", feat_enable, 0},
+ {"system-call-vectored", feat_disable, 0},
+ {"trace-interrupt-v3", feat_enable, 0},
+ {"vector-v3", feat_enable, 0},
+ {"vector-binary128", feat_enable, 0},
+ {"vector-binary16", feat_enable, 0},
+ {"wait-v3", feat_enable, 0},
+};
+
+/* XXX: how to configure this? Default + boot time? */
+#ifdef CONFIG_PPC_CPUFEATURES_ENABLE_UNKNOWN
+#define CPU_FEATURE_ENABLE_UNKNOWN 1
+#else
+#define CPU_FEATURE_ENABLE_UNKNOWN 0
+#endif
+
+static void __init cpufeatures_setup_start(u32 isa)
+{
+ pr_info("setup for ISA %d\n", isa);
+
+ if (isa >= 3000) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_300;
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00;
+ }
+}
+
+static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
+{
+ const struct dt_cpu_feature_match *m;
+ bool known = false;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(dt_cpu_feature_match_table); i++) {
+ m = &dt_cpu_feature_match_table[i];
+ if (!strcmp(f->name, m->name)) {
+ known = true;
+ if (m->enable(f))
+ break;
+
+ pr_info("not enabling: %s (disabled or unsupported by kernel)\n",
+ f->name);
+ return false;
+ }
+ }
+
+ if (!known && CPU_FEATURE_ENABLE_UNKNOWN) {
+ if (!feat_try_enable_unknown(f)) {
+ pr_info("not enabling: %s (unknown and unsupported by kernel)\n",
+ f->name);
+ return false;
+ }
+ }
+
+ if (m->cpu_ftr_bit_mask)
+ cur_cpu_spec->cpu_features |= m->cpu_ftr_bit_mask;
+
+ if (known)
+ pr_debug("enabling: %s\n", f->name);
+ else
+ pr_debug("enabling: %s (unknown)\n", f->name);
+
+ return true;
+}
+
+static __init void cpufeatures_cpu_quirks(void)
+{
+ int version = mfspr(SPRN_PVR);
+
+ /*
+ * Not all quirks can be derived from the cpufeatures device tree.
+ */
+ if ((version & 0xffffff00) == 0x004e0100)
+ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
+}
+
+static void __init cpufeatures_setup_finished(void)
+{
+ cpufeatures_cpu_quirks();
+
+ if (hv_mode && !(cur_cpu_spec->cpu_features & CPU_FTR_HVMODE)) {
+ pr_err("hypervisor not present in device tree but HV mode is enabled in the CPU. Enabling.\n");
+ cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
+ }
+
+ system_registers.lpcr = mfspr(SPRN_LPCR);
+ system_registers.hfscr = mfspr(SPRN_HFSCR);
+ system_registers.fscr = mfspr(SPRN_FSCR);
+
+ cpufeatures_flush_tlb();
+
+ pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n",
+ cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features);
+}
+
+static int __init fdt_find_cpu_features(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ if (of_flat_dt_is_compatible(node, "ibm,powerpc-cpu-features")
+ && of_get_flat_dt_prop(node, "isa", NULL))
+ return 1;
+
+ return 0;
+}
+
+static bool __initdata using_dt_cpu_ftrs = false;
+
+bool __init dt_cpu_ftrs_in_use(void)
+{
+ return using_dt_cpu_ftrs;
+}
+
+bool __init dt_cpu_ftrs_init(void *fdt)
+{
+ /* Setup and verify the FDT, if it fails we just bail */
+ if (!early_init_dt_verify(fdt))
+ return false;
+
+ if (!of_scan_flat_dt(fdt_find_cpu_features, NULL))
+ return false;
+
+ cpufeatures_setup_cpu();
+
+ using_dt_cpu_ftrs = true;
+ return true;
+}
+
+static int nr_dt_cpu_features;
+static struct dt_cpu_feature *dt_cpu_features;
+
+static int __init process_cpufeatures_node(unsigned long node,
+ const char *uname, int i)
+{
+ const __be32 *prop;
+ struct dt_cpu_feature *f;
+ int len;
+
+ f = &dt_cpu_features[i];
+ memset(f, 0, sizeof(struct dt_cpu_feature));
+
+ f->node = node;
+
+ f->name = uname;
+
+ prop = of_get_flat_dt_prop(node, "isa", &len);
+ if (!prop) {
+ pr_warn("%s: missing isa property\n", uname);
+ return 0;
+ }
+ f->isa = be32_to_cpup(prop);
+
+ prop = of_get_flat_dt_prop(node, "usable-privilege", &len);
+ if (!prop) {
+ pr_warn("%s: missing usable-privilege property", uname);
+ return 0;
+ }
+ f->usable_privilege = be32_to_cpup(prop);
+
+ prop = of_get_flat_dt_prop(node, "hv-support", &len);
+ if (prop)
+ f->hv_support = be32_to_cpup(prop);
+ else
+ f->hv_support = HV_SUPPORT_NONE;
+
+ prop = of_get_flat_dt_prop(node, "os-support", &len);
+ if (prop)
+ f->os_support = be32_to_cpup(prop);
+ else
+ f->os_support = OS_SUPPORT_NONE;
+
+ prop = of_get_flat_dt_prop(node, "hfscr-bit-nr", &len);
+ if (prop)
+ f->hfscr_bit_nr = be32_to_cpup(prop);
+ else
+ f->hfscr_bit_nr = -1;
+ prop = of_get_flat_dt_prop(node, "fscr-bit-nr", &len);
+ if (prop)
+ f->fscr_bit_nr = be32_to_cpup(prop);
+ else
+ f->fscr_bit_nr = -1;
+ prop = of_get_flat_dt_prop(node, "hwcap-bit-nr", &len);
+ if (prop)
+ f->hwcap_bit_nr = be32_to_cpup(prop);
+ else
+ f->hwcap_bit_nr = -1;
+
+ if (f->usable_privilege & USABLE_HV) {
+ if (!(mfmsr() & MSR_HV)) {
+ pr_warn("%s: HV feature passed to guest\n", uname);
+ return 0;
+ }
+
+ if (f->hv_support == HV_SUPPORT_NONE && f->hfscr_bit_nr != -1) {
+ pr_warn("%s: unwanted hfscr_bit_nr\n", uname);
+ return 0;
+ }
+
+ if (f->hv_support == HV_SUPPORT_HFSCR) {
+ if (f->hfscr_bit_nr == -1) {
+ pr_warn("%s: missing hfscr_bit_nr\n", uname);
+ return 0;
+ }
+ }
+ } else {
+ if (f->hv_support != HV_SUPPORT_NONE || f->hfscr_bit_nr != -1) {
+ pr_warn("%s: unwanted hv_support/hfscr_bit_nr\n", uname);
+ return 0;
+ }
+ }
+
+ if (f->usable_privilege & USABLE_OS) {
+ if (f->os_support == OS_SUPPORT_NONE && f->fscr_bit_nr != -1) {
+ pr_warn("%s: unwanted fscr_bit_nr\n", uname);
+ return 0;
+ }
+
+ if (f->os_support == OS_SUPPORT_FSCR) {
+ if (f->fscr_bit_nr == -1) {
+ pr_warn("%s: missing fscr_bit_nr\n", uname);
+ return 0;
+ }
+ }
+ } else {
+ if (f->os_support != OS_SUPPORT_NONE || f->fscr_bit_nr != -1) {
+ pr_warn("%s: unwanted os_support/fscr_bit_nr\n", uname);
+ return 0;
+ }
+ }
+
+ if (!(f->usable_privilege & USABLE_PR)) {
+ if (f->hwcap_bit_nr != -1) {
+ pr_warn("%s: unwanted hwcap_bit_nr\n", uname);
+ return 0;
+ }
+ }
+
+ /* Do all the independent features in the first pass */
+ if (!of_get_flat_dt_prop(node, "dependencies", &len)) {
+ if (cpufeatures_process_feature(f))
+ f->enabled = 1;
+ else
+ f->disabled = 1;
+ }
+
+ return 0;
+}
+
+static void __init cpufeatures_deps_enable(struct dt_cpu_feature *f)
+{
+ const __be32 *prop;
+ int len;
+ int nr_deps;
+ int i;
+
+ if (f->enabled || f->disabled)
+ return;
+
+ prop = of_get_flat_dt_prop(f->node, "dependencies", &len);
+ if (!prop) {
+ pr_warn("%s: missing dependencies property", f->name);
+ return;
+ }
+
+ nr_deps = len / sizeof(int);
+
+ for (i = 0; i < nr_deps; i++) {
+ unsigned long phandle = be32_to_cpu(prop[i]);
+ int j;
+
+ for (j = 0; j < nr_dt_cpu_features; j++) {
+ struct dt_cpu_feature *d = &dt_cpu_features[j];
+
+ if (of_get_flat_dt_phandle(d->node) == phandle) {
+ cpufeatures_deps_enable(d);
+ if (d->disabled) {
+ f->disabled = 1;
+ return;
+ }
+ }
+ }
+ }
+
+ if (cpufeatures_process_feature(f))
+ f->enabled = 1;
+ else
+ f->disabled = 1;
+}
+
+static int __init scan_cpufeatures_subnodes(unsigned long node,
+ const char *uname,
+ void *data)
+{
+ int *count = data;
+
+ process_cpufeatures_node(node, uname, *count);
+
+ (*count)++;
+
+ return 0;
+}
+
+static int __init count_cpufeatures_subnodes(unsigned long node,
+ const char *uname,
+ void *data)
+{
+ int *count = data;
+
+ (*count)++;
+
+ return 0;
+}
+
+static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
+ *uname, int depth, void *data)
+{
+ const __be32 *prop;
+ int count, i;
+ u32 isa;
+
+ /* We are scanning "ibm,powerpc-cpu-features" nodes only */
+ if (!of_flat_dt_is_compatible(node, "ibm,powerpc-cpu-features"))
+ return 0;
+
+ prop = of_get_flat_dt_prop(node, "isa", NULL);
+ if (!prop)
+ /* We checked before, "can't happen" */
+ return 0;
+
+ isa = be32_to_cpup(prop);
+
+ /* Count and allocate space for cpu features */
+ of_scan_flat_dt_subnodes(node, count_cpufeatures_subnodes,
+ &nr_dt_cpu_features);
+ dt_cpu_features = __va(
+ memblock_alloc(sizeof(struct dt_cpu_feature)*
+ nr_dt_cpu_features, PAGE_SIZE));
+
+ cpufeatures_setup_start(isa);
+
+ /* Scan nodes into dt_cpu_features and enable those without deps */
+ count = 0;
+ of_scan_flat_dt_subnodes(node, scan_cpufeatures_subnodes, &count);
+
+ /* Recursive enable remaining features with dependencies */
+ for (i = 0; i < nr_dt_cpu_features; i++) {
+ struct dt_cpu_feature *f = &dt_cpu_features[i];
+
+ cpufeatures_deps_enable(f);
+ }
+
+ prop = of_get_flat_dt_prop(node, "display-name", NULL);
+ if (prop && strlen((char *)prop) != 0) {
+ strlcpy(dt_cpu_name, (char *)prop, sizeof(dt_cpu_name));
+ cur_cpu_spec->cpu_name = dt_cpu_name;
+ }
+
+ cpufeatures_setup_finished();
+
+ memblock_free(__pa(dt_cpu_features),
+ sizeof(struct dt_cpu_feature)*nr_dt_cpu_features);
+
+ return 0;
+}
+
+void __init dt_cpu_ftrs_scan(void)
+{
+ of_scan_flat_dt(dt_cpu_ftrs_scan_callback, NULL);
+}
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 9de7f79e702b..63992b2d8e15 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -22,7 +22,6 @@
*/
#include <linux/delay.h>
-#include <linux/debugfs.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
@@ -37,7 +36,7 @@
#include <linux/of.h>
#include <linux/atomic.h>
-#include <asm/debug.h>
+#include <asm/debugfs.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/io.h>
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index b94887165a10..c405c79e50cd 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -724,7 +724,16 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
*/
#define MAX_WAIT_FOR_RECOVERY 300
-static void eeh_handle_normal_event(struct eeh_pe *pe)
+/**
+ * eeh_handle_normal_event - Handle EEH events on a specific PE
+ * @pe: EEH PE
+ *
+ * Attempts to recover the given PE. If recovery fails or the PE has failed
+ * too many times, remove the PE.
+ *
+ * Returns true if @pe should no longer be used, else false.
+ */
+static bool eeh_handle_normal_event(struct eeh_pe *pe)
{
struct pci_bus *frozen_bus;
struct eeh_dev *edev, *tmp;
@@ -736,13 +745,18 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
if (!frozen_bus) {
pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
__func__, pe->phb->global_number, pe->addr);
- return;
+ return false;
}
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
- if (pe->freeze_count > eeh_max_freezes)
- goto excess_failures;
+ if (pe->freeze_count > eeh_max_freezes) {
+ pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n"
+ "last hour and has been permanently disabled.\n",
+ pe->phb->global_number, pe->addr,
+ pe->freeze_count);
+ goto hard_fail;
+ }
pr_warn("EEH: This PCI device has failed %d times in the last hour\n",
pe->freeze_count);
@@ -870,27 +884,18 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Notify device driver to resume\n");
eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
- return;
+ return false;
-excess_failures:
+hard_fail:
/*
* About 90% of all real-life EEH failures in the field
* are due to poorly seated PCI cards. Only 10% or so are
* due to actual, failed cards.
*/
- pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n"
- "last hour and has been permanently disabled.\n"
- "Please try reseating or replacing it.\n",
- pe->phb->global_number, pe->addr,
- pe->freeze_count);
- goto perm_error;
-
-hard_fail:
pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
"Please try reseating or replacing it\n",
pe->phb->global_number, pe->addr);
-perm_error:
eeh_slot_error_detail(pe, EEH_LOG_PERM);
/* Notify all devices that they're about to go down. */
@@ -915,10 +920,21 @@ perm_error:
pci_lock_rescan_remove();
pci_hp_remove_devices(frozen_bus);
pci_unlock_rescan_remove();
+
+ /* The passed PE should no longer be used */
+ return true;
}
}
+ return false;
}
+/**
+ * eeh_handle_special_event - Handle EEH events without a specific failing PE
+ *
+ * Called when an EEH event is detected but can't be narrowed down to a
+ * specific PE. Iterates through possible failures and handles them as
+ * necessary.
+ */
static void eeh_handle_special_event(void)
{
struct eeh_pe *pe, *phb_pe;
@@ -982,7 +998,14 @@ static void eeh_handle_special_event(void)
*/
if (rc == EEH_NEXT_ERR_FROZEN_PE ||
rc == EEH_NEXT_ERR_FENCED_PHB) {
- eeh_handle_normal_event(pe);
+ /*
+ * eeh_handle_normal_event() can make the PE stale if it
+ * determines that the PE cannot possibly be recovered.
+ * Don't modify the PE state if that's the case.
+ */
+ if (eeh_handle_normal_event(pe))
+ continue;
+
eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
} else {
pci_lock_rescan_remove();
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index a38600949f3a..8587059ad848 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -31,7 +31,6 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
-#include <asm/ftrace.h>
#include <asm/ptrace.h>
#include <asm/export.h>
@@ -1315,109 +1314,3 @@ machine_check_in_rtas:
/* XXX load up BATs and panic */
#endif /* CONFIG_PPC_RTAS */
-
-#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CONFIG_DYNAMIC_FTRACE
-_GLOBAL(mcount)
-_GLOBAL(_mcount)
- /*
- * It is required that _mcount on PPC32 must preserve the
- * link register. But we have r0 to play with. We use r0
- * to push the return address back to the caller of mcount
- * into the ctr register, restore the link register and
- * then jump back using the ctr register.
- */
- mflr r0
- mtctr r0
- lwz r0, 4(r1)
- mtlr r0
- bctr
-
-_GLOBAL(ftrace_caller)
- MCOUNT_SAVE_FRAME
- /* r3 ends up with link register */
- subi r3, r3, MCOUNT_INSN_SIZE
-.globl ftrace_call
-ftrace_call:
- bl ftrace_stub
- nop
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
- b ftrace_graph_stub
-_GLOBAL(ftrace_graph_stub)
-#endif
- MCOUNT_RESTORE_FRAME
- /* old link register ends up in ctr reg */
- bctr
-#else
-_GLOBAL(mcount)
-_GLOBAL(_mcount)
-
- MCOUNT_SAVE_FRAME
-
- subi r3, r3, MCOUNT_INSN_SIZE
- LOAD_REG_ADDR(r5, ftrace_trace_function)
- lwz r5,0(r5)
-
- mtctr r5
- bctrl
- nop
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- b ftrace_graph_caller
-#endif
- MCOUNT_RESTORE_FRAME
- bctr
-#endif
-EXPORT_SYMBOL(_mcount)
-
-_GLOBAL(ftrace_stub)
- blr
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-_GLOBAL(ftrace_graph_caller)
- /* load r4 with local address */
- lwz r4, 44(r1)
- subi r4, r4, MCOUNT_INSN_SIZE
-
- /* Grab the LR out of the caller stack frame */
- lwz r3,52(r1)
-
- bl prepare_ftrace_return
- nop
-
- /*
- * prepare_ftrace_return gives us the address we divert to.
- * Change the LR in the callers stack frame to this.
- */
- stw r3,52(r1)
-
- MCOUNT_RESTORE_FRAME
- /* old link register ends up in ctr reg */
- bctr
-
-_GLOBAL(return_to_handler)
- /* need to save return values */
- stwu r1, -32(r1)
- stw r3, 20(r1)
- stw r4, 16(r1)
- stw r31, 12(r1)
- mr r31, r1
-
- bl ftrace_return_to_handler
- nop
-
- /* return value has real return address */
- mtlr r3
-
- lwz r3, 20(r1)
- lwz r4, 16(r1)
- lwz r31,12(r1)
- lwz r1, 0(r1)
-
- /* Jump back to real return address */
- blr
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 767ef6d68c9e..bfbad08a1207 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -20,7 +20,6 @@
#include <linux/errno.h>
#include <linux/err.h>
-#include <linux/magic.h>
#include <asm/unistd.h>
#include <asm/processor.h>
#include <asm/page.h>
@@ -33,7 +32,6 @@
#include <asm/bug.h>
#include <asm/ptrace.h>
#include <asm/irqflags.h>
-#include <asm/ftrace.h>
#include <asm/hw_irq.h>
#include <asm/context_tracking.h>
#include <asm/tm.h>
@@ -1173,381 +1171,3 @@ _GLOBAL(enter_prom)
ld r0,16(r1)
mtlr r0
blr
-
-#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CONFIG_DYNAMIC_FTRACE
-_GLOBAL(mcount)
-_GLOBAL(_mcount)
-EXPORT_SYMBOL(_mcount)
- mflr r12
- mtctr r12
- mtlr r0
- bctr
-
-#ifndef CC_USING_MPROFILE_KERNEL
-_GLOBAL_TOC(ftrace_caller)
- /* Taken from output of objdump from lib64/glibc */
- mflr r3
- ld r11, 0(r1)
- stdu r1, -112(r1)
- std r3, 128(r1)
- ld r4, 16(r11)
- subi r3, r3, MCOUNT_INSN_SIZE
-.globl ftrace_call
-ftrace_call:
- bl ftrace_stub
- nop
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
- b ftrace_graph_stub
-_GLOBAL(ftrace_graph_stub)
-#endif
- ld r0, 128(r1)
- mtlr r0
- addi r1, r1, 112
-
-#else /* CC_USING_MPROFILE_KERNEL */
-/*
- *
- * ftrace_caller() is the function that replaces _mcount() when ftrace is
- * active.
- *
- * We arrive here after a function A calls function B, and we are the trace
- * function for B. When we enter r1 points to A's stack frame, B has not yet
- * had a chance to allocate one yet.
- *
- * Additionally r2 may point either to the TOC for A, or B, depending on
- * whether B did a TOC setup sequence before calling us.
- *
- * On entry the LR points back to the _mcount() call site, and r0 holds the
- * saved LR as it was on entry to B, ie. the original return address at the
- * call site in A.
- *
- * Our job is to save the register state into a struct pt_regs (on the stack)
- * and then arrange for the ftrace function to be called.
- */
-_GLOBAL(ftrace_caller)
- /* Save the original return address in A's stack frame */
- std r0,LRSAVE(r1)
-
- /* Create our stack frame + pt_regs */
- stdu r1,-SWITCH_FRAME_SIZE(r1)
-
- /* Save all gprs to pt_regs */
- SAVE_8GPRS(0,r1)
- SAVE_8GPRS(8,r1)
- SAVE_8GPRS(16,r1)
- SAVE_8GPRS(24,r1)
-
- /* Load special regs for save below */
- mfmsr r8
- mfctr r9
- mfxer r10
- mfcr r11
-
- /* Get the _mcount() call site out of LR */
- mflr r7
- /* Save it as pt_regs->nip & pt_regs->link */
- std r7, _NIP(r1)
- std r7, _LINK(r1)
-
- /* Save callee's TOC in the ABI compliant location */
- std r2, 24(r1)
- ld r2,PACATOC(r13) /* get kernel TOC in r2 */
-
- addis r3,r2,function_trace_op@toc@ha
- addi r3,r3,function_trace_op@toc@l
- ld r5,0(r3)
-
-#ifdef CONFIG_LIVEPATCH
- mr r14,r7 /* remember old NIP */
-#endif
- /* Calculate ip from nip-4 into r3 for call below */
- subi r3, r7, MCOUNT_INSN_SIZE
-
- /* Put the original return address in r4 as parent_ip */
- mr r4, r0
-
- /* Save special regs */
- std r8, _MSR(r1)
- std r9, _CTR(r1)
- std r10, _XER(r1)
- std r11, _CCR(r1)
-
- /* Load &pt_regs in r6 for call below */
- addi r6, r1 ,STACK_FRAME_OVERHEAD
-
- /* ftrace_call(r3, r4, r5, r6) */
-.globl ftrace_call
-ftrace_call:
- bl ftrace_stub
- nop
-
- /* Load ctr with the possibly modified NIP */
- ld r3, _NIP(r1)
- mtctr r3
-#ifdef CONFIG_LIVEPATCH
- cmpd r14,r3 /* has NIP been altered? */
-#endif
-
- /* Restore gprs */
- REST_8GPRS(0,r1)
- REST_8GPRS(8,r1)
- REST_8GPRS(16,r1)
- REST_8GPRS(24,r1)
-
- /* Restore callee's TOC */
- ld r2, 24(r1)
-
- /* Pop our stack frame */
- addi r1, r1, SWITCH_FRAME_SIZE
-
- /* Restore original LR for return to B */
- ld r0, LRSAVE(r1)
- mtlr r0
-
-#ifdef CONFIG_LIVEPATCH
- /* Based on the cmpd above, if the NIP was altered handle livepatch */
- bne- livepatch_handler
-#endif
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- stdu r1, -112(r1)
-.globl ftrace_graph_call
-ftrace_graph_call:
- b ftrace_graph_stub
-_GLOBAL(ftrace_graph_stub)
- addi r1, r1, 112
-#endif
-
- ld r0,LRSAVE(r1) /* restore callee's lr at _mcount site */
- mtlr r0
- bctr /* jump after _mcount site */
-#endif /* CC_USING_MPROFILE_KERNEL */
-
-_GLOBAL(ftrace_stub)
- blr
-
-#ifdef CONFIG_LIVEPATCH
- /*
- * This function runs in the mcount context, between two functions. As
- * such it can only clobber registers which are volatile and used in
- * function linkage.
- *
- * We get here when a function A, calls another function B, but B has
- * been live patched with a new function C.
- *
- * On entry:
- * - we have no stack frame and can not allocate one
- * - LR points back to the original caller (in A)
- * - CTR holds the new NIP in C
- * - r0 & r12 are free
- *
- * r0 can't be used as the base register for a DS-form load or store, so
- * we temporarily shuffle r1 (stack pointer) into r0 and then put it back.
- */
-livepatch_handler:
- CURRENT_THREAD_INFO(r12, r1)
-
- /* Save stack pointer into r0 */
- mr r0, r1
-
- /* Allocate 3 x 8 bytes */
- ld r1, TI_livepatch_sp(r12)
- addi r1, r1, 24
- std r1, TI_livepatch_sp(r12)
-
- /* Save toc & real LR on livepatch stack */
- std r2, -24(r1)
- mflr r12
- std r12, -16(r1)
-
- /* Store stack end marker */
- lis r12, STACK_END_MAGIC@h
- ori r12, r12, STACK_END_MAGIC@l
- std r12, -8(r1)
-
- /* Restore real stack pointer */
- mr r1, r0
-
- /* Put ctr in r12 for global entry and branch there */
- mfctr r12
- bctrl
-
- /*
- * Now we are returning from the patched function to the original
- * caller A. We are free to use r0 and r12, and we can use r2 until we
- * restore it.
- */
-
- CURRENT_THREAD_INFO(r12, r1)
-
- /* Save stack pointer into r0 */
- mr r0, r1
-
- ld r1, TI_livepatch_sp(r12)
-
- /* Check stack marker hasn't been trashed */
- lis r2, STACK_END_MAGIC@h
- ori r2, r2, STACK_END_MAGIC@l
- ld r12, -8(r1)
-1: tdne r12, r2
- EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0
-
- /* Restore LR & toc from livepatch stack */
- ld r12, -16(r1)
- mtlr r12
- ld r2, -24(r1)
-
- /* Pop livepatch stack frame */
- CURRENT_THREAD_INFO(r12, r0)
- subi r1, r1, 24
- std r1, TI_livepatch_sp(r12)
-
- /* Restore real stack pointer */
- mr r1, r0
-
- /* Return to original caller of live patched function */
- blr
-#endif
-
-
-#else
-_GLOBAL_TOC(_mcount)
-EXPORT_SYMBOL(_mcount)
- /* Taken from output of objdump from lib64/glibc */
- mflr r3
- ld r11, 0(r1)
- stdu r1, -112(r1)
- std r3, 128(r1)
- ld r4, 16(r11)
-
- subi r3, r3, MCOUNT_INSN_SIZE
- LOAD_REG_ADDR(r5,ftrace_trace_function)
- ld r5,0(r5)
- ld r5,0(r5)
- mtctr r5
- bctrl
- nop
-
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- b ftrace_graph_caller
-#endif
- ld r0, 128(r1)
- mtlr r0
- addi r1, r1, 112
-_GLOBAL(ftrace_stub)
- blr
-
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-#ifndef CC_USING_MPROFILE_KERNEL
-_GLOBAL(ftrace_graph_caller)
- /* load r4 with local address */
- ld r4, 128(r1)
- subi r4, r4, MCOUNT_INSN_SIZE
-
- /* Grab the LR out of the caller stack frame */
- ld r11, 112(r1)
- ld r3, 16(r11)
-
- bl prepare_ftrace_return
- nop
-
- /*
- * prepare_ftrace_return gives us the address we divert to.
- * Change the LR in the callers stack frame to this.
- */
- ld r11, 112(r1)
- std r3, 16(r11)
-
- ld r0, 128(r1)
- mtlr r0
- addi r1, r1, 112
- blr
-
-#else /* CC_USING_MPROFILE_KERNEL */
-_GLOBAL(ftrace_graph_caller)
- /* with -mprofile-kernel, parameter regs are still alive at _mcount */
- std r10, 104(r1)
- std r9, 96(r1)
- std r8, 88(r1)
- std r7, 80(r1)
- std r6, 72(r1)
- std r5, 64(r1)
- std r4, 56(r1)
- std r3, 48(r1)
-
- /* Save callee's TOC in the ABI compliant location */
- std r2, 24(r1)
- ld r2, PACATOC(r13) /* get kernel TOC in r2 */
-
- mfctr r4 /* ftrace_caller has moved local addr here */
- std r4, 40(r1)
- mflr r3 /* ftrace_caller has restored LR from stack */
- subi r4, r4, MCOUNT_INSN_SIZE
-
- bl prepare_ftrace_return
- nop
-
- /*
- * prepare_ftrace_return gives us the address we divert to.
- * Change the LR to this.
- */
- mtlr r3
-
- ld r0, 40(r1)
- mtctr r0
- ld r10, 104(r1)
- ld r9, 96(r1)
- ld r8, 88(r1)
- ld r7, 80(r1)
- ld r6, 72(r1)
- ld r5, 64(r1)
- ld r4, 56(r1)
- ld r3, 48(r1)
-
- /* Restore callee's TOC */
- ld r2, 24(r1)
-
- addi r1, r1, 112
- mflr r0
- std r0, LRSAVE(r1)
- bctr
-#endif /* CC_USING_MPROFILE_KERNEL */
-
-_GLOBAL(return_to_handler)
- /* need to save return values */
- std r4, -32(r1)
- std r3, -24(r1)
- /* save TOC */
- std r2, -16(r1)
- std r31, -8(r1)
- mr r31, r1
- stdu r1, -112(r1)
-
- /*
- * We might be called from a module.
- * Switch to our TOC to run inside the core kernel.
- */
- ld r2, PACATOC(r13)
-
- bl ftrace_return_to_handler
- nop
-
- /* return value has real return address */
- mtlr r3
-
- ld r1, 0(r1)
- ld r4, -32(r1)
- ld r3, -24(r1)
- ld r2, -16(r1)
- ld r31, -8(r1)
-
- /* Jump back to real return address */
- blr
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 45b453e4d0c8..acd8ca76233e 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -735,8 +735,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
andis. r15,r14,(DBSR_IC|DBSR_BT)@h
beq+ 1f
+#ifdef CONFIG_RELOCATABLE
+ ld r15,PACATOC(r13)
+ ld r14,interrupt_base_book3e@got(r15)
+ ld r15,__end_interrupts@got(r15)
+#else
LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
LOAD_REG_IMMEDIATE(r15,__end_interrupts)
+#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
blt+ cr0,1f
@@ -799,8 +805,14 @@ kernel_dbg_exc:
andis. r15,r14,(DBSR_IC|DBSR_BT)@h
beq+ 1f
+#ifdef CONFIG_RELOCATABLE
+ ld r15,PACATOC(r13)
+ ld r14,interrupt_base_book3e@got(r15)
+ ld r15,__end_interrupts@got(r15)
+#else
LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
LOAD_REG_IMMEDIATE(r15,__end_interrupts)
+#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
blt+ cr0,1f
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6353019966e6..ae418b85c17c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -116,9 +116,11 @@ EXC_VIRT_NONE(0x4000, 0x100)
EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
SET_SCRATCH0(r13)
- GET_PACA(r13)
- clrrdi r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */
- EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD,
+ /*
+ * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
+ * being used, so a nested NMI exception would corrupt it.
+ */
+ EXCEPTION_PROLOG_PSERIES_NORI(PACA_EXNMI, system_reset_common, EXC_STD,
IDLETEST, 0x100)
EXC_REAL_END(system_reset, 0x100, 0x100)
@@ -126,34 +128,37 @@ EXC_VIRT_NONE(0x4100, 0x100)
#ifdef CONFIG_PPC_P7_NAP
EXC_COMMON_BEGIN(system_reset_idle_common)
-BEGIN_FTR_SECTION
- GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
- bl pnv_restore_hyp_resource
+ b pnv_powersave_wakeup
+#endif
- li r0,PNV_THREAD_RUNNING
- stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
+EXC_COMMON_BEGIN(system_reset_common)
+ /*
+ * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
+ * to recover, but nested NMI will notice in_nmi and not recover
+ * because of the use of the NMI stack. in_nmi reentrancy is tested in
+ * system_reset_exception.
+ */
+ lhz r10,PACA_IN_NMI(r13)
+ addi r10,r10,1
+ sth r10,PACA_IN_NMI(r13)
+ li r10,MSR_RI
+ mtmsrd r10,1
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- li r0,KVM_HWTHREAD_IN_KERNEL
- stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* Order setting hwthread_state vs. testing hwthread_req */
- sync
- lbz r0,HSTATE_HWTHREAD_REQ(r13)
- cmpwi r0,0
- beq 1f
- BRANCH_TO_KVM(r10, kvm_start_guest)
-1:
-#endif
+ mr r10,r1
+ ld r1,PACA_NMI_EMERG_SP(r13)
+ subi r1,r1,INT_FRAME_SIZE
+ EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100,
+ system_reset, system_reset_exception,
+ ADD_NVGPRS;ADD_RECONCILE)
- /* Return SRR1 from power7_nap() */
- mfspr r3,SPRN_SRR1
- blt cr3,2f
- b pnv_wakeup_loss
-2: b pnv_wakeup_noloss
-#endif
+ /*
+ * The stack is no longer in use, decrement in_nmi.
+ */
+ lhz r10,PACA_IN_NMI(r13)
+ subi r10,r10,1
+ sth r10,PACA_IN_NMI(r13)
-EXC_COMMON(system_reset_common, 0x100, system_reset_exception)
+ b ret_from_except
#ifdef CONFIG_PPC_PSERIES
/*
@@ -161,8 +166,9 @@ EXC_COMMON(system_reset_common, 0x100, system_reset_exception)
*/
TRAMP_REAL_BEGIN(system_reset_fwnmi)
SET_SCRATCH0(r13) /* save r13 */
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
- NOTEST, 0x100)
+ /* See comment at system_reset exception */
+ EXCEPTION_PROLOG_PSERIES_NORI(PACA_EXNMI, system_reset_common,
+ EXC_STD, NOTEST, 0x100)
#endif /* CONFIG_PPC_PSERIES */
@@ -172,14 +178,6 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
* vector
*/
SET_SCRATCH0(r13) /* save r13 */
- /*
- * Running native on arch 2.06 or later, we may wakeup from winkle
- * inside machine check. If yes, then last bit of HSPRG0 would be set
- * to 1. Hence clear it unconditionally.
- */
- GET_PACA(r13)
- clrrdi r13,r13,1
- SET_PACA(r13)
EXCEPTION_PROLOG_0(PACA_EXMC)
BEGIN_FTR_SECTION
b machine_check_powernv_early
@@ -212,6 +210,12 @@ BEGIN_FTR_SECTION
* NOTE: We are here with MSR_ME=0 (off), which means we risk a
* checkstop if we get another machine check exception before we do
* rfid with MSR_ME=1.
+ *
+ * This interrupt can wake directly from idle. If that is the case,
+ * the machine check is handled then the idle wakeup code is called
+ * to restore state. In that case, the POWER9 DD1 idle PACA workaround
+ * is not applied in the early machine check code, which will cause
+ * bugs.
*/
mr r11,r1 /* Save r1 */
lhz r10,PACA_IN_MCE(r13)
@@ -268,20 +272,11 @@ machine_check_fwnmi:
machine_check_pSeries_0:
EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
/*
- * The following is essentially EXCEPTION_PROLOG_PSERIES_1 with the
- * difference that MSR_RI is not enabled, because PACA_EXMC is being
- * used, so nested machine check corrupts it. machine_check_common
- * enables MSR_RI.
+ * MSR_RI is not enabled, because PACA_EXMC is being used, so a
+ * nested machine check corrupts it. machine_check_common enables
+ * MSR_RI.
*/
- ld r10,PACAKMSR(r13)
- xori r10,r10,MSR_RI
- mfspr r11,SPRN_SRR0
- LOAD_HANDLER(r12, machine_check_common)
- mtspr SPRN_SRR0,r12
- mfspr r12,SPRN_SRR1
- mtspr SPRN_SRR1,r10
- rfid
- b . /* prevent speculative execution */
+ EXCEPTION_PROLOG_PSERIES_1_NORI(machine_check_common, EXC_STD)
TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
@@ -340,6 +335,37 @@ EXC_COMMON_BEGIN(machine_check_common)
/* restore original r1. */ \
ld r1,GPR1(r1)
+#ifdef CONFIG_PPC_P7_NAP
+/*
+ * This is an idle wakeup. Low level machine check has already been
+ * done. Queue the event then call the idle code to do the wake up.
+ */
+EXC_COMMON_BEGIN(machine_check_idle_common)
+ bl machine_check_queue_event
+
+ /*
+ * We have not used any non-volatile GPRs here, and as a rule
+ * most exception code including machine check does not.
+ * Therefore PACA_NAPSTATELOST does not need to be set. Idle
+ * wakeup will restore volatile registers.
+ *
+ * Load the original SRR1 into r3 for pnv_powersave_wakeup_mce.
+ *
+ * Then decrement MCE nesting after finishing with the stack.
+ */
+ ld r3,_MSR(r1)
+
+ lhz r11,PACA_IN_MCE(r13)
+ subi r11,r11,1
+ sth r11,PACA_IN_MCE(r13)
+
+ /* Turn off the RI bit because SRR1 is used by idle wakeup code. */
+ /* Recoverability could be improved by reducing the use of SRR1. */
+ li r11,0
+ mtmsrd r11,1
+
+ b pnv_powersave_wakeup_mce
+#endif
/*
* Handle machine check early in real mode. We come here with
* ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
@@ -352,6 +378,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
bl machine_check_early
std r3,RESULT(r1) /* Save result */
ld r12,_MSR(r1)
+
#ifdef CONFIG_PPC_P7_NAP
/*
* Check if thread was in power saving mode. We come here when any
@@ -362,48 +389,12 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
*
* Go back to nap/sleep/winkle mode again if (b) is true.
*/
- rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */
- beq 4f /* No, it wasn;t */
- /* Thread was in power saving mode. Go back to nap again. */
- cmpwi r11,2
- blt 3f
- /* Supervisor/Hypervisor state loss */
- li r0,1
- stb r0,PACA_NAPSTATELOST(r13)
-3: bl machine_check_queue_event
- MACHINE_CHECK_HANDLER_WINDUP
- GET_PACA(r13)
- ld r1,PACAR1(r13)
- /*
- * Check what idle state this CPU was in and go back to same mode
- * again.
- */
- lbz r3,PACA_THREAD_IDLE_STATE(r13)
- cmpwi r3,PNV_THREAD_NAP
- bgt 10f
- IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
- /* No return */
-10:
- cmpwi r3,PNV_THREAD_SLEEP
- bgt 2f
- IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
- /* No return */
-
-2:
- /*
- * Go back to winkle. Please note that this thread was woken up in
- * machine check from winkle and have not restored the per-subcore
- * state. Hence before going back to winkle, set last bit of HSPRG0
- * to 1. This will make sure that if this thread gets woken up
- * again at reset vector 0x100 then it will get chance to restore
- * the subcore state.
- */
- ori r13,r13,1
- SET_PACA(r13)
- IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
- /* No return */
-4:
+ BEGIN_FTR_SECTION
+ rlwinm. r11,r12,47-31,30,31
+ bne machine_check_idle_common
+ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif
+
/*
* Check if we are coming from hypervisor userspace. If yes then we
* continue in host kernel in V mode to deliver the MC event.
@@ -968,17 +959,12 @@ EXC_VIRT_NONE(0x4e60, 0x20)
TRAMP_KVM_HV(PACA_EXGEN, 0xe60)
TRAMP_REAL_BEGIN(hmi_exception_early)
EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60)
- mr r10,r1 /* Save r1 */
- ld r1,PACAEMERGSP(r13) /* Use emergency stack */
+ mr r10,r1 /* Save r1 */
+ ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- std r9,_CCR(r1) /* save CR in stackframe */
mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
- std r11,_NIP(r1) /* save HSRR0 in stackframe */
- mfspr r12,SPRN_HSRR1 /* Save SRR1 */
- std r12,_MSR(r1) /* save SRR1 in stackframe */
- std r10,0(r1) /* make stack chain pointer */
- std r0,GPR0(r1) /* save r0 in stackframe */
- std r10,GPR1(r1) /* save r1 in stackframe */
+ mfspr r12,SPRN_HSRR1 /* Save HSRR1 */
+ EXCEPTION_PROLOG_COMMON_1()
EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
EXCEPTION_PROLOG_COMMON_3(0xe60)
addi r3,r1,STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8ff0dd4e77a7..466569e26278 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -30,17 +30,16 @@
#include <linux/string.h>
#include <linux/memblock.h>
#include <linux/delay.h>
-#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/crash_dump.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
+#include <asm/debugfs.h>
#include <asm/page.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/fadump.h>
-#include <asm/debug.h>
#include <asm/setup.h>
static struct fw_dump fw_dump;
@@ -210,14 +209,20 @@ static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
*/
static inline unsigned long fadump_calculate_reserve_size(void)
{
- unsigned long size;
+ int ret;
+ unsigned long long base, size;
/*
- * Check if the size is specified through fadump_reserve_mem= cmdline
- * option. If yes, then use that.
+ * Check if the size is specified through crashkernel= cmdline
+ * option. If yes, then use that but ignore base as fadump
+ * reserves memory at end of RAM.
*/
- if (fw_dump.reserve_bootvar)
+ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+ &size, &base);
+ if (ret == 0 && size > 0) {
+ fw_dump.reserve_bootvar = (unsigned long)size;
return fw_dump.reserve_bootvar;
+ }
/* divide by 20 to get 5% of value */
size = memblock_end_of_DRAM() / 20;
@@ -319,15 +324,34 @@ int __init fadump_reserve_mem(void)
pr_debug("fadumphdr_addr = %p\n",
(void *) fw_dump.fadumphdr_addr);
} else {
- /* Reserve the memory at the top of memory. */
size = get_fadump_area_size();
- base = memory_boundary - size;
- memblock_reserve(base, size);
- printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
- "for firmware-assisted dump\n",
- (unsigned long)(size >> 20),
- (unsigned long)(base >> 20));
+
+ /*
+ * Reserve memory at an offset closer to bottom of the RAM to
+ * minimize the impact of memory hot-remove operation. We can't
+ * use memblock_find_in_range() here since it doesn't allocate
+ * from bottom to top.
+ */
+ for (base = fw_dump.boot_memory_size;
+ base <= (memory_boundary - size);
+ base += size) {
+ if (memblock_is_region_memory(base, size) &&
+ !memblock_is_region_reserved(base, size))
+ break;
+ }
+ if ((base > (memory_boundary - size)) ||
+ memblock_reserve(base, size)) {
+ pr_err("Failed to reserve memory\n");
+ return 0;
+ }
+
+ pr_info("Reserved %ldMB of memory at %ldMB for firmware-"
+ "assisted dump (System RAM: %ldMB)\n",
+ (unsigned long)(size >> 20),
+ (unsigned long)(base >> 20),
+ (unsigned long)(memblock_phys_mem_size() >> 20));
}
+
fw_dump.reserve_dump_area_start = base;
fw_dump.reserve_dump_area_size = size;
return 1;
@@ -353,15 +377,6 @@ static int __init early_fadump_param(char *p)
}
early_param("fadump", early_fadump_param);
-/* Look for fadump_reserve_mem= cmdline option */
-static int __init early_fadump_reserve_mem(char *p)
-{
- if (p)
- fw_dump.reserve_bootvar = memparse(p, &p);
- return 0;
-}
-early_param("fadump_reserve_mem", early_fadump_reserve_mem);
-
static void register_fw_dump(struct fadump_mem_struct *fdm)
{
int rc;
@@ -509,34 +524,6 @@ fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
return reg_entry;
}
-static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type,
- void *data, size_t data_len)
-{
- struct elf_note note;
-
- note.n_namesz = strlen(name) + 1;
- note.n_descsz = data_len;
- note.n_type = type;
- memcpy(buf, &note, sizeof(note));
- buf += (sizeof(note) + 3)/4;
- memcpy(buf, name, note.n_namesz);
- buf += (note.n_namesz + 3)/4;
- memcpy(buf, data, note.n_descsz);
- buf += (note.n_descsz + 3)/4;
-
- return buf;
-}
-
-static void fadump_final_note(u32 *buf)
-{
- struct elf_note note;
-
- note.n_namesz = 0;
- note.n_descsz = 0;
- note.n_type = 0;
- memcpy(buf, &note, sizeof(note));
-}
-
static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
{
struct elf_prstatus prstatus;
@@ -547,8 +534,8 @@ static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
* prstatus.pr_pid = ????
*/
elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
- buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
- &prstatus, sizeof(prstatus));
+ buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
+ &prstatus, sizeof(prstatus));
return buf;
}
@@ -689,7 +676,7 @@ static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
}
}
- fadump_final_note(note_buf);
+ final_note(note_buf);
if (fdh) {
pr_debug("Updating elfcore header (%llx) with cpu notes\n",
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 1607be7c0ef2..e22734278458 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -735,11 +735,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE)
EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE)
EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2f00, MOLTrampoline, unknown_exception, EXC_XFER_EE_LITE)
-
- .globl mol_trampoline
- .set mol_trampoline, i0x2f00
- EXPORT_SYMBOL(mol_trampoline)
+ EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_EE)
. = 0x3000
@@ -1278,16 +1274,6 @@ EXPORT_SYMBOL(empty_zero_page)
swapper_pg_dir:
.space PGD_TABLE_SIZE
- .globl intercept_table
-intercept_table:
- .long 0, 0, i0x200, i0x300, i0x400, 0, i0x600, i0x700
- .long i0x800, 0, 0, 0, 0, i0xd00, 0, 0
- .long 0, 0, 0, i0x1300, 0, 0, 0, 0
- .long 0, 0, 0, 0, 0, 0, 0, 0
- .long 0, 0, 0, 0, 0, 0, 0, 0
- .long 0, 0, 0, 0, 0, 0, 0, 0
-EXPORT_SYMBOL(intercept_table)
-
/* Room for two PTE pointers, usually the kernel and current user pointers
* to their respective root page table.
*/
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 1dc5eae2ced3..0ddc602b33a4 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -949,7 +949,8 @@ start_here_multiplatform:
LOAD_REG_ADDR(r3,init_thread_union)
/* set up a stack pointer */
- addi r1,r3,THREAD_SIZE
+ LOAD_REG_IMMEDIATE(r1,THREAD_SIZE)
+ add r1,r3,r1
li r0,0
stdu r0,-STACK_FRAME_OVERHEAD(r1)
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 6fd08219248d..07d4e0ad60db 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -20,6 +20,7 @@
#include <asm/kvm_book3s_asm.h>
#include <asm/opal.h>
#include <asm/cpuidle.h>
+#include <asm/exception-64s.h>
#include <asm/book3s/64/mmu-hash.h>
#include <asm/mmu.h>
@@ -94,12 +95,12 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
core_idle_lock_held:
HMT_LOW
3: lwz r15,0(r14)
- andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT
+ andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
bne 3b
HMT_MEDIUM
lwarx r15,0,r14
- andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
- bne core_idle_lock_held
+ andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
+ bne- core_idle_lock_held
blr
/*
@@ -113,7 +114,7 @@ core_idle_lock_held:
*
* Address to 'rfid' to in r5
*/
-_GLOBAL(pnv_powersave_common)
+pnv_powersave_common:
/* Use r3 to pass state nap/sleep/winkle */
/* NAP is a state loss, we create a regs frame on the
* stack, fill it up with the state we care about and
@@ -188,8 +189,8 @@ pnv_enter_arch207_idle_mode:
/* The following store to HSTATE_HWTHREAD_STATE(r13) */
/* MUST occur in real mode, i.e. with the MMU off, */
/* and the MMU must stay off until we clear this flag */
- /* and test HSTATE_HWTHREAD_REQ(r13) in the system */
- /* reset interrupt vector in exceptions-64s.S. */
+ /* and test HSTATE_HWTHREAD_REQ(r13) in */
+ /* pnv_powersave_wakeup in this file. */
/* The reason is that another thread can switch the */
/* MMU to a guest context whenever this flag is set */
/* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */
@@ -209,15 +210,20 @@ pnv_enter_arch207_idle_mode:
/* Sleep or winkle */
lbz r7,PACA_THREAD_MASK(r13)
ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
+ li r5,0
+ beq cr3,3f
+ lis r5,PNV_CORE_IDLE_WINKLE_COUNT@h
+3:
lwarx_loop1:
lwarx r15,0,r14
- andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
- bnel core_idle_lock_held
+ andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
+ bnel- core_idle_lock_held
+ add r15,r15,r5 /* Add if winkle */
andc r15,r15,r7 /* Clear thread bit */
- andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
+ andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS
/*
* If cr0 = 0, then current thread is the last thread of the core entering
@@ -240,7 +246,7 @@ common_enter: /* common code for all the threads entering sleep or winkle */
IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
fastsleep_workaround_at_entry:
- ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
+ oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
stwcx. r15,0,r14
bne- lwarx_loop1
isync
@@ -250,10 +256,10 @@ fastsleep_workaround_at_entry:
li r4,1
bl opal_config_cpu_idle_state
- /* Clear Lock bit */
- li r0,0
+ /* Unlock */
+ xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
lwsync
- stw r0,0(r14)
+ stw r15,0(r14)
b common_enter
enter_winkle:
@@ -301,8 +307,8 @@ power_enter_stop:
lwarx_loop_stop:
lwarx r15,0,r14
- andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
- bnel core_idle_lock_held
+ andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
+ bnel- core_idle_lock_held
andc r15,r15,r7 /* Clear thread bit */
stwcx. r15,0,r14
@@ -375,17 +381,113 @@ _GLOBAL(power9_idle_stop)
li r4,1
b pnv_powersave_common
/* No return */
+
/*
- * Called from reset vector. Check whether we have woken up with
- * hypervisor state loss. If yes, restore hypervisor state and return
- * back to reset vector.
+ * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1,
+ * HSPRG0 will be set to the HSPRG0 value of one of the
+ * threads in this core. Thus the value we have in r13
+ * may not be this thread's paca pointer.
+ *
+ * Fortunately, the TIR remains invariant. Since this thread's
+ * paca pointer is recorded in all its sibling's paca, we can
+ * correctly recover this thread's paca pointer if we
+ * know the index of this thread in the core.
+ *
+ * This index can be obtained from the TIR.
*
- * r13 - Contents of HSPRG0
+ * i.e, thread's position in the core = TIR.
+ * If this value is i, then this thread's paca is
+ * paca->thread_sibling_pacas[i].
+ */
+power9_dd1_recover_paca:
+ mfspr r4, SPRN_TIR
+ /*
+ * Since each entry in thread_sibling_pacas is 8 bytes
+ * we need to left-shift by 3 bits. Thus r4 = i * 8
+ */
+ sldi r4, r4, 3
+ /* Get &paca->thread_sibling_pacas[0] in r5 */
+ ld r5, PACA_SIBLING_PACA_PTRS(r13)
+ /* Load paca->thread_sibling_pacas[i] into r13 */
+ ldx r13, r4, r5
+ SET_PACA(r13)
+ /*
+ * Indicate that we have lost NVGPR state
+ * which needs to be restored from the stack.
+ */
+ li r3, 1
+ stb r0,PACA_NAPSTATELOST(r13)
+ blr
+
+/*
+ * Called from machine check handler for powersave wakeups.
+ * Low level machine check processing has already been done. Now just
+ * go through the wake up path to get everything in order.
+ *
+ * r3 - The original SRR1 value.
+ * Original SRR[01] have been clobbered.
+ * MSR_RI is clear.
+ */
+.global pnv_powersave_wakeup_mce
+pnv_powersave_wakeup_mce:
+ /* Set cr3 for pnv_powersave_wakeup */
+ rlwinm r11,r3,47-31,30,31
+ cmpwi cr3,r11,2
+
+ /*
+ * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake
+ * reason into SRR1, which allows reuse of the system reset wakeup
+ * code without being mistaken for another type of wakeup.
+ */
+ oris r3,r3,SRR1_WAKEMCE_RESVD@h
+ mtspr SPRN_SRR1,r3
+
+ b pnv_powersave_wakeup
+
+/*
+ * Called from reset vector for powersave wakeups.
* cr3 - set to gt if waking up with partial/complete hypervisor state loss
*/
-_GLOBAL(pnv_restore_hyp_resource)
+.global pnv_powersave_wakeup
+pnv_powersave_wakeup:
+ ld r2, PACATOC(r13)
+
BEGIN_FTR_SECTION
- ld r2,PACATOC(r13);
+BEGIN_FTR_SECTION_NESTED(70)
+ bl power9_dd1_recover_paca
+END_FTR_SECTION_NESTED_IFSET(CPU_FTR_POWER9_DD1, 70)
+ bl pnv_restore_hyp_resource_arch300
+FTR_SECTION_ELSE
+ bl pnv_restore_hyp_resource_arch207
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
+
+ li r0,PNV_THREAD_RUNNING
+ stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ li r0,KVM_HWTHREAD_IN_KERNEL
+ stb r0,HSTATE_HWTHREAD_STATE(r13)
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ sync
+ lbz r0,HSTATE_HWTHREAD_REQ(r13)
+ cmpwi r0,0
+ beq 1f
+ b kvm_start_guest
+1:
+#endif
+
+ /* Return SRR1 from power7_nap() */
+ mfspr r3,SPRN_SRR1
+ blt cr3,pnv_wakeup_noloss
+ b pnv_wakeup_loss
+
+/*
+ * Check whether we have woken up with hypervisor state loss.
+ * If yes, restore hypervisor state and return back to link.
+ *
+ * cr3 - set to gt if waking up with partial/complete hypervisor state loss
+ */
+pnv_restore_hyp_resource_arch300:
/*
* POWER ISA 3. Use PSSCR to determine if we
* are waking up from deep idle state
@@ -400,31 +502,19 @@ BEGIN_FTR_SECTION
*/
rldicl r5,r5,4,60
cmpd cr4,r5,r4
- bge cr4,pnv_wakeup_tb_loss
- /*
- * Waking up without hypervisor state loss. Return to
- * reset vector
- */
- blr
+ bge cr4,pnv_wakeup_tb_loss /* returns to caller */
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ blr /* Waking up without hypervisor state loss. */
+/* Same calling convention as arch300 */
+pnv_restore_hyp_resource_arch207:
/*
* POWER ISA 2.07 or less.
- * Check if last bit of HSPGR0 is set. This indicates whether we are
- * waking up from winkle.
+ * Check if we slept with sleep or winkle.
*/
- clrldi r5,r13,63
- clrrdi r13,r13,1
-
- /* Now that we are sure r13 is corrected, load TOC */
- ld r2,PACATOC(r13);
- cmpwi cr4,r5,1
- mtspr SPRN_HSPRG0,r13
-
- lbz r0,PACA_THREAD_IDLE_STATE(r13)
- cmpwi cr2,r0,PNV_THREAD_NAP
- bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */
+ lbz r4,PACA_THREAD_IDLE_STATE(r13)
+ cmpwi cr2,r4,PNV_THREAD_NAP
+ bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */
/*
* We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
@@ -433,8 +523,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
*/
bgt cr3,.
- blr /* Return back to System Reset vector from where
- pnv_restore_hyp_resource was invoked */
+ blr /* Waking up without hypervisor state loss */
/*
* Called if waking up from idle state which can cause either partial or
@@ -444,9 +533,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
*
* r13 - PACA
* cr3 - gt if waking up with partial/complete hypervisor state loss
+ *
+ * If ISA300:
* cr4 - gt or eq if waking up from complete hypervisor state loss.
+ *
+ * If ISA207:
+ * r4 - PACA_THREAD_IDLE_STATE
*/
-_GLOBAL(pnv_wakeup_tb_loss)
+pnv_wakeup_tb_loss:
ld r1,PACAR1(r13)
/*
* Before entering any idle state, the NVGPRs are saved in the stack.
@@ -473,18 +567,19 @@ _GLOBAL(pnv_wakeup_tb_loss)
* is required to return back to reset vector after hypervisor state
* restore is complete.
*/
+ mr r18,r4
mflr r17
mfspr r16,SPRN_SRR1
BEGIN_FTR_SECTION
CHECK_HMI_INTERRUPT
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- lbz r7,PACA_THREAD_MASK(r13)
ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
-lwarx_loop2:
- lwarx r15,0,r14
- andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
+ lbz r7,PACA_THREAD_MASK(r13)
+
/*
+ * Take the core lock to synchronize against other threads.
+ *
* Lock bit is set in one of the 2 cases-
* a. In the sleep/winkle enter path, the last thread is executing
* fastsleep workaround code.
@@ -492,23 +587,93 @@ lwarx_loop2:
* workaround undo code or resyncing timebase or restoring context
* In either case loop until the lock bit is cleared.
*/
- bnel core_idle_lock_held
+1:
+ lwarx r15,0,r14
+ andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
+ bnel- core_idle_lock_held
+ oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
+ stwcx. r15,0,r14
+ bne- 1b
+ isync
- cmpwi cr2,r15,0
+ andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS
+ cmpwi cr2,r9,0
/*
* At this stage
* cr2 - eq if first thread to wakeup in core
* cr3- gt if waking up with partial/complete hypervisor state loss
+ * ISA300:
* cr4 - gt or eq if waking up from complete hypervisor state loss.
*/
- ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
- stwcx. r15,0,r14
- bne- lwarx_loop2
- isync
-
BEGIN_FTR_SECTION
+ /*
+ * Were we in winkle?
+ * If yes, check if all threads were in winkle, decrement our
+ * winkle count, set all thread winkle bits if all were in winkle.
+ * Check if our thread has a winkle bit set, and set cr4 accordingly
+ * (to match ISA300, above). Pseudo-code for core idle state
+ * transitions for ISA207 is as follows (everything happens atomically
+ * due to store conditional and/or lock bit):
+ *
+ * nap_idle() { }
+ * nap_wake() { }
+ *
+ * sleep_idle()
+ * {
+ * core_idle_state &= ~thread_in_core
+ * }
+ *
+ * sleep_wake()
+ * {
+ * bool first_in_core, first_in_subcore;
+ *
+ * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
+ * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
+ *
+ * core_idle_state |= thread_in_core;
+ * }
+ *
+ * winkle_idle()
+ * {
+ * core_idle_state &= ~thread_in_core;
+ * core_idle_state += 1 << WINKLE_COUNT_SHIFT;
+ * }
+ *
+ * winkle_wake()
+ * {
+ * bool first_in_core, first_in_subcore, winkle_state_lost;
+ *
+ * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
+ * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
+ *
+ * core_idle_state |= thread_in_core;
+ *
+ * if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT))
+ * core_idle_state |= THREAD_WINKLE_BITS;
+ * core_idle_state -= 1 << WINKLE_COUNT_SHIFT;
+ *
+ * winkle_state_lost = core_idle_state &
+ * (thread_in_core << WINKLE_THREAD_SHIFT);
+ * core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT);
+ * }
+ *
+ */
+ cmpwi r18,PNV_THREAD_WINKLE
+ bne 2f
+ andis. r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h
+ subis r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h
+ beq 2f
+ ori r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */
+2:
+ /* Shift thread bit to winkle mask, then test if this thread is set,
+ * and remove it from the winkle bits */
+ slwi r8,r7,8
+ and r8,r8,r15
+ andc r15,r15,r8
+ cmpwi cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */
+
lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
and r4,r4,r15
cmpwi r4,0 /* Check if first in subcore */
@@ -593,7 +758,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_WORC,r4
clear_lock:
- andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
+ xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
lwsync
stw r15,0(r14)
@@ -651,8 +816,7 @@ hypervisor_state_restored:
mtspr SPRN_SRR1,r16
mtlr r17
- blr /* Return back to System Reset vector from where
- pnv_restore_hyp_resource was invoked */
+ blr /* return to pnv_powersave_wakeup */
fastsleep_workaround_at_exit:
li r3,1
@@ -664,7 +828,8 @@ fastsleep_workaround_at_exit:
* R3 here contains the value that will be returned to the caller
* of power7_nap.
*/
-_GLOBAL(pnv_wakeup_loss)
+.global pnv_wakeup_loss
+pnv_wakeup_loss:
ld r1,PACAR1(r13)
BEGIN_FTR_SECTION
CHECK_HMI_INTERRUPT
@@ -684,7 +849,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
* R3 here contains the value that will be returned to the caller
* of power7_nap.
*/
-_GLOBAL(pnv_wakeup_noloss)
+pnv_wakeup_noloss:
lbz r0,PACA_NAPSTATELOST(r13)
cmpwi r0,0
bne pnv_wakeup_loss
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 5f202a566ec5..f2b724cd9e64 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -711,13 +711,16 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
return tbl;
}
-void iommu_free_table(struct iommu_table *tbl, const char *node_name)
+static void iommu_table_free(struct kref *kref)
{
unsigned long bitmap_sz;
unsigned int order;
+ struct iommu_table *tbl;
- if (!tbl)
- return;
+ tbl = container_of(kref, struct iommu_table, it_kref);
+
+ if (tbl->it_ops->free)
+ tbl->it_ops->free(tbl);
if (!tbl->it_map) {
kfree(tbl);
@@ -733,7 +736,7 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
/* verify that table contains no entries */
if (!bitmap_empty(tbl->it_map, tbl->it_size))
- pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
+ pr_warn("%s: Unexpected TCEs\n", __func__);
/* calculate bitmap size in bytes */
bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
@@ -746,6 +749,24 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
kfree(tbl);
}
+struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl)
+{
+ if (kref_get_unless_zero(&tbl->it_kref))
+ return tbl;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_table_get);
+
+int iommu_tce_table_put(struct iommu_table *tbl)
+{
+ if (WARN_ON(!tbl))
+ return 0;
+
+ return kref_put(&tbl->it_kref, iommu_table_free);
+}
+EXPORT_SYMBOL_GPL(iommu_tce_table_put);
+
/* Creates TCEs for a user provided buffer. The user buffer must be
* contiguous real kernel storage (not vmalloc). The address passed here
* comprises a page address and offset into that page. The dma_addr_t
@@ -942,47 +963,36 @@ void iommu_flush_tce(struct iommu_table *tbl)
}
EXPORT_SYMBOL_GPL(iommu_flush_tce);
-int iommu_tce_clear_param_check(struct iommu_table *tbl,
- unsigned long ioba, unsigned long tce_value,
- unsigned long npages)
+int iommu_tce_check_ioba(unsigned long page_shift,
+ unsigned long offset, unsigned long size,
+ unsigned long ioba, unsigned long npages)
{
- /* tbl->it_ops->clear() does not support any value but 0 */
- if (tce_value)
- return -EINVAL;
+ unsigned long mask = (1UL << page_shift) - 1;
- if (ioba & ~IOMMU_PAGE_MASK(tbl))
+ if (ioba & mask)
return -EINVAL;
- ioba >>= tbl->it_page_shift;
- if (ioba < tbl->it_offset)
+ ioba >>= page_shift;
+ if (ioba < offset)
return -EINVAL;
- if ((ioba + npages) > (tbl->it_offset + tbl->it_size))
+ if ((ioba + 1) > (offset + size))
return -EINVAL;
return 0;
}
-EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
+EXPORT_SYMBOL_GPL(iommu_tce_check_ioba);
-int iommu_tce_put_param_check(struct iommu_table *tbl,
- unsigned long ioba, unsigned long tce)
+int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
{
- if (tce & ~IOMMU_PAGE_MASK(tbl))
- return -EINVAL;
-
- if (ioba & ~IOMMU_PAGE_MASK(tbl))
- return -EINVAL;
+ unsigned long mask = (1UL << page_shift) - 1;
- ioba >>= tbl->it_page_shift;
- if (ioba < tbl->it_offset)
- return -EINVAL;
-
- if ((ioba + 1) > (tbl->it_offset + tbl->it_size))
+ if (gpa & mask)
return -EINVAL;
return 0;
}
-EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
+EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction)
@@ -1004,6 +1014,31 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
}
EXPORT_SYMBOL_GPL(iommu_tce_xchg);
+#ifdef CONFIG_PPC_BOOK3S_64
+long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
+ unsigned long *hpa, enum dma_data_direction *direction)
+{
+ long ret;
+
+ ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
+
+ if (!ret && ((*direction == DMA_FROM_DEVICE) ||
+ (*direction == DMA_BIDIRECTIONAL))) {
+ struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);
+
+ if (likely(pg)) {
+ SetPageDirty(pg);
+ } else {
+ tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
+ ret = -EFAULT;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
+#endif
+
int iommu_take_ownership(struct iommu_table *tbl)
{
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index a018f5cae899..5c291df30fe3 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -65,7 +65,6 @@
#include <asm/machdep.h>
#include <asm/udbg.h>
#include <asm/smp.h>
-#include <asm/debug.h>
#include <asm/livepatch.h>
#include <asm/asm-prototypes.h>
@@ -442,46 +441,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
return sum;
}
-#ifdef CONFIG_HOTPLUG_CPU
-void migrate_irqs(void)
-{
- struct irq_desc *desc;
- unsigned int irq;
- static int warned;
- cpumask_var_t mask;
- const struct cpumask *map = cpu_online_mask;
-
- alloc_cpumask_var(&mask, GFP_KERNEL);
-
- for_each_irq_desc(irq, desc) {
- struct irq_data *data;
- struct irq_chip *chip;
-
- data = irq_desc_get_irq_data(desc);
- if (irqd_is_per_cpu(data))
- continue;
-
- chip = irq_data_get_irq_chip(data);
-
- cpumask_and(mask, irq_data_get_affinity_mask(data), map);
- if (cpumask_any(mask) >= nr_cpu_ids) {
- pr_warn("Breaking affinity for irq %i\n", irq);
- cpumask_copy(mask, map);
- }
- if (chip->irq_set_affinity)
- chip->irq_set_affinity(data, mask, true);
- else if (desc->action && !(warned++))
- pr_err("Cannot set affinity for irq %i\n", irq);
- }
-
- free_cpumask_var(mask);
-
- local_irq_enable();
- mdelay(1);
- local_irq_disable();
-}
-#endif
-
static inline void check_stack_overflow(void)
{
#ifdef CONFIG_DEBUG_STACKOVERFLOW
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
new file mode 100644
index 000000000000..6c089d9757c9
--- /dev/null
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -0,0 +1,104 @@
+/*
+ * Dynamic Ftrace based Kprobes Optimization
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Hitachi Ltd., 2012
+ * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+ * IBM Corporation
+ */
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <linux/ftrace.h>
+
+static nokprobe_inline
+int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb, unsigned long orig_nip)
+{
+ /*
+ * Emulate singlestep (and also recover regs->nip)
+ * as if there is a nop
+ */
+ regs->nip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+ if (unlikely(p->post_handler)) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ __this_cpu_write(current_kprobe, NULL);
+ if (orig_nip)
+ regs->nip = orig_nip;
+ return 1;
+}
+
+int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ if (kprobe_ftrace(p))
+ return __skip_singlestep(p, regs, kcb, 0);
+ else
+ return 0;
+}
+NOKPROBE_SYMBOL(skip_singlestep);
+
+/* Ftrace callback handler for kprobes */
+void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
+ struct ftrace_ops *ops, struct pt_regs *regs)
+{
+ struct kprobe *p;
+ struct kprobe_ctlblk *kcb;
+ unsigned long flags;
+
+ /* Disable irq for emulating a breakpoint and avoiding preempt */
+ local_irq_save(flags);
+ hard_irq_disable();
+
+ p = get_kprobe((kprobe_opcode_t *)nip);
+ if (unlikely(!p) || kprobe_disabled(p))
+ goto end;
+
+ kcb = get_kprobe_ctlblk();
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(p);
+ } else {
+ unsigned long orig_nip = regs->nip;
+
+ /*
+ * On powerpc, NIP is *before* this instruction for the
+ * pre handler
+ */
+ regs->nip -= MCOUNT_INSN_SIZE;
+
+ __this_cpu_write(current_kprobe, p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (!p->pre_handler || !p->pre_handler(p, regs))
+ __skip_singlestep(p, regs, kcb, orig_nip);
+ /*
+ * If pre_handler returns !0, it sets regs->nip and
+ * resets current kprobe.
+ */
+ }
+end:
+ local_irq_restore(flags);
+}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
+
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+ p->ainsn.insn = NULL;
+ p->ainsn.boostable = -1;
+ return 0;
+}
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index fce05a38851c..160ae0fa7d0d 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -35,6 +35,7 @@
#include <asm/code-patching.h>
#include <asm/cacheflush.h>
#include <asm/sstep.h>
+#include <asm/sections.h>
#include <linux/uaccess.h>
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
@@ -42,7 +43,86 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
+bool arch_within_kprobe_blacklist(unsigned long addr)
+{
+ return (addr >= (unsigned long)__kprobes_text_start &&
+ addr < (unsigned long)__kprobes_text_end) ||
+ (addr >= (unsigned long)_stext &&
+ addr < (unsigned long)__head_end);
+}
+
+kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset)
+{
+ kprobe_opcode_t *addr;
+
+#ifdef PPC64_ELF_ABI_v2
+ /* PPC64 ABIv2 needs local entry point */
+ addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
+ if (addr && !offset) {
+#ifdef CONFIG_KPROBES_ON_FTRACE
+ unsigned long faddr;
+ /*
+ * Per livepatch.h, ftrace location is always within the first
+ * 16 bytes of a function on powerpc with -mprofile-kernel.
+ */
+ faddr = ftrace_location_range((unsigned long)addr,
+ (unsigned long)addr + 16);
+ if (faddr)
+ addr = (kprobe_opcode_t *)faddr;
+ else
+#endif
+ addr = (kprobe_opcode_t *)ppc_function_entry(addr);
+ }
+#elif defined(PPC64_ELF_ABI_v1)
+ /*
+ * 64bit powerpc ABIv1 uses function descriptors:
+ * - Check for the dot variant of the symbol first.
+ * - If that fails, try looking up the symbol provided.
+ *
+ * This ensures we always get to the actual symbol and not
+ * the descriptor.
+ *
+ * Also handle <module:symbol> format.
+ */
+ char dot_name[MODULE_NAME_LEN + 1 + KSYM_NAME_LEN];
+ const char *modsym;
+ bool dot_appended = false;
+ if ((modsym = strchr(name, ':')) != NULL) {
+ modsym++;
+ if (*modsym != '\0' && *modsym != '.') {
+ /* Convert to <module:.symbol> */
+ strncpy(dot_name, name, modsym - name);
+ dot_name[modsym - name] = '.';
+ dot_name[modsym - name + 1] = '\0';
+ strncat(dot_name, modsym,
+ sizeof(dot_name) - (modsym - name) - 2);
+ dot_appended = true;
+ } else {
+ dot_name[0] = '\0';
+ strncat(dot_name, name, sizeof(dot_name) - 1);
+ }
+ } else if (name[0] != '.') {
+ dot_name[0] = '.';
+ dot_name[1] = '\0';
+ strncat(dot_name, name, KSYM_NAME_LEN - 2);
+ dot_appended = true;
+ } else {
+ dot_name[0] = '\0';
+ strncat(dot_name, name, KSYM_NAME_LEN - 1);
+ }
+ addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name);
+ if (!addr && dot_appended) {
+ /* Let's try the original non-dot symbol lookup */
+ addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
+ }
+#else
+ addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
+#endif
+
+ return addr;
+}
+
+int arch_prepare_kprobe(struct kprobe *p)
{
int ret = 0;
kprobe_opcode_t insn = *p->addr;
@@ -74,30 +154,34 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
p->ainsn.boostable = 0;
return ret;
}
+NOKPROBE_SYMBOL(arch_prepare_kprobe);
-void __kprobes arch_arm_kprobe(struct kprobe *p)
+void arch_arm_kprobe(struct kprobe *p)
{
*p->addr = BREAKPOINT_INSTRUCTION;
flush_icache_range((unsigned long) p->addr,
(unsigned long) p->addr + sizeof(kprobe_opcode_t));
}
+NOKPROBE_SYMBOL(arch_arm_kprobe);
-void __kprobes arch_disarm_kprobe(struct kprobe *p)
+void arch_disarm_kprobe(struct kprobe *p)
{
*p->addr = p->opcode;
flush_icache_range((unsigned long) p->addr,
(unsigned long) p->addr + sizeof(kprobe_opcode_t));
}
+NOKPROBE_SYMBOL(arch_disarm_kprobe);
-void __kprobes arch_remove_kprobe(struct kprobe *p)
+void arch_remove_kprobe(struct kprobe *p)
{
if (p->ainsn.insn) {
free_insn_slot(p->ainsn.insn, 0);
p->ainsn.insn = NULL;
}
}
+NOKPROBE_SYMBOL(arch_remove_kprobe);
-static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+static nokprobe_inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
{
enable_single_step(regs);
@@ -110,37 +194,80 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
regs->nip = (unsigned long)p->ainsn.insn;
}
-static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+static nokprobe_inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
{
kcb->prev_kprobe.kp = kprobe_running();
kcb->prev_kprobe.status = kcb->kprobe_status;
kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr;
}
-static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+static nokprobe_inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
{
__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
kcb->kprobe_status = kcb->prev_kprobe.status;
kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr;
}
-static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
__this_cpu_write(current_kprobe, p);
kcb->kprobe_saved_msr = regs->msr;
}
-void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
- struct pt_regs *regs)
+bool arch_function_offset_within_entry(unsigned long offset)
+{
+#ifdef PPC64_ELF_ABI_v2
+#ifdef CONFIG_KPROBES_ON_FTRACE
+ return offset <= 16;
+#else
+ return offset <= 8;
+#endif
+#else
+ return !offset;
+#endif
+}
+
+void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
{
ri->ret_addr = (kprobe_opcode_t *)regs->link;
/* Replace the return addr with trampoline addr */
regs->link = (unsigned long)kretprobe_trampoline;
}
+NOKPROBE_SYMBOL(arch_prepare_kretprobe);
-int __kprobes kprobe_handler(struct pt_regs *regs)
+int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
+{
+ int ret;
+ unsigned int insn = *p->ainsn.insn;
+
+ /* regs->nip is also adjusted if emulate_step returns 1 */
+ ret = emulate_step(regs, insn);
+ if (ret > 0) {
+ /*
+ * Once this instruction has been boosted
+ * successfully, set the boostable flag
+ */
+ if (unlikely(p->ainsn.boostable == 0))
+ p->ainsn.boostable = 1;
+ } else if (ret < 0) {
+ /*
+ * We don't allow kprobes on mtmsr(d)/rfi(d), etc.
+ * So, we should never get here... but, its still
+ * good to catch them, just in case...
+ */
+ printk("Can't step on instruction %x\n", insn);
+ BUG();
+ } else if (ret == 0)
+ /* This instruction can't be boosted */
+ p->ainsn.boostable = -1;
+
+ return ret;
+}
+NOKPROBE_SYMBOL(try_to_emulate);
+
+int kprobe_handler(struct pt_regs *regs)
{
struct kprobe *p;
int ret = 0;
@@ -177,10 +304,17 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
*/
save_previous_kprobe(kcb);
set_current_kprobe(p, regs, kcb);
- kcb->kprobe_saved_msr = regs->msr;
kprobes_inc_nmissed_count(p);
prepare_singlestep(p, regs);
kcb->kprobe_status = KPROBE_REENTER;
+ if (p->ainsn.boostable >= 0) {
+ ret = try_to_emulate(p, regs);
+
+ if (ret > 0) {
+ restore_previous_kprobe(kcb);
+ return 1;
+ }
+ }
return 1;
} else {
if (*addr != BREAKPOINT_INSTRUCTION) {
@@ -197,7 +331,9 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
}
p = __this_cpu_read(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
- goto ss_probe;
+ if (!skip_singlestep(p, regs, kcb))
+ goto ss_probe;
+ ret = 1;
}
}
goto no_kprobe;
@@ -235,18 +371,9 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
ss_probe:
if (p->ainsn.boostable >= 0) {
- unsigned int insn = *p->ainsn.insn;
+ ret = try_to_emulate(p, regs);
- /* regs->nip is also adjusted if emulate_step returns 1 */
- ret = emulate_step(regs, insn);
if (ret > 0) {
- /*
- * Once this instruction has been boosted
- * successfully, set the boostable flag
- */
- if (unlikely(p->ainsn.boostable == 0))
- p->ainsn.boostable = 1;
-
if (p->post_handler)
p->post_handler(p, regs, 0);
@@ -254,17 +381,7 @@ ss_probe:
reset_current_kprobe();
preempt_enable_no_resched();
return 1;
- } else if (ret < 0) {
- /*
- * We don't allow kprobes on mtmsr(d)/rfi(d), etc.
- * So, we should never get here... but, its still
- * good to catch them, just in case...
- */
- printk("Can't step on instruction %x\n", insn);
- BUG();
- } else if (ret == 0)
- /* This instruction can't be boosted */
- p->ainsn.boostable = -1;
+ }
}
prepare_singlestep(p, regs);
kcb->kprobe_status = KPROBE_HIT_SS;
@@ -274,6 +391,7 @@ no_kprobe:
preempt_enable_no_resched();
return ret;
}
+NOKPROBE_SYMBOL(kprobe_handler);
/*
* Function return probe trampoline:
@@ -291,8 +409,7 @@ asm(".global kretprobe_trampoline\n"
/*
* Called when the probe at kretprobe trampoline is hit
*/
-static int __kprobes trampoline_probe_handler(struct kprobe *p,
- struct pt_regs *regs)
+static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kretprobe_instance *ri = NULL;
struct hlist_head *head, empty_rp;
@@ -361,6 +478,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
*/
return 1;
}
+NOKPROBE_SYMBOL(trampoline_probe_handler);
/*
* Called after single-stepping. p->addr is the address of the
@@ -370,7 +488,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
* single-stepped a copy of the instruction. The address of this
* copy is p->ainsn.insn.
*/
-int __kprobes kprobe_post_handler(struct pt_regs *regs)
+int kprobe_post_handler(struct pt_regs *regs)
{
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -410,8 +528,9 @@ out:
return 1;
}
+NOKPROBE_SYMBOL(kprobe_post_handler);
-int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
{
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -474,13 +593,15 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
}
return 0;
}
+NOKPROBE_SYMBOL(kprobe_fault_handler);
unsigned long arch_deref_entry_point(void *entry)
{
return ppc_global_function_entry(entry);
}
+NOKPROBE_SYMBOL(arch_deref_entry_point);
-int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
struct jprobe *jp = container_of(p, struct jprobe, kp);
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -497,17 +618,20 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
return 1;
}
+NOKPROBE_SYMBOL(setjmp_pre_handler);
-void __used __kprobes jprobe_return(void)
+void __used jprobe_return(void)
{
asm volatile("trap" ::: "memory");
}
+NOKPROBE_SYMBOL(jprobe_return);
-static void __used __kprobes jprobe_return_end(void)
+static void __used jprobe_return_end(void)
{
-};
+}
+NOKPROBE_SYMBOL(jprobe_return_end);
-int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -520,6 +644,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
preempt_enable_no_resched();
return 1;
}
+NOKPROBE_SYMBOL(longjmp_break_handler);
static struct kprobe trampoline_p = {
.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
@@ -531,10 +656,11 @@ int __init arch_init_kprobes(void)
return register_kprobe(&trampoline_p);
}
-int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+int arch_trampoline_kprobe(struct kprobe *p)
{
if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
return 1;
return 0;
}
+NOKPROBE_SYMBOL(arch_trampoline_kprobe);
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index a1475e6aef3a..5f9eada3519b 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -221,6 +221,8 @@ static void machine_check_process_queued_event(struct irq_work *work)
{
int index;
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
+
/*
* For now just print it to console.
* TODO: log this error event to FSP or nvram.
@@ -228,12 +230,13 @@ static void machine_check_process_queued_event(struct irq_work *work)
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
machine_check_print_event_info(
- this_cpu_ptr(&mce_event_queue[index]));
+ this_cpu_ptr(&mce_event_queue[index]), false);
__this_cpu_dec(mce_queue_count);
}
}
-void machine_check_print_event_info(struct machine_check_event *evt)
+void machine_check_print_event_info(struct machine_check_event *evt,
+ bool user_mode)
{
const char *level, *sevstr, *subtype;
static const char *mc_ue_types[] = {
@@ -310,7 +313,16 @@ void machine_check_print_event_info(struct machine_check_event *evt)
printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
evt->disposition == MCE_DISPOSITION_RECOVERED ?
- "Recovered" : "[Not recovered");
+ "Recovered" : "Not recovered");
+
+ if (user_mode) {
+ printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
+ evt->srr0, current->pid, current->comm);
+ } else {
+ printk("%s NIP [%016llx]: %pS\n", level, evt->srr0,
+ (void *)evt->srr0);
+ }
+
printk("%s Initiator: %s\n", level,
evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
switch (evt->error_type) {
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 763d6f58caa8..f913139bb0c2 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -72,10 +72,14 @@ void __flush_tlb_power8(unsigned int action)
void __flush_tlb_power9(unsigned int action)
{
+ unsigned int num_sets;
+
if (radix_enabled())
- flush_tlb_206(POWER9_TLB_SETS_RADIX, action);
+ num_sets = POWER9_TLB_SETS_RADIX;
+ else
+ num_sets = POWER9_TLB_SETS_HASH;
- flush_tlb_206(POWER9_TLB_SETS_HASH, action);
+ flush_tlb_206(num_sets, action);
}
@@ -147,159 +151,365 @@ static int mce_flush(int what)
return 0;
}
-static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat)
-{
- if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB))
- dsisr &= ~slb;
- if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT))
- dsisr &= ~erat;
- if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB))
- dsisr &= ~tlb;
- /* Any other errors we don't understand? */
- if (dsisr)
- return 0;
- return 1;
-}
-
-static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
+#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
+
+struct mce_ierror_table {
+ unsigned long srr1_mask;
+ unsigned long srr1_value;
+ bool nip_valid; /* nip is a valid indicator of faulting address */
+ unsigned int error_type;
+ unsigned int error_subtype;
+ unsigned int initiator;
+ unsigned int severity;
+};
+
+static const struct mce_ierror_table mce_p7_ierror_table[] = {
+{ 0x00000000001c0000, 0x0000000000040000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x0000000000080000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x00000000000c0000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x0000000000100000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x0000000000140000, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x0000000000180000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x00000000001c0000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, 0, 0, 0, 0, 0 } };
+
+static const struct mce_ierror_table mce_p8_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+ MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000001c0000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008000000, true,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008040000, true,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, 0, 0, 0, 0, 0 } };
+
+static const struct mce_ierror_table mce_p9_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+ MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008000000, true,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008040000, true,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000080c0000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008100000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008140000, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x0000000008180000, false,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x00000000081c0000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, 0, 0, 0, 0, 0 } };
+
+struct mce_derror_table {
+ unsigned long dsisr_value;
+ bool dar_valid; /* dar is a valid indicator of faulting address */
+ unsigned int error_type;
+ unsigned int error_subtype;
+ unsigned int initiator;
+ unsigned int severity;
+};
+
+static const struct mce_derror_table mce_p7_derror_table[] = {
+{ 0x00008000, false,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00004000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000800, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000400, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000100, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000080, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000040, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, false, 0, 0, 0, 0 } };
+
+static const struct mce_derror_table mce_p8_derror_table[] = {
+{ 0x00008000, false,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00004000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00002000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00001000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000800, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000400, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000200, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000100, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000080, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, false, 0, 0, 0, 0 } };
+
+static const struct mce_derror_table mce_p9_derror_table[] = {
+{ 0x00008000, false,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00004000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00002000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00001000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000800, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000400, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000200, false,
+ MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000100, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000080, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000040, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000020, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000010, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000008, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, false, 0, 0, 0, 0 } };
+
+static int mce_handle_ierror(struct pt_regs *regs,
+ const struct mce_ierror_table table[],
+ struct mce_error_info *mce_err, uint64_t *addr)
{
- long handled = 1;
+ uint64_t srr1 = regs->msr;
+ int handled = 0;
+ int i;
+
+ *addr = 0;
+
+ for (i = 0; table[i].srr1_mask; i++) {
+ if ((srr1 & table[i].srr1_mask) != table[i].srr1_value)
+ continue;
+
+ /* attempt to correct the error */
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_SLB:
+ handled = mce_flush(MCE_FLUSH_SLB);
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ handled = mce_flush(MCE_FLUSH_ERAT);
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ handled = mce_flush(MCE_FLUSH_TLB);
+ break;
+ }
- /*
- * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
- * reset the error bits whenever we handle them so that at the end
- * we can check whether we handled all of them or not.
- * */
-#ifdef CONFIG_PPC_STD_MMU_64
- if (dsisr & slb_error_bits) {
- flush_and_reload_slb();
- /* reset error bits */
- dsisr &= ~(slb_error_bits);
- }
- if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
- cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
- /* reset error bits */
- dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
+ /* now fill in mce_error_info */
+ mce_err->error_type = table[i].error_type;
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_UE:
+ mce_err->u.ue_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ mce_err->u.slb_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ mce_err->u.erat_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ mce_err->u.tlb_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_USER:
+ mce_err->u.user_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_RA:
+ mce_err->u.ra_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ mce_err->u.link_error_type = table[i].error_subtype;
+ break;
+ }
+ mce_err->severity = table[i].severity;
+ mce_err->initiator = table[i].initiator;
+ if (table[i].nip_valid)
+ *addr = regs->nip;
+ return handled;
}
-#endif
- /* Any other errors we don't understand? */
- if (dsisr & 0xffffffffUL)
- handled = 0;
- return handled;
-}
+ mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
+ mce_err->severity = MCE_SEV_ERROR_SYNC;
+ mce_err->initiator = MCE_INITIATOR_CPU;
-static long mce_handle_derror_p7(uint64_t dsisr)
-{
- return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
+ return 0;
}
-static long mce_handle_common_ierror(uint64_t srr1)
+static int mce_handle_derror(struct pt_regs *regs,
+ const struct mce_derror_table table[],
+ struct mce_error_info *mce_err, uint64_t *addr)
{
- long handled = 0;
-
- switch (P7_SRR1_MC_IFETCH(srr1)) {
- case 0:
- break;
-#ifdef CONFIG_PPC_STD_MMU_64
- case P7_SRR1_MC_IFETCH_SLB_PARITY:
- case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
- /* flush and reload SLBs for SLB errors. */
- flush_and_reload_slb();
- handled = 1;
- break;
- case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
- cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
- handled = 1;
+ uint64_t dsisr = regs->dsisr;
+ int handled = 0;
+ int found = 0;
+ int i;
+
+ *addr = 0;
+
+ for (i = 0; table[i].dsisr_value; i++) {
+ if (!(dsisr & table[i].dsisr_value))
+ continue;
+
+ /* attempt to correct the error */
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_SLB:
+ if (mce_flush(MCE_FLUSH_SLB))
+ handled = 1;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ if (mce_flush(MCE_FLUSH_ERAT))
+ handled = 1;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ if (mce_flush(MCE_FLUSH_TLB))
+ handled = 1;
+ break;
}
- break;
-#endif
- default:
- break;
- }
- return handled;
-}
-
-static long mce_handle_ierror_p7(uint64_t srr1)
-{
- long handled = 0;
-
- handled = mce_handle_common_ierror(srr1);
+ /*
+ * Attempt to handle multiple conditions, but only return
+ * one. Ensure uncorrectable errors are first in the table
+ * to match.
+ */
+ if (found)
+ continue;
+
+ /* now fill in mce_error_info */
+ mce_err->error_type = table[i].error_type;
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_UE:
+ mce_err->u.ue_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ mce_err->u.slb_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ mce_err->u.erat_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ mce_err->u.tlb_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_USER:
+ mce_err->u.user_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_RA:
+ mce_err->u.ra_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ mce_err->u.link_error_type = table[i].error_subtype;
+ break;
+ }
+ mce_err->severity = table[i].severity;
+ mce_err->initiator = table[i].initiator;
+ if (table[i].dar_valid)
+ *addr = regs->dar;
-#ifdef CONFIG_PPC_STD_MMU_64
- if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
- flush_and_reload_slb();
- handled = 1;
+ found = 1;
}
-#endif
- return handled;
-}
-static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
-{
- switch (P7_SRR1_MC_IFETCH(srr1)) {
- case P7_SRR1_MC_IFETCH_SLB_PARITY:
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
- break;
- case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
- break;
- case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_TLB;
- mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
- break;
- case P7_SRR1_MC_IFETCH_UE:
- case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
- break;
- case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type =
- MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
- break;
- }
-}
+ if (found)
+ return handled;
-static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
-{
- mce_get_common_ierror(mce_err, srr1);
- if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
- }
-}
+ mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
+ mce_err->severity = MCE_SEV_ERROR_SYNC;
+ mce_err->initiator = MCE_INITIATOR_CPU;
-static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
-{
- if (dsisr & P7_DSISR_MC_UE) {
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
- } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type =
- MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
- } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
- } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
- } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
- mce_err->error_type = MCE_ERROR_TYPE_TLB;
- mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
- } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
- }
+ return 0;
}
static long mce_handle_ue_error(struct pt_regs *regs)
@@ -320,292 +530,42 @@ static long mce_handle_ue_error(struct pt_regs *regs)
return handled;
}
-long __machine_check_early_realmode_p7(struct pt_regs *regs)
+static long mce_handle_error(struct pt_regs *regs,
+ const struct mce_derror_table dtable[],
+ const struct mce_ierror_table itable[])
{
- uint64_t srr1, nip, addr;
- long handled = 1;
- struct mce_error_info mce_error_info = { 0 };
-
- mce_error_info.severity = MCE_SEV_ERROR_SYNC;
- mce_error_info.initiator = MCE_INITIATOR_CPU;
-
- srr1 = regs->msr;
- nip = regs->nip;
+ struct mce_error_info mce_err = { 0 };
+ uint64_t addr;
+ uint64_t srr1 = regs->msr;
+ long handled;
- /*
- * Handle memory errors depending whether this was a load/store or
- * ifetch exception. Also, populate the mce error_type and
- * type-specific error_type from either SRR1 or DSISR, depending
- * whether this was a load/store or ifetch exception
- */
- if (P7_SRR1_MC_LOADSTORE(srr1)) {
- handled = mce_handle_derror_p7(regs->dsisr);
- mce_get_derror_p7(&mce_error_info, regs->dsisr);
- addr = regs->dar;
- } else {
- handled = mce_handle_ierror_p7(srr1);
- mce_get_ierror_p7(&mce_error_info, srr1);
- addr = regs->nip;
- }
+ if (SRR1_MC_LOADSTORE(srr1))
+ handled = mce_handle_derror(regs, dtable, &mce_err, &addr);
+ else
+ handled = mce_handle_ierror(regs, itable, &mce_err, &addr);
- /* Handle UE error. */
- if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
+ if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
handled = mce_handle_ue_error(regs);
- save_mce_event(regs, handled, &mce_error_info, nip, addr);
- return handled;
-}
-
-static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
-{
- mce_get_common_ierror(mce_err, srr1);
- if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- }
-}
-
-static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
-{
- mce_get_derror_p7(mce_err, dsisr);
- if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- }
-}
-
-static long mce_handle_ierror_p8(uint64_t srr1)
-{
- long handled = 0;
+ save_mce_event(regs, handled, &mce_err, regs->nip, addr);
- handled = mce_handle_common_ierror(srr1);
-
-#ifdef CONFIG_PPC_STD_MMU_64
- if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
- flush_and_reload_slb();
- handled = 1;
- }
-#endif
return handled;
}
-static long mce_handle_derror_p8(uint64_t dsisr)
-{
- return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
-}
-
-long __machine_check_early_realmode_p8(struct pt_regs *regs)
-{
- uint64_t srr1, nip, addr;
- long handled = 1;
- struct mce_error_info mce_error_info = { 0 };
-
- mce_error_info.severity = MCE_SEV_ERROR_SYNC;
- mce_error_info.initiator = MCE_INITIATOR_CPU;
-
- srr1 = regs->msr;
- nip = regs->nip;
-
- if (P7_SRR1_MC_LOADSTORE(srr1)) {
- handled = mce_handle_derror_p8(regs->dsisr);
- mce_get_derror_p8(&mce_error_info, regs->dsisr);
- addr = regs->dar;
- } else {
- handled = mce_handle_ierror_p8(srr1);
- mce_get_ierror_p8(&mce_error_info, srr1);
- addr = regs->nip;
- }
-
- /* Handle UE error. */
- if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
- handled = mce_handle_ue_error(regs);
-
- save_mce_event(regs, handled, &mce_error_info, nip, addr);
- return handled;
-}
-
-static int mce_handle_derror_p9(struct pt_regs *regs)
-{
- uint64_t dsisr = regs->dsisr;
-
- return mce_handle_flush_derrors(dsisr,
- P9_DSISR_MC_SLB_PARITY_MFSLB |
- P9_DSISR_MC_SLB_MULTIHIT_MFSLB,
-
- P9_DSISR_MC_TLB_MULTIHIT_MFTLB,
-
- P9_DSISR_MC_ERAT_MULTIHIT);
-}
-
-static int mce_handle_ierror_p9(struct pt_regs *regs)
+long __machine_check_early_realmode_p7(struct pt_regs *regs)
{
- uint64_t srr1 = regs->msr;
+ /* P7 DD1 leaves top bits of DSISR undefined */
+ regs->dsisr &= 0x0000ffff;
- switch (P9_SRR1_MC_IFETCH(srr1)) {
- case P9_SRR1_MC_IFETCH_SLB_PARITY:
- case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
- return mce_flush(MCE_FLUSH_SLB);
- case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
- return mce_flush(MCE_FLUSH_TLB);
- case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
- return mce_flush(MCE_FLUSH_ERAT);
- default:
- return 0;
- }
+ return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table);
}
-static void mce_get_derror_p9(struct pt_regs *regs,
- struct mce_error_info *mce_err, uint64_t *addr)
-{
- uint64_t dsisr = regs->dsisr;
-
- mce_err->severity = MCE_SEV_ERROR_SYNC;
- mce_err->initiator = MCE_INITIATOR_CPU;
-
- if (dsisr & P9_DSISR_MC_USER_TLBIE)
- *addr = regs->nip;
- else
- *addr = regs->dar;
-
- if (dsisr & P9_DSISR_MC_UE) {
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
- } else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) {
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
- } else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) {
- mce_err->error_type = MCE_ERROR_TYPE_LINK;
- mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT;
- } else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) {
- mce_err->error_type = MCE_ERROR_TYPE_LINK;
- mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT;
- } else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) {
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- } else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) {
- mce_err->error_type = MCE_ERROR_TYPE_TLB;
- mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
- } else if (dsisr & P9_DSISR_MC_USER_TLBIE) {
- mce_err->error_type = MCE_ERROR_TYPE_USER;
- mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE;
- } else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
- } else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
- } else if (dsisr & P9_DSISR_MC_RA_LOAD) {
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD;
- } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) {
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
- } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) {
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
- } else if (dsisr & P9_DSISR_MC_RA_FOREIGN) {
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN;
- }
-}
-
-static void mce_get_ierror_p9(struct pt_regs *regs,
- struct mce_error_info *mce_err, uint64_t *addr)
+long __machine_check_early_realmode_p8(struct pt_regs *regs)
{
- uint64_t srr1 = regs->msr;
-
- switch (P9_SRR1_MC_IFETCH(srr1)) {
- case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
- case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
- mce_err->severity = MCE_SEV_FATAL;
- break;
- default:
- mce_err->severity = MCE_SEV_ERROR_SYNC;
- break;
- }
-
- mce_err->initiator = MCE_INITIATOR_CPU;
-
- *addr = regs->nip;
-
- switch (P9_SRR1_MC_IFETCH(srr1)) {
- case P9_SRR1_MC_IFETCH_UE:
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
- break;
- case P9_SRR1_MC_IFETCH_SLB_PARITY:
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
- break;
- case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
- break;
- case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- break;
- case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_TLB;
- mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
- break;
- case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD:
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
- break;
- case P9_SRR1_MC_IFETCH_LINK_TIMEOUT:
- mce_err->error_type = MCE_ERROR_TYPE_LINK;
- mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT;
- break;
- case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT:
- mce_err->error_type = MCE_ERROR_TYPE_LINK;
- mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT;
- break;
- case P9_SRR1_MC_IFETCH_RA:
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH;
- break;
- case P9_SRR1_MC_IFETCH_RA_TABLEWALK:
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH;
- break;
- case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_STORE;
- break;
- case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
- mce_err->error_type = MCE_ERROR_TYPE_LINK;
- mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT;
- break;
- case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN:
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN;
- break;
- default:
- break;
- }
+ return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table);
}
long __machine_check_early_realmode_p9(struct pt_regs *regs)
{
- uint64_t nip, addr;
- long handled;
- struct mce_error_info mce_error_info = { 0 };
-
- nip = regs->nip;
-
- if (P9_SRR1_MC_LOADSTORE(regs->msr)) {
- handled = mce_handle_derror_p9(regs);
- mce_get_derror_p9(regs, &mce_error_info, &addr);
- } else {
- handled = mce_handle_ierror_p9(regs);
- mce_get_ierror_p9(regs, &mce_error_info, &addr);
- }
-
- /* Handle UE error. */
- if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
- handled = mce_handle_ue_error(regs);
-
- save_mce_event(regs, handled, &mce_error_info, nip, addr);
- return handled;
+ return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
}
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index d5e2b8309939..eae61b044e9e 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -389,51 +389,40 @@ static int nvram_pstore_open(struct pstore_info *psi)
/**
* nvram_pstore_write - pstore write callback for nvram
- * @type: Type of message logged
- * @reason: reason behind dump (oops/panic)
- * @id: identifier to indicate the write performed
- * @part: pstore writes data to registered buffer in parts,
- * part number will indicate the same.
- * @count: Indicates oops count
- * @compressed: Flag to indicate the log is compressed
- * @size: number of bytes written to the registered buffer
- * @psi: registered pstore_info structure
+ * @record: pstore record to write, with @id to be set
*
* Called by pstore_dump() when an oops or panic report is logged in the
* printk buffer.
* Returns 0 on successful write.
*/
-static int nvram_pstore_write(enum pstore_type_id type,
- enum kmsg_dump_reason reason,
- u64 *id, unsigned int part, int count,
- bool compressed, size_t size,
- struct pstore_info *psi)
+static int nvram_pstore_write(struct pstore_record *record)
{
int rc;
unsigned int err_type = ERR_TYPE_KERNEL_PANIC;
struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf;
/* part 1 has the recent messages from printk buffer */
- if (part > 1 || (type != PSTORE_TYPE_DMESG))
+ if (record->part > 1 || (record->type != PSTORE_TYPE_DMESG))
return -1;
if (clobbering_unread_rtas_event())
return -1;
oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
- oops_hdr->report_length = cpu_to_be16(size);
+ oops_hdr->report_length = cpu_to_be16(record->size);
oops_hdr->timestamp = cpu_to_be64(ktime_get_real_seconds());
- if (compressed)
+ if (record->compressed)
err_type = ERR_TYPE_KERNEL_PANIC_GZ;
rc = nvram_write_os_partition(&oops_log_partition, oops_buf,
- (int) (sizeof(*oops_hdr) + size), err_type, count);
+ (int) (sizeof(*oops_hdr) + record->size), err_type,
+ record->count);
if (rc != 0)
return rc;
- *id = part;
+ record->id = record->part;
return 0;
}
@@ -442,10 +431,7 @@ static int nvram_pstore_write(enum pstore_type_id type,
* Returns the length of the data we read from each partition.
* Returns 0 if we've been called before.
*/
-static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
- int *count, struct timespec *time, char **buf,
- bool *compressed, ssize_t *ecc_notice_size,
- struct pstore_info *psi)
+static ssize_t nvram_pstore_read(struct pstore_record *record)
{
struct oops_log_info *oops_hdr;
unsigned int err_type, id_no, size = 0;
@@ -459,40 +445,40 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
switch (nvram_type_ids[read_type]) {
case PSTORE_TYPE_DMESG:
part = &oops_log_partition;
- *type = PSTORE_TYPE_DMESG;
+ record->type = PSTORE_TYPE_DMESG;
break;
case PSTORE_TYPE_PPC_COMMON:
sig = NVRAM_SIG_SYS;
part = &common_partition;
- *type = PSTORE_TYPE_PPC_COMMON;
- *id = PSTORE_TYPE_PPC_COMMON;
- time->tv_sec = 0;
- time->tv_nsec = 0;
+ record->type = PSTORE_TYPE_PPC_COMMON;
+ record->id = PSTORE_TYPE_PPC_COMMON;
+ record->time.tv_sec = 0;
+ record->time.tv_nsec = 0;
break;
#ifdef CONFIG_PPC_PSERIES
case PSTORE_TYPE_PPC_RTAS:
part = &rtas_log_partition;
- *type = PSTORE_TYPE_PPC_RTAS;
- time->tv_sec = last_rtas_event;
- time->tv_nsec = 0;
+ record->type = PSTORE_TYPE_PPC_RTAS;
+ record->time.tv_sec = last_rtas_event;
+ record->time.tv_nsec = 0;
break;
case PSTORE_TYPE_PPC_OF:
sig = NVRAM_SIG_OF;
part = &of_config_partition;
- *type = PSTORE_TYPE_PPC_OF;
- *id = PSTORE_TYPE_PPC_OF;
- time->tv_sec = 0;
- time->tv_nsec = 0;
+ record->type = PSTORE_TYPE_PPC_OF;
+ record->id = PSTORE_TYPE_PPC_OF;
+ record->time.tv_sec = 0;
+ record->time.tv_nsec = 0;
break;
#endif
#ifdef CONFIG_PPC_POWERNV
case PSTORE_TYPE_PPC_OPAL:
sig = NVRAM_SIG_FW;
part = &skiboot_partition;
- *type = PSTORE_TYPE_PPC_OPAL;
- *id = PSTORE_TYPE_PPC_OPAL;
- time->tv_sec = 0;
- time->tv_nsec = 0;
+ record->type = PSTORE_TYPE_PPC_OPAL;
+ record->id = PSTORE_TYPE_PPC_OPAL;
+ record->time.tv_sec = 0;
+ record->time.tv_nsec = 0;
break;
#endif
default:
@@ -520,10 +506,10 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
return 0;
}
- *count = 0;
+ record->count = 0;
if (part->os_partition)
- *id = id_no;
+ record->id = id_no;
if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) {
size_t length, hdr_size;
@@ -533,34 +519,35 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
/* Old format oops header had 2-byte record size */
hdr_size = sizeof(u16);
length = be16_to_cpu(oops_hdr->version);
- time->tv_sec = 0;
- time->tv_nsec = 0;
+ record->time.tv_sec = 0;
+ record->time.tv_nsec = 0;
} else {
hdr_size = sizeof(*oops_hdr);
length = be16_to_cpu(oops_hdr->report_length);
- time->tv_sec = be64_to_cpu(oops_hdr->timestamp);
- time->tv_nsec = 0;
+ record->time.tv_sec = be64_to_cpu(oops_hdr->timestamp);
+ record->time.tv_nsec = 0;
}
- *buf = kmemdup(buff + hdr_size, length, GFP_KERNEL);
+ record->buf = kmemdup(buff + hdr_size, length, GFP_KERNEL);
kfree(buff);
- if (*buf == NULL)
+ if (record->buf == NULL)
return -ENOMEM;
- *ecc_notice_size = 0;
+ record->ecc_notice_size = 0;
if (err_type == ERR_TYPE_KERNEL_PANIC_GZ)
- *compressed = true;
+ record->compressed = true;
else
- *compressed = false;
+ record->compressed = false;
return length;
}
- *buf = buff;
+ record->buf = buff;
return part->size;
}
static struct pstore_info nvram_pstore_info = {
.owner = THIS_MODULE,
.name = "nvram",
+ .flags = PSTORE_FLAGS_DMESG,
.open = nvram_pstore_open,
.read = nvram_pstore_read,
.write = nvram_pstore_write,
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 2282bf4e63cd..ec60ed0d4aad 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -243,10 +243,10 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
/*
* 2. branch to optimized_callback() and emulate_step()
*/
- kprobe_lookup_name("optimized_callback", op_callback_addr);
- kprobe_lookup_name("emulate_step", emulate_step_addr);
+ op_callback_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("optimized_callback");
+ emulate_step_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("emulate_step");
if (!op_callback_addr || !emulate_step_addr) {
- WARN(1, "kprobe_lookup_name() failed\n");
+ WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n");
goto error;
}
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index dfc479df9634..8d63627e067f 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -245,3 +245,24 @@ void __init free_unused_pacas(void)
free_lppacas();
}
+
+void copy_mm_to_paca(struct mm_struct *mm)
+{
+#ifdef CONFIG_PPC_BOOK3S
+ mm_context_t *context = &mm->context;
+
+ get_paca()->mm_ctx_id = context->id;
+#ifdef CONFIG_PPC_MM_SLICES
+ VM_BUG_ON(!mm->context.addr_limit);
+ get_paca()->addr_limit = mm->context.addr_limit;
+ get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
+ memcpy(&get_paca()->mm_ctx_high_slices_psize,
+ &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
+#else /* CONFIG_PPC_MM_SLICES */
+ get_paca()->mm_ctx_user_psize = context->user_psize;
+ get_paca()->mm_ctx_sllp = context->sllp;
+#endif
+#else /* CONFIG_PPC_BOOK3S */
+ return;
+#endif
+}
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index ffda24a38dda..341a7469cab8 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -233,6 +233,14 @@ void pcibios_reset_secondary_bus(struct pci_dev *dev)
pci_reset_secondary_bus(dev);
}
+resource_size_t pcibios_default_alignment(void)
+{
+ if (ppc_md.pcibios_default_alignment)
+ return ppc_md.pcibios_default_alignment();
+
+ return 0;
+}
+
#ifdef CONFIG_PCI_IOV
resource_size_t pcibios_iov_resource_alignment(struct pci_dev *pdev, int resno)
{
@@ -513,7 +521,8 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,
*
* Returns a negative error code on failure, zero on success.
*/
-int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+int pci_mmap_page_range(struct pci_dev *dev, int bar,
+ struct vm_area_struct *vma,
enum pci_mmap_state mmap_state, int write_combine)
{
resource_size_t offset =
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index f5d399e46193..40c4887c27b6 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -55,9 +55,9 @@
#include <asm/kexec.h>
#include <asm/opal.h>
#include <asm/fadump.h>
-#include <asm/debug.h>
#include <asm/epapr_hcalls.h>
#include <asm/firmware.h>
+#include <asm/dt_cpu_ftrs.h>
#include <mm/mmu_decl.h>
@@ -376,23 +376,31 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
* A POWER6 partition in "POWER6 architected" mode
* uses the 0x0f000002 PVR value; in POWER5+ mode
* it uses 0x0f000001.
+ *
+ * If we're using device tree CPU feature discovery then we don't
+ * support the cpu-version property, and it's the responsibility of the
+ * firmware/hypervisor to provide the correct feature set for the
+ * architecture level via the ibm,powerpc-cpu-features binding.
*/
- prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
- if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000)
- identify_cpu(0, be32_to_cpup(prop));
+ if (!dt_cpu_ftrs_in_use()) {
+ prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
+ if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000)
+ identify_cpu(0, be32_to_cpup(prop));
- identical_pvr_fixup(node);
+ check_cpu_feature_properties(node);
+ check_cpu_pa_features(node);
+ }
- check_cpu_feature_properties(node);
- check_cpu_pa_features(node);
+ identical_pvr_fixup(node);
init_mmu_slb_size(node);
#ifdef CONFIG_PPC64
- if (nthreads > 1)
- cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
- else
+ if (nthreads == 1)
cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
+ else if (!dt_cpu_ftrs_in_use())
+ cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
#endif
+
return 0;
}
@@ -722,6 +730,8 @@ void __init early_init_devtree(void *params)
DBG("Scanning CPUs ...\n");
+ dt_cpu_ftrs_scan();
+
/* Retrieve CPU related informations from the flat tree
* (altivec support, boot CPU ID, ...)
*/
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 1c1b44ec7642..dd8a04f3053a 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -815,7 +815,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
.virt_base = cpu_to_be32(0xffffffff),
.virt_size = cpu_to_be32(0xffffffff),
.load_base = cpu_to_be32(0xffffffff),
- .min_rma = cpu_to_be32(256), /* 256MB min RMA */
+ .min_rma = cpu_to_be32(512), /* 512MB min RMA */
.min_load = cpu_to_be32(0xffffffff), /* full client load */
.min_rma_percent = 0, /* min RMA percentage of total RAM */
.max_pft_size = 48, /* max log_2(hash table size) */
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 4697da895133..71dcda91755d 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -31,11 +31,11 @@
#include <linux/unistd.h>
#include <linux/serial.h>
#include <linux/serial_8250.h>
-#include <linux/debugfs.h>
#include <linux/percpu.h>
#include <linux/memblock.h>
#include <linux/of_platform.h>
#include <linux/hugetlb.h>
+#include <asm/debugfs.h>
#include <asm/io.h>
#include <asm/paca.h>
#include <asm/prom.h>
@@ -125,6 +125,11 @@ int ppc_do_canonicalize_irqs;
EXPORT_SYMBOL(ppc_do_canonicalize_irqs);
#endif
+#ifdef CONFIG_CRASH_CORE
+/* This keeps a track of which one is the crashing cpu. */
+int crashing_cpu = -1;
+#endif
+
/* also used by kexec */
void machine_shutdown(void)
{
@@ -256,7 +261,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "processor\t: %lu\n", cpu_id);
seq_printf(m, "cpu\t\t: ");
- if (cur_cpu_spec->pvr_mask)
+ if (cur_cpu_spec->pvr_mask && cur_cpu_spec->cpu_name)
seq_printf(m, "%s", cur_cpu_spec->cpu_name);
else
seq_printf(m, "unknown (%08x)", pvr);
@@ -920,6 +925,15 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = klimit;
+
+#ifdef CONFIG_PPC_MM_SLICES
+#ifdef CONFIG_PPC64
+ init_mm.context.addr_limit = TASK_SIZE_128TB;
+#else
+#error "context.addr_limit not initialized."
+#endif
+#endif
+
#ifdef CONFIG_PPC_64K_PAGES
init_mm.context.pte_frag = NULL;
#endif
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index f997154dfc41..f35ff9dea4fb 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -49,6 +49,7 @@
#include <asm/paca.h>
#include <asm/time.h>
#include <asm/cputable.h>
+#include <asm/dt_cpu_ftrs.h>
#include <asm/sections.h>
#include <asm/btext.h>
#include <asm/nvram.h>
@@ -230,8 +231,8 @@ static void cpu_ready_for_interrupts(void)
* If we are not in hypervisor mode the job is done once for
* the whole partition in configure_exceptions().
*/
- if (early_cpu_has_feature(CPU_FTR_HVMODE) &&
- early_cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ if (cpu_has_feature(CPU_FTR_HVMODE) &&
+ cpu_has_feature(CPU_FTR_ARCH_207S)) {
unsigned long lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
}
@@ -274,8 +275,10 @@ void __init early_setup(unsigned long dt_ptr)
/* -------- printk is _NOT_ safe to use here ! ------- */
- /* Identify CPU type */
- identify_cpu(0, mfspr(SPRN_PVR));
+ /* Try new device tree based feature discovery ... */
+ if (!dt_cpu_ftrs_init(__va(dt_ptr)))
+ /* Otherwise use the old style CPU table */
+ identify_cpu(0, mfspr(SPRN_PVR));
/* Assume we're on cpu 0 for now. Don't write to the paca yet! */
initialise_paca(&boot_paca, 0);
@@ -541,6 +544,9 @@ void __init initialize_cache_info(void)
dcache_bsize = ppc64_caches.l1d.block_size;
icache_bsize = ppc64_caches.l1i.block_size;
+ cur_cpu_spec->dcache_bsize = dcache_bsize;
+ cur_cpu_spec->icache_bsize = icache_bsize;
+
DBG(" <- initialize_cache_info()\n");
}
@@ -637,6 +643,11 @@ void __init emergency_stack_init(void)
paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
#ifdef CONFIG_PPC_BOOK3S_64
+ /* emergency stack for NMI exception handling. */
+ ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+ klp_init_thread_info(ti);
+ paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
+
/* emergency stack for machine check exception handling. */
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
klp_init_thread_info(ti);
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 3a3671172436..e9436c5e1e09 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -14,6 +14,7 @@
#include <linux/uprobes.h>
#include <linux/key.h>
#include <linux/context_tracking.h>
+#include <linux/livepatch.h>
#include <asm/hw_breakpoint.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -162,6 +163,9 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
tracehook_notify_resume(regs);
}
+ if (thread_info_flags & _TIF_PATCH_PENDING)
+ klp_update_patch_state(current);
+
user_enter();
}
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 46f89e66a273..df2a41647d8e 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -39,6 +39,7 @@
#include <asm/irq.h>
#include <asm/hw_irq.h>
#include <asm/kvm_ppc.h>
+#include <asm/dbell.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/prom.h>
@@ -86,8 +87,6 @@ volatile unsigned int cpu_callin_map[NR_CPUS];
int smt_enabled_at_boot = 1;
-static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL;
-
/*
* Returns 1 if the specified cpu should be brought up during boot.
* Used to inhibit booting threads if they've been disabled or
@@ -158,32 +157,33 @@ static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
return IRQ_HANDLED;
}
-static irqreturn_t debug_ipi_action(int irq, void *data)
+#ifdef CONFIG_NMI_IPI
+static irqreturn_t nmi_ipi_action(int irq, void *data)
{
- if (crash_ipi_function_ptr) {
- crash_ipi_function_ptr(get_irq_regs());
- return IRQ_HANDLED;
- }
-
-#ifdef CONFIG_DEBUGGER
- debugger_ipi(get_irq_regs());
-#endif /* CONFIG_DEBUGGER */
-
+ smp_handle_nmi_ipi(get_irq_regs());
return IRQ_HANDLED;
}
+#endif
static irq_handler_t smp_ipi_action[] = {
[PPC_MSG_CALL_FUNCTION] = call_function_action,
[PPC_MSG_RESCHEDULE] = reschedule_action,
[PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,
- [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
+#ifdef CONFIG_NMI_IPI
+ [PPC_MSG_NMI_IPI] = nmi_ipi_action,
+#endif
};
+/*
+ * The NMI IPI is a fallback and not truly non-maskable. It is simpler
+ * than going through the call function infrastructure, and strongly
+ * serialized, so it is more appropriate for debugging.
+ */
const char *smp_ipi_name[] = {
[PPC_MSG_CALL_FUNCTION] = "ipi call function",
[PPC_MSG_RESCHEDULE] = "ipi reschedule",
[PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
- [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
+ [PPC_MSG_NMI_IPI] = "nmi ipi",
};
/* optional function to request ipi, for controllers with >= 4 ipis */
@@ -191,14 +191,13 @@ int smp_request_message_ipi(int virq, int msg)
{
int err;
- if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) {
+ if (msg < 0 || msg > PPC_MSG_NMI_IPI)
return -EINVAL;
- }
-#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC_CORE)
- if (msg == PPC_MSG_DEBUGGER_BREAK) {
+#ifndef CONFIG_NMI_IPI
+ if (msg == PPC_MSG_NMI_IPI)
return 1;
- }
#endif
+
err = request_irq(virq, smp_ipi_action[msg],
IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND,
smp_ipi_name[msg], NULL);
@@ -211,17 +210,9 @@ int smp_request_message_ipi(int virq, int msg)
#ifdef CONFIG_PPC_SMP_MUXED_IPI
struct cpu_messages {
long messages; /* current messages */
- unsigned long data; /* data for cause ipi */
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
-void smp_muxed_ipi_set_data(int cpu, unsigned long data)
-{
- struct cpu_messages *info = &per_cpu(ipi_message, cpu);
-
- info->data = data;
-}
-
void smp_muxed_ipi_set_message(int cpu, int msg)
{
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
@@ -236,14 +227,13 @@ void smp_muxed_ipi_set_message(int cpu, int msg)
void smp_muxed_ipi_message_pass(int cpu, int msg)
{
- struct cpu_messages *info = &per_cpu(ipi_message, cpu);
-
smp_muxed_ipi_set_message(cpu, msg);
+
/*
* cause_ipi functions are required to include a full barrier
* before doing whatever causes the IPI.
*/
- smp_ops->cause_ipi(cpu, info->data);
+ smp_ops->cause_ipi(cpu);
}
#ifdef __BIG_ENDIAN__
@@ -254,11 +244,18 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
irqreturn_t smp_ipi_demux(void)
{
- struct cpu_messages *info = this_cpu_ptr(&ipi_message);
- unsigned long all;
-
mb(); /* order any irq clear */
+ return smp_ipi_demux_relaxed();
+}
+
+/* sync-free variant. Callers should ensure synchronization */
+irqreturn_t smp_ipi_demux_relaxed(void)
+{
+ struct cpu_messages *info;
+ unsigned long all;
+
+ info = this_cpu_ptr(&ipi_message);
do {
all = xchg(&info->messages, 0);
#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
@@ -278,8 +275,10 @@ irqreturn_t smp_ipi_demux(void)
scheduler_ipi();
if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
tick_broadcast_ipi_handler();
- if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK))
- debug_ipi_action(0, NULL);
+#ifdef CONFIG_NMI_IPI
+ if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI))
+ nmi_ipi_action(0, NULL);
+#endif
} while (info->messages);
return IRQ_HANDLED;
@@ -316,6 +315,187 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
}
+#ifdef CONFIG_NMI_IPI
+
+/*
+ * "NMI IPI" system.
+ *
+ * NMI IPIs may not be recoverable, so should not be used as ongoing part of
+ * a running system. They can be used for crash, debug, halt/reboot, etc.
+ *
+ * NMI IPIs are globally single threaded. No more than one in progress at
+ * any time.
+ *
+ * The IPI call waits with interrupts disabled until all targets enter the
+ * NMI handler, then the call returns.
+ *
+ * No new NMI can be initiated until targets exit the handler.
+ *
+ * The IPI call may time out without all targets entering the NMI handler.
+ * In that case, there is some logic to recover (and ignore subsequent
+ * NMI interrupts that may eventually be raised), but the platform interrupt
+ * handler may not be able to distinguish this from other exception causes,
+ * which may cause a crash.
+ */
+
+static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0);
+static struct cpumask nmi_ipi_pending_mask;
+static int nmi_ipi_busy_count = 0;
+static void (*nmi_ipi_function)(struct pt_regs *) = NULL;
+
+static void nmi_ipi_lock_start(unsigned long *flags)
+{
+ raw_local_irq_save(*flags);
+ hard_irq_disable();
+ while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
+ raw_local_irq_restore(*flags);
+ cpu_relax();
+ raw_local_irq_save(*flags);
+ hard_irq_disable();
+ }
+}
+
+static void nmi_ipi_lock(void)
+{
+ while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
+ cpu_relax();
+}
+
+static void nmi_ipi_unlock(void)
+{
+ smp_mb();
+ WARN_ON(atomic_read(&__nmi_ipi_lock) != 1);
+ atomic_set(&__nmi_ipi_lock, 0);
+}
+
+static void nmi_ipi_unlock_end(unsigned long *flags)
+{
+ nmi_ipi_unlock();
+ raw_local_irq_restore(*flags);
+}
+
+/*
+ * Platform NMI handler calls this to ack
+ */
+int smp_handle_nmi_ipi(struct pt_regs *regs)
+{
+ void (*fn)(struct pt_regs *);
+ unsigned long flags;
+ int me = raw_smp_processor_id();
+ int ret = 0;
+
+ /*
+ * Unexpected NMIs are possible here because the interrupt may not
+ * be able to distinguish NMI IPIs from other types of NMIs, or
+ * because the caller may have timed out.
+ */
+ nmi_ipi_lock_start(&flags);
+ if (!nmi_ipi_busy_count)
+ goto out;
+ if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask))
+ goto out;
+
+ fn = nmi_ipi_function;
+ if (!fn)
+ goto out;
+
+ cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
+ nmi_ipi_busy_count++;
+ nmi_ipi_unlock();
+
+ ret = 1;
+
+ fn(regs);
+
+ nmi_ipi_lock();
+ nmi_ipi_busy_count--;
+out:
+ nmi_ipi_unlock_end(&flags);
+
+ return ret;
+}
+
+static void do_smp_send_nmi_ipi(int cpu)
+{
+ if (smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu))
+ return;
+
+ if (cpu >= 0) {
+ do_message_pass(cpu, PPC_MSG_NMI_IPI);
+ } else {
+ int c;
+
+ for_each_online_cpu(c) {
+ if (c == raw_smp_processor_id())
+ continue;
+ do_message_pass(c, PPC_MSG_NMI_IPI);
+ }
+ }
+}
+
+/*
+ * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
+ * - fn is the target callback function.
+ * - delay_us > 0 is the delay before giving up waiting for targets to
+ * enter the handler, == 0 specifies indefinite delay.
+ */
+static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
+{
+ unsigned long flags;
+ int me = raw_smp_processor_id();
+ int ret = 1;
+
+ BUG_ON(cpu == me);
+ BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS);
+
+ if (unlikely(!smp_ops))
+ return 0;
+
+ /* Take the nmi_ipi_busy count/lock with interrupts hard disabled */
+ nmi_ipi_lock_start(&flags);
+ while (nmi_ipi_busy_count) {
+ nmi_ipi_unlock_end(&flags);
+ cpu_relax();
+ nmi_ipi_lock_start(&flags);
+ }
+
+ nmi_ipi_function = fn;
+
+ if (cpu < 0) {
+ /* ALL_OTHERS */
+ cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask);
+ cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
+ } else {
+ /* cpumask starts clear */
+ cpumask_set_cpu(cpu, &nmi_ipi_pending_mask);
+ }
+ nmi_ipi_busy_count++;
+ nmi_ipi_unlock();
+
+ do_smp_send_nmi_ipi(cpu);
+
+ while (!cpumask_empty(&nmi_ipi_pending_mask)) {
+ udelay(1);
+ if (delay_us) {
+ delay_us--;
+ if (!delay_us)
+ break;
+ }
+ }
+
+ nmi_ipi_lock();
+ if (!cpumask_empty(&nmi_ipi_pending_mask)) {
+ /* Could not gather all CPUs */
+ ret = 0;
+ cpumask_clear(&nmi_ipi_pending_mask);
+ }
+ nmi_ipi_busy_count--;
+ nmi_ipi_unlock_end(&flags);
+
+ return ret;
+}
+#endif /* CONFIG_NMI_IPI */
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
void tick_broadcast(const struct cpumask *mask)
{
@@ -326,29 +506,22 @@ void tick_broadcast(const struct cpumask *mask)
}
#endif
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
-void smp_send_debugger_break(void)
+#ifdef CONFIG_DEBUGGER
+void debugger_ipi_callback(struct pt_regs *regs)
{
- int cpu;
- int me = raw_smp_processor_id();
-
- if (unlikely(!smp_ops))
- return;
+ debugger_ipi(regs);
+}
- for_each_online_cpu(cpu)
- if (cpu != me)
- do_message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
+void smp_send_debugger_break(void)
+{
+ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000);
}
#endif
#ifdef CONFIG_KEXEC_CORE
void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
- crash_ipi_function_ptr = crash_ipi_callback;
- if (crash_ipi_callback) {
- mb();
- smp_send_debugger_break();
- }
+ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000);
}
#endif
@@ -439,7 +612,21 @@ int generic_cpu_disable(void)
#ifdef CONFIG_PPC64
vdso_data->processorCount--;
#endif
- migrate_irqs();
+ /* Update affinity of all IRQs previously aimed at this CPU */
+ irq_migrate_all_off_this_cpu();
+
+ /*
+ * Depending on the details of the interrupt controller, it's possible
+ * that one of the interrupts we just migrated away from this CPU is
+ * actually already pending on this CPU. If we leave it in that state
+ * the interrupt will never be EOI'ed, and will never fire again. So
+ * temporarily enable interrupts here, to allow any pending interrupt to
+ * be received (and EOI'ed), before we take this CPU offline.
+ */
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+
return 0;
}
@@ -521,6 +708,16 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
cpu_idle_thread_init(cpu, tidle);
+ /*
+ * The platform might need to allocate resources prior to bringing
+ * up the CPU
+ */
+ if (smp_ops->prepare_cpu) {
+ rc = smp_ops->prepare_cpu(cpu);
+ if (rc)
+ return rc;
+ }
+
/* Make sure callin-map entry is 0 (can be leftover a CPU
* hotplug
*/
@@ -787,24 +984,21 @@ static struct sched_domain_topology_level powerpc_topology[] = {
{ NULL, },
};
-void __init smp_cpus_done(unsigned int max_cpus)
+static __init long smp_setup_cpu_workfn(void *data __always_unused)
{
- cpumask_var_t old_mask;
+ smp_ops->setup_cpu(boot_cpuid);
+ return 0;
+}
- /* We want the setup_cpu() here to be called from CPU 0, but our
- * init thread may have been "borrowed" by another CPU in the meantime
- * se we pin us down to CPU 0 for a short while
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+ /*
+ * We want the setup_cpu() here to be called on the boot CPU, but
+ * init might run on any CPU, so make sure it's invoked on the boot
+ * CPU.
*/
- alloc_cpumask_var(&old_mask, GFP_NOWAIT);
- cpumask_copy(old_mask, &current->cpus_allowed);
- set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid));
-
if (smp_ops && smp_ops->setup_cpu)
- smp_ops->setup_cpu(boot_cpuid);
-
- set_cpus_allowed_ptr(current, old_mask);
-
- free_cpumask_var(old_mask);
+ work_on_cpu_safe(boot_cpuid, smp_setup_cpu_workfn, NULL);
if (smp_ops && smp_ops->bringup_done)
smp_ops->bringup_done();
@@ -812,7 +1006,6 @@ void __init smp_cpus_done(unsigned int max_cpus)
dump_numa_cpu_topology();
set_sched_topology(powerpc_topology);
-
}
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 66711958493c..d534ed901538 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -59,7 +59,14 @@ EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
- save_context_stack(trace, tsk->thread.ksp, tsk, 0);
+ unsigned long sp;
+
+ if (tsk == current)
+ sp = current_stack_pointer();
+ else
+ sp = tsk->thread.ksp;
+
+ save_context_stack(trace, sp, tsk, 0);
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c
index 6ae9bd5086a4..0050b2d2ff7a 100644
--- a/arch/powerpc/kernel/swsusp.c
+++ b/arch/powerpc/kernel/swsusp.c
@@ -10,6 +10,7 @@
*/
#include <linux/sched.h>
+#include <linux/suspend.h>
#include <asm/current.h>
#include <asm/mmu_context.h>
#include <asm/switch_to.h>
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index de04c9fbb5cd..a877bf8269fe 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -42,11 +42,11 @@
#include <asm/unistd.h>
#include <asm/asm-prototypes.h>
-static inline unsigned long do_mmap2(unsigned long addr, size_t len,
+static inline long do_mmap2(unsigned long addr, size_t len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long off, int shift)
{
- unsigned long ret = -EINVAL;
+ long ret = -EINVAL;
if (!arch_validate_prot(prot))
goto out;
@@ -62,16 +62,16 @@ out:
return ret;
}
-unsigned long sys_mmap2(unsigned long addr, size_t len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
+SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len,
+ unsigned long, prot, unsigned long, flags,
+ unsigned long, fd, unsigned long, pgoff)
{
return do_mmap2(addr, len, prot, flags, fd, pgoff, PAGE_SHIFT-12);
}
-unsigned long sys_mmap(unsigned long addr, size_t len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, off_t offset)
+SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
+ unsigned long, prot, unsigned long, flags,
+ unsigned long, fd, off_t, offset)
{
return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT);
}
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index c1fb255a60d6..4437c70c7c2b 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -710,6 +710,10 @@ static int register_cpu_online(unsigned int cpu)
struct device_attribute *attrs, *pmc_attrs;
int i, nattrs;
+ /* For cpus present at boot a reference was already grabbed in register_cpu() */
+ if (!s->of_node)
+ s->of_node = of_get_cpu_node(cpu, NULL);
+
#ifdef CONFIG_PPC64
if (cpu_has_feature(CPU_FTR_SMT))
device_create_file(s, &dev_attr_smt_snooze_delay);
@@ -785,9 +789,9 @@ static int register_cpu_online(unsigned int cpu)
return 0;
}
+#ifdef CONFIG_HOTPLUG_CPU
static int unregister_cpu_online(unsigned int cpu)
{
-#ifdef CONFIG_HOTPLUG_CPU
struct cpu *c = &per_cpu(cpu_devices, cpu);
struct device *s = &c->dev;
struct device_attribute *attrs, *pmc_attrs;
@@ -864,9 +868,13 @@ static int unregister_cpu_online(unsigned int cpu)
}
#endif
cacheinfo_cpu_offline(cpu);
-#endif /* CONFIG_HOTPLUG_CPU */
+ of_node_put(s->of_node);
+ s->of_node = NULL;
return 0;
}
+#else /* !CONFIG_HOTPLUG_CPU */
+#define unregister_cpu_online NULL
+#endif
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
ssize_t arch_cpu_probe(const char *buf, size_t count)
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 07b90725855e..2b33cfaac7b8 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -995,8 +995,10 @@ static void __init init_decrementer_clockevent(void)
decrementer_clockevent.max_delta_ns =
clockevent_delta2ns(decrementer_max, &decrementer_clockevent);
+ decrementer_clockevent.max_delta_ticks = decrementer_max;
decrementer_clockevent.min_delta_ns =
clockevent_delta2ns(2, &decrementer_clockevent);
+ decrementer_clockevent.min_delta_ticks = 2;
register_decrementer_clockevent(cpu);
}
diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile
new file mode 100644
index 000000000000..729dffc5f7bc
--- /dev/null
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -0,0 +1,29 @@
+#
+# Makefile for the powerpc trace subsystem
+#
+
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+ifdef CONFIG_FUNCTION_TRACER
+# do not trace tracer code
+CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+endif
+
+obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_32.o
+obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64.o
+ifdef CONFIG_MPROFILE_KERNEL
+obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_mprofile.o
+else
+obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o
+endif
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
+obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
+obj-$(CONFIG_TRACING) += trace_clock.o
+
+obj-$(CONFIG_PPC64) += $(obj64-y)
+obj-$(CONFIG_PPC32) += $(obj32-y)
+
+# Disable GCOV & sanitizers in odd or sensitive code
+GCOV_PROFILE_ftrace.o := n
+UBSAN_SANITIZE_ftrace.o := n
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 5c9f50c1aa99..32509de6ce4c 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -21,6 +21,7 @@
#include <linux/init.h>
#include <linux/list.h>
+#include <asm/asm-prototypes.h>
#include <asm/cacheflush.h>
#include <asm/code-patching.h>
#include <asm/ftrace.h>
diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S
new file mode 100644
index 000000000000..afef2c076282
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_32.S
@@ -0,0 +1,118 @@
+/*
+ * Split from entry_32.S
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/magic.h>
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/export.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+ /*
+ * It is required that _mcount on PPC32 must preserve the
+ * link register. But we have r0 to play with. We use r0
+ * to push the return address back to the caller of mcount
+ * into the ctr register, restore the link register and
+ * then jump back using the ctr register.
+ */
+ mflr r0
+ mtctr r0
+ lwz r0, 4(r1)
+ mtlr r0
+ bctr
+
+_GLOBAL(ftrace_caller)
+ MCOUNT_SAVE_FRAME
+ /* r3 ends up with link register */
+ subi r3, r3, MCOUNT_INSN_SIZE
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ b ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+#endif
+ MCOUNT_RESTORE_FRAME
+ /* old link register ends up in ctr reg */
+ bctr
+#else
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+
+ MCOUNT_SAVE_FRAME
+
+ subi r3, r3, MCOUNT_INSN_SIZE
+ LOAD_REG_ADDR(r5, ftrace_trace_function)
+ lwz r5,0(r5)
+
+ mtctr r5
+ bctrl
+ nop
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ b ftrace_graph_caller
+#endif
+ MCOUNT_RESTORE_FRAME
+ bctr
+#endif
+EXPORT_SYMBOL(_mcount)
+
+_GLOBAL(ftrace_stub)
+ blr
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(ftrace_graph_caller)
+ /* load r4 with local address */
+ lwz r4, 44(r1)
+ subi r4, r4, MCOUNT_INSN_SIZE
+
+ /* Grab the LR out of the caller stack frame */
+ lwz r3,52(r1)
+
+ bl prepare_ftrace_return
+ nop
+
+ /*
+ * prepare_ftrace_return gives us the address we divert to.
+ * Change the LR in the callers stack frame to this.
+ */
+ stw r3,52(r1)
+
+ MCOUNT_RESTORE_FRAME
+ /* old link register ends up in ctr reg */
+ bctr
+
+_GLOBAL(return_to_handler)
+ /* need to save return values */
+ stwu r1, -32(r1)
+ stw r3, 20(r1)
+ stw r4, 16(r1)
+ stw r31, 12(r1)
+ mr r31, r1
+
+ bl ftrace_return_to_handler
+ nop
+
+ /* return value has real return address */
+ mtlr r3
+
+ lwz r3, 20(r1)
+ lwz r4, 16(r1)
+ lwz r31,12(r1)
+ lwz r1, 0(r1)
+
+ /* Jump back to real return address */
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_64.S
new file mode 100644
index 000000000000..e5ccea19821e
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_64.S
@@ -0,0 +1,85 @@
+/*
+ * Split from entry_64.S
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/magic.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/ppc-opcode.h>
+#include <asm/export.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+EXPORT_SYMBOL(_mcount)
+ mflr r12
+ mtctr r12
+ mtlr r0
+ bctr
+
+#else /* CONFIG_DYNAMIC_FTRACE */
+_GLOBAL_TOC(_mcount)
+EXPORT_SYMBOL(_mcount)
+ /* Taken from output of objdump from lib64/glibc */
+ mflr r3
+ ld r11, 0(r1)
+ stdu r1, -112(r1)
+ std r3, 128(r1)
+ ld r4, 16(r11)
+
+ subi r3, r3, MCOUNT_INSN_SIZE
+ LOAD_REG_ADDR(r5,ftrace_trace_function)
+ ld r5,0(r5)
+ ld r5,0(r5)
+ mtctr r5
+ bctrl
+ nop
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ b ftrace_graph_caller
+#endif
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+_GLOBAL(ftrace_stub)
+ blr
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(return_to_handler)
+ /* need to save return values */
+ std r4, -32(r1)
+ std r3, -24(r1)
+ /* save TOC */
+ std r2, -16(r1)
+ std r31, -8(r1)
+ mr r31, r1
+ stdu r1, -112(r1)
+
+ /*
+ * We might be called from a module.
+ * Switch to our TOC to run inside the core kernel.
+ */
+ ld r2, PACATOC(r13)
+
+ bl ftrace_return_to_handler
+ nop
+
+ /* return value has real return address */
+ mtlr r3
+
+ ld r1, 0(r1)
+ ld r4, -32(r1)
+ ld r3, -24(r1)
+ ld r2, -16(r1)
+ ld r31, -8(r1)
+
+ /* Jump back to real return address */
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
new file mode 100644
index 000000000000..7c933a99f5d5
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -0,0 +1,272 @@
+/*
+ * Split from ftrace_64.S
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/magic.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/ppc-opcode.h>
+#include <asm/export.h>
+#include <asm/thread_info.h>
+#include <asm/bug.h>
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ *
+ * ftrace_caller() is the function that replaces _mcount() when ftrace is
+ * active.
+ *
+ * We arrive here after a function A calls function B, and we are the trace
+ * function for B. When we enter r1 points to A's stack frame, B has not yet
+ * had a chance to allocate one yet.
+ *
+ * Additionally r2 may point either to the TOC for A, or B, depending on
+ * whether B did a TOC setup sequence before calling us.
+ *
+ * On entry the LR points back to the _mcount() call site, and r0 holds the
+ * saved LR as it was on entry to B, ie. the original return address at the
+ * call site in A.
+ *
+ * Our job is to save the register state into a struct pt_regs (on the stack)
+ * and then arrange for the ftrace function to be called.
+ */
+_GLOBAL(ftrace_caller)
+ /* Save the original return address in A's stack frame */
+ std r0,LRSAVE(r1)
+
+ /* Create our stack frame + pt_regs */
+ stdu r1,-SWITCH_FRAME_SIZE(r1)
+
+ /* Save all gprs to pt_regs */
+ SAVE_8GPRS(0,r1)
+ SAVE_8GPRS(8,r1)
+ SAVE_8GPRS(16,r1)
+ SAVE_8GPRS(24,r1)
+
+ /* Load special regs for save below */
+ mfmsr r8
+ mfctr r9
+ mfxer r10
+ mfcr r11
+
+ /* Get the _mcount() call site out of LR */
+ mflr r7
+ /* Save it as pt_regs->nip */
+ std r7, _NIP(r1)
+ /* Save the read LR in pt_regs->link */
+ std r0, _LINK(r1)
+
+ /* Save callee's TOC in the ABI compliant location */
+ std r2, 24(r1)
+ ld r2,PACATOC(r13) /* get kernel TOC in r2 */
+
+ addis r3,r2,function_trace_op@toc@ha
+ addi r3,r3,function_trace_op@toc@l
+ ld r5,0(r3)
+
+#ifdef CONFIG_LIVEPATCH
+ mr r14,r7 /* remember old NIP */
+#endif
+ /* Calculate ip from nip-4 into r3 for call below */
+ subi r3, r7, MCOUNT_INSN_SIZE
+
+ /* Put the original return address in r4 as parent_ip */
+ mr r4, r0
+
+ /* Save special regs */
+ std r8, _MSR(r1)
+ std r9, _CTR(r1)
+ std r10, _XER(r1)
+ std r11, _CCR(r1)
+
+ /* Load &pt_regs in r6 for call below */
+ addi r6, r1 ,STACK_FRAME_OVERHEAD
+
+ /* ftrace_call(r3, r4, r5, r6) */
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+
+ /* Load ctr with the possibly modified NIP */
+ ld r3, _NIP(r1)
+ mtctr r3
+#ifdef CONFIG_LIVEPATCH
+ cmpd r14,r3 /* has NIP been altered? */
+#endif
+
+ /* Restore gprs */
+ REST_8GPRS(0,r1)
+ REST_8GPRS(8,r1)
+ REST_8GPRS(16,r1)
+ REST_8GPRS(24,r1)
+
+ /* Restore possibly modified LR */
+ ld r0, _LINK(r1)
+ mtlr r0
+
+ /* Restore callee's TOC */
+ ld r2, 24(r1)
+
+ /* Pop our stack frame */
+ addi r1, r1, SWITCH_FRAME_SIZE
+
+#ifdef CONFIG_LIVEPATCH
+ /* Based on the cmpd above, if the NIP was altered handle livepatch */
+ bne- livepatch_handler
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ b ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+#endif
+
+ bctr /* jump after _mcount site */
+
+_GLOBAL(ftrace_stub)
+ blr
+
+#ifdef CONFIG_LIVEPATCH
+ /*
+ * This function runs in the mcount context, between two functions. As
+ * such it can only clobber registers which are volatile and used in
+ * function linkage.
+ *
+ * We get here when a function A, calls another function B, but B has
+ * been live patched with a new function C.
+ *
+ * On entry:
+ * - we have no stack frame and can not allocate one
+ * - LR points back to the original caller (in A)
+ * - CTR holds the new NIP in C
+ * - r0 & r12 are free
+ *
+ * r0 can't be used as the base register for a DS-form load or store, so
+ * we temporarily shuffle r1 (stack pointer) into r0 and then put it back.
+ */
+livepatch_handler:
+ CURRENT_THREAD_INFO(r12, r1)
+
+ /* Save stack pointer into r0 */
+ mr r0, r1
+
+ /* Allocate 3 x 8 bytes */
+ ld r1, TI_livepatch_sp(r12)
+ addi r1, r1, 24
+ std r1, TI_livepatch_sp(r12)
+
+ /* Save toc & real LR on livepatch stack */
+ std r2, -24(r1)
+ mflr r12
+ std r12, -16(r1)
+
+ /* Store stack end marker */
+ lis r12, STACK_END_MAGIC@h
+ ori r12, r12, STACK_END_MAGIC@l
+ std r12, -8(r1)
+
+ /* Restore real stack pointer */
+ mr r1, r0
+
+ /* Put ctr in r12 for global entry and branch there */
+ mfctr r12
+ bctrl
+
+ /*
+ * Now we are returning from the patched function to the original
+ * caller A. We are free to use r0 and r12, and we can use r2 until we
+ * restore it.
+ */
+
+ CURRENT_THREAD_INFO(r12, r1)
+
+ /* Save stack pointer into r0 */
+ mr r0, r1
+
+ ld r1, TI_livepatch_sp(r12)
+
+ /* Check stack marker hasn't been trashed */
+ lis r2, STACK_END_MAGIC@h
+ ori r2, r2, STACK_END_MAGIC@l
+ ld r12, -8(r1)
+1: tdne r12, r2
+ EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0
+
+ /* Restore LR & toc from livepatch stack */
+ ld r12, -16(r1)
+ mtlr r12
+ ld r2, -24(r1)
+
+ /* Pop livepatch stack frame */
+ CURRENT_THREAD_INFO(r12, r0)
+ subi r1, r1, 24
+ std r1, TI_livepatch_sp(r12)
+
+ /* Restore real stack pointer */
+ mr r1, r0
+
+ /* Return to original caller of live patched function */
+ blr
+#endif /* CONFIG_LIVEPATCH */
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(ftrace_graph_caller)
+ stdu r1, -112(r1)
+ /* with -mprofile-kernel, parameter regs are still alive at _mcount */
+ std r10, 104(r1)
+ std r9, 96(r1)
+ std r8, 88(r1)
+ std r7, 80(r1)
+ std r6, 72(r1)
+ std r5, 64(r1)
+ std r4, 56(r1)
+ std r3, 48(r1)
+
+ /* Save callee's TOC in the ABI compliant location */
+ std r2, 24(r1)
+ ld r2, PACATOC(r13) /* get kernel TOC in r2 */
+
+ mfctr r4 /* ftrace_caller has moved local addr here */
+ std r4, 40(r1)
+ mflr r3 /* ftrace_caller has restored LR from stack */
+ subi r4, r4, MCOUNT_INSN_SIZE
+
+ bl prepare_ftrace_return
+ nop
+
+ /*
+ * prepare_ftrace_return gives us the address we divert to.
+ * Change the LR to this.
+ */
+ mtlr r3
+
+ ld r0, 40(r1)
+ mtctr r0
+ ld r10, 104(r1)
+ ld r9, 96(r1)
+ ld r8, 88(r1)
+ ld r7, 80(r1)
+ ld r6, 72(r1)
+ ld r5, 64(r1)
+ ld r4, 56(r1)
+ ld r3, 48(r1)
+
+ /* Restore callee's TOC */
+ ld r2, 24(r1)
+
+ addi r1, r1, 112
+ mflr r0
+ std r0, LRSAVE(r1)
+ bctr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S
new file mode 100644
index 000000000000..f095358da96e
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S
@@ -0,0 +1,68 @@
+/*
+ * Split from ftrace_64.S
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/magic.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/ppc-opcode.h>
+#include <asm/export.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+_GLOBAL_TOC(ftrace_caller)
+ /* Taken from output of objdump from lib64/glibc */
+ mflr r3
+ ld r11, 0(r1)
+ stdu r1, -112(r1)
+ std r3, 128(r1)
+ ld r4, 16(r11)
+ subi r3, r3, MCOUNT_INSN_SIZE
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ b ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+#endif
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+
+_GLOBAL(ftrace_stub)
+ blr
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(ftrace_graph_caller)
+ /* load r4 with local address */
+ ld r4, 128(r1)
+ subi r4, r4, MCOUNT_INSN_SIZE
+
+ /* Grab the LR out of the caller stack frame */
+ ld r11, 112(r1)
+ ld r3, 16(r11)
+
+ bl prepare_ftrace_return
+ nop
+
+ /*
+ * prepare_ftrace_return gives us the address we divert to.
+ * Change the LR in the callers stack frame to this.
+ */
+ ld r11, 112(r1)
+ std r3, 16(r11)
+
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace_clock.c b/arch/powerpc/kernel/trace/trace_clock.c
index 49170690946d..49170690946d 100644
--- a/arch/powerpc/kernel/trace_clock.c
+++ b/arch/powerpc/kernel/trace/trace_clock.c
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index ff365f9de27a..d4e545d27ef9 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -35,13 +35,13 @@
#include <linux/backlight.h>
#include <linux/bug.h>
#include <linux/kdebug.h>
-#include <linux/debugfs.h>
#include <linux/ratelimit.h>
#include <linux/context_tracking.h>
#include <asm/emulated_ops.h>
#include <asm/pgtable.h>
#include <linux/uaccess.h>
+#include <asm/debugfs.h>
#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
@@ -279,18 +279,35 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
void system_reset_exception(struct pt_regs *regs)
{
+ /*
+ * Avoid crashes in case of nested NMI exceptions. Recoverability
+ * is determined by RI and in_nmi
+ */
+ bool nested = in_nmi();
+ if (!nested)
+ nmi_enter();
+
/* See if any machine dependent calls */
if (ppc_md.system_reset_exception) {
if (ppc_md.system_reset_exception(regs))
- return;
+ goto out;
}
die("System Reset", regs, SIGABRT);
+out:
+#ifdef CONFIG_PPC_BOOK3S_64
+ BUG_ON(get_paca()->in_nmi == 0);
+ if (get_paca()->in_nmi > 1)
+ panic("Unrecoverable nested System Reset");
+#endif
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
panic("Unrecoverable System Reset");
+ if (!nested)
+ nmi_exit();
+
/* What should we do here? We could issue a shutdown or hard reset. */
}
@@ -306,8 +323,6 @@ long machine_check_early(struct pt_regs *regs)
__this_cpu_inc(irq_stat.mce_exceptions);
- add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
-
if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
handled = cur_cpu_spec->machine_check_early(regs);
return handled;
@@ -741,6 +756,8 @@ void machine_check_exception(struct pt_regs *regs)
__this_cpu_inc(irq_stat.mce_exceptions);
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
+
/* See if any machine dependent calls. In theory, we would want
* to call the CPU first, and call the ppc_md. one if the CPU
* one returns a positive number. However there is existing code
@@ -1440,6 +1457,8 @@ void facility_unavailable_exception(struct pt_regs *regs)
[FSCR_TM_LG] = "TM",
[FSCR_EBB_LG] = "EBB",
[FSCR_TAR_LG] = "TAR",
+ [FSCR_MSGP_LG] = "MSGP",
+ [FSCR_SCV_LG] = "SCV",
};
char *facility = "unknown";
u64 value;
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 7394b770ae1f..2f793be3d2b1 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -77,6 +77,8 @@ SECTIONS
#endif
} :kernel
+ __head_end = .;
+
/*
* If the build dies here, it's likely code in head_64.S is referencing
* labels it can't reach, and the linker inserting stubs without the
@@ -312,6 +314,8 @@ SECTIONS
NOSAVE_DATA
}
+ BUG_TABLE
+
. = ALIGN(PAGE_SIZE);
_edata = .;
PROVIDE32 (edata = .);