aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile4
-rw-r--r--arch/powerpc/kernel/align.c86
-rw-r--r--arch/powerpc/kernel/asm-offsets.c75
-rw-r--r--arch/powerpc/kernel/cacheinfo.c11
-rw-r--r--arch/powerpc/kernel/clock.c82
-rw-r--r--arch/powerpc/kernel/cpu_setup_a2.S120
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S74
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S42
-rw-r--r--arch/powerpc/kernel/cputable.c59
-rw-r--r--arch/powerpc/kernel/crash.c3
-rw-r--r--arch/powerpc/kernel/crash_dump.c8
-rw-r--r--arch/powerpc/kernel/dma-iommu.c4
-rw-r--r--arch/powerpc/kernel/dma.c10
-rw-r--r--arch/powerpc/kernel/eeh.c297
-rw-r--r--arch/powerpc/kernel/eeh_driver.c334
-rw-r--r--arch/powerpc/kernel/eeh_event.c21
-rw-r--r--arch/powerpc/kernel/eeh_pe.c111
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c3
-rw-r--r--arch/powerpc/kernel/entry_64.S140
-rw-r--r--arch/powerpc/kernel/epapr_paravirt.c21
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S588
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S501
-rw-r--r--arch/powerpc/kernel/fadump.c17
-rw-r--r--arch/powerpc/kernel/fpu.S16
-rw-r--r--arch/powerpc/kernel/fsl_booke_entry_mapping.S2
-rw-r--r--arch/powerpc/kernel/ftrace.c179
-rw-r--r--arch/powerpc/kernel/head_40x.S19
-rw-r--r--arch/powerpc/kernel/head_64.S118
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S266
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c9
-rw-r--r--arch/powerpc/kernel/idle_book3e.S2
-rw-r--r--arch/powerpc/kernel/idle_power4.S2
-rw-r--r--arch/powerpc/kernel/idle_power7.S104
-rw-r--r--arch/powerpc/kernel/iomap.c21
-rw-r--r--arch/powerpc/kernel/iommu.c157
-rw-r--r--arch/powerpc/kernel/irq.c27
-rw-r--r--arch/powerpc/kernel/kgdb.c1
-rw-r--r--arch/powerpc/kernel/kprobes.c9
-rw-r--r--arch/powerpc/kernel/kvm.c45
-rw-r--r--arch/powerpc/kernel/legacy_serial.c36
-rw-r--r--arch/powerpc/kernel/machine_kexec.c14
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c8
-rw-r--r--arch/powerpc/kernel/mce.c352
-rw-r--r--arch/powerpc/kernel/mce_power.c313
-rw-r--r--arch/powerpc/kernel/misc_32.S9
-rw-r--r--arch/powerpc/kernel/misc_64.S52
-rw-r--r--arch/powerpc/kernel/module_64.c290
-rw-r--r--arch/powerpc/kernel/paca.c43
-rw-r--r--arch/powerpc/kernel/pci-common.c132
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c3
-rw-r--r--arch/powerpc/kernel/pci_64.c12
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c16
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c3
-rw-r--r--arch/powerpc/kernel/process.c262
-rw-r--r--arch/powerpc/kernel/prom.c192
-rw-r--r--arch/powerpc/kernel/prom_init.c211
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh4
-rw-r--r--arch/powerpc/kernel/reloc_64.S5
-rw-r--r--arch/powerpc/kernel/rtas.c39
-rw-r--r--arch/powerpc/kernel/rtas_flash.c2
-rw-r--r--arch/powerpc/kernel/rtas_pci.c66
-rw-r--r--arch/powerpc/kernel/rtasd.c24
-rw-r--r--arch/powerpc/kernel/setup-common.c60
-rw-r--r--arch/powerpc/kernel/setup_32.c10
-rw-r--r--arch/powerpc/kernel/setup_64.c111
-rw-r--r--arch/powerpc/kernel/signal.c5
-rw-r--r--arch/powerpc/kernel/signal_32.c51
-rw-r--r--arch/powerpc/kernel/signal_64.c29
-rw-r--r--arch/powerpc/kernel/smp-tbsync.c1
-rw-r--r--arch/powerpc/kernel/smp.c108
-rw-r--r--arch/powerpc/kernel/swsusp_booke.S32
-rw-r--r--arch/powerpc/kernel/syscalls.c1
-rw-r--r--arch/powerpc/kernel/sysfs.c437
-rw-r--r--arch/powerpc/kernel/systbl.S18
-rw-r--r--arch/powerpc/kernel/time.c93
-rw-r--r--arch/powerpc/kernel/tm.S71
-rw-r--r--arch/powerpc/kernel/traps.c80
-rw-r--r--arch/powerpc/kernel/udbg.c2
-rw-r--r--arch/powerpc/kernel/udbg_16550.c11
-rw-r--r--arch/powerpc/kernel/uprobes.c2
-rw-r--r--arch/powerpc/kernel/vdso.c8
-rw-r--r--arch/powerpc/kernel/vdso32/getcpu.S2
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32_wrapper.S3
-rw-r--r--arch/powerpc/kernel/vdso64/getcpu.S2
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64_wrapper.S3
-rw-r--r--arch/powerpc/kernel/vector.S10
-rw-r--r--arch/powerpc/kernel/vio.c34
87 files changed, 4593 insertions, 2267 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 445cb6e39d5b..670c312d914e 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -2,6 +2,7 @@
# Makefile for the linux kernel.
#
+CFLAGS_prom.o = -I$(src)/../../../scripts/dtc/libfdt
CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
@@ -39,15 +40,14 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
+obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
-obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o
obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o
obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o
obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o
-obj-$(CONFIG_PPC_CLOCK) += clock.o
procfs-y := proc_powerpc.o
obj-$(CONFIG_PROC_FS) += $(procfs-y)
rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index de91f3ae631e..34f55524d456 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -25,14 +25,13 @@
#include <asm/cputable.h>
#include <asm/emulated_ops.h>
#include <asm/switch_to.h>
+#include <asm/disassemble.h>
struct aligninfo {
unsigned char len;
unsigned char flags;
};
-#define IS_XFORM(inst) (((inst) >> 26) == 31)
-#define IS_DSFORM(inst) (((inst) >> 26) >= 56)
#define INVALID { 0, 0 }
@@ -73,7 +72,7 @@ static struct aligninfo aligninfo[128] = {
{ 8, LD+F }, /* 00 0 1001: lfd */
{ 4, ST+F+S }, /* 00 0 1010: stfs */
{ 8, ST+F }, /* 00 0 1011: stfd */
- INVALID, /* 00 0 1100 */
+ { 16, LD }, /* 00 0 1100: lq */
{ 8, LD }, /* 00 0 1101: ld/ldu/lwa */
INVALID, /* 00 0 1110 */
{ 8, ST }, /* 00 0 1111: std/stdu */
@@ -140,7 +139,7 @@ static struct aligninfo aligninfo[128] = {
{ 2, LD+SW }, /* 10 0 1100: lhbrx */
{ 4, LD+SE }, /* 10 0 1101 lwa */
{ 2, ST+SW }, /* 10 0 1110: sthbrx */
- INVALID, /* 10 0 1111 */
+ { 16, ST }, /* 10 0 1111: stq */
INVALID, /* 10 1 0000 */
INVALID, /* 10 1 0001 */
INVALID, /* 10 1 0010 */
@@ -192,37 +191,6 @@ static struct aligninfo aligninfo[128] = {
};
/*
- * Create a DSISR value from the instruction
- */
-static inline unsigned make_dsisr(unsigned instr)
-{
- unsigned dsisr;
-
-
- /* bits 6:15 --> 22:31 */
- dsisr = (instr & 0x03ff0000) >> 16;
-
- if (IS_XFORM(instr)) {
- /* bits 29:30 --> 15:16 */
- dsisr |= (instr & 0x00000006) << 14;
- /* bit 25 --> 17 */
- dsisr |= (instr & 0x00000040) << 8;
- /* bits 21:24 --> 18:21 */
- dsisr |= (instr & 0x00000780) << 3;
- } else {
- /* bit 5 --> 17 */
- dsisr |= (instr & 0x04000000) >> 12;
- /* bits 1: 4 --> 18:21 */
- dsisr |= (instr & 0x78000000) >> 17;
- /* bits 30:31 --> 12:13 */
- if (IS_DSFORM(instr))
- dsisr |= (instr & 0x00000003) << 18;
- }
-
- return dsisr;
-}
-
-/*
* The dcbz (data cache block zero) instruction
* gives an alignment fault if used on non-cacheable
* memory. We handle the fault mainly for the
@@ -385,8 +353,6 @@ static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg,
char *ptr1 = (char *) &current->thread.TS_FPR(reg+1);
int i, ret, sw = 0;
- if (!(flags & F))
- return 0;
if (reg & 1)
return 0; /* invalid form: FRS/FRT must be even */
if (flags & SW)
@@ -406,6 +372,34 @@ static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg,
return 1; /* exception handled and fixed up */
}
+#ifdef CONFIG_PPC64
+static int emulate_lq_stq(struct pt_regs *regs, unsigned char __user *addr,
+ unsigned int reg, unsigned int flags)
+{
+ char *ptr0 = (char *)&regs->gpr[reg];
+ char *ptr1 = (char *)&regs->gpr[reg+1];
+ int i, ret, sw = 0;
+
+ if (reg & 1)
+ return 0; /* invalid form: GPR must be even */
+ if (flags & SW)
+ sw = 7;
+ ret = 0;
+ for (i = 0; i < 8; ++i) {
+ if (!(flags & ST)) {
+ ret |= __get_user(ptr0[i^sw], addr + i);
+ ret |= __get_user(ptr1[i^sw], addr + i + 8);
+ } else {
+ ret |= __put_user(ptr0[i^sw], addr + i);
+ ret |= __put_user(ptr1[i^sw], addr + i + 8);
+ }
+ }
+ if (ret)
+ return -EFAULT;
+ return 1; /* exception handled and fixed up */
+}
+#endif /* CONFIG_PPC64 */
+
#ifdef CONFIG_SPE
static struct aligninfo spe_aligninfo[32] = {
@@ -914,10 +908,20 @@ int fix_alignment(struct pt_regs *regs)
flush_fp_to_thread(current);
}
- /* Special case for 16-byte FP loads and stores */
- if (nb == 16) {
- PPC_WARN_ALIGNMENT(fp_pair, regs);
- return emulate_fp_pair(addr, reg, flags);
+ if ((nb == 16)) {
+ if (flags & F) {
+ /* Special case for 16-byte FP loads and stores */
+ PPC_WARN_ALIGNMENT(fp_pair, regs);
+ return emulate_fp_pair(addr, reg, flags);
+ } else {
+#ifdef CONFIG_PPC64
+ /* Special case for 16-byte loads and stores */
+ PPC_WARN_ALIGNMENT(lq_stq, regs);
+ return emulate_lq_stq(regs, addr, reg, flags);
+#else
+ return 0;
+#endif
+ }
}
PPC_WARN_ALIGNMENT(unaligned, regs);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index d3de01066f7d..f5995a912213 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -54,6 +54,7 @@
#endif
#if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S)
#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
#endif
#ifdef CONFIG_PPC32
@@ -203,6 +204,15 @@ int main(void)
DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack));
DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack));
DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack));
+ DEFINE(PACA_TCD_PTR, offsetof(struct paca_struct, tcd_ptr));
+
+ DEFINE(TCD_ESEL_NEXT,
+ offsetof(struct tlb_core_data, esel_next));
+ DEFINE(TCD_ESEL_MAX,
+ offsetof(struct tlb_core_data, esel_max));
+ DEFINE(TCD_ESEL_FIRST,
+ offsetof(struct tlb_core_data, esel_first));
+ DEFINE(TCD_LOCK, offsetof(struct tlb_core_data, lock));
#endif /* CONFIG_PPC_BOOK3E */
#ifdef CONFIG_PPC_STD_MMU_64
@@ -232,15 +242,20 @@ int main(void)
DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx));
#endif /* CONFIG_PPC_STD_MMU_64 */
DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
+#ifdef CONFIG_PPC_BOOK3S_64
+ DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp));
+ DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce));
+#endif
DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
+ DEFINE(PACA_DSCR, offsetof(struct paca_struct, dscr_default));
DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user));
DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost));
- DEFINE(PACA_SPRG3, offsetof(struct paca_struct, sprg3));
+ DEFINE(PACA_SPRG_VDSO, offsetof(struct paca_struct, sprg_vdso));
#endif /* CONFIG_PPC64 */
/* RTAS */
@@ -425,18 +440,16 @@ int main(void)
DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
- DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr));
- DEFINE(VCPU_FPSCR, offsetof(struct kvm_vcpu, arch.fpscr));
+ DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fp.fpr));
#ifdef CONFIG_ALTIVEC
- DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr));
- DEFINE(VCPU_VSCR, offsetof(struct kvm_vcpu, arch.vscr));
-#endif
-#ifdef CONFIG_VSX
- DEFINE(VCPU_VSRS, offsetof(struct kvm_vcpu, arch.vsr));
+ DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr.vr));
#endif
DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
+#ifdef CONFIG_PPC_BOOK3S
+ DEFINE(VCPU_TAR, offsetof(struct kvm_vcpu, arch.tar));
+#endif
DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -458,6 +471,9 @@ int main(void)
DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
+ DEFINE(VCPU_SHAREDBE, offsetof(struct kvm_vcpu, arch.shared_big_endian));
+#endif
DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0));
DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1));
@@ -489,11 +505,18 @@ int main(void)
DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
+ DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic));
+ DEFINE(VCPU_VTB, offsetof(struct kvm_vcpu, arch.vtb));
DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr));
DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr));
DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor));
+ DEFINE(VCPU_IAMR, offsetof(struct kvm_vcpu, arch.iamr));
DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl));
DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr));
+ DEFINE(VCPU_DABRX, offsetof(struct kvm_vcpu, arch.dabrx));
+ DEFINE(VCPU_DAWR, offsetof(struct kvm_vcpu, arch.dawr));
+ DEFINE(VCPU_DAWRX, offsetof(struct kvm_vcpu, arch.dawrx));
+ DEFINE(VCPU_CIABR, offsetof(struct kvm_vcpu, arch.ciabr));
DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));
DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
@@ -502,29 +525,60 @@ int main(void)
DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
+ DEFINE(VCPU_SPMC, offsetof(struct kvm_vcpu, arch.spmc));
DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar));
DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar));
+ DEFINE(VCPU_SIER, offsetof(struct kvm_vcpu, arch.sier));
DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
+ DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
- DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid));
DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
+ DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr));
+ DEFINE(VCPU_SHADOW_FSCR, offsetof(struct kvm_vcpu, arch.shadow_fscr));
+ DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb));
+ DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr));
+ DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr));
+ DEFINE(VCPU_BESCR, offsetof(struct kvm_vcpu, arch.bescr));
+ DEFINE(VCPU_CSIGR, offsetof(struct kvm_vcpu, arch.csigr));
+ DEFINE(VCPU_TACR, offsetof(struct kvm_vcpu, arch.tacr));
+ DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr));
+ DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop));
+ DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort));
DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
+ DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm));
DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr));
+ DEFINE(VCORE_DPDES, offsetof(struct kvmppc_vcore, dpdes));
DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ DEFINE(VCPU_TFHAR, offsetof(struct kvm_vcpu, arch.tfhar));
+ DEFINE(VCPU_TFIAR, offsetof(struct kvm_vcpu, arch.tfiar));
+ DEFINE(VCPU_TEXASR, offsetof(struct kvm_vcpu, arch.texasr));
+ DEFINE(VCPU_GPR_TM, offsetof(struct kvm_vcpu, arch.gpr_tm));
+ DEFINE(VCPU_FPRS_TM, offsetof(struct kvm_vcpu, arch.fp_tm.fpr));
+ DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr));
+ DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm));
+ DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm));
+ DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm));
+ DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm));
+ DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm));
+ DEFINE(VCPU_PPR_TM, offsetof(struct kvm_vcpu, arch.ppr_tm));
+ DEFINE(VCPU_DSCR_TM, offsetof(struct kvm_vcpu, arch.dscr_tm));
+ DEFINE(VCPU_TAR_TM, offsetof(struct kvm_vcpu, arch.tar_tm));
+#endif
#ifdef CONFIG_PPC_BOOK3S_64
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -568,6 +622,7 @@ int main(void)
#ifdef CONFIG_PPC64
SVCPU_FIELD(SVCPU_SLB, slb);
SVCPU_FIELD(SVCPU_SLB_MAX, slb_max);
+ SVCPU_FIELD(SVCPU_SHADOW_FSCR, shadow_fscr);
#endif
HSTATE_FIELD(HSTATE_HOST_R1, host_r1);
@@ -589,6 +644,7 @@ int main(void)
HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
+ HSTATE_FIELD(HSTATE_PTID, ptid);
HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
HSTATE_FIELD(HSTATE_PMC, host_pmc);
HSTATE_FIELD(HSTATE_PURR, host_purr);
@@ -602,6 +658,7 @@ int main(void)
#ifdef CONFIG_PPC_BOOK3S_64
HSTATE_FIELD(HSTATE_CFAR, cfar);
HSTATE_FIELD(HSTATE_PPR, ppr);
+ HSTATE_FIELD(HSTATE_HOST_FSCR, host_fscr);
#endif /* CONFIG_PPC_BOOK3S_64 */
#else /* CONFIG_PPC_BOOK3S */
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 654932727873..40198d50b4c2 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -12,7 +12,6 @@
#include <linux/cpu.h>
#include <linux/cpumask.h>
-#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kobject.h>
#include <linux/list.h>
@@ -757,7 +756,10 @@ void cacheinfo_cpu_online(unsigned int cpu_id)
cacheinfo_sysfs_populate(cpu_id, cache);
}
-#ifdef CONFIG_HOTPLUG_CPU /* functions needed for cpu offline */
+/* functions needed to remove cache entry for cpu offline or suspend/resume */
+
+#if (defined(CONFIG_PPC_PSERIES) && defined(CONFIG_SUSPEND)) || \
+ defined(CONFIG_HOTPLUG_CPU)
static struct cache *cache_lookup_by_cpu(unsigned int cpu_id)
{
@@ -794,6 +796,9 @@ static void remove_cache_dir(struct cache_dir *cache_dir)
{
remove_index_dirs(cache_dir);
+ /* Remove cache dir from sysfs */
+ kobject_del(cache_dir->kobj);
+
kobject_put(cache_dir->kobj);
kfree(cache_dir);
@@ -841,4 +846,4 @@ void cacheinfo_cpu_offline(unsigned int cpu_id)
if (cache)
cache_cpu_clear(cache, cpu_id);
}
-#endif /* CONFIG_HOTPLUG_CPU */
+#endif /* (CONFIG_PPC_PSERIES && CONFIG_SUSPEND) || CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/kernel/clock.c b/arch/powerpc/kernel/clock.c
deleted file mode 100644
index a764b47791e8..000000000000
--- a/arch/powerpc/kernel/clock.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Dummy clk implementations for powerpc.
- * These need to be overridden in platform code.
- */
-
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/export.h>
-#include <asm/clk_interface.h>
-
-struct clk_interface clk_functions;
-
-struct clk *clk_get(struct device *dev, const char *id)
-{
- if (clk_functions.clk_get)
- return clk_functions.clk_get(dev, id);
- return ERR_PTR(-ENOSYS);
-}
-EXPORT_SYMBOL(clk_get);
-
-void clk_put(struct clk *clk)
-{
- if (clk_functions.clk_put)
- clk_functions.clk_put(clk);
-}
-EXPORT_SYMBOL(clk_put);
-
-int clk_enable(struct clk *clk)
-{
- if (clk_functions.clk_enable)
- return clk_functions.clk_enable(clk);
- return -ENOSYS;
-}
-EXPORT_SYMBOL(clk_enable);
-
-void clk_disable(struct clk *clk)
-{
- if (clk_functions.clk_disable)
- clk_functions.clk_disable(clk);
-}
-EXPORT_SYMBOL(clk_disable);
-
-unsigned long clk_get_rate(struct clk *clk)
-{
- if (clk_functions.clk_get_rate)
- return clk_functions.clk_get_rate(clk);
- return 0;
-}
-EXPORT_SYMBOL(clk_get_rate);
-
-long clk_round_rate(struct clk *clk, unsigned long rate)
-{
- if (clk_functions.clk_round_rate)
- return clk_functions.clk_round_rate(clk, rate);
- return -ENOSYS;
-}
-EXPORT_SYMBOL(clk_round_rate);
-
-int clk_set_rate(struct clk *clk, unsigned long rate)
-{
- if (clk_functions.clk_set_rate)
- return clk_functions.clk_set_rate(clk, rate);
- return -ENOSYS;
-}
-EXPORT_SYMBOL(clk_set_rate);
-
-struct clk *clk_get_parent(struct clk *clk)
-{
- if (clk_functions.clk_get_parent)
- return clk_functions.clk_get_parent(clk);
- return ERR_PTR(-ENOSYS);
-}
-EXPORT_SYMBOL(clk_get_parent);
-
-int clk_set_parent(struct clk *clk, struct clk *parent)
-{
- if (clk_functions.clk_set_parent)
- return clk_functions.clk_set_parent(clk, parent);
- return -ENOSYS;
-}
-EXPORT_SYMBOL(clk_set_parent);
diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S
deleted file mode 100644
index 61f079e05b61..000000000000
--- a/arch/powerpc/kernel/cpu_setup_a2.S
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * A2 specific assembly support code
- *
- * Copyright 2009 Ben Herrenschmidt, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/asm-offsets.h>
-#include <asm/ppc_asm.h>
-#include <asm/ppc-opcode.h>
-#include <asm/processor.h>
-#include <asm/reg_a2.h>
-#include <asm/reg.h>
-#include <asm/thread_info.h>
-
-/*
- * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity.
- * This also prevents external LPID accesses but that isn't a problem when not a
- * guest. Under PV, this setting will be ignored and MMUCR will return the right
- * number of PID bits we can use.
- */
-#define MMUCR1_EXTEND_PID \
- (MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \
- MMUCR1_DTTID | MMUCR1_DCCD)
-
-/*
- * Use extended PIDs if enabled.
- * Don't clear the ERATs on context sync events and enable I & D LRU.
- * Enable ERAT back invalidate when tlbwe overwrites an entry.
- */
-#define INITIAL_MMUCR1 \
- (MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \
- MMUCR1_DRRE | MMUCR1_TLBWE_BINV)
-
-_GLOBAL(__setup_cpu_a2)
- /* Some of these are actually thread local and some are
- * core local but doing it always won't hurt
- */
-
-#ifdef CONFIG_PPC_ICSWX
- /* Make sure ACOP starts out as zero */
- li r3,0
- mtspr SPRN_ACOP,r3
-
- /* Skip the following if we are in Guest mode */
- mfmsr r3
- andis. r0,r3,MSR_GS@h
- bne _icswx_skip_guest
-
- /* Enable icswx instruction */
- mfspr r3,SPRN_A2_CCR2
- ori r3,r3,A2_CCR2_ENABLE_ICSWX
- mtspr SPRN_A2_CCR2,r3
-
- /* Unmask all CTs in HACOP */
- li r3,-1
- mtspr SPRN_HACOP,r3
-_icswx_skip_guest:
-#endif /* CONFIG_PPC_ICSWX */
-
- /* Enable doorbell */
- mfspr r3,SPRN_A2_CCR2
- oris r3,r3,A2_CCR2_ENABLE_PC@h
- mtspr SPRN_A2_CCR2,r3
- isync
-
- /* Setup CCR0 to disable power saving for now as it's busted
- * in the current implementations. Setup CCR1 to wake on
- * interrupts normally (we write the default value but who
- * knows what FW may have clobbered...)
- */
- li r3,0
- mtspr SPRN_A2_CCR0, r3
- LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f)
- mtspr SPRN_A2_CCR1, r3
-
- /* Initialise MMUCR1 */
- lis r3,INITIAL_MMUCR1@h
- ori r3,r3,INITIAL_MMUCR1@l
- mtspr SPRN_MMUCR1,r3
-
- /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */
- LOAD_REG_IMMEDIATE(r3, 0x000a7531)
- mtspr SPRN_MMUCR2,r3
-
- /* Set MMUCR3 to write all thids bit to the TLB */
- LOAD_REG_IMMEDIATE(r3, 0x0000000f)
- mtspr SPRN_MMUCR3,r3
-
- /* Don't do ERAT stuff if running guest mode */
- mfmsr r3
- andis. r0,r3,MSR_GS@h
- bne 1f
-
- /* Now set the I-ERAT watermark to 15 */
- lis r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h
- mtspr SPRN_MMUCR0, r4
- li r4,A2_IERAT_SIZE-1
- PPC_ERATWE(R4,R4,3)
-
- /* Now set the D-ERAT watermark to 31 */
- lis r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h
- mtspr SPRN_MMUCR0, r4
- li r4,A2_DERAT_SIZE-1
- PPC_ERATWE(R4,R4,3)
-
- /* And invalidate the beast just in case. That won't get rid of
- * a bolted entry though it will be in LRU and so will go away eventually
- * but let's not bother for now
- */
- PPC_ERATILX(0,0,R0)
-1:
- blr
-
-_GLOBAL(__restore_cpu_a2)
- b __setup_cpu_a2
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index bfb18c7290b7..4f1393d20079 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -53,11 +53,57 @@ _GLOBAL(__e500_dcache_setup)
isync
blr
+/*
+ * FIXME - we haven't yet done testing to determine a reasonable default
+ * value for PW20_WAIT_IDLE_BIT.
+ */
+#define PW20_WAIT_IDLE_BIT 50 /* 1ms, TB frequency is 41.66MHZ */
+_GLOBAL(setup_pw20_idle)
+ mfspr r3, SPRN_PWRMGTCR0
+
+ /* Set PW20_WAIT bit, enable pw20 state*/
+ ori r3, r3, PWRMGTCR0_PW20_WAIT
+ li r11, PW20_WAIT_IDLE_BIT
+
+ /* Set Automatic PW20 Core Idle Count */
+ rlwimi r3, r11, PWRMGTCR0_PW20_ENT_SHIFT, PWRMGTCR0_PW20_ENT
+
+ mtspr SPRN_PWRMGTCR0, r3
+
+ blr
+
+/*
+ * FIXME - we haven't yet done testing to determine a reasonable default
+ * value for AV_WAIT_IDLE_BIT.
+ */
+#define AV_WAIT_IDLE_BIT 50 /* 1ms, TB frequency is 41.66MHZ */
+_GLOBAL(setup_altivec_idle)
+ mfspr r3, SPRN_PWRMGTCR0
+
+ /* Enable Altivec Idle */
+ oris r3, r3, PWRMGTCR0_AV_IDLE_PD_EN@h
+ li r11, AV_WAIT_IDLE_BIT
+
+ /* Set Automatic AltiVec Idle Count */
+ rlwimi r3, r11, PWRMGTCR0_AV_IDLE_CNT_SHIFT, PWRMGTCR0_AV_IDLE_CNT
+
+ mtspr SPRN_PWRMGTCR0, r3
+
+ blr
+
_GLOBAL(__setup_cpu_e6500)
mflr r6
#ifdef CONFIG_PPC64
- bl .setup_altivec_ivors
+ bl setup_altivec_ivors
+ /* Touch IVOR42 only if the CPU supports E.HV category */
+ mfspr r10,SPRN_MMUCFG
+ rlwinm. r10,r10,0,MMUCFG_LPIDSIZE
+ beq 1f
+ bl setup_lrat_ivor
+1:
#endif
+ bl setup_pw20_idle
+ bl setup_altivec_idle
bl __setup_cpu_e5500
mtlr r6
blr
@@ -118,7 +164,15 @@ _GLOBAL(__setup_cpu_e5500)
#ifdef CONFIG_PPC_BOOK3E_64
_GLOBAL(__restore_cpu_e6500)
mflr r5
- bl .setup_altivec_ivors
+ bl setup_altivec_ivors
+ /* Touch IVOR42 only if the CPU supports E.HV category */
+ mfspr r10,SPRN_MMUCFG
+ rlwinm. r10,r10,0,MMUCFG_LPIDSIZE
+ beq 1f
+ bl setup_lrat_ivor
+1:
+ bl setup_pw20_idle
+ bl setup_altivec_idle
bl __restore_cpu_e5500
mtlr r5
blr
@@ -127,9 +181,9 @@ _GLOBAL(__restore_cpu_e5500)
mflr r4
bl __e500_icache_setup
bl __e500_dcache_setup
- bl .__setup_base_ivors
- bl .setup_perfmon_ivor
- bl .setup_doorbell_ivors
+ bl __setup_base_ivors
+ bl setup_perfmon_ivor
+ bl setup_doorbell_ivors
/*
* We only want to touch IVOR38-41 if we're running on hardware
* that supports category E.HV. The architectural way to determine
@@ -138,7 +192,7 @@ _GLOBAL(__restore_cpu_e5500)
mfspr r10,SPRN_MMUCFG
rlwinm. r10,r10,0,MMUCFG_LPIDSIZE
beq 1f
- bl .setup_ehv_ivors
+ bl setup_ehv_ivors
1:
mtlr r4
blr
@@ -147,9 +201,9 @@ _GLOBAL(__setup_cpu_e5500)
mflr r5
bl __e500_icache_setup
bl __e500_dcache_setup
- bl .__setup_base_ivors
- bl .setup_perfmon_ivor
- bl .setup_doorbell_ivors
+ bl __setup_base_ivors
+ bl setup_perfmon_ivor
+ bl setup_doorbell_ivors
/*
* We only want to touch IVOR38-41 if we're running on hardware
* that supports category E.HV. The architectural way to determine
@@ -158,7 +212,7 @@ _GLOBAL(__setup_cpu_e5500)
mfspr r10,SPRN_MMUCFG
rlwinm. r10,r10,0,MMUCFG_LPIDSIZE
beq 1f
- bl .setup_ehv_ivors
+ bl setup_ehv_ivors
b 2f
1:
ld r10,CPU_SPEC_FEATURES(r4)
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 18b5b9cf8e37..46733535cc0b 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -29,7 +29,7 @@ _GLOBAL(__setup_cpu_power7)
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
bl __init_LPCR
- bl __init_TLB
+ bl __init_tlb_power7
mtlr r11
blr
@@ -42,7 +42,7 @@ _GLOBAL(__restore_cpu_power7)
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
bl __init_LPCR
- bl __init_TLB
+ bl __init_tlb_power7
mtlr r11
blr
@@ -56,10 +56,10 @@ _GLOBAL(__setup_cpu_power8)
li r0,0
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
- oris r3, r3, LPCR_AIL_3@h
+ ori r3, r3, LPCR_PECEDH
bl __init_LPCR
bl __init_HFSCR
- bl __init_TLB
+ bl __init_tlb_power8
bl __init_PMU_HV
mtlr r11
blr
@@ -75,10 +75,10 @@ _GLOBAL(__restore_cpu_power8)
li r0,0
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
- oris r3, r3, LPCR_AIL_3@h
+ ori r3, r3, LPCR_PECEDH
bl __init_LPCR
bl __init_HFSCR
- bl __init_TLB
+ bl __init_tlb_power8
bl __init_PMU_HV
mtlr r11
blr
@@ -134,15 +134,31 @@ __init_HFSCR:
mtspr SPRN_HFSCR,r3
blr
-__init_TLB:
- /*
- * Clear the TLB using the "IS 3" form of tlbiel instruction
- * (invalidate by congruence class). P7 has 128 CCs, P8 has 512
- * so we just always do 512
- */
+/*
+ * Clear the TLB using the specified IS form of tlbiel instruction
+ * (invalidate by congruence class). P7 has 128 CCs., P8 has 512.
+ *
+ * r3 = IS field
+ */
+__init_tlb_power7:
+ li r3,0xc00 /* IS field = 0b11 */
+_GLOBAL(__flush_tlb_power7)
+ li r6,128
+ mtctr r6
+ mr r7,r3 /* IS field */
+ ptesync
+2: tlbiel r7
+ addi r7,r7,0x1000
+ bdnz 2b
+ ptesync
+1: blr
+
+__init_tlb_power8:
+ li r3,0xc00 /* IS field = 0b11 */
+_GLOBAL(__flush_tlb_power8)
li r6,512
mtctr r6
- li r7,0xc00 /* IS field = 0b11 */
+ mr r7,r3 /* IS field */
ptesync
2: tlbiel r7
addi r7,r7,0x1000
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 597d954e5860..965291b4c2fa 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -71,6 +71,10 @@ extern void __restore_cpu_power7(void);
extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power8(void);
extern void __restore_cpu_a2(void);
+extern void __flush_tlb_power7(unsigned long inval_selector);
+extern void __flush_tlb_power8(unsigned long inval_selector);
+extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
#endif /* CONFIG_PPC64 */
#if defined(CONFIG_E500)
extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -105,7 +109,8 @@ extern void __restore_cpu_e6500(void);
PPC_FEATURE_PSERIES_PERFMON_COMPAT)
#define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \
PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \
- PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR)
+ PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
+ PPC_FEATURE2_VEC_CRYPTO)
#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
PPC_FEATURE_TRUE_LE | \
PPC_FEATURE_HAS_ALTIVEC_COMP)
@@ -440,6 +445,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
+ .flush_tlb = __flush_tlb_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7",
},
{ /* 2.07-compliant processor, i.e. Power8 "architected" mode */
@@ -456,6 +463,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
+ .flush_tlb = __flush_tlb_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
{ /* Power7 */
@@ -474,6 +483,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_POWER4,
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
+ .flush_tlb = __flush_tlb_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7",
},
{ /* Power7+ */
@@ -492,13 +503,15 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_POWER4,
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
+ .flush_tlb = __flush_tlb_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7+",
},
{ /* Power8E */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004b0000,
.cpu_name = "POWER8E (raw)",
- .cpu_features = CPU_FTRS_POWER8,
+ .cpu_features = CPU_FTRS_POWER8E,
.cpu_user_features = COMMON_USER_POWER8,
.cpu_user_features2 = COMMON_USER2_POWER8,
.mmu_features = MMU_FTRS_POWER8,
@@ -510,6 +523,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
+ .flush_tlb = __flush_tlb_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
{ /* Power8 */
@@ -528,6 +543,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
+ .flush_tlb = __flush_tlb_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
{ /* Cell Broadband Engine */
@@ -2132,44 +2149,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
}
#endif /* CONFIG_PPC32 */
#endif /* CONFIG_E500 */
-
-#ifdef CONFIG_PPC_A2
- { /* Standard A2 (>= DD2) + FPU core */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00480000,
- .cpu_name = "A2 (>= DD2)",
- .cpu_features = CPU_FTRS_A2,
- .cpu_user_features = COMMON_USER_PPC64,
- .mmu_features = MMU_FTRS_A2,
- .icache_bsize = 64,
- .dcache_bsize = 64,
- .num_pmcs = 0,
- .cpu_setup = __setup_cpu_a2,
- .cpu_restore = __restore_cpu_a2,
- .machine_check = machine_check_generic,
- .platform = "ppca2",
- },
- { /* This is a default entry to get going, to be replaced by
- * a real one at some stage
- */
-#define CPU_FTRS_BASE_BOOK3E (CPU_FTR_USE_TB | \
- CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_SMT | \
- CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
- .pvr_mask = 0x00000000,
- .pvr_value = 0x00000000,
- .cpu_name = "Book3E",
- .cpu_features = CPU_FTRS_BASE_BOOK3E,
- .cpu_user_features = COMMON_USER_PPC64,
- .mmu_features = MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX |
- MMU_FTR_USE_TLBIVAX_BCAST |
- MMU_FTR_LOCK_BCAST_INVAL,
- .icache_bsize = 64,
- .dcache_bsize = 64,
- .num_pmcs = 0,
- .machine_check = machine_check_generic,
- .platform = "power6",
- },
-#endif /* CONFIG_PPC_A2 */
};
static struct cpu_spec the_cpu_spec;
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index fdcd8f551aff..51dbace3269b 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -17,7 +17,6 @@
#include <linux/export.h>
#include <linux/crash_dump.h>
#include <linux/delay.h>
-#include <linux/init.h>
#include <linux/irq.h>
#include <linux/types.h>
@@ -82,7 +81,7 @@ void crash_ipi_callback(struct pt_regs *regs)
}
atomic_inc(&cpus_in_crash);
- smp_mb__after_atomic_inc();
+ smp_mb__after_atomic();
/*
* Starting the kdump boot.
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 11c1d069d920..7a13f378ca2c 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -98,17 +98,19 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
size_t csize, unsigned long offset, int userbuf)
{
void *vaddr;
+ phys_addr_t paddr;
if (!csize)
return 0;
csize = min_t(size_t, csize, PAGE_SIZE);
+ paddr = pfn << PAGE_SHIFT;
- if ((min_low_pfn < pfn) && (pfn < max_pfn)) {
- vaddr = __va(pfn << PAGE_SHIFT);
+ if (memblock_is_region_memory(paddr, csize)) {
+ vaddr = __va(paddr);
csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
} else {
- vaddr = __ioremap(pfn << PAGE_SHIFT, PAGE_SIZE, 0);
+ vaddr = __ioremap(paddr, PAGE_SIZE, 0);
csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
iounmap(vaddr);
}
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index e4897523de41..54d0116256f7 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -83,10 +83,10 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask)
return 0;
}
- if (tbl->it_offset > (mask >> IOMMU_PAGE_SHIFT)) {
+ if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
dev_info(dev, "Warning: IOMMU offset too big for device mask\n");
dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",
- mask, tbl->it_offset << IOMMU_PAGE_SHIFT);
+ mask, tbl->it_offset << tbl->it_page_shift);
return 0;
} else
return 1;
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 8032b97ccdcb..ee78f6e49d64 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -191,12 +191,10 @@ EXPORT_SYMBOL(dma_direct_ops);
#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-int dma_set_mask(struct device *dev, u64 dma_mask)
+int __dma_set_mask(struct device *dev, u64 dma_mask)
{
struct dma_map_ops *dma_ops = get_dma_ops(dev);
- if (ppc_md.dma_set_mask)
- return ppc_md.dma_set_mask(dev, dma_mask);
if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL))
return dma_ops->set_dma_mask(dev, dma_mask);
if (!dev->dma_mask || !dma_supported(dev, dma_mask))
@@ -204,6 +202,12 @@ int dma_set_mask(struct device *dev, u64 dma_mask)
*dev->dma_mask = dma_mask;
return 0;
}
+int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+ if (ppc_md.dma_set_mask)
+ return ppc_md.dma_set_mask(dev, dma_mask);
+ return __dma_set_mask(dev, dma_mask);
+}
EXPORT_SYMBOL(dma_set_mask);
u64 dma_get_required_mask(struct device *dev)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 4bd687d5e7aa..86e25702aaca 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -22,18 +22,21 @@
*/
#include <linux/delay.h>
+#include <linux/debugfs.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>
#include <linux/rbtree.h>
+#include <linux/reboot.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
#include <linux/export.h>
#include <linux/of.h>
#include <linux/atomic.h>
+#include <asm/debug.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/io.h>
@@ -84,24 +87,23 @@
#define EEH_MAX_FAILS 2100000
/* Time to wait for a PCI slot to report status, in milliseconds */
-#define PCI_BUS_RESET_WAIT_MSEC (60*1000)
-
-/* Platform dependent EEH operations */
-struct eeh_ops *eeh_ops = NULL;
-
-int eeh_subsystem_enabled;
-EXPORT_SYMBOL(eeh_subsystem_enabled);
+#define PCI_BUS_RESET_WAIT_MSEC (5*60*1000)
/*
- * EEH probe mode support. The intention is to support multiple
- * platforms for EEH. Some platforms like pSeries do PCI emunation
- * based on device tree. However, other platforms like powernv probe
- * PCI devices from hardware. The flag is used to distinguish that.
- * In addition, struct eeh_ops::probe would be invoked for particular
- * OF node or PCI device so that the corresponding PE would be created
- * there.
+ * EEH probe mode support, which is part of the flags,
+ * is to support multiple platforms for EEH. Some platforms
+ * like pSeries do PCI emunation based on device tree.
+ * However, other platforms like powernv probe PCI devices
+ * from hardware. The flag is used to distinguish that.
+ * In addition, struct eeh_ops::probe would be invoked for
+ * particular OF node or PCI device so that the corresponding
+ * PE would be created there.
*/
-int eeh_probe_mode;
+int eeh_subsystem_flags;
+EXPORT_SYMBOL(eeh_subsystem_flags);
+
+/* Platform dependent EEH operations */
+struct eeh_ops *eeh_ops = NULL;
/* Lock to avoid races due to multiple reports of an error */
DEFINE_RAW_SPINLOCK(confirm_error_lock);
@@ -132,6 +134,15 @@ static struct eeh_stats eeh_stats;
#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
+static int __init eeh_setup(char *str)
+{
+ if (!strcmp(str, "off"))
+ eeh_subsystem_flags |= EEH_FORCE_DISABLED;
+
+ return 1;
+}
+__setup("eeh=", eeh_setup);
+
/**
* eeh_gather_pci_data - Copy assorted PCI config space registers to buff
* @edev: device to report data for
@@ -144,73 +155,67 @@ static struct eeh_stats eeh_stats;
static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
{
struct device_node *dn = eeh_dev_to_of_node(edev);
- struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
u32 cfg;
int cap, i;
int n = 0;
n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
- printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name);
+ pr_warn("EEH: of node=%s\n", dn->full_name);
eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);
n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
- printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
+ pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);
n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
- printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
-
- if (!dev) {
- printk(KERN_WARNING "EEH: no PCI device for this of node\n");
- return n;
- }
+ pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
/* Gather bridge-specific registers */
- if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
+ if (edev->mode & EEH_DEV_BRIDGE) {
eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);
n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
- printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
+ pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);
n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
- printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
+ pr_warn("EEH: Bridge control: %04x\n", cfg);
}
/* Dump out the PCI-X command and status regs */
- cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+ cap = edev->pcix_cap;
if (cap) {
eeh_ops->read_config(dn, cap, 4, &cfg);
n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
- printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
+ pr_warn("EEH: PCI-X cmd: %08x\n", cfg);
eeh_ops->read_config(dn, cap+4, 4, &cfg);
n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
- printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
+ pr_warn("EEH: PCI-X status: %08x\n", cfg);
}
- /* If PCI-E capable, dump PCI-E cap 10, and the AER */
- if (pci_is_pcie(dev)) {
+ /* If PCI-E capable, dump PCI-E cap 10 */
+ cap = edev->pcie_cap;
+ if (cap) {
n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
- printk(KERN_WARNING
- "EEH: PCI-E capabilities and status follow:\n");
+ pr_warn("EEH: PCI-E capabilities and status follow:\n");
for (i=0; i<=8; i++) {
- eeh_ops->read_config(dn, dev->pcie_cap+4*i, 4, &cfg);
+ eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
- printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
+ pr_warn("EEH: PCI-E %02x: %08x\n", i, cfg);
}
+ }
+
+ /* If AER capable, dump it */
+ cap = edev->aer_cap;
+ if (cap) {
+ n += scnprintf(buf+n, len-n, "pci-e AER:\n");
+ pr_warn("EEH: PCI-E AER capability register set follows:\n");
- cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
- if (cap) {
- n += scnprintf(buf+n, len-n, "pci-e AER:\n");
- printk(KERN_WARNING
- "EEH: PCI-E AER capability register set follows:\n");
-
- for (i=0; i<14; i++) {
- eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
- n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
- printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
- }
+ for (i=0; i<14; i++) {
+ eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
+ pr_warn("EEH: PCI-E AER %02x: %08x\n", i, cfg);
}
}
@@ -231,21 +236,19 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
{
size_t loglen = 0;
struct eeh_dev *edev, *tmp;
- bool valid_cfg_log = true;
/*
* When the PHB is fenced or dead, it's pointless to collect
* the data from PCI config space because it should return
* 0xFF's. For ER, we still retrieve the data from the PCI
* config space.
+ *
+ * For pHyp, we have to enable IO for log retrieval. Otherwise,
+ * 0xFF's is always returned from PCI config space.
*/
- if (eeh_probe_mode_dev() &&
- (pe->type & EEH_PE_PHB) &&
- (pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)))
- valid_cfg_log = false;
-
- if (valid_cfg_log) {
- eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+ if (!(pe->type & EEH_PE_PHB)) {
+ if (eeh_probe_mode_devtree())
+ eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
eeh_ops->configure_bridge(pe);
eeh_pe_restore_bars(pe);
@@ -308,7 +311,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
/* If the PHB has been in problematic state */
eeh_serialize_lock(&flags);
- if (phb_pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)) {
+ if (phb_pe->state & EEH_PE_ISOLATED) {
ret = 0;
goto out;
}
@@ -327,8 +330,8 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
eeh_serialize_unlock(flags);
- pr_err("EEH: PHB#%x failure detected\n",
- phb_pe->phb->global_number);
+ pr_err("EEH: PHB#%x failure detected, location: %s\n",
+ phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
dump_stack();
eeh_send_failure_event(phb_pe);
@@ -355,16 +358,17 @@ out:
int eeh_dev_check_failure(struct eeh_dev *edev)
{
int ret;
+ int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
unsigned long flags;
struct device_node *dn;
struct pci_dev *dev;
- struct eeh_pe *pe;
+ struct eeh_pe *pe, *parent_pe, *phb_pe;
int rc = 0;
const char *location;
eeh_stats.total_mmio_ffs++;
- if (!eeh_subsystem_enabled)
+ if (!eeh_enabled())
return 0;
if (!edev) {
@@ -436,14 +440,34 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
*/
if ((ret < 0) ||
(ret == EEH_STATE_NOT_SUPPORT) ||
- (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
- (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
+ ((ret & active_flags) == active_flags)) {
eeh_stats.false_positives++;
pe->false_positives++;
rc = 0;
goto dn_unlock;
}
+ /*
+ * It should be corner case that the parent PE has been
+ * put into frozen state as well. We should take care
+ * that at first.
+ */
+ parent_pe = pe->parent;
+ while (parent_pe) {
+ /* Hit the ceiling ? */
+ if (parent_pe->type & EEH_PE_PHB)
+ break;
+
+ /* Frozen parent PE ? */
+ ret = eeh_ops->get_state(parent_pe, NULL);
+ if (ret > 0 &&
+ (ret & active_flags) != active_flags)
+ pe = parent_pe;
+
+ /* Next parent level */
+ parent_pe = parent_pe->parent;
+ }
+
eeh_stats.slot_resets++;
/* Avoid repeated reports of this failure, including problems
@@ -457,8 +481,11 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
* a stack trace will help the device-driver authors figure
* out what happened. So print that out.
*/
- pr_err("EEH: Frozen PE#%x detected on PHB#%x\n",
- pe->addr, pe->phb->global_number);
+ phb_pe = eeh_phb_pe_get(pe->phb);
+ pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+ pe->phb->global_number, pe->addr);
+ pr_err("EEH: PE location: %s, PHB location: %s\n",
+ eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
dump_stack();
eeh_send_failure_event(pe);
@@ -514,16 +541,42 @@ EXPORT_SYMBOL(eeh_check_failure);
*/
int eeh_pci_enable(struct eeh_pe *pe, int function)
{
- int rc;
+ int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+
+ /*
+ * pHyp doesn't allow to enable IO or DMA on unfrozen PE.
+ * Also, it's pointless to enable them on unfrozen PE. So
+ * we have the check here.
+ */
+ if (function == EEH_OPT_THAW_MMIO ||
+ function == EEH_OPT_THAW_DMA) {
+ rc = eeh_ops->get_state(pe, NULL);
+ if (rc < 0)
+ return rc;
+
+ /* Needn't to enable or already enabled */
+ if ((rc == EEH_STATE_NOT_SUPPORT) ||
+ ((rc & flags) == flags))
+ return 0;
+ }
rc = eeh_ops->set_option(pe, function);
if (rc)
- pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n",
- __func__, function, pe->phb->global_number, pe->addr, rc);
+ pr_warn("%s: Unexpected state change %d on "
+ "PHB#%d-PE#%x, err=%d\n",
+ __func__, function, pe->phb->global_number,
+ pe->addr, rc);
rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
- if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
- (function == EEH_OPT_THAW_MMIO))
+ if (rc <= 0)
+ return rc;
+
+ if ((function == EEH_OPT_THAW_MMIO) &&
+ (rc & EEH_STATE_MMIO_ENABLED))
+ return 0;
+
+ if ((function == EEH_OPT_THAW_DMA) &&
+ (rc & EEH_STATE_DMA_ENABLED))
return 0;
return rc;
@@ -611,26 +664,7 @@ static void eeh_reset_pe_once(struct eeh_pe *pe)
else
eeh_ops->reset(pe, EEH_RESET_HOT);
- /* The PCI bus requires that the reset be held high for at least
- * a 100 milliseconds. We wait a bit longer 'just in case'.
- */
-#define PCI_BUS_RST_HOLD_TIME_MSEC 250
- msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
-
- /* We might get hit with another EEH freeze as soon as the
- * pci slot reset line is dropped. Make sure we don't miss
- * these, and clear the flag now.
- */
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
-
eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
-
- /* After a PCI slot has been reset, the PCI Express spec requires
- * a 1.5 second idle time for the bus to stabilize, before starting
- * up traffic.
- */
-#define PCI_BUS_SETTLE_TIME_MSEC 1800
- msleep(PCI_BUS_SETTLE_TIME_MSEC);
}
/**
@@ -650,6 +684,10 @@ int eeh_reset_pe(struct eeh_pe *pe)
for (i=0; i<3; i++) {
eeh_reset_pe_once(pe);
+ /*
+ * EEH_PE_ISOLATED is expected to be removed after
+ * BAR restore.
+ */
rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
if ((rc & flags) == flags)
return 0;
@@ -747,6 +785,17 @@ int __exit eeh_ops_unregister(const char *name)
return -EEXIST;
}
+static int eeh_reboot_notifier(struct notifier_block *nb,
+ unsigned long action, void *unused)
+{
+ eeh_set_enable(false);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_reboot_nb = {
+ .notifier_call = eeh_reboot_notifier,
+};
+
/**
* eeh_init - EEH initialization
*
@@ -778,6 +827,14 @@ int eeh_init(void)
if (machine_is(powernv) && cnt++ <= 0)
return ret;
+ /* Register reboot notifier */
+ ret = register_reboot_notifier(&eeh_reboot_nb);
+ if (ret) {
+ pr_warn("%s: Failed to register notifier (%d)\n",
+ __func__, ret);
+ return ret;
+ }
+
/* call platform initialization function */
if (!eeh_ops) {
pr_warning("%s: Platform EEH operation not found\n",
@@ -806,8 +863,8 @@ int eeh_init(void)
&hose_list, list_node)
pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL);
} else {
- pr_warning("%s: Invalid probe mode %d\n",
- __func__, eeh_probe_mode);
+ pr_warn("%s: Invalid probe mode %x",
+ __func__, eeh_subsystem_flags);
return -EINVAL;
}
@@ -822,7 +879,7 @@ int eeh_init(void)
return ret;
}
- if (eeh_subsystem_enabled)
+ if (eeh_enabled())
pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
else
pr_warning("EEH: No capable adapters found\n");
@@ -897,7 +954,7 @@ void eeh_add_device_late(struct pci_dev *dev)
struct device_node *dn;
struct eeh_dev *edev;
- if (!dev || !eeh_subsystem_enabled)
+ if (!dev || !eeh_enabled())
return;
pr_debug("EEH: Adding device %s\n", pci_name(dev));
@@ -921,6 +978,13 @@ void eeh_add_device_late(struct pci_dev *dev)
eeh_sysfs_remove_device(edev->pdev);
edev->mode &= ~EEH_DEV_SYSFS;
+ /*
+ * We definitely should have the PCI device removed
+ * though it wasn't correctly. So we needn't call
+ * into error handler afterwards.
+ */
+ edev->mode |= EEH_DEV_NO_HANDLER;
+
edev->pdev = NULL;
dev->dev.archdata.edev = NULL;
}
@@ -998,7 +1062,7 @@ void eeh_remove_device(struct pci_dev *dev)
{
struct eeh_dev *edev;
- if (!dev || !eeh_subsystem_enabled)
+ if (!dev || !eeh_enabled())
return;
edev = pci_dev_to_eeh_dev(dev);
@@ -1023,6 +1087,14 @@ void eeh_remove_device(struct pci_dev *dev)
else
edev->mode |= EEH_DEV_DISCONNECTED;
+ /*
+ * We're removing from the PCI subsystem, that means
+ * the PCI device driver can't support EEH or not
+ * well. So we rely on hotplug completely to do recovery
+ * for the specific PCI device.
+ */
+ edev->mode |= EEH_DEV_NO_HANDLER;
+
eeh_addr_cache_rmv_dev(dev);
eeh_sysfs_remove_device(dev);
edev->mode &= ~EEH_DEV_SYSFS;
@@ -1030,7 +1102,7 @@ void eeh_remove_device(struct pci_dev *dev)
static int proc_eeh_show(struct seq_file *m, void *v)
{
- if (0 == eeh_subsystem_enabled) {
+ if (!eeh_enabled()) {
seq_printf(m, "EEH Subsystem is globally disabled\n");
seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
} else {
@@ -1067,10 +1139,45 @@ static const struct file_operations proc_eeh_operations = {
.release = single_release,
};
+#ifdef CONFIG_DEBUG_FS
+static int eeh_enable_dbgfs_set(void *data, u64 val)
+{
+ if (val)
+ eeh_subsystem_flags &= ~EEH_FORCE_DISABLED;
+ else
+ eeh_subsystem_flags |= EEH_FORCE_DISABLED;
+
+ /* Notify the backend */
+ if (eeh_ops->post_init)
+ eeh_ops->post_init();
+
+ return 0;
+}
+
+static int eeh_enable_dbgfs_get(void *data, u64 *val)
+{
+ if (eeh_enabled())
+ *val = 0x1ul;
+ else
+ *val = 0x0ul;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
+ eeh_enable_dbgfs_set, "0x%llx\n");
+#endif
+
static int __init eeh_init_proc(void)
{
- if (machine_is(pseries) || machine_is(powernv))
+ if (machine_is(pseries) || machine_is(powernv)) {
proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
+#ifdef CONFIG_DEBUG_FS
+ debugfs_create_file("eeh_enable", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_enable_dbgfs_ops);
+#endif
+ }
+
return 0;
}
__initcall(eeh_init_proc);
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 36bed5a12750..420da61d4ce0 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -143,17 +143,43 @@ static void eeh_disable_irq(struct pci_dev *dev)
static void eeh_enable_irq(struct pci_dev *dev)
{
struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
- struct irq_desc *desc;
if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
edev->mode &= ~EEH_DEV_IRQ_DISABLED;
-
- desc = irq_to_desc(dev->irq);
- if (desc && desc->depth > 0)
+ /*
+ * FIXME !!!!!
+ *
+ * This is just ass backwards. This maze has
+ * unbalanced irq_enable/disable calls. So instead of
+ * finding the root cause it works around the warning
+ * in the irq_enable code by conditionally calling
+ * into it.
+ *
+ * That's just wrong.The warning in the core code is
+ * there to tell people to fix their assymetries in
+ * their own code, not by abusing the core information
+ * to avoid it.
+ *
+ * I so wish that the assymetry would be the other way
+ * round and a few more irq_disable calls render that
+ * shit unusable forever.
+ *
+ * tglx
+ */
+ if (irqd_irq_disabled(irq_get_irq_data(dev->irq)))
enable_irq(dev->irq);
}
}
+static bool eeh_dev_removed(struct eeh_dev *edev)
+{
+ /* EEH device removed ? */
+ if (!edev || (edev->mode & EEH_DEV_REMOVED))
+ return true;
+
+ return false;
+}
+
/**
* eeh_report_error - Report pci error to each device driver
* @data: eeh device
@@ -170,10 +196,8 @@ static void *eeh_report_error(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
- /* We might not have the associated PCI device,
- * then we should continue for next one.
- */
- if (!dev) return NULL;
+ if (!dev || eeh_dev_removed(edev))
+ return NULL;
dev->error_state = pci_channel_io_frozen;
driver = eeh_pcid_get(dev);
@@ -213,11 +237,15 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
+ if (!dev || eeh_dev_removed(edev))
+ return NULL;
+
driver = eeh_pcid_get(dev);
if (!driver) return NULL;
if (!driver->err_handler ||
- !driver->err_handler->mmio_enabled) {
+ !driver->err_handler->mmio_enabled ||
+ (edev->mode & EEH_DEV_NO_HANDLER)) {
eeh_pcid_put(dev);
return NULL;
}
@@ -249,7 +277,8 @@ static void *eeh_report_reset(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
- if (!dev) return NULL;
+ if (!dev || eeh_dev_removed(edev))
+ return NULL;
dev->error_state = pci_channel_io_normal;
driver = eeh_pcid_get(dev);
@@ -258,7 +287,8 @@ static void *eeh_report_reset(void *data, void *userdata)
eeh_enable_irq(dev);
if (!driver->err_handler ||
- !driver->err_handler->slot_reset) {
+ !driver->err_handler->slot_reset ||
+ (edev->mode & EEH_DEV_NO_HANDLER)) {
eeh_pcid_put(dev);
return NULL;
}
@@ -288,7 +318,8 @@ static void *eeh_report_resume(void *data, void *userdata)
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct pci_driver *driver;
- if (!dev) return NULL;
+ if (!dev || eeh_dev_removed(edev))
+ return NULL;
dev->error_state = pci_channel_io_normal;
driver = eeh_pcid_get(dev);
@@ -297,7 +328,9 @@ static void *eeh_report_resume(void *data, void *userdata)
eeh_enable_irq(dev);
if (!driver->err_handler ||
- !driver->err_handler->resume) {
+ !driver->err_handler->resume ||
+ (edev->mode & EEH_DEV_NO_HANDLER)) {
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
eeh_pcid_put(dev);
return NULL;
}
@@ -322,7 +355,8 @@ static void *eeh_report_failure(void *data, void *userdata)
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct pci_driver *driver;
- if (!dev) return NULL;
+ if (!dev || eeh_dev_removed(edev))
+ return NULL;
dev->error_state = pci_channel_io_perm_failure;
driver = eeh_pcid_get(dev);
@@ -358,10 +392,24 @@ static void *eeh_rmv_device(void *data, void *userdata)
*/
if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
return NULL;
- driver = eeh_pcid_get(dev);
- if (driver && driver->err_handler)
+
+ /*
+ * We rely on count-based pcibios_release_device() to
+ * detach permanently offlined PEs. Unfortunately, that's
+ * not reliable enough. We might have the permanently
+ * offlined PEs attached, but we needn't take care of
+ * them and their child devices.
+ */
+ if (eeh_dev_removed(edev))
return NULL;
+ driver = eeh_pcid_get(dev);
+ if (driver) {
+ eeh_pcid_put(dev);
+ if (driver->err_handler)
+ return NULL;
+ }
+
/* Remove it from PCI subsystem */
pr_debug("EEH: Removing %s without EEH sensitive driver\n",
pci_name(dev));
@@ -369,7 +417,9 @@ static void *eeh_rmv_device(void *data, void *userdata)
edev->mode |= EEH_DEV_DISCONNECTED;
(*removed)++;
+ pci_lock_rescan_remove();
pci_stop_and_remove_bus_device(dev);
+ pci_unlock_rescan_remove();
return NULL;
}
@@ -390,6 +440,48 @@ static void *eeh_pe_detach_dev(void *data, void *userdata)
return NULL;
}
+/*
+ * Explicitly clear PE's frozen state for PowerNV where
+ * we have frozen PE until BAR restore is completed. It's
+ * harmless to clear it for pSeries. To be consistent with
+ * PE reset (for 3 times), we try to clear the frozen state
+ * for 3 times as well.
+ */
+static void *__eeh_clear_pe_frozen_state(void *data, void *flag)
+{
+ struct eeh_pe *pe = (struct eeh_pe *)data;
+ int i, rc;
+
+ for (i = 0; i < 3; i++) {
+ rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+ if (rc)
+ continue;
+ rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
+ if (!rc)
+ break;
+ }
+
+ /* The PE has been isolated, clear it */
+ if (rc) {
+ pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n",
+ __func__, pe->phb->global_number, pe->addr, rc);
+ return (void *)pe;
+ }
+
+ return NULL;
+}
+
+static int eeh_clear_pe_frozen_state(struct eeh_pe *pe)
+{
+ void *rc;
+
+ rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL);
+ if (!rc)
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+
+ return rc ? -EIO : 0;
+}
+
/**
* eeh_reset_device - Perform actual reset of a pci slot
* @pe: EEH PE
@@ -416,22 +508,41 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
* into pcibios_add_pci_devices().
*/
eeh_pe_state_mark(pe, EEH_PE_KEEP);
- if (bus)
+ if (bus) {
+ pci_lock_rescan_remove();
pcibios_remove_pci_devices(bus);
- else if (frozen_bus)
+ pci_unlock_rescan_remove();
+ } else if (frozen_bus) {
eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
+ }
- /* Reset the pci controller. (Asserts RST#; resets config space).
+ /*
+ * Reset the pci controller. (Asserts RST#; resets config space).
* Reconfigure bridges and devices. Don't try to bring the system
* up if the reset failed for some reason.
+ *
+ * During the reset, it's very dangerous to have uncontrolled PCI
+ * config accesses. So we prefer to block them. However, controlled
+ * PCI config accesses initiated from EEH itself are allowed.
*/
+ eeh_pe_state_mark(pe, EEH_PE_RESET);
rc = eeh_reset_pe(pe);
- if (rc)
+ if (rc) {
+ eeh_pe_state_clear(pe, EEH_PE_RESET);
return rc;
+ }
+
+ pci_lock_rescan_remove();
/* Restore PE */
eeh_ops->configure_bridge(pe);
eeh_pe_restore_bars(pe);
+ eeh_pe_state_clear(pe, EEH_PE_RESET);
+
+ /* Clear frozen state */
+ rc = eeh_clear_pe_frozen_state(pe);
+ if (rc)
+ return rc;
/* Give the system 5 seconds to finish running the user-space
* hotplug shutdown scripts, e.g. ifdown for ethernet. Yes,
@@ -462,13 +573,14 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
pe->tstamp = tstamp;
pe->freeze_count = cnt;
+ pci_unlock_rescan_remove();
return 0;
}
/* The longest amount of time to wait for a pci device
* to come back on line, in seconds.
*/
-#define MAX_WAIT_FOR_RECOVERY 150
+#define MAX_WAIT_FOR_RECOVERY 300
static void eeh_handle_normal_event(struct eeh_pe *pe)
{
@@ -540,7 +652,6 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
result = PCI_ERS_RESULT_NEED_RESET;
} else {
pr_info("EEH: Notify device drivers to resume I/O\n");
- result = PCI_ERS_RESULT_NONE;
eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
}
}
@@ -552,10 +663,17 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
if (rc < 0)
goto hard_fail;
- if (rc)
+ if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
- else
+ } else {
+ /*
+ * We didn't do PE reset for the case. The PE
+ * is still in frozen state. Clear it before
+ * resuming the PE.
+ */
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
result = PCI_ERS_RESULT_RECOVERED;
+ }
}
/* If any device has a hard failure, then shut off everything. */
@@ -617,93 +735,113 @@ perm_error:
/* Notify all devices that they're about to go down. */
eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
- /* Shut down the device drivers for good. */
- if (frozen_bus)
+ /* Mark the PE to be removed permanently */
+ pe->freeze_count = EEH_MAX_ALLOWED_FREEZES + 1;
+
+ /*
+ * Shut down the device drivers for good. We mark
+ * all removed devices correctly to avoid access
+ * the their PCI config any more.
+ */
+ if (frozen_bus) {
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+
+ pci_lock_rescan_remove();
pcibios_remove_pci_devices(frozen_bus);
+ pci_unlock_rescan_remove();
+ }
}
static void eeh_handle_special_event(void)
{
struct eeh_pe *pe, *phb_pe;
struct pci_bus *bus;
- struct pci_controller *hose, *tmp;
+ struct pci_controller *hose;
unsigned long flags;
- int rc = 0;
+ int rc;
- /*
- * The return value from next_error() has been classified as follows.
- * It might be good to enumerate them. However, next_error() is only
- * supported by PowerNV platform for now. So it would be fine to use
- * integer directly:
- *
- * 4 - Dead IOC 3 - Dead PHB
- * 2 - Fenced PHB 1 - Frozen PE
- * 0 - No error found
- *
- */
- rc = eeh_ops->next_error(&pe);
- if (rc <= 0)
- return;
- switch (rc) {
- case 4:
- /* Mark all PHBs in dead state */
- eeh_serialize_lock(&flags);
- list_for_each_entry_safe(hose, tmp,
- &hose_list, list_node) {
- phb_pe = eeh_phb_pe_get(hose);
- if (!phb_pe) continue;
-
- eeh_pe_state_mark(phb_pe,
- EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+ do {
+ rc = eeh_ops->next_error(&pe);
+
+ switch (rc) {
+ case EEH_NEXT_ERR_DEAD_IOC:
+ /* Mark all PHBs in dead state */
+ eeh_serialize_lock(&flags);
+
+ /* Purge all events */
+ eeh_remove_event(NULL, true);
+
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb_pe = eeh_phb_pe_get(hose);
+ if (!phb_pe) continue;
+
+ eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
+ }
+
+ eeh_serialize_unlock(flags);
+
+ break;
+ case EEH_NEXT_ERR_FROZEN_PE:
+ case EEH_NEXT_ERR_FENCED_PHB:
+ case EEH_NEXT_ERR_DEAD_PHB:
+ /* Mark the PE in fenced state */
+ eeh_serialize_lock(&flags);
+
+ /* Purge all events of the PHB */
+ eeh_remove_event(pe, true);
+
+ if (rc == EEH_NEXT_ERR_DEAD_PHB)
+ eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+ else
+ eeh_pe_state_mark(pe,
+ EEH_PE_ISOLATED | EEH_PE_RECOVERING);
+
+ eeh_serialize_unlock(flags);
+
+ break;
+ case EEH_NEXT_ERR_NONE:
+ return;
+ default:
+ pr_warn("%s: Invalid value %d from next_error()\n",
+ __func__, rc);
+ return;
}
- eeh_serialize_unlock(flags);
-
- /* Purge all events */
- eeh_remove_event(NULL);
- break;
- case 3:
- case 2:
- case 1:
- /* Mark the PE in fenced state */
- eeh_serialize_lock(&flags);
- if (rc == 3)
- eeh_pe_state_mark(pe,
- EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
- else
- eeh_pe_state_mark(pe,
- EEH_PE_ISOLATED | EEH_PE_RECOVERING);
- eeh_serialize_unlock(flags);
-
- /* Purge all events of the PHB */
- eeh_remove_event(pe);
- break;
- default:
- pr_err("%s: Invalid value %d from next_error()\n",
- __func__, rc);
- return;
- }
- /*
- * For fenced PHB and frozen PE, it's handled as normal
- * event. We have to remove the affected PHBs for dead
- * PHB and IOC
- */
- if (rc == 2 || rc == 1)
- eeh_handle_normal_event(pe);
- else {
- list_for_each_entry_safe(hose, tmp,
- &hose_list, list_node) {
- phb_pe = eeh_phb_pe_get(hose);
- if (!phb_pe || !(phb_pe->state & EEH_PE_PHB_DEAD))
- continue;
-
- bus = eeh_pe_bus_get(phb_pe);
- /* Notify all devices that they're about to go down. */
- eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
- pcibios_remove_pci_devices(bus);
+ /*
+ * For fenced PHB and frozen PE, it's handled as normal
+ * event. We have to remove the affected PHBs for dead
+ * PHB and IOC
+ */
+ if (rc == EEH_NEXT_ERR_FROZEN_PE ||
+ rc == EEH_NEXT_ERR_FENCED_PHB) {
+ eeh_handle_normal_event(pe);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ } else {
+ pci_lock_rescan_remove();
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb_pe = eeh_phb_pe_get(hose);
+ if (!phb_pe ||
+ !(phb_pe->state & EEH_PE_ISOLATED) ||
+ (phb_pe->state & EEH_PE_RECOVERING))
+ continue;
+
+ /* Notify all devices to be down */
+ bus = eeh_pe_bus_get(phb_pe);
+ eeh_pe_dev_traverse(pe,
+ eeh_report_failure, NULL);
+ pcibios_remove_pci_devices(bus);
+ }
+ pci_unlock_rescan_remove();
}
- }
+
+ /*
+ * If we have detected dead IOC, we needn't proceed
+ * any more since all PHBs would have been removed
+ */
+ if (rc == EEH_NEXT_ERR_DEAD_IOC)
+ break;
+ } while (rc != EEH_NEXT_ERR_NONE);
}
/**
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index 72d748b56c86..4eefb6e34dbb 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -152,24 +152,33 @@ int eeh_send_failure_event(struct eeh_pe *pe)
/**
* eeh_remove_event - Remove EEH event from the queue
* @pe: Event binding to the PE
+ * @force: Event will be removed unconditionally
*
* On PowerNV platform, we might have subsequent coming events
* is part of the former one. For that case, those subsequent
* coming events are totally duplicated and unnecessary, thus
* they should be removed.
*/
-void eeh_remove_event(struct eeh_pe *pe)
+void eeh_remove_event(struct eeh_pe *pe, bool force)
{
unsigned long flags;
struct eeh_event *event, *tmp;
+ /*
+ * If we have NULL PE passed in, we have dead IOC
+ * or we're sure we can report all existing errors
+ * by the caller.
+ *
+ * With "force", the event with associated PE that
+ * have been isolated, the event won't be removed
+ * to avoid event lost.
+ */
spin_lock_irqsave(&eeh_eventlist_lock, flags);
list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) {
- /*
- * If we don't have valid PE passed in, that means
- * we already have event corresponding to dead IOC
- * and all events should be purged.
- */
+ if (!force && event->pe &&
+ (event->pe->state & EEH_PE_ISOLATED))
+ continue;
+
if (!pe) {
list_del(&event->list);
kfree(event);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index f9450537e335..fbd01eba4473 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -25,7 +25,6 @@
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/gfp.h>
-#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/string.h>
@@ -504,13 +503,17 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
struct eeh_dev *edev, *tmp;
struct pci_dev *pdev;
- /*
- * Mark the PE with the indicated state. Also,
- * the associated PCI device will be put into
- * I/O frozen state to avoid I/O accesses from
- * the PCI device driver.
- */
+ /* Keep the state of permanently removed PE intact */
+ if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
+ (state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
+ return NULL;
+
pe->state |= state;
+
+ /* Offline PCI devices if applicable */
+ if (state != EEH_PE_ISOLATED)
+ return NULL;
+
eeh_pe_for_each_dev(pe, edev, tmp) {
pdev = eeh_dev_to_pci_dev(edev);
if (pdev)
@@ -533,6 +536,27 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state)
eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
}
+static void *__eeh_pe_dev_mode_mark(void *data, void *flag)
+{
+ struct eeh_dev *edev = data;
+ int mode = *((int *)flag);
+
+ edev->mode |= mode;
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_dev_state_mark - Mark state for all device under the PE
+ * @pe: EEH PE
+ *
+ * Mark specific state for all child devices of the PE.
+ */
+void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode)
+{
+ eeh_pe_dev_traverse(pe, __eeh_pe_dev_mode_mark, &mode);
+}
+
/**
* __eeh_pe_state_clear - Clear state for the PE
* @data: EEH PE
@@ -547,8 +571,16 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
struct eeh_pe *pe = (struct eeh_pe *)data;
int state = *((int *)flag);
+ /* Keep the state of permanently removed PE intact */
+ if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
+ (state & EEH_PE_ISOLATED))
+ return NULL;
+
pe->state &= ~state;
- pe->check_count = 0;
+
+ /* Clear check count since last isolation */
+ if (state & EEH_PE_ISOLATED)
+ pe->check_count = 0;
return NULL;
}
@@ -737,6 +769,9 @@ static void *eeh_restore_one_device_bars(void *data, void *flag)
else
eeh_restore_device_bars(edev, dn);
+ if (eeh_ops->restore_config)
+ eeh_ops->restore_config(dn);
+
return NULL;
}
@@ -757,6 +792,66 @@ void eeh_pe_restore_bars(struct eeh_pe *pe)
}
/**
+ * eeh_pe_loc_get - Retrieve location code binding to the given PE
+ * @pe: EEH PE
+ *
+ * Retrieve the location code of the given PE. If the primary PE bus
+ * is root bus, we will grab location code from PHB device tree node
+ * or root port. Otherwise, the upstream bridge's device tree node
+ * of the primary PE bus will be checked for the location code.
+ */
+const char *eeh_pe_loc_get(struct eeh_pe *pe)
+{
+ struct pci_controller *hose;
+ struct pci_bus *bus = eeh_pe_bus_get(pe);
+ struct pci_dev *pdev;
+ struct device_node *dn;
+ const char *loc;
+
+ if (!bus)
+ return "N/A";
+
+ /* PHB PE or root PE ? */
+ if (pci_is_root_bus(bus)) {
+ hose = pci_bus_to_host(bus);
+ loc = of_get_property(hose->dn,
+ "ibm,loc-code", NULL);
+ if (loc)
+ return loc;
+ loc = of_get_property(hose->dn,
+ "ibm,io-base-loc-code", NULL);
+ if (loc)
+ return loc;
+
+ pdev = pci_get_slot(bus, 0x0);
+ } else {
+ pdev = bus->self;
+ }
+
+ if (!pdev) {
+ loc = "N/A";
+ goto out;
+ }
+
+ dn = pci_device_to_OF_node(pdev);
+ if (!dn) {
+ loc = "N/A";
+ goto out;
+ }
+
+ loc = of_get_property(dn, "ibm,loc-code", NULL);
+ if (!loc)
+ loc = of_get_property(dn, "ibm,slot-location-code", NULL);
+ if (!loc)
+ loc = "N/A";
+
+out:
+ if (pci_is_root_bus(bus) && pdev)
+ pci_dev_put(pdev);
+ return loc;
+}
+
+/**
* eeh_pe_bus_get - Retrieve PCI bus according to the given PE
* @pe: EEH PE
*
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index 5d753d4f2c75..e2595ba4b720 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -59,6 +59,9 @@ void eeh_sysfs_add_device(struct pci_dev *pdev)
struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
int rc=0;
+ if (!eeh_enabled())
+ return;
+
if (edev && (edev->mode & EEH_DEV_SYSFS))
return;
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index bbfb0294b354..6528c5e2cc44 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -39,8 +39,8 @@
* System calls.
*/
.section ".toc","aw"
-.SYS_CALL_TABLE:
- .tc .sys_call_table[TC],.sys_call_table
+SYS_CALL_TABLE:
+ .tc sys_call_table[TC],sys_call_table
/* This value is used to mark exception frames on the stack. */
exception_marker:
@@ -106,7 +106,7 @@ BEGIN_FW_FTR_SECTION
LDX_BE r10,0,r10 /* get log write index */
cmpd cr1,r11,r10
beq+ cr1,33f
- bl .accumulate_stolen_time
+ bl accumulate_stolen_time
REST_GPR(0,r1)
REST_4GPRS(3,r1)
REST_2GPRS(7,r1)
@@ -143,7 +143,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
std r10,SOFTE(r1)
#ifdef SHOW_SYSCALLS
- bl .do_show_syscall
+ bl do_show_syscall
REST_GPR(0,r1)
REST_4GPRS(3,r1)
REST_2GPRS(7,r1)
@@ -162,7 +162,7 @@ system_call: /* label this so stack traces look sane */
* Need to vector to 32 Bit or default sys_call_table here,
* based on caller's run-mode / personality.
*/
- ld r11,.SYS_CALL_TABLE@toc(2)
+ ld r11,SYS_CALL_TABLE@toc(2)
andi. r10,r10,_TIF_32BIT
beq 15f
addi r11,r11,8 /* use 32-bit syscall entries */
@@ -174,14 +174,14 @@ system_call: /* label this so stack traces look sane */
clrldi r8,r8,32
15:
slwi r0,r0,4
- ldx r10,r11,r0 /* Fetch system call handler [ptr] */
- mtctr r10
+ ldx r12,r11,r0 /* Fetch system call handler [ptr] */
+ mtctr r12
bctrl /* Call handler */
syscall_exit:
std r3,RESULT(r1)
#ifdef SHOW_SYSCALLS
- bl .do_show_syscall_exit
+ bl do_show_syscall_exit
ld r3,RESULT(r1)
#endif
CURRENT_THREAD_INFO(r12, r1)
@@ -248,9 +248,9 @@ syscall_error:
/* Traced system call support */
syscall_dotrace:
- bl .save_nvgprs
+ bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .do_syscall_trace_enter
+ bl do_syscall_trace_enter
/*
* Restore argument registers possibly just changed.
* We use the return value of do_syscall_trace_enter
@@ -308,7 +308,7 @@ syscall_exit_work:
4: /* Anything else left to do? */
SET_DEFAULT_THREAD_PPR(r3, r10) /* Set thread.ppr = 3 */
andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
- beq .ret_from_except_lite
+ beq ret_from_except_lite
/* Re-enable interrupts */
#ifdef CONFIG_PPC_BOOK3E
@@ -319,10 +319,10 @@ syscall_exit_work:
mtmsrd r10,1
#endif /* CONFIG_PPC_BOOK3E */
- bl .save_nvgprs
+ bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .do_syscall_trace_leave
- b .ret_from_except
+ bl do_syscall_trace_leave
+ b ret_from_except
/* Save non-volatile GPRs, if not already saved. */
_GLOBAL(save_nvgprs)
@@ -345,52 +345,48 @@ _GLOBAL(save_nvgprs)
*/
_GLOBAL(ppc_fork)
- bl .save_nvgprs
- bl .sys_fork
+ bl save_nvgprs
+ bl sys_fork
b syscall_exit
_GLOBAL(ppc_vfork)
- bl .save_nvgprs
- bl .sys_vfork
+ bl save_nvgprs
+ bl sys_vfork
b syscall_exit
_GLOBAL(ppc_clone)
- bl .save_nvgprs
- bl .sys_clone
+ bl save_nvgprs
+ bl sys_clone
b syscall_exit
_GLOBAL(ppc32_swapcontext)
- bl .save_nvgprs
- bl .compat_sys_swapcontext
+ bl save_nvgprs
+ bl compat_sys_swapcontext
b syscall_exit
_GLOBAL(ppc64_swapcontext)
- bl .save_nvgprs
- bl .sys_swapcontext
+ bl save_nvgprs
+ bl sys_swapcontext
b syscall_exit
_GLOBAL(ret_from_fork)
- bl .schedule_tail
+ bl schedule_tail
REST_NVGPRS(r1)
li r3,0
b syscall_exit
_GLOBAL(ret_from_kernel_thread)
- bl .schedule_tail
+ bl schedule_tail
REST_NVGPRS(r1)
- ld r14, 0(r14)
mtlr r14
mr r3,r15
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+ mr r12,r14
+#endif
blrl
li r3,0
b syscall_exit
- .section ".toc","aw"
-DSCR_DEFAULT:
- .tc dscr_default[TC],dscr_default
-
- .section ".text"
-
/*
* This routine switches between two different tasks. The process
* state of one is saved on its kernel stack. Then the state
@@ -432,12 +428,6 @@ BEGIN_FTR_SECTION
std r24,THREAD_VRSAVE(r3)
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_PPC64
-BEGIN_FTR_SECTION
- mfspr r25,SPRN_DSCR
- std r25,THREAD_DSCR(r3)
-END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
-#endif
and. r0,r0,r22
beq+ 1f
andc r22,r22,r0
@@ -575,11 +565,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#ifdef CONFIG_PPC64
BEGIN_FTR_SECTION
lwz r6,THREAD_DSCR_INHERIT(r4)
- ld r7,DSCR_DEFAULT@toc(2)
ld r0,THREAD_DSCR(r4)
cmpwi r6,0
bne 1f
- ld r0,0(r7)
+ ld r0,PACA_DSCR(r13)
1:
BEGIN_FTR_SECTION_NESTED(70)
mfspr r8, SPRN_FSCR
@@ -611,7 +600,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
_GLOBAL(ret_from_except)
ld r11,_TRAP(r1)
andi. r0,r11,1
- bne .ret_from_except_lite
+ bne ret_from_except_lite
REST_NVGPRS(r1)
_GLOBAL(ret_from_except_lite)
@@ -661,15 +650,23 @@ _GLOBAL(ret_from_except_lite)
#endif
1: andi. r0,r4,_TIF_NEED_RESCHED
beq 2f
- bl .restore_interrupts
+ bl restore_interrupts
SCHEDULE_USER
- b .ret_from_except_lite
-
-2: bl .save_nvgprs
- bl .restore_interrupts
+ b ret_from_except_lite
+2:
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ andi. r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM
+ bne 3f /* only restore TM if nothing else to do */
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl restore_tm_state
+ b restore
+3:
+#endif
+ bl save_nvgprs
+ bl restore_interrupts
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .do_notify_resume
- b .ret_from_except
+ bl do_notify_resume
+ b ret_from_except
resume_kernel:
/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
@@ -722,7 +719,7 @@ resume_kernel:
* sure we are soft-disabled first and reconcile irq state.
*/
RECONCILE_IRQ_STATE(r3,r4)
-1: bl .preempt_schedule_irq
+1: bl preempt_schedule_irq
/* Re-test flags and eventually loop */
CURRENT_THREAD_INFO(r9, r1)
@@ -784,7 +781,7 @@ restore_no_replay:
*/
do_restore:
#ifdef CONFIG_PPC_BOOK3E
- b .exception_return_book3e
+ b exception_return_book3e
#else
/*
* Clear the reservation. If we know the CPU tracks the address of
@@ -899,7 +896,7 @@ restore_check_irq_replay:
*
* Still, this might be useful for things like hash_page
*/
- bl .__check_irq_replay
+ bl __check_irq_replay
cmpwi cr0,r3,0
beq restore_no_replay
@@ -920,13 +917,13 @@ restore_check_irq_replay:
cmpwi cr0,r3,0x500
bne 1f
addi r3,r1,STACK_FRAME_OVERHEAD;
- bl .do_IRQ
- b .ret_from_except
+ bl do_IRQ
+ b ret_from_except
1: cmpwi cr0,r3,0x900
bne 1f
addi r3,r1,STACK_FRAME_OVERHEAD;
- bl .timer_interrupt
- b .ret_from_except
+ bl timer_interrupt
+ b ret_from_except
#ifdef CONFIG_PPC_DOORBELL
1:
#ifdef CONFIG_PPC_BOOK3E
@@ -940,14 +937,14 @@ restore_check_irq_replay:
#endif /* CONFIG_PPC_BOOK3E */
bne 1f
addi r3,r1,STACK_FRAME_OVERHEAD;
- bl .doorbell_exception
- b .ret_from_except
+ bl doorbell_exception
+ b ret_from_except
#endif /* CONFIG_PPC_DOORBELL */
-1: b .ret_from_except /* What else to do here ? */
+1: b ret_from_except /* What else to do here ? */
unrecov_restore:
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .unrecoverable_exception
+ bl unrecoverable_exception
b unrecov_restore
#ifdef CONFIG_PPC_RTAS
@@ -1013,7 +1010,7 @@ _GLOBAL(enter_rtas)
std r6,PACASAVEDMSR(r13)
/* Setup our real return addr */
- LOAD_REG_ADDR(r4,.rtas_return_loc)
+ LOAD_REG_ADDR(r4,rtas_return_loc)
clrldi r4,r4,2 /* convert to realmode address */
mtlr r4
@@ -1037,7 +1034,7 @@ _GLOBAL(enter_rtas)
rfid
b . /* prevent speculative execution */
-_STATIC(rtas_return_loc)
+rtas_return_loc:
FIXUP_ENDIAN
/* relocation is off at this point */
@@ -1046,7 +1043,7 @@ _STATIC(rtas_return_loc)
bcl 20,31,$+4
0: mflr r3
- ld r3,(1f-0b)(r3) /* get &.rtas_restore_regs */
+ ld r3,(1f-0b)(r3) /* get &rtas_restore_regs */
mfmsr r6
li r0,MSR_RI
@@ -1063,9 +1060,9 @@ _STATIC(rtas_return_loc)
b . /* prevent speculative execution */
.align 3
-1: .llong .rtas_restore_regs
+1: .llong rtas_restore_regs
-_STATIC(rtas_restore_regs)
+rtas_restore_regs:
/* relocation is on at this point */
REST_GPR(2, r1) /* Restore the TOC */
REST_GPR(13, r1) /* Restore paca */
@@ -1165,7 +1162,7 @@ _GLOBAL(mcount)
_GLOBAL(_mcount)
blr
-_GLOBAL(ftrace_caller)
+_GLOBAL_TOC(ftrace_caller)
/* Taken from output of objdump from lib64/glibc */
mflr r3
ld r11, 0(r1)
@@ -1189,10 +1186,7 @@ _GLOBAL(ftrace_graph_stub)
_GLOBAL(ftrace_stub)
blr
#else
-_GLOBAL(mcount)
- blr
-
-_GLOBAL(_mcount)
+_GLOBAL_TOC(_mcount)
/* Taken from output of objdump from lib64/glibc */
mflr r3
ld r11, 0(r1)
@@ -1230,7 +1224,7 @@ _GLOBAL(ftrace_graph_caller)
ld r11, 112(r1)
addi r3, r11, 16
- bl .prepare_ftrace_return
+ bl prepare_ftrace_return
nop
ld r0, 128(r1)
@@ -1246,7 +1240,7 @@ _GLOBAL(return_to_handler)
mr r31, r1
stdu r1, -112(r1)
- bl .ftrace_return_to_handler
+ bl ftrace_return_to_handler
nop
/* return value has real return address */
@@ -1276,7 +1270,7 @@ _GLOBAL(mod_return_to_handler)
*/
ld r2, PACATOC(r13)
- bl .ftrace_return_to_handler
+ bl ftrace_return_to_handler
nop
/* return value has real return address */
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index 7898be90f2dc..59e4ba74975d 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -30,13 +30,14 @@ extern u32 epapr_ev_idle_start[];
#endif
bool epapr_paravirt_enabled;
+static bool __maybe_unused epapr_has_idle;
static int __init early_init_dt_scan_epapr(unsigned long node,
const char *uname,
int depth, void *data)
{
const u32 *insts;
- unsigned long len;
+ int len;
int i;
insts = of_get_flat_dt_prop(node, "hcall-instructions", &len);
@@ -47,15 +48,16 @@ static int __init early_init_dt_scan_epapr(unsigned long node,
return -1;
for (i = 0; i < (len / 4); i++) {
- patch_instruction(epapr_hypercall_start + i, insts[i]);
+ u32 inst = be32_to_cpu(insts[i]);
+ patch_instruction(epapr_hypercall_start + i, inst);
#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
- patch_instruction(epapr_ev_idle_start + i, insts[i]);
+ patch_instruction(epapr_ev_idle_start + i, inst);
#endif
}
#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
if (of_get_flat_dt_prop(node, "has-idle", NULL))
- ppc_md.power_save = epapr_ev_idle;
+ epapr_has_idle = true;
#endif
epapr_paravirt_enabled = true;
@@ -70,3 +72,14 @@ int __init epapr_paravirt_early_init(void)
return 0;
}
+static int __init epapr_idle_init(void)
+{
+#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
+ if (epapr_has_idle)
+ ppc_md.power_save = epapr_ev_idle;
+#endif
+
+ return 0;
+}
+
+postcore_initcall(epapr_idle_init);
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index e7751561fd1d..bb9cac6c8051 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -34,7 +34,250 @@
* special interrupts from within a non-standard level will probably
* blow you up
*/
-#define SPECIAL_EXC_FRAME_SIZE INT_FRAME_SIZE
+#define SPECIAL_EXC_SRR0 0
+#define SPECIAL_EXC_SRR1 1
+#define SPECIAL_EXC_SPRG_GEN 2
+#define SPECIAL_EXC_SPRG_TLB 3
+#define SPECIAL_EXC_MAS0 4
+#define SPECIAL_EXC_MAS1 5
+#define SPECIAL_EXC_MAS2 6
+#define SPECIAL_EXC_MAS3 7
+#define SPECIAL_EXC_MAS6 8
+#define SPECIAL_EXC_MAS7 9
+#define SPECIAL_EXC_MAS5 10 /* E.HV only */
+#define SPECIAL_EXC_MAS8 11 /* E.HV only */
+#define SPECIAL_EXC_IRQHAPPENED 12
+#define SPECIAL_EXC_DEAR 13
+#define SPECIAL_EXC_ESR 14
+#define SPECIAL_EXC_SOFTE 15
+#define SPECIAL_EXC_CSRR0 16
+#define SPECIAL_EXC_CSRR1 17
+/* must be even to keep 16-byte stack alignment */
+#define SPECIAL_EXC_END 18
+
+#define SPECIAL_EXC_FRAME_SIZE (INT_FRAME_SIZE + SPECIAL_EXC_END * 8)
+#define SPECIAL_EXC_FRAME_OFFS (INT_FRAME_SIZE - 288)
+
+#define SPECIAL_EXC_STORE(reg, name) \
+ std reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1)
+
+#define SPECIAL_EXC_LOAD(reg, name) \
+ ld reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1)
+
+special_reg_save:
+ lbz r9,PACAIRQHAPPENED(r13)
+ RECONCILE_IRQ_STATE(r3,r4)
+
+ /*
+ * We only need (or have stack space) to save this stuff if
+ * we interrupted the kernel.
+ */
+ ld r3,_MSR(r1)
+ andi. r3,r3,MSR_PR
+ bnelr
+
+ /* Copy info into temporary exception thread info */
+ ld r11,PACAKSAVE(r13)
+ CURRENT_THREAD_INFO(r11, r11)
+ CURRENT_THREAD_INFO(r12, r1)
+ ld r10,TI_FLAGS(r11)
+ std r10,TI_FLAGS(r12)
+ ld r10,TI_PREEMPT(r11)
+ std r10,TI_PREEMPT(r12)
+ ld r10,TI_TASK(r11)
+ std r10,TI_TASK(r12)
+
+ /*
+ * Advance to the next TLB exception frame for handler
+ * types that don't do it automatically.
+ */
+ LOAD_REG_ADDR(r11,extlb_level_exc)
+ lwz r12,0(r11)
+ mfspr r10,SPRN_SPRG_TLB_EXFRAME
+ add r10,r10,r12
+ mtspr SPRN_SPRG_TLB_EXFRAME,r10
+
+ /*
+ * Save registers needed to allow nesting of certain exceptions
+ * (such as TLB misses) inside special exception levels
+ */
+ mfspr r10,SPRN_SRR0
+ SPECIAL_EXC_STORE(r10,SRR0)
+ mfspr r10,SPRN_SRR1
+ SPECIAL_EXC_STORE(r10,SRR1)
+ mfspr r10,SPRN_SPRG_GEN_SCRATCH
+ SPECIAL_EXC_STORE(r10,SPRG_GEN)
+ mfspr r10,SPRN_SPRG_TLB_SCRATCH
+ SPECIAL_EXC_STORE(r10,SPRG_TLB)
+ mfspr r10,SPRN_MAS0
+ SPECIAL_EXC_STORE(r10,MAS0)
+ mfspr r10,SPRN_MAS1
+ SPECIAL_EXC_STORE(r10,MAS1)
+ mfspr r10,SPRN_MAS2
+ SPECIAL_EXC_STORE(r10,MAS2)
+ mfspr r10,SPRN_MAS3
+ SPECIAL_EXC_STORE(r10,MAS3)
+ mfspr r10,SPRN_MAS6
+ SPECIAL_EXC_STORE(r10,MAS6)
+ mfspr r10,SPRN_MAS7
+ SPECIAL_EXC_STORE(r10,MAS7)
+BEGIN_FTR_SECTION
+ mfspr r10,SPRN_MAS5
+ SPECIAL_EXC_STORE(r10,MAS5)
+ mfspr r10,SPRN_MAS8
+ SPECIAL_EXC_STORE(r10,MAS8)
+
+ /* MAS5/8 could have inappropriate values if we interrupted KVM code */
+ li r10,0
+ mtspr SPRN_MAS5,r10
+ mtspr SPRN_MAS8,r10
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+ SPECIAL_EXC_STORE(r9,IRQHAPPENED)
+
+ mfspr r10,SPRN_DEAR
+ SPECIAL_EXC_STORE(r10,DEAR)
+ mfspr r10,SPRN_ESR
+ SPECIAL_EXC_STORE(r10,ESR)
+
+ lbz r10,PACASOFTIRQEN(r13)
+ SPECIAL_EXC_STORE(r10,SOFTE)
+ ld r10,_NIP(r1)
+ SPECIAL_EXC_STORE(r10,CSRR0)
+ ld r10,_MSR(r1)
+ SPECIAL_EXC_STORE(r10,CSRR1)
+
+ blr
+
+ret_from_level_except:
+ ld r3,_MSR(r1)
+ andi. r3,r3,MSR_PR
+ beq 1f
+ b ret_from_except
+1:
+
+ LOAD_REG_ADDR(r11,extlb_level_exc)
+ lwz r12,0(r11)
+ mfspr r10,SPRN_SPRG_TLB_EXFRAME
+ sub r10,r10,r12
+ mtspr SPRN_SPRG_TLB_EXFRAME,r10
+
+ /*
+ * It's possible that the special level exception interrupted a
+ * TLB miss handler, and inserted the same entry that the
+ * interrupted handler was about to insert. On CPUs without TLB
+ * write conditional, this can result in a duplicate TLB entry.
+ * Wipe all non-bolted entries to be safe.
+ *
+ * Note that this doesn't protect against any TLB misses
+ * we may take accessing the stack from here to the end of
+ * the special level exception. It's not clear how we can
+ * reasonably protect against that, but only CPUs with
+ * neither TLB write conditional nor bolted kernel memory
+ * are affected. Do any such CPUs even exist?
+ */
+ PPC_TLBILX_ALL(0,R0)
+
+ REST_NVGPRS(r1)
+
+ SPECIAL_EXC_LOAD(r10,SRR0)
+ mtspr SPRN_SRR0,r10
+ SPECIAL_EXC_LOAD(r10,SRR1)
+ mtspr SPRN_SRR1,r10
+ SPECIAL_EXC_LOAD(r10,SPRG_GEN)
+ mtspr SPRN_SPRG_GEN_SCRATCH,r10
+ SPECIAL_EXC_LOAD(r10,SPRG_TLB)
+ mtspr SPRN_SPRG_TLB_SCRATCH,r10
+ SPECIAL_EXC_LOAD(r10,MAS0)
+ mtspr SPRN_MAS0,r10
+ SPECIAL_EXC_LOAD(r10,MAS1)
+ mtspr SPRN_MAS1,r10
+ SPECIAL_EXC_LOAD(r10,MAS2)
+ mtspr SPRN_MAS2,r10
+ SPECIAL_EXC_LOAD(r10,MAS3)
+ mtspr SPRN_MAS3,r10
+ SPECIAL_EXC_LOAD(r10,MAS6)
+ mtspr SPRN_MAS6,r10
+ SPECIAL_EXC_LOAD(r10,MAS7)
+ mtspr SPRN_MAS7,r10
+BEGIN_FTR_SECTION
+ SPECIAL_EXC_LOAD(r10,MAS5)
+ mtspr SPRN_MAS5,r10
+ SPECIAL_EXC_LOAD(r10,MAS8)
+ mtspr SPRN_MAS8,r10
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+
+ lbz r6,PACASOFTIRQEN(r13)
+ ld r5,SOFTE(r1)
+
+ /* Interrupts had better not already be enabled... */
+ twnei r6,0
+
+ cmpwi cr0,r5,0
+ beq 1f
+
+ TRACE_ENABLE_INTS
+ stb r5,PACASOFTIRQEN(r13)
+1:
+ /*
+ * Restore PACAIRQHAPPENED rather than setting it based on
+ * the return MSR[EE], since we could have interrupted
+ * __check_irq_replay() or other inconsistent transitory
+ * states that must remain that way.
+ */
+ SPECIAL_EXC_LOAD(r10,IRQHAPPENED)
+ stb r10,PACAIRQHAPPENED(r13)
+
+ SPECIAL_EXC_LOAD(r10,DEAR)
+ mtspr SPRN_DEAR,r10
+ SPECIAL_EXC_LOAD(r10,ESR)
+ mtspr SPRN_ESR,r10
+
+ stdcx. r0,0,r1 /* to clear the reservation */
+
+ REST_4GPRS(2, r1)
+ REST_4GPRS(6, r1)
+
+ ld r10,_CTR(r1)
+ ld r11,_XER(r1)
+ mtctr r10
+ mtxer r11
+
+ blr
+
+.macro ret_from_level srr0 srr1 paca_ex scratch
+ bl ret_from_level_except
+
+ ld r10,_LINK(r1)
+ ld r11,_CCR(r1)
+ ld r0,GPR13(r1)
+ mtlr r10
+ mtcr r11
+
+ ld r10,GPR10(r1)
+ ld r11,GPR11(r1)
+ ld r12,GPR12(r1)
+ mtspr \scratch,r0
+
+ std r10,\paca_ex+EX_R10(r13);
+ std r11,\paca_ex+EX_R11(r13);
+ ld r10,_NIP(r1)
+ ld r11,_MSR(r1)
+ ld r0,GPR0(r1)
+ ld r1,GPR1(r1)
+ mtspr \srr0,r10
+ mtspr \srr1,r11
+ ld r10,\paca_ex+EX_R10(r13)
+ ld r11,\paca_ex+EX_R11(r13)
+ mfspr r13,\scratch
+.endm
+
+ret_from_crit_except:
+ ret_from_level SPRN_CSRR0 SPRN_CSRR1 PACA_EXCRIT SPRN_SPRG_CRIT_SCRATCH
+ rfci
+
+ret_from_mc_except:
+ ret_from_level SPRN_MCSRR0 SPRN_MCSRR1 PACA_EXMC SPRN_SPRG_MC_SCRATCH
+ rfmci
/* Exception prolog code for all exceptions */
#define EXCEPTION_PROLOG(n, intnum, type, addition) \
@@ -42,7 +285,6 @@
mfspr r13,SPRN_SPRG_PACA; /* get PACA */ \
std r10,PACA_EX##type+EX_R10(r13); \
std r11,PACA_EX##type+EX_R11(r13); \
- PROLOG_STORE_RESTORE_SCRATCH_##type; \
mfcr r10; /* save CR */ \
mfspr r11,SPRN_##type##_SRR1;/* what are we coming from */ \
DO_KVM intnum,SPRN_##type##_SRR1; /* KVM hook */ \
@@ -69,19 +311,19 @@
#define CRIT_SET_KSTACK \
ld r1,PACA_CRIT_STACK(r13); \
- subi r1,r1,SPECIAL_EXC_FRAME_SIZE;
+ subi r1,r1,SPECIAL_EXC_FRAME_SIZE
#define SPRN_CRIT_SRR0 SPRN_CSRR0
#define SPRN_CRIT_SRR1 SPRN_CSRR1
#define DBG_SET_KSTACK \
ld r1,PACA_DBG_STACK(r13); \
- subi r1,r1,SPECIAL_EXC_FRAME_SIZE;
+ subi r1,r1,SPECIAL_EXC_FRAME_SIZE
#define SPRN_DBG_SRR0 SPRN_DSRR0
#define SPRN_DBG_SRR1 SPRN_DSRR1
#define MC_SET_KSTACK \
ld r1,PACA_MC_STACK(r13); \
- subi r1,r1,SPECIAL_EXC_FRAME_SIZE;
+ subi r1,r1,SPECIAL_EXC_FRAME_SIZE
#define SPRN_MC_SRR0 SPRN_MCSRR0
#define SPRN_MC_SRR1 SPRN_MCSRR1
@@ -100,20 +342,6 @@
#define GDBELL_EXCEPTION_PROLOG(n, intnum, addition) \
EXCEPTION_PROLOG(n, intnum, GDBELL, addition##_GDBELL(n))
-/*
- * Store user-visible scratch in PACA exception slots and restore proper value
- */
-#define PROLOG_STORE_RESTORE_SCRATCH_GEN
-#define PROLOG_STORE_RESTORE_SCRATCH_GDBELL
-#define PROLOG_STORE_RESTORE_SCRATCH_DBG
-#define PROLOG_STORE_RESTORE_SCRATCH_MC
-
-#define PROLOG_STORE_RESTORE_SCRATCH_CRIT \
- mfspr r10,SPRN_SPRG_CRIT_SCRATCH; /* get r13 */ \
- std r10,PACA_EXCRIT+EX_R13(r13); \
- ld r11,PACA_SPRG3(r13); \
- mtspr SPRN_SPRG_CRIT_SCRATCH,r11;
-
/* Variants of the "addition" argument for the prolog
*/
#define PROLOG_ADDITION_NONE_GEN(n)
@@ -147,10 +375,8 @@
std r15,PACA_EXMC+EX_R15(r13)
-/* Core exception code for all exceptions except TLB misses.
- * XXX: Needs to make SPRN_SPRG_GEN depend on exception type
- */
-#define EXCEPTION_COMMON(n, excf, ints) \
+/* Core exception code for all exceptions except TLB misses. */
+#define EXCEPTION_COMMON_LVL(n, scratch, excf) \
exc_##n##_common: \
std r0,GPR0(r1); /* save r0 in stackframe */ \
std r2,GPR2(r1); /* save r2 in stackframe */ \
@@ -163,7 +389,7 @@ exc_##n##_common: \
ACCOUNT_CPU_USER_ENTRY(r10,r11);/* accounting (uses cr0+eq) */ \
2: ld r3,excf+EX_R10(r13); /* get back r10 */ \
ld r4,excf+EX_R11(r13); /* get back r11 */ \
- mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 */ \
+ mfspr r5,scratch; /* get back r13 */ \
std r12,GPR12(r1); /* save r12 in stackframe */ \
ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
mflr r6; /* save LR in stackframe */ \
@@ -187,24 +413,29 @@ exc_##n##_common: \
std r11,SOFTE(r1); /* and save it to stackframe */ \
std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \
std r3,_TRAP(r1); /* set trap number */ \
- std r0,RESULT(r1); /* clear regs->result */ \
- ints;
+ std r0,RESULT(r1); /* clear regs->result */
-/* Variants for the "ints" argument. This one does nothing when we want
- * to keep interrupts in their original state
- */
-#define INTS_KEEP
+#define EXCEPTION_COMMON(n) \
+ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN)
+#define EXCEPTION_COMMON_CRIT(n) \
+ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_CRIT_SCRATCH, PACA_EXCRIT)
+#define EXCEPTION_COMMON_MC(n) \
+ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_MC_SCRATCH, PACA_EXMC)
+#define EXCEPTION_COMMON_DBG(n) \
+ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_DBG_SCRATCH, PACA_EXDBG)
-/* This second version is meant for exceptions that don't immediately
- * hard-enable. We set a bit in paca->irq_happened to ensure that
- * a subsequent call to arch_local_irq_restore() will properly
- * hard-enable and avoid the fast-path, and then reconcile irq state.
+/*
+ * This is meant for exceptions that don't immediately hard-enable. We
+ * set a bit in paca->irq_happened to ensure that a subsequent call to
+ * arch_local_irq_restore() will properly hard-enable and avoid the
+ * fast-path, and then reconcile irq state.
*/
#define INTS_DISABLE RECONCILE_IRQ_STATE(r3,r4)
-/* This is called by exceptions that used INTS_KEEP (that did not touch
- * irq indicators in the PACA). This will restore MSR:EE to it's previous
- * value
+/*
+ * This is called by exceptions that don't use INTS_DISABLE (that did not
+ * touch irq indicators in the PACA). This will restore MSR:EE to it's
+ * previous value
*
* XXX In the long run, we may want to open-code it in order to separate the
* load from the wrtee, thus limiting the latency caused by the dependency
@@ -262,12 +493,13 @@ exc_##n##_bad_stack: \
#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack) \
START_EXCEPTION(label); \
NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\
- EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE) \
+ EXCEPTION_COMMON(trapnum) \
+ INTS_DISABLE; \
ack(r8); \
CHECK_NAPPING(); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
bl hdlr; \
- b .ret_from_except_lite;
+ b ret_from_except_lite;
/* This value is used to mark exception frames on the stack. */
.section ".toc","aw"
@@ -283,8 +515,8 @@ exception_marker:
.balign 0x1000
.globl interrupt_base_book3e
interrupt_base_book3e: /* fake trap */
- EXCEPTION_STUB(0x000, machine_check) /* 0x0200 */
- EXCEPTION_STUB(0x020, critical_input) /* 0x0580 */
+ EXCEPTION_STUB(0x000, machine_check)
+ EXCEPTION_STUB(0x020, critical_input) /* 0x0100 */
EXCEPTION_STUB(0x040, debug_crit) /* 0x0d00 */
EXCEPTION_STUB(0x060, data_storage) /* 0x0300 */
EXCEPTION_STUB(0x080, instruction_storage) /* 0x0400 */
@@ -299,8 +531,8 @@ interrupt_base_book3e: /* fake trap */
EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */
EXCEPTION_STUB(0x1c0, data_tlb_miss)
EXCEPTION_STUB(0x1e0, instruction_tlb_miss)
- EXCEPTION_STUB(0x200, altivec_unavailable) /* 0x0f20 */
- EXCEPTION_STUB(0x220, altivec_assist) /* 0x1700 */
+ EXCEPTION_STUB(0x200, altivec_unavailable)
+ EXCEPTION_STUB(0x220, altivec_assist)
EXCEPTION_STUB(0x260, perfmon)
EXCEPTION_STUB(0x280, doorbell)
EXCEPTION_STUB(0x2a0, doorbell_crit)
@@ -308,6 +540,7 @@ interrupt_base_book3e: /* fake trap */
EXCEPTION_STUB(0x2e0, guest_doorbell_crit)
EXCEPTION_STUB(0x300, hypercall)
EXCEPTION_STUB(0x320, ehpriv)
+ EXCEPTION_STUB(0x340, lrat_error)
.globl interrupt_end_book3e
interrupt_end_book3e:
@@ -316,25 +549,25 @@ interrupt_end_book3e:
START_EXCEPTION(critical_input);
CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL,
PROLOG_ADDITION_NONE)
-// EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE)
-// bl special_reg_save_crit
-// CHECK_NAPPING();
-// addi r3,r1,STACK_FRAME_OVERHEAD
-// bl .critical_exception
-// b ret_from_crit_except
- b .
+ EXCEPTION_COMMON_CRIT(0x100)
+ bl save_nvgprs
+ bl special_reg_save
+ CHECK_NAPPING();
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unknown_exception
+ b ret_from_crit_except
/* Machine Check Interrupt */
START_EXCEPTION(machine_check);
- MC_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_MACHINE_CHECK,
+ MC_EXCEPTION_PROLOG(0x000, BOOKE_INTERRUPT_MACHINE_CHECK,
PROLOG_ADDITION_NONE)
-// EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE)
-// bl special_reg_save_mc
-// addi r3,r1,STACK_FRAME_OVERHEAD
-// CHECK_NAPPING();
-// bl .machine_check_exception
-// b ret_from_mc_except
- b .
+ EXCEPTION_COMMON_MC(0x000)
+ bl save_nvgprs
+ bl special_reg_save
+ CHECK_NAPPING();
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_exception
+ b ret_from_mc_except
/* Data Storage Interrupt */
START_EXCEPTION(data_storage)
@@ -342,7 +575,8 @@ interrupt_end_book3e:
PROLOG_ADDITION_2REGS)
mfspr r14,SPRN_DEAR
mfspr r15,SPRN_ESR
- EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_DISABLE)
+ EXCEPTION_COMMON(0x300)
+ INTS_DISABLE
b storage_fault_common
/* Instruction Storage Interrupt */
@@ -351,12 +585,13 @@ interrupt_end_book3e:
PROLOG_ADDITION_2REGS)
li r15,0
mr r14,r10
- EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_DISABLE)
+ EXCEPTION_COMMON(0x400)
+ INTS_DISABLE
b storage_fault_common
/* External Input Interrupt */
MASKABLE_EXCEPTION(0x500, BOOKE_INTERRUPT_EXTERNAL,
- external_input, .do_IRQ, ACK_NONE)
+ external_input, do_IRQ, ACK_NONE)
/* Alignment */
START_EXCEPTION(alignment);
@@ -364,7 +599,7 @@ interrupt_end_book3e:
PROLOG_ADDITION_2REGS)
mfspr r14,SPRN_DEAR
mfspr r15,SPRN_ESR
- EXCEPTION_COMMON(0x600, PACA_EXGEN, INTS_KEEP)
+ EXCEPTION_COMMON(0x600)
b alignment_more /* no room, go out of line */
/* Program Interrupt */
@@ -372,90 +607,96 @@ interrupt_end_book3e:
NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
PROLOG_ADDITION_1REG)
mfspr r14,SPRN_ESR
- EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE)
+ EXCEPTION_COMMON(0x700)
+ INTS_DISABLE
std r14,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
ld r14,PACA_EXGEN+EX_R14(r13)
- bl .save_nvgprs
- bl .program_check_exception
- b .ret_from_except
+ bl save_nvgprs
+ bl program_check_exception
+ b ret_from_except
/* Floating Point Unavailable Interrupt */
START_EXCEPTION(fp_unavailable);
NORMAL_EXCEPTION_PROLOG(0x800, BOOKE_INTERRUPT_FP_UNAVAIL,
PROLOG_ADDITION_NONE)
/* we can probably do a shorter exception entry for that one... */
- EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP)
+ EXCEPTION_COMMON(0x800)
ld r12,_MSR(r1)
andi. r0,r12,MSR_PR;
beq- 1f
- bl .load_up_fpu
+ bl load_up_fpu
b fast_exception_return
1: INTS_DISABLE
- bl .save_nvgprs
+ bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .kernel_fp_unavailable_exception
- b .ret_from_except
+ bl kernel_fp_unavailable_exception
+ b ret_from_except
/* Altivec Unavailable Interrupt */
START_EXCEPTION(altivec_unavailable);
NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL,
PROLOG_ADDITION_NONE)
/* we can probably do a shorter exception entry for that one... */
- EXCEPTION_COMMON(0x200, PACA_EXGEN, INTS_KEEP)
+ EXCEPTION_COMMON(0x200)
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
ld r12,_MSR(r1)
andi. r0,r12,MSR_PR;
beq- 1f
- bl .load_up_altivec
+ bl load_up_altivec
b fast_exception_return
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
INTS_DISABLE
- bl .save_nvgprs
+ bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .altivec_unavailable_exception
- b .ret_from_except
+ bl altivec_unavailable_exception
+ b ret_from_except
/* AltiVec Assist */
START_EXCEPTION(altivec_assist);
NORMAL_EXCEPTION_PROLOG(0x220,
BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST,
PROLOG_ADDITION_NONE)
- EXCEPTION_COMMON(0x220, PACA_EXGEN, INTS_DISABLE)
- bl .save_nvgprs
+ EXCEPTION_COMMON(0x220)
+ INTS_DISABLE
+ bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
- bl .altivec_assist_exception
+ bl altivec_assist_exception
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#else
- bl .unknown_exception
+ bl unknown_exception
#endif
- b .ret_from_except
+ b ret_from_except
/* Decrementer Interrupt */
MASKABLE_EXCEPTION(0x900, BOOKE_INTERRUPT_DECREMENTER,
- decrementer, .timer_interrupt, ACK_DEC)
+ decrementer, timer_interrupt, ACK_DEC)
/* Fixed Interval Timer Interrupt */
MASKABLE_EXCEPTION(0x980, BOOKE_INTERRUPT_FIT,
- fixed_interval, .unknown_exception, ACK_FIT)
+ fixed_interval, unknown_exception, ACK_FIT)
/* Watchdog Timer Interrupt */
START_EXCEPTION(watchdog);
CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG,
PROLOG_ADDITION_NONE)
-// EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE)
-// bl special_reg_save_crit
-// CHECK_NAPPING();
-// addi r3,r1,STACK_FRAME_OVERHEAD
-// bl .unknown_exception
-// b ret_from_crit_except
- b .
+ EXCEPTION_COMMON_CRIT(0x9f0)
+ bl save_nvgprs
+ bl special_reg_save
+ CHECK_NAPPING();
+ addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_BOOKE_WDT
+ bl WatchdogException
+#else
+ bl unknown_exception
+#endif
+ b ret_from_crit_except
/* System Call Interrupt */
START_EXCEPTION(system_call)
@@ -469,11 +710,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
START_EXCEPTION(ap_unavailable);
NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL,
PROLOG_ADDITION_NONE)
- EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_DISABLE)
- bl .save_nvgprs
+ EXCEPTION_COMMON(0xf20)
+ INTS_DISABLE
+ bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .unknown_exception
- b .ret_from_except
+ bl unknown_exception
+ b ret_from_except
/* Debug exception as a critical interrupt*/
START_EXCEPTION(debug_crit);
@@ -512,7 +754,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
mtcr r10
ld r10,PACA_EXCRIT+EX_R10(r13) /* restore registers */
ld r11,PACA_EXCRIT+EX_R11(r13)
- ld r13,PACA_EXCRIT+EX_R13(r13)
+ mfspr r13,SPRN_SPRG_CRIT_SCRATCH
rfci
/* Normal debug exception */
@@ -525,18 +767,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
/* Now we mash up things to make it look like we are coming on a
* normal exception
*/
- ld r15,PACA_EXCRIT+EX_R13(r13)
- mtspr SPRN_SPRG_GEN_SCRATCH,r15
mfspr r14,SPRN_DBSR
- EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE)
+ EXCEPTION_COMMON_CRIT(0xd00)
std r14,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
mr r4,r14
ld r14,PACA_EXCRIT+EX_R14(r13)
ld r15,PACA_EXCRIT+EX_R15(r13)
- bl .save_nvgprs
- bl .DebugException
- b .ret_from_except
+ bl save_nvgprs
+ bl DebugException
+ b ret_from_except
kernel_dbg_exc:
b . /* NYI */
@@ -591,43 +831,43 @@ kernel_dbg_exc:
/* Now we mash up things to make it look like we are coming on a
* normal exception
*/
- mfspr r15,SPRN_SPRG_DBG_SCRATCH
- mtspr SPRN_SPRG_GEN_SCRATCH,r15
mfspr r14,SPRN_DBSR
- EXCEPTION_COMMON(0xd08, PACA_EXDBG, INTS_DISABLE)
+ EXCEPTION_COMMON_DBG(0xd08)
+ INTS_DISABLE
std r14,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
mr r4,r14
ld r14,PACA_EXDBG+EX_R14(r13)
ld r15,PACA_EXDBG+EX_R15(r13)
- bl .save_nvgprs
- bl .DebugException
- b .ret_from_except
+ bl save_nvgprs
+ bl DebugException
+ b ret_from_except
START_EXCEPTION(perfmon);
NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
PROLOG_ADDITION_NONE)
- EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE)
+ EXCEPTION_COMMON(0x260)
+ INTS_DISABLE
CHECK_NAPPING()
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .performance_monitor_exception
- b .ret_from_except_lite
+ bl performance_monitor_exception
+ b ret_from_except_lite
/* Doorbell interrupt */
MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL,
- doorbell, .doorbell_exception, ACK_NONE)
+ doorbell, doorbell_exception, ACK_NONE)
/* Doorbell critical Interrupt */
START_EXCEPTION(doorbell_crit);
CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL,
PROLOG_ADDITION_NONE)
-// EXCEPTION_COMMON(0x2a0, PACA_EXCRIT, INTS_DISABLE)
-// bl special_reg_save_crit
-// CHECK_NAPPING();
-// addi r3,r1,STACK_FRAME_OVERHEAD
-// bl .doorbell_critical_exception
-// b ret_from_crit_except
- b .
+ EXCEPTION_COMMON_CRIT(0x2a0)
+ bl save_nvgprs
+ bl special_reg_save
+ CHECK_NAPPING();
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unknown_exception
+ b ret_from_crit_except
/*
* Guest doorbell interrupt
@@ -636,41 +876,52 @@ kernel_dbg_exc:
START_EXCEPTION(guest_doorbell);
GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL,
PROLOG_ADDITION_NONE)
- EXCEPTION_COMMON(0x2c0, PACA_EXGEN, INTS_KEEP)
+ EXCEPTION_COMMON(0x2c0)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .save_nvgprs
+ bl save_nvgprs
INTS_RESTORE_HARD
- bl .unknown_exception
- b .ret_from_except
+ bl unknown_exception
+ b ret_from_except
/* Guest Doorbell critical Interrupt */
START_EXCEPTION(guest_doorbell_crit);
CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT,
PROLOG_ADDITION_NONE)
-// EXCEPTION_COMMON(0x2e0, PACA_EXCRIT, INTS_DISABLE)
-// bl special_reg_save_crit
-// CHECK_NAPPING();
-// addi r3,r1,STACK_FRAME_OVERHEAD
-// bl .guest_doorbell_critical_exception
-// b ret_from_crit_except
- b .
+ EXCEPTION_COMMON_CRIT(0x2e0)
+ bl save_nvgprs
+ bl special_reg_save
+ CHECK_NAPPING();
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unknown_exception
+ b ret_from_crit_except
/* Hypervisor call */
START_EXCEPTION(hypercall);
NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL,
PROLOG_ADDITION_NONE)
- EXCEPTION_COMMON(0x310, PACA_EXGEN, INTS_KEEP)
+ EXCEPTION_COMMON(0x310)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .save_nvgprs
+ bl save_nvgprs
INTS_RESTORE_HARD
- bl .unknown_exception
- b .ret_from_except
+ bl unknown_exception
+ b ret_from_except
/* Embedded Hypervisor priviledged */
START_EXCEPTION(ehpriv);
NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV,
PROLOG_ADDITION_NONE)
- EXCEPTION_COMMON(0x320, PACA_EXGEN, INTS_KEEP)
+ EXCEPTION_COMMON(0x320)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl save_nvgprs
+ INTS_RESTORE_HARD
+ bl unknown_exception
+ b ret_from_except
+
+/* LRAT Error interrupt */
+ START_EXCEPTION(lrat_error);
+ NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR,
+ PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON(0x340)
addi r3,r1,STACK_FRAME_OVERHEAD
bl .save_nvgprs
INTS_RESTORE_HARD
@@ -763,16 +1014,16 @@ storage_fault_common:
mr r5,r15
ld r14,PACA_EXGEN+EX_R14(r13)
ld r15,PACA_EXGEN+EX_R15(r13)
- bl .do_page_fault
+ bl do_page_fault
cmpdi r3,0
bne- 1f
- b .ret_from_except_lite
-1: bl .save_nvgprs
+ b ret_from_except_lite
+1: bl save_nvgprs
mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
ld r4,_DAR(r1)
- bl .bad_page_fault
- b .ret_from_except
+ bl bad_page_fault
+ b ret_from_except
/*
* Alignment exception doesn't fit entirely in the 0x100 bytes so it
@@ -784,10 +1035,10 @@ alignment_more:
addi r3,r1,STACK_FRAME_OVERHEAD
ld r14,PACA_EXGEN+EX_R14(r13)
ld r15,PACA_EXGEN+EX_R15(r13)
- bl .save_nvgprs
+ bl save_nvgprs
INTS_RESTORE_HARD
- bl .alignment_exception
- b .ret_from_except
+ bl alignment_exception
+ b ret_from_except
/*
* We branch here from entry_64.S for the last stage of the exception
@@ -859,6 +1110,7 @@ BAD_STACK_TRAMPOLINE(0x2e0)
BAD_STACK_TRAMPOLINE(0x300)
BAD_STACK_TRAMPOLINE(0x310)
BAD_STACK_TRAMPOLINE(0x320)
+BAD_STACK_TRAMPOLINE(0x340)
BAD_STACK_TRAMPOLINE(0x400)
BAD_STACK_TRAMPOLINE(0x500)
BAD_STACK_TRAMPOLINE(0x600)
@@ -920,7 +1172,7 @@ bad_stack_book3e:
std r12,0(r11)
ld r2,PACATOC(r13)
1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl .kernel_bad_stack
+ bl kernel_bad_stack
b 1b
/*
@@ -1055,12 +1307,9 @@ skpinv: addi r6,r6,1 /* Increment */
mtspr SPRN_MAS0,r3
tlbre
mfspr r6,SPRN_MAS1
- rlwinm r6,r6,0,2,0 /* clear IPROT */
+ rlwinm r6,r6,0,2,31 /* clear IPROT and VALID */
mtspr SPRN_MAS1,r6
tlbwe
-
- /* Invalidate TLB1 */
- PPC_TLBILX_ALL(0,R0)
sync
isync
@@ -1114,12 +1363,9 @@ skpinv: addi r6,r6,1 /* Increment */
mtspr SPRN_MAS0,r4
tlbre
mfspr r5,SPRN_MAS1
- rlwinm r5,r5,0,2,0 /* clear IPROT */
+ rlwinm r5,r5,0,2,31 /* clear IPROT and VALID */
mtspr SPRN_MAS1,r5
tlbwe
-
- /* Invalidate TLB1 */
- PPC_TLBILX_ALL(0,R0)
sync
isync
@@ -1221,22 +1467,6 @@ a2_tlbinit_after_linear_map:
.globl a2_tlbinit_after_iprot_flush
a2_tlbinit_after_iprot_flush:
-#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
- /* Now establish early debug mappings if applicable */
- /* Restore the MAS0 we used for linear mapping load */
- mtspr SPRN_MAS0,r11
-
- lis r3,(MAS1_VALID | MAS1_IPROT)@h
- ori r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT)
- mtspr SPRN_MAS1,r3
- LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G)
- mtspr SPRN_MAS2,r3
- LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW)
- mtspr SPRN_MAS7_MAS3,r3
- /* re-use the MAS8 value from the linear mapping */
- tlbwe
-#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */
-
PPC_TLBILX(0,0,R0)
sync
isync
@@ -1275,13 +1505,13 @@ _GLOBAL(start_initialization_book3e)
* and always use AS 0, so we just set it up to match our link
* address and never use 0 based addresses.
*/
- bl .initial_tlb_book3e
+ bl initial_tlb_book3e
/* Init global core bits */
- bl .init_core_book3e
+ bl init_core_book3e
/* Init per-thread bits */
- bl .init_thread_book3e
+ bl init_thread_book3e
/* Return to common init code */
tovirt(r28,r28)
@@ -1302,7 +1532,7 @@ _GLOBAL(start_initialization_book3e)
*/
_GLOBAL(book3e_secondary_core_init_tlb_set)
li r4,1
- b .generic_secondary_smp_init
+ b generic_secondary_smp_init
_GLOBAL(book3e_secondary_core_init)
mflr r28
@@ -1312,18 +1542,18 @@ _GLOBAL(book3e_secondary_core_init)
bne 2f
/* Setup TLB for this core */
- bl .initial_tlb_book3e
+ bl initial_tlb_book3e
/* We can return from the above running at a different
* address, so recalculate r2 (TOC)
*/
- bl .relative_toc
+ bl relative_toc
/* Init global core bits */
-2: bl .init_core_book3e
+2: bl init_core_book3e
/* Init per-thread bits */
-3: bl .init_thread_book3e
+3: bl init_thread_book3e
/* Return to common init code at proper virtual address.
*
@@ -1350,14 +1580,14 @@ _GLOBAL(book3e_secondary_thread_init)
mflr r28
b 3b
-_STATIC(init_core_book3e)
+init_core_book3e:
/* Establish the interrupt vector base */
LOAD_REG_IMMEDIATE(r3, interrupt_base_book3e)
mtspr SPRN_IVPR,r3
sync
blr
-_STATIC(init_thread_book3e)
+init_thread_book3e:
lis r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h
mtspr SPRN_EPCR,r3
@@ -1414,3 +1644,7 @@ _GLOBAL(setup_ehv_ivors)
SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */
SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */
blr
+
+_GLOBAL(setup_lrat_ivor)
+ SET_IVOR(42, 0x340) /* LRAT Error */
+ blr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 9f905e40922e..a7d36b19221d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -54,14 +54,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
xori r12,r12,MSR_LE ; \
mtspr SPRN_SRR1,r12 ; \
rfid ; /* return to userspace */ \
- b . ; \
-2: mfspr r12,SPRN_SRR1 ; \
- andi. r12,r12,MSR_PR ; \
- bne 0b ; \
- mtspr SPRN_SRR0,r3 ; \
- mtspr SPRN_SRR1,r4 ; \
- mtspr SPRN_SDR1,r5 ; \
- rfid ; \
b . ; /* prevent speculative execution */
#if defined(CONFIG_RELOCATABLE)
@@ -121,9 +113,10 @@ BEGIN_FTR_SECTION
cmpwi cr1,r13,2
/* Total loss of HV state is fatal, we could try to use the
* PIR to locate a PACA, then use an emergency stack etc...
- * but for now, let's just stay stuck here
+ * OPAL v3 based powernv platforms have new idle states
+ * which fall in this catagory.
*/
- bgt cr1,.
+ bgt cr1,8f
GET_PACA(r13)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -139,8 +132,13 @@ BEGIN_FTR_SECTION
#endif
beq cr1,2f
- b .power7_wakeup_noloss
-2: b .power7_wakeup_loss
+ b power7_wakeup_noloss
+2: b power7_wakeup_loss
+
+ /* Fast Sleep wakeup on PowerNV */
+8: GET_PACA(r13)
+ b power7_wakeup_tb_loss
+
9:
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif /* CONFIG_PPC_P7_NAP */
@@ -155,8 +153,35 @@ machine_check_pSeries_1:
*/
HMT_MEDIUM_PPR_DISCARD
SET_SCRATCH0(r13) /* save r13 */
+#ifdef CONFIG_PPC_P7_NAP
+BEGIN_FTR_SECTION
+ /* Running native on arch 2.06 or later, check if we are
+ * waking up from nap. We only handle no state loss and
+ * supervisor state loss. We do -not- handle hypervisor
+ * state loss at this time.
+ */
+ mfspr r13,SPRN_SRR1
+ rlwinm. r13,r13,47-31,30,31
+ OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
+ beq 9f
+
+ mfspr r13,SPRN_SRR1
+ rlwinm. r13,r13,47-31,30,31
+ /* waking up from powersave (nap) state */
+ cmpwi cr1,r13,2
+ /* Total loss of HV state is fatal. let's just stay stuck here */
+ OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
+ bgt cr1,.
+9:
+ OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#endif /* CONFIG_PPC_P7_NAP */
EXCEPTION_PROLOG_0(PACA_EXMC)
+BEGIN_FTR_SECTION
+ b machine_check_pSeries_early
+FTR_SECTION_ELSE
b machine_check_pSeries_0
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
. = 0x300
.globl data_access_pSeries
@@ -186,16 +211,16 @@ data_access_slb_pSeries:
#endif /* __DISABLED__ */
mfspr r12,SPRN_SRR1
#ifndef CONFIG_RELOCATABLE
- b .slb_miss_realmode
+ b slb_miss_realmode
#else
/*
- * We can't just use a direct branch to .slb_miss_realmode
+ * We can't just use a direct branch to slb_miss_realmode
* because the distance from here to there depends on where
* the kernel ends up being put.
*/
mfctr r11
ld r10,PACAKBASE(r13)
- LOAD_HANDLER(r10, .slb_miss_realmode)
+ LOAD_HANDLER(r10, slb_miss_realmode)
mtctr r10
bctr
#endif
@@ -218,11 +243,11 @@ instruction_access_slb_pSeries:
#endif /* __DISABLED__ */
mfspr r12,SPRN_SRR1
#ifndef CONFIG_RELOCATABLE
- b .slb_miss_realmode
+ b slb_miss_realmode
#else
mfctr r11
ld r10,PACAKBASE(r13)
- LOAD_HANDLER(r10, .slb_miss_realmode)
+ LOAD_HANDLER(r10, slb_miss_realmode)
mtctr r10
bctr
#endif
@@ -405,6 +430,80 @@ denorm_exception_hv:
.align 7
/* moved from 0x200 */
+machine_check_pSeries_early:
+BEGIN_FTR_SECTION
+ EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
+ /*
+ * Register contents:
+ * R13 = PACA
+ * R9 = CR
+ * Original R9 to R13 is saved on PACA_EXMC
+ *
+ * Switch to mc_emergency stack and handle re-entrancy (we limit
+ * the nested MCE upto level 4 to avoid stack overflow).
+ * Save MCE registers srr1, srr0, dar and dsisr and then set ME=1
+ *
+ * We use paca->in_mce to check whether this is the first entry or
+ * nested machine check. We increment paca->in_mce to track nested
+ * machine checks.
+ *
+ * If this is the first entry then set stack pointer to
+ * paca->mc_emergency_sp, otherwise r1 is already pointing to
+ * stack frame on mc_emergency stack.
+ *
+ * NOTE: We are here with MSR_ME=0 (off), which means we risk a
+ * checkstop if we get another machine check exception before we do
+ * rfid with MSR_ME=1.
+ */
+ mr r11,r1 /* Save r1 */
+ lhz r10,PACA_IN_MCE(r13)
+ cmpwi r10,0 /* Are we in nested machine check */
+ bne 0f /* Yes, we are. */
+ /* First machine check entry */
+ ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
+0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ addi r10,r10,1 /* increment paca->in_mce */
+ sth r10,PACA_IN_MCE(r13)
+ /* Limit nested MCE to level 4 to avoid stack overflow */
+ cmpwi r10,4
+ bgt 2f /* Check if we hit limit of 4 */
+ std r11,GPR1(r1) /* Save r1 on the stack. */
+ std r11,0(r1) /* make stack chain pointer */
+ mfspr r11,SPRN_SRR0 /* Save SRR0 */
+ std r11,_NIP(r1)
+ mfspr r11,SPRN_SRR1 /* Save SRR1 */
+ std r11,_MSR(r1)
+ mfspr r11,SPRN_DAR /* Save DAR */
+ std r11,_DAR(r1)
+ mfspr r11,SPRN_DSISR /* Save DSISR */
+ std r11,_DSISR(r1)
+ std r9,_CCR(r1) /* Save CR in stackframe */
+ /* Save r9 through r13 from EXMC save area to stack frame. */
+ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
+ mfmsr r11 /* get MSR value */
+ ori r11,r11,MSR_ME /* turn on ME bit */
+ ori r11,r11,MSR_RI /* turn on RI bit */
+ ld r12,PACAKBASE(r13) /* get high part of &label */
+ LOAD_HANDLER(r12, machine_check_handle_early)
+1: mtspr SPRN_SRR0,r12
+ mtspr SPRN_SRR1,r11
+ rfid
+ b . /* prevent speculative execution */
+2:
+ /* Stack overflow. Stay on emergency stack and panic.
+ * Keep the ME bit off while panic-ing, so that if we hit
+ * another machine check we checkstop.
+ */
+ addi r1,r1,INT_FRAME_SIZE /* go back to previous stack frame */
+ ld r11,PACAKMSR(r13)
+ ld r12,PACAKBASE(r13)
+ LOAD_HANDLER(r12, unrecover_mce)
+ li r10,MSR_ME
+ andc r11,r11,r10 /* Turn off MSR_ME */
+ b 1b
+ b . /* prevent speculative execution */
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+
machine_check_pSeries:
.globl machine_check_fwnmi
machine_check_fwnmi:
@@ -441,7 +540,7 @@ do_stab_bolted_pSeries:
std r12,PACA_EXSLB+EX_R12(r13)
GET_SCRATCH0(r10)
std r10,PACA_EXSLB+EX_R13(r13)
- EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD)
+ EXCEPTION_PROLOG_PSERIES_1(do_stab_bolted, EXC_STD)
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
@@ -686,62 +785,38 @@ kvmppc_skip_Hinterrupt:
/*** Common interrupt handlers ***/
- STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception)
-
- /*
- * Machine check is different because we use a different
- * save area: PACA_EXMC instead of PACA_EXGEN.
- */
- .align 7
- .globl machine_check_common
-machine_check_common:
-
- mfspr r10,SPRN_DAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
- FINISH_NAP
- DISABLE_INTS
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
- bl .save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl .machine_check_exception
- b .ret_from_except
+ STD_EXCEPTION_COMMON(0x100, system_reset, system_reset_exception)
STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ)
- STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt)
- STD_EXCEPTION_COMMON(0x980, hdecrementer, .hdec_interrupt)
+ STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, timer_interrupt)
+ STD_EXCEPTION_COMMON(0x980, hdecrementer, hdec_interrupt)
#ifdef CONFIG_PPC_DOORBELL
- STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, .doorbell_exception)
+ STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, doorbell_exception)
#else
- STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, .unknown_exception)
+ STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, unknown_exception)
#endif
- STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
- STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
- STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
- STD_EXCEPTION_COMMON(0xe40, emulation_assist, .emulation_assist_interrupt)
- STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
+ STD_EXCEPTION_COMMON(0xb00, trap_0b, unknown_exception)
+ STD_EXCEPTION_COMMON(0xd00, single_step, single_step_exception)
+ STD_EXCEPTION_COMMON(0xe00, trap_0e, unknown_exception)
+ STD_EXCEPTION_COMMON(0xe40, emulation_assist, emulation_assist_interrupt)
+ STD_EXCEPTION_COMMON(0xe60, hmi_exception, unknown_exception)
#ifdef CONFIG_PPC_DOORBELL
- STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, .doorbell_exception)
+ STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, doorbell_exception)
#else
- STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, .unknown_exception)
+ STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, unknown_exception)
#endif
- STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception)
- STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
- STD_EXCEPTION_COMMON(0x1502, denorm, .unknown_exception)
+ STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, performance_monitor_exception)
+ STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, instruction_breakpoint_exception)
+ STD_EXCEPTION_COMMON(0x1502, denorm, unknown_exception)
#ifdef CONFIG_ALTIVEC
- STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception)
+ STD_EXCEPTION_COMMON(0x1700, altivec_assist, altivec_assist_exception)
#else
- STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception)
+ STD_EXCEPTION_COMMON(0x1700, altivec_assist, unknown_exception)
#endif
#ifdef CONFIG_CBE_RAS
- STD_EXCEPTION_COMMON(0x1200, cbe_system_error, .cbe_system_error_exception)
- STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, .cbe_maintenance_exception)
- STD_EXCEPTION_COMMON(0x1800, cbe_thermal, .cbe_thermal_exception)
+ STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception)
+ STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception)
+ STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception)
#endif /* CONFIG_CBE_RAS */
/*
@@ -770,16 +845,16 @@ data_access_slb_relon_pSeries:
mfspr r3,SPRN_DAR
mfspr r12,SPRN_SRR1
#ifndef CONFIG_RELOCATABLE
- b .slb_miss_realmode
+ b slb_miss_realmode
#else
/*
- * We can't just use a direct branch to .slb_miss_realmode
+ * We can't just use a direct branch to slb_miss_realmode
* because the distance from here to there depends on where
* the kernel ends up being put.
*/
mfctr r11
ld r10,PACAKBASE(r13)
- LOAD_HANDLER(r10, .slb_miss_realmode)
+ LOAD_HANDLER(r10, slb_miss_realmode)
mtctr r10
bctr
#endif
@@ -795,11 +870,11 @@ instruction_access_slb_relon_pSeries:
mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
mfspr r12,SPRN_SRR1
#ifndef CONFIG_RELOCATABLE
- b .slb_miss_realmode
+ b slb_miss_realmode
#else
mfctr r11
ld r10,PACAKBASE(r13)
- LOAD_HANDLER(r10, .slb_miss_realmode)
+ LOAD_HANDLER(r10, slb_miss_realmode)
mtctr r10
bctr
#endif
@@ -907,7 +982,7 @@ system_call_entry:
b system_call_common
ppc64_runlatch_on_trampoline:
- b .__ppc64_runlatch_on
+ b __ppc64_runlatch_on
/*
* Here we have detected that the kernel stack pointer is bad.
@@ -966,7 +1041,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
std r12,RESULT(r1)
std r11,STACK_FRAME_OVERHEAD-16(r1)
1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl .kernel_bad_stack
+ bl kernel_bad_stack
b 1b
/*
@@ -987,7 +1062,7 @@ data_access_common:
ld r3,PACA_EXGEN+EX_DAR(r13)
lwz r4,PACA_EXGEN+EX_DSISR(r13)
li r5,0x300
- b .do_hash_page /* Try to handle as hpte fault */
+ b do_hash_page /* Try to handle as hpte fault */
.align 7
.globl h_data_storage_common
@@ -997,11 +1072,11 @@ h_data_storage_common:
mfspr r10,SPRN_HDSISR
stw r10,PACA_EXGEN+EX_DSISR(r13)
EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
- bl .save_nvgprs
+ bl save_nvgprs
DISABLE_INTS
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .unknown_exception
- b .ret_from_except
+ bl unknown_exception
+ b ret_from_except
.align 7
.globl instruction_access_common
@@ -1012,9 +1087,9 @@ instruction_access_common:
ld r3,_NIP(r1)
andis. r4,r12,0x5820
li r5,0x400
- b .do_hash_page /* Try to handle as hpte fault */
+ b do_hash_page /* Try to handle as hpte fault */
- STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception)
+ STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception)
/*
* Here is the common SLB miss user that is used when going to virtual
@@ -1029,7 +1104,7 @@ slb_miss_user_common:
stw r9,PACA_EXGEN+EX_CCR(r13)
std r10,PACA_EXGEN+EX_LR(r13)
std r11,PACA_EXGEN+EX_SRR0(r13)
- bl .slb_allocate_user
+ bl slb_allocate_user
ld r10,PACA_EXGEN+EX_LR(r13)
ld r3,PACA_EXGEN+EX_R3(r13)
@@ -1072,14 +1147,38 @@ slb_miss_fault:
unrecov_user_slb:
EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
DISABLE_INTS
- bl .save_nvgprs
+ bl save_nvgprs
1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl .unrecoverable_exception
+ bl unrecoverable_exception
b 1b
#endif /* __DISABLED__ */
+ /*
+ * Machine check is different because we use a different
+ * save area: PACA_EXMC instead of PACA_EXGEN.
+ */
+ .align 7
+ .globl machine_check_common
+machine_check_common:
+
+ mfspr r10,SPRN_DAR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ mfspr r10,SPRN_DSISR
+ stw r10,PACA_EXGEN+EX_DSISR(r13)
+ EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
+ FINISH_NAP
+ DISABLE_INTS
+ ld r3,PACA_EXGEN+EX_DAR(r13)
+ lwz r4,PACA_EXGEN+EX_DSISR(r13)
+ std r3,_DAR(r1)
+ std r4,_DSISR(r1)
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_exception
+ b ret_from_except
+
.align 7
.globl alignment_common
alignment_common:
@@ -1092,31 +1191,31 @@ alignment_common:
lwz r4,PACA_EXGEN+EX_DSISR(r13)
std r3,_DAR(r1)
std r4,_DSISR(r1)
- bl .save_nvgprs
+ bl save_nvgprs
DISABLE_INTS
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .alignment_exception
- b .ret_from_except
+ bl alignment_exception
+ b ret_from_except
.align 7
.globl program_check_common
program_check_common:
EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
- bl .save_nvgprs
+ bl save_nvgprs
DISABLE_INTS
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .program_check_exception
- b .ret_from_except
+ bl program_check_exception
+ b ret_from_except
.align 7
.globl fp_unavailable_common
fp_unavailable_common:
EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
bne 1f /* if from user, just load it up */
- bl .save_nvgprs
+ bl save_nvgprs
DISABLE_INTS
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .kernel_fp_unavailable_exception
+ bl kernel_fp_unavailable_exception
BUG_OPCODE
1:
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -1128,15 +1227,15 @@ BEGIN_FTR_SECTION
bne- 2f
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
- bl .load_up_fpu
+ bl load_up_fpu
b fast_exception_return
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl .save_nvgprs
+ bl save_nvgprs
DISABLE_INTS
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .fp_unavailable_tm
- b .ret_from_except
+ bl fp_unavailable_tm
+ b ret_from_except
#endif
.align 7
.globl altivec_unavailable_common
@@ -1154,24 +1253,24 @@ BEGIN_FTR_SECTION
bne- 2f
END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
#endif
- bl .load_up_altivec
+ bl load_up_altivec
b fast_exception_return
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl .save_nvgprs
+ bl save_nvgprs
DISABLE_INTS
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .altivec_unavailable_tm
- b .ret_from_except
+ bl altivec_unavailable_tm
+ b ret_from_except
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
- bl .save_nvgprs
+ bl save_nvgprs
DISABLE_INTS
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .altivec_unavailable_exception
- b .ret_from_except
+ bl altivec_unavailable_exception
+ b ret_from_except
.align 7
.globl vsx_unavailable_common
@@ -1189,26 +1288,26 @@ BEGIN_FTR_SECTION
bne- 2f
END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
#endif
- b .load_up_vsx
+ b load_up_vsx
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl .save_nvgprs
+ bl save_nvgprs
DISABLE_INTS
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .vsx_unavailable_tm
- b .ret_from_except
+ bl vsx_unavailable_tm
+ b ret_from_except
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- bl .save_nvgprs
+ bl save_nvgprs
DISABLE_INTS
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .vsx_unavailable_exception
- b .ret_from_except
+ bl vsx_unavailable_exception
+ b ret_from_except
- STD_EXCEPTION_COMMON(0xf60, facility_unavailable, .facility_unavailable_exception)
- STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, .facility_unavailable_exception)
+ STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception)
+ STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception)
.align 7
.globl __end_handlers
@@ -1263,6 +1362,154 @@ _GLOBAL(opal_mc_secondary_handler)
#endif /* CONFIG_PPC_POWERNV */
+#define MACHINE_CHECK_HANDLER_WINDUP \
+ /* Clear MSR_RI before setting SRR0 and SRR1. */\
+ li r0,MSR_RI; \
+ mfmsr r9; /* get MSR value */ \
+ andc r9,r9,r0; \
+ mtmsrd r9,1; /* Clear MSR_RI */ \
+ /* Move original SRR0 and SRR1 into the respective regs */ \
+ ld r9,_MSR(r1); \
+ mtspr SPRN_SRR1,r9; \
+ ld r3,_NIP(r1); \
+ mtspr SPRN_SRR0,r3; \
+ ld r9,_CTR(r1); \
+ mtctr r9; \
+ ld r9,_XER(r1); \
+ mtxer r9; \
+ ld r9,_LINK(r1); \
+ mtlr r9; \
+ REST_GPR(0, r1); \
+ REST_8GPRS(2, r1); \
+ REST_GPR(10, r1); \
+ ld r11,_CCR(r1); \
+ mtcr r11; \
+ /* Decrement paca->in_mce. */ \
+ lhz r12,PACA_IN_MCE(r13); \
+ subi r12,r12,1; \
+ sth r12,PACA_IN_MCE(r13); \
+ REST_GPR(11, r1); \
+ REST_2GPRS(12, r1); \
+ /* restore original r1. */ \
+ ld r1,GPR1(r1)
+
+ /*
+ * Handle machine check early in real mode. We come here with
+ * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
+ */
+ .align 7
+ .globl machine_check_handle_early
+machine_check_handle_early:
+ std r0,GPR0(r1) /* Save r0 */
+ EXCEPTION_PROLOG_COMMON_3(0x200)
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_early
+ std r3,RESULT(r1) /* Save result */
+ ld r12,_MSR(r1)
+#ifdef CONFIG_PPC_P7_NAP
+ /*
+ * Check if thread was in power saving mode. We come here when any
+ * of the following is true:
+ * a. thread wasn't in power saving mode
+ * b. thread was in power saving mode with no state loss or
+ * supervisor state loss
+ *
+ * Go back to nap again if (b) is true.
+ */
+ rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */
+ beq 4f /* No, it wasn;t */
+ /* Thread was in power saving mode. Go back to nap again. */
+ cmpwi r11,2
+ bne 3f
+ /* Supervisor state loss */
+ li r0,1
+ stb r0,PACA_NAPSTATELOST(r13)
+3: bl machine_check_queue_event
+ MACHINE_CHECK_HANDLER_WINDUP
+ GET_PACA(r13)
+ ld r1,PACAR1(r13)
+ b power7_enter_nap_mode
+4:
+#endif
+ /*
+ * Check if we are coming from hypervisor userspace. If yes then we
+ * continue in host kernel in V mode to deliver the MC event.
+ */
+ rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
+ beq 5f
+ andi. r11,r12,MSR_PR /* See if coming from user. */
+ bne 9f /* continue in V mode if we are. */
+
+5:
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ /*
+ * We are coming from kernel context. Check if we are coming from
+ * guest. if yes, then we can continue. We will fall through
+ * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest.
+ */
+ lbz r11,HSTATE_IN_GUEST(r13)
+ cmpwi r11,0 /* Check if coming from guest */
+ bne 9f /* continue if we are. */
+#endif
+ /*
+ * At this point we are not sure about what context we come from.
+ * Queue up the MCE event and return from the interrupt.
+ * But before that, check if this is an un-recoverable exception.
+ * If yes, then stay on emergency stack and panic.
+ */
+ andi. r11,r12,MSR_RI
+ bne 2f
+1: mfspr r11,SPRN_SRR0
+ ld r10,PACAKBASE(r13)
+ LOAD_HANDLER(r10,unrecover_mce)
+ mtspr SPRN_SRR0,r10
+ ld r10,PACAKMSR(r13)
+ /*
+ * We are going down. But there are chances that we might get hit by
+ * another MCE during panic path and we may run into unstable state
+ * with no way out. Hence, turn ME bit off while going down, so that
+ * when another MCE is hit during panic path, system will checkstop
+ * and hypervisor will get restarted cleanly by SP.
+ */
+ li r3,MSR_ME
+ andc r10,r10,r3 /* Turn off MSR_ME */
+ mtspr SPRN_SRR1,r10
+ rfid
+ b .
+2:
+ /*
+ * Check if we have successfully handled/recovered from error, if not
+ * then stay on emergency stack and panic.
+ */
+ ld r3,RESULT(r1) /* Load result */
+ cmpdi r3,0 /* see if we handled MCE successfully */
+
+ beq 1b /* if !handled then panic */
+ /*
+ * Return from MC interrupt.
+ * Queue up the MCE event so that we can log it later, while
+ * returning from kernel or opal call.
+ */
+ bl machine_check_queue_event
+ MACHINE_CHECK_HANDLER_WINDUP
+ rfid
+9:
+ /* Deliver the machine check to host kernel in V mode. */
+ MACHINE_CHECK_HANDLER_WINDUP
+ b machine_check_pSeries
+
+unrecover_mce:
+ /* Invoke machine_check_exception to print MCE event and panic. */
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_exception
+ /*
+ * We will not reach here. Even if we did, there is no way out. Call
+ * unrecoverable_exception and die.
+ */
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unrecoverable_exception
+ b 1b
/*
* r13 points to the PACA, r9 contains the saved CR,
* r12 contain the saved SRR1, SRR0 is still ready for return
@@ -1271,7 +1518,7 @@ _GLOBAL(opal_mc_secondary_handler)
* r3 is saved in paca->slb_r3
* We assume we aren't going to take any exceptions during this procedure.
*/
-_GLOBAL(slb_miss_realmode)
+slb_miss_realmode:
mflr r10
#ifdef CONFIG_RELOCATABLE
mtctr r11
@@ -1280,7 +1527,7 @@ _GLOBAL(slb_miss_realmode)
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
- bl .slb_allocate_realmode
+ bl slb_allocate_realmode
/* All done -- return from exception. */
@@ -1320,9 +1567,9 @@ _GLOBAL(slb_miss_realmode)
unrecov_slb:
EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
DISABLE_INTS
- bl .save_nvgprs
+ bl save_nvgprs
1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl .unrecoverable_exception
+ bl unrecoverable_exception
b 1b
@@ -1339,7 +1586,7 @@ power4_fixup_nap:
* Hash table stuff
*/
.align 7
-_STATIC(do_hash_page)
+do_hash_page:
std r3,_DAR(r1)
std r4,_DSISR(r1)
@@ -1376,7 +1623,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
*
* at return r3 = 0 for success, 1 for page fault, negative for error
*/
- bl .hash_page /* build HPTE if possible */
+ bl hash_page /* build HPTE if possible */
cmpdi r3,0 /* see if hash_page succeeded */
/* Success */
@@ -1390,35 +1637,35 @@ handle_page_fault:
11: ld r4,_DAR(r1)
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .do_page_fault
+ bl do_page_fault
cmpdi r3,0
beq+ 12f
- bl .save_nvgprs
+ bl save_nvgprs
mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
lwz r4,_DAR(r1)
- bl .bad_page_fault
- b .ret_from_except
+ bl bad_page_fault
+ b ret_from_except
/* We have a data breakpoint exception - handle it */
handle_dabr_fault:
- bl .save_nvgprs
+ bl save_nvgprs
ld r4,_DAR(r1)
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .do_break
-12: b .ret_from_except_lite
+ bl do_break
+12: b ret_from_except_lite
/* We have a page fault that hash_page could handle but HV refused
* the PTE insertion
*/
-13: bl .save_nvgprs
+13: bl save_nvgprs
mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
ld r4,_DAR(r1)
- bl .low_hash_fault
- b .ret_from_except
+ bl low_hash_fault
+ b ret_from_except
/*
* We come here as a result of a DSI at a point where we don't want
@@ -1427,16 +1674,16 @@ handle_dabr_fault:
* were soft-disabled. We want to invoke the exception handler for
* the access, or panic if there isn't a handler.
*/
-77: bl .save_nvgprs
+77: bl save_nvgprs
mr r4,r3
addi r3,r1,STACK_FRAME_OVERHEAD
li r5,SIGSEGV
- bl .bad_page_fault
- b .ret_from_except
+ bl bad_page_fault
+ b ret_from_except
/* here we have a segment miss */
do_ste_alloc:
- bl .ste_allocate /* try to insert stab entry */
+ bl ste_allocate /* try to insert stab entry */
cmpdi r3,0
bne- handle_page_fault
b fast_exception_return
@@ -1449,7 +1696,7 @@ do_ste_alloc:
* We assume (DAR >> 60) == 0xc.
*/
.align 7
-_GLOBAL(do_stab_bolted)
+do_stab_bolted:
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */
mfspr r11,SPRN_DAR /* ea */
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 2230fd0ca3e4..742694c1d852 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -55,9 +55,9 @@ int crash_mem_ranges;
int __init early_init_dt_scan_fw_dump(unsigned long node,
const char *uname, int depth, void *data)
{
- __be32 *sections;
+ const __be32 *sections;
int i, num_sections;
- unsigned long size;
+ int size;
const int *token;
if (depth != 1 || strcmp(uname, "rtas") != 0)
@@ -69,7 +69,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
*/
token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
if (!token)
- return 0;
+ return 1;
fw_dump.fadump_supported = 1;
fw_dump.ibm_configure_kernel_dump = *token;
@@ -92,7 +92,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
&size);
if (!sections)
- return 0;
+ return 1;
num_sections = size / (3 * sizeof(u32));
@@ -110,6 +110,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
break;
}
}
+
return 1;
}
@@ -645,7 +646,7 @@ static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
}
/* Lower 4 bytes of reg_value contains logical cpu id */
cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK;
- if (!cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {
+ if (fdh && !cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {
SKIP_TO_NEXT_CPU(reg_entry);
continue;
}
@@ -662,9 +663,11 @@ static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
}
fadump_final_note(note_buf);
- pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+ if (fdh) {
+ pr_debug("Updating elfcore header (%llx) with cpu notes\n",
fdh->elfcorehdr_addr);
- fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
+ fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
+ }
return 0;
error_out:
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index f7f5b8bed68f..9ad236e5d2c9 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -81,6 +81,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
/*
+ * Enable use of the FPU, and VSX if possible, for the caller.
+ */
+_GLOBAL(fp_enable)
+ mfmsr r3
+ ori r3,r3,MSR_FP
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r3,r3,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+ SYNC
+ MTMSRD(r3)
+ isync /* (not necessary for arch 2.02 and later) */
+ blr
+
+/*
* Load state from memory into FP registers including FPSCR.
* Assumes the caller has enabled FP in the MSR.
*/
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index a92c79be2728..f22e7e44fbf3 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -176,6 +176,8 @@ skpinv: addi r6,r6,1 /* Increment */
/* 7. Jump to KERNELBASE mapping */
lis r6,(KERNELBASE & ~0xfff)@h
ori r6,r6,(KERNELBASE & ~0xfff)@l
+ rlwinm r7,r25,0,0x03ffffff
+ add r6,r7,r6
#elif defined(ENTRY_MAPPING_KEXEC_SETUP)
/*
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 9b27b293a922..d178834fe508 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -10,6 +10,8 @@
*
*/
+#define pr_fmt(fmt) "ftrace-powerpc: " fmt
+
#include <linux/spinlock.h>
#include <linux/hardirq.h>
#include <linux/uaccess.h>
@@ -74,6 +76,7 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
*/
static int test_24bit_addr(unsigned long ip, unsigned long addr)
{
+ addr = ppc_function_entry((void *)addr);
/* use the create_branch to verify that this offset can be branched */
return create_branch((unsigned int *)ip, addr, 0);
@@ -104,11 +107,9 @@ __ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
unsigned int op;
- unsigned int jmp[5];
- unsigned long ptr;
+ unsigned long entry, ptr;
unsigned long ip = rec->ip;
- unsigned long tramp;
- int offset;
+ void *tramp;
/* read where this goes */
if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
@@ -116,101 +117,46 @@ __ftrace_make_nop(struct module *mod,
/* Make sure that that this is still a 24bit jump */
if (!is_bl_op(op)) {
- printk(KERN_ERR "Not expected bl: opcode is %x\n", op);
+ pr_err("Not expected bl: opcode is %x\n", op);
return -EINVAL;
}
/* lets find where the pointer goes */
- tramp = find_bl_target(ip, op);
-
- /*
- * On PPC64 the trampoline looks like:
- * 0x3d, 0x82, 0x00, 0x00, addis r12,r2, <high>
- * 0x39, 0x8c, 0x00, 0x00, addi r12,r12, <low>
- * Where the bytes 2,3,6 and 7 make up the 32bit offset
- * to the TOC that holds the pointer.
- * to jump to.
- * 0xf8, 0x41, 0x00, 0x28, std r2,40(r1)
- * 0xe9, 0x6c, 0x00, 0x20, ld r11,32(r12)
- * The actually address is 32 bytes from the offset
- * into the TOC.
- * 0xe8, 0x4c, 0x00, 0x28, ld r2,40(r12)
- */
-
- pr_devel("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc);
-
- /* Find where the trampoline jumps to */
- if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
- printk(KERN_ERR "Failed to read %lx\n", tramp);
- return -EFAULT;
- }
+ tramp = (void *)find_bl_target(ip, op);
- pr_devel(" %08x %08x", jmp[0], jmp[1]);
+ pr_devel("ip:%lx jumps to %p", ip, tramp);
- /* verify that this is what we expect it to be */
- if (((jmp[0] & 0xffff0000) != 0x3d820000) ||
- ((jmp[1] & 0xffff0000) != 0x398c0000) ||
- (jmp[2] != 0xf8410028) ||
- (jmp[3] != 0xe96c0020) ||
- (jmp[4] != 0xe84c0028)) {
- printk(KERN_ERR "Not a trampoline\n");
+ if (!is_module_trampoline(tramp)) {
+ pr_err("Not a trampoline\n");
return -EINVAL;
}
- /* The bottom half is signed extended */
- offset = ((unsigned)((unsigned short)jmp[0]) << 16) +
- (int)((short)jmp[1]);
-
- pr_devel(" %x ", offset);
-
- /* get the address this jumps too */
- tramp = mod->arch.toc + offset + 32;
- pr_devel("toc: %lx", tramp);
-
- if (probe_kernel_read(jmp, (void *)tramp, 8)) {
- printk(KERN_ERR "Failed to read %lx\n", tramp);
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
return -EFAULT;
}
- pr_devel(" %08x %08x\n", jmp[0], jmp[1]);
-
-#ifdef __LITTLE_ENDIAN__
- ptr = ((unsigned long)jmp[1] << 32) + jmp[0];
-#else
- ptr = ((unsigned long)jmp[0] << 32) + jmp[1];
-#endif
+ pr_devel("trampoline target %lx", ptr);
+ entry = ppc_global_function_entry((void *)addr);
/* This should match what was called */
- if (ptr != ppc_function_entry((void *)addr)) {
- printk(KERN_ERR "addr does not match %lx\n", ptr);
+ if (ptr != entry) {
+ pr_err("addr %lx does not match expected %lx\n", ptr, entry);
return -EINVAL;
}
/*
- * We want to nop the line, but the next line is
- * 0xe8, 0x41, 0x00, 0x28 ld r2,40(r1)
- * This needs to be turned to a nop too.
- */
- if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE))
- return -EFAULT;
-
- if (op != 0xe8410028) {
- printk(KERN_ERR "Next line is not ld! (%08x)\n", op);
- return -EINVAL;
- }
-
- /*
- * Milton Miller pointed out that we can not blindly do nops.
- * If a task was preempted when calling a trace function,
- * the nops will remove the way to restore the TOC in r2
- * and the r2 TOC will get corrupted.
- */
-
- /*
- * Replace:
- * bl <tramp> <==== will be replaced with "b 1f"
- * ld r2,40(r1)
- * 1:
+ * Our original call site looks like:
+ *
+ * bl <tramp>
+ * ld r2,XX(r1)
+ *
+ * Milton Miller pointed out that we can not simply nop the branch.
+ * If a task was preempted when calling a trace function, the nops
+ * will remove the way to restore the TOC in r2 and the r2 TOC will
+ * get corrupted.
+ *
+ * Use a b +8 to jump over the load.
*/
op = 0x48000008; /* b +8 */
@@ -235,7 +181,7 @@ __ftrace_make_nop(struct module *mod,
/* Make sure that that this is still a 24bit jump */
if (!is_bl_op(op)) {
- printk(KERN_ERR "Not expected bl: opcode is %x\n", op);
+ pr_err("Not expected bl: opcode is %x\n", op);
return -EINVAL;
}
@@ -254,7 +200,7 @@ __ftrace_make_nop(struct module *mod,
/* Find where the trampoline jumps to */
if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
- printk(KERN_ERR "Failed to read %lx\n", tramp);
+ pr_err("Failed to read %lx\n", tramp);
return -EFAULT;
}
@@ -265,7 +211,7 @@ __ftrace_make_nop(struct module *mod,
((jmp[1] & 0xffff0000) != 0x398c0000) ||
(jmp[2] != 0x7d8903a6) ||
(jmp[3] != 0x4e800420)) {
- printk(KERN_ERR "Not a trampoline\n");
+ pr_err("Not a trampoline\n");
return -EINVAL;
}
@@ -277,8 +223,7 @@ __ftrace_make_nop(struct module *mod,
pr_devel(" %lx ", tramp);
if (tramp != addr) {
- printk(KERN_ERR
- "Trampoline location %08lx does not match addr\n",
+ pr_err("Trampoline location %08lx does not match addr\n",
tramp);
return -EINVAL;
}
@@ -319,15 +264,13 @@ int ftrace_make_nop(struct module *mod,
*/
if (!rec->arch.mod) {
if (!mod) {
- printk(KERN_ERR "No module loaded addr=%lx\n",
- addr);
+ pr_err("No module loaded addr=%lx\n", addr);
return -EFAULT;
}
rec->arch.mod = mod;
} else if (mod) {
if (mod != rec->arch.mod) {
- printk(KERN_ERR
- "Record mod %p not equal to passed in mod %p\n",
+ pr_err("Record mod %p not equal to passed in mod %p\n",
rec->arch.mod, mod);
return -EINVAL;
}
@@ -348,45 +291,42 @@ static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned int op[2];
- unsigned long ip = rec->ip;
+ void *ip = (void *)rec->ip;
/* read where this goes */
- if (probe_kernel_read(op, (void *)ip, MCOUNT_INSN_SIZE * 2))
+ if (probe_kernel_read(op, ip, sizeof(op)))
return -EFAULT;
/*
- * It should be pointing to two nops or
- * b +8; ld r2,40(r1)
+ * We expect to see:
+ *
+ * b +8
+ * ld r2,XX(r1)
+ *
+ * The load offset is different depending on the ABI. For simplicity
+ * just mask it out when doing the compare.
*/
- if (((op[0] != 0x48000008) || (op[1] != 0xe8410028)) &&
- ((op[0] != PPC_INST_NOP) || (op[1] != PPC_INST_NOP))) {
- printk(KERN_ERR "Expected NOPs but have %x %x\n", op[0], op[1]);
+ if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
+ pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
return -EINVAL;
}
/* If we never set up a trampoline to ftrace_caller, then bail */
if (!rec->arch.mod->arch.tramp) {
- printk(KERN_ERR "No ftrace trampoline\n");
+ pr_err("No ftrace trampoline\n");
return -EINVAL;
}
- /* create the branch to the trampoline */
- op[0] = create_branch((unsigned int *)ip,
- rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
- if (!op[0]) {
- printk(KERN_ERR "REL24 out of range!\n");
+ /* Ensure branch is within 24 bits */
+ if (!create_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) {
+ pr_err("Branch out of range\n");
return -EINVAL;
}
- /* ld r2,40(r1) */
- op[1] = 0xe8410028;
-
- pr_devel("write to %lx\n", rec->ip);
-
- if (probe_kernel_write((void *)ip, op, MCOUNT_INSN_SIZE * 2))
- return -EPERM;
-
- flush_icache_range(ip, ip + 8);
+ if (patch_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) {
+ pr_err("REL24 out of range!\n");
+ return -EINVAL;
+ }
return 0;
}
@@ -403,13 +343,13 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
/* It should be pointing to a nop */
if (op != PPC_INST_NOP) {
- printk(KERN_ERR "Expected NOP but have %x\n", op);
+ pr_err("Expected NOP but have %x\n", op);
return -EINVAL;
}
/* If we never set up a trampoline to ftrace_caller, then bail */
if (!rec->arch.mod->arch.tramp) {
- printk(KERN_ERR "No ftrace trampoline\n");
+ pr_err("No ftrace trampoline\n");
return -EINVAL;
}
@@ -417,7 +357,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
op = create_branch((unsigned int *)ip,
rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
if (!op) {
- printk(KERN_ERR "REL24 out of range!\n");
+ pr_err("REL24 out of range!\n");
return -EINVAL;
}
@@ -455,7 +395,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
* already have a module defined.
*/
if (!rec->arch.mod) {
- printk(KERN_ERR "No module loaded\n");
+ pr_err("No module loaded\n");
return -EINVAL;
}
@@ -531,13 +471,8 @@ void arch_ftrace_update_code(int command)
ftrace_disable_ftrace_graph_caller();
}
-int __init ftrace_dyn_arch_init(void *data)
+int __init ftrace_dyn_arch_init(void)
{
- /* caller expects data to be zero */
- unsigned long *p = data;
-
- *p = 0;
-
return 0;
}
#endif /* CONFIG_DYNAMIC_FTRACE */
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 67ee0d6c1070..7d7d8635227a 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -930,25 +930,6 @@ initial_mmu:
tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */
tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */
-#if defined(CONFIG_SERIAL_TEXT_DEBUG) && defined(SERIAL_DEBUG_IO_BASE)
-
- /* Load a TLB entry for the UART, so that ppc4xx_progress() can use
- * the UARTs nice and early. We use a 4k real==virtual mapping. */
-
- lis r3,SERIAL_DEBUG_IO_BASE@h
- ori r3,r3,SERIAL_DEBUG_IO_BASE@l
- mr r4,r3
- clrrwi r4,r4,12
- ori r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G)
-
- clrrwi r3,r3,12
- ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K))
-
- li r0,0 /* TLB slot 0 */
- tlbwe r4,r0,TLB_DATA
- tlbwe r3,r0,TLB_TAG
-#endif /* CONFIG_SERIAL_DEBUG_TEXT && SERIAL_DEBUG_IO_BASE */
-
isync
/* Establish the exception vector base
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 4f0946de2d5c..a95145d7f61b 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -23,6 +23,7 @@
*/
#include <linux/threads.h>
+#include <linux/init.h>
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -69,16 +70,15 @@ _GLOBAL(__start)
/* NOP this out unconditionally */
BEGIN_FTR_SECTION
FIXUP_ENDIAN
- b .__start_initialization_multiplatform
+ b __start_initialization_multiplatform
END_FTR_SECTION(0, 1)
/* Catch branch to 0 in real mode */
trap
- /* Secondary processors spin on this value until it becomes nonzero.
- * When it does it contains the real address of the descriptor
- * of the function that the cpu should jump to to continue
- * initialization.
+ /* Secondary processors spin on this value until it becomes non-zero.
+ * When non-zero, it contains the real address of the function the cpu
+ * should jump to.
*/
.balign 8
.globl __secondary_hold_spinloop
@@ -139,16 +139,15 @@ __secondary_hold:
tovirt(r26,r26)
#endif
/* All secondary cpus wait here until told to start. */
-100: ld r4,__secondary_hold_spinloop-_stext(r26)
- cmpdi 0,r4,0
+100: ld r12,__secondary_hold_spinloop-_stext(r26)
+ cmpdi 0,r12,0
beq 100b
#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
#ifdef CONFIG_PPC_BOOK3E
- tovirt(r4,r4)
+ tovirt(r12,r12)
#endif
- ld r4,0(r4) /* deref function descriptor */
- mtctr r4
+ mtctr r12
mr r3,r24
/*
* it may be the case that other platforms have r4 right to
@@ -185,16 +184,16 @@ _GLOBAL(generic_secondary_thread_init)
mr r24,r3
/* turn on 64-bit mode */
- bl .enable_64b_mode
+ bl enable_64b_mode
/* get a valid TOC pointer, wherever we're mapped at */
- bl .relative_toc
+ bl relative_toc
tovirt(r2,r2)
#ifdef CONFIG_PPC_BOOK3E
/* Book3E initialization */
mr r3,r24
- bl .book3e_secondary_thread_init
+ bl book3e_secondary_thread_init
#endif
b generic_secondary_common_init
@@ -213,17 +212,17 @@ _GLOBAL(generic_secondary_smp_init)
mr r25,r4
/* turn on 64-bit mode */
- bl .enable_64b_mode
+ bl enable_64b_mode
/* get a valid TOC pointer, wherever we're mapped at */
- bl .relative_toc
+ bl relative_toc
tovirt(r2,r2)
#ifdef CONFIG_PPC_BOOK3E
/* Book3E initialization */
mr r3,r24
mr r4,r25
- bl .book3e_secondary_core_init
+ bl book3e_secondary_core_init
#endif
generic_secondary_common_init:
@@ -235,7 +234,7 @@ generic_secondary_common_init:
ld r13,0(r13) /* Get base vaddr of paca array */
#ifndef CONFIG_SMP
addi r13,r13,PACA_SIZE /* know r13 if used accidentally */
- b .kexec_wait /* wait for next kernel if !SMP */
+ b kexec_wait /* wait for next kernel if !SMP */
#else
LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */
lwz r7,0(r7) /* also the max paca allocated */
@@ -249,7 +248,7 @@ generic_secondary_common_init:
blt 1b
mr r3,r24 /* not found, copy phys to r3 */
- b .kexec_wait /* next kernel might do better */
+ b kexec_wait /* next kernel might do better */
2: SET_PACA(r13)
#ifdef CONFIG_PPC_BOOK3E
@@ -263,11 +262,13 @@ generic_secondary_common_init:
/* See if we need to call a cpu state restore handler */
LOAD_REG_ADDR(r23, cur_cpu_spec)
ld r23,0(r23)
- ld r23,CPU_SPEC_RESTORE(r23)
- cmpdi 0,r23,0
+ ld r12,CPU_SPEC_RESTORE(r23)
+ cmpdi 0,r12,0
beq 3f
- ld r23,0(r23)
- mtctr r23
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+ ld r12,0(r12)
+#endif
+ mtctr r12
bctrl
3: LOAD_REG_ADDR(r3, spinning_secondaries) /* Decrement spinning_secondaries */
@@ -298,7 +299,7 @@ generic_secondary_common_init:
* Assumes we're mapped EA == RA if the MMU is on.
*/
#ifdef CONFIG_PPC_BOOK3S
-_STATIC(__mmu_off)
+__mmu_off:
mfmsr r3
andi. r0,r3,MSR_IR|MSR_DR
beqlr
@@ -323,12 +324,12 @@ _STATIC(__mmu_off)
* DT block, r4 is a physical pointer to the kernel itself
*
*/
-_GLOBAL(__start_initialization_multiplatform)
+__start_initialization_multiplatform:
/* Make sure we are running in 64 bits mode */
- bl .enable_64b_mode
+ bl enable_64b_mode
/* Get TOC pointer (current runtime address) */
- bl .relative_toc
+ bl relative_toc
/* find out where we are now */
bcl 20,31,$+4
@@ -341,7 +342,7 @@ _GLOBAL(__start_initialization_multiplatform)
*/
cmpldi cr0,r5,0
beq 1f
- b .__boot_from_prom /* yes -> prom */
+ b __boot_from_prom /* yes -> prom */
1:
/* Save parameters */
mr r31,r3
@@ -353,8 +354,8 @@ _GLOBAL(__start_initialization_multiplatform)
#endif
#ifdef CONFIG_PPC_BOOK3E
- bl .start_initialization_book3e
- b .__after_prom_start
+ bl start_initialization_book3e
+ b __after_prom_start
#else
/* Setup some critical 970 SPRs before switching MMU off */
mfspr r0,SPRN_PVR
@@ -367,15 +368,15 @@ _GLOBAL(__start_initialization_multiplatform)
beq 1f
cmpwi r0,0x45 /* 970GX */
bne 2f
-1: bl .__cpu_preinit_ppc970
+1: bl __cpu_preinit_ppc970
2:
/* Switch off MMU if not already off */
- bl .__mmu_off
- b .__after_prom_start
+ bl __mmu_off
+ b __after_prom_start
#endif /* CONFIG_PPC_BOOK3E */
-_INIT_STATIC(__boot_from_prom)
+__boot_from_prom:
#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
/* Save parameters */
mr r31,r3
@@ -394,7 +395,7 @@ _INIT_STATIC(__boot_from_prom)
#ifdef CONFIG_RELOCATABLE
/* Relocate code for where we are now */
mr r3,r26
- bl .relocate
+ bl relocate
#endif
/* Restore parameters */
@@ -406,14 +407,14 @@ _INIT_STATIC(__boot_from_prom)
/* Do all of the interaction with OF client interface */
mr r8,r26
- bl .prom_init
+ bl prom_init
#endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */
/* We never return. We also hit that trap if trying to boot
* from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
trap
-_STATIC(__after_prom_start)
+__after_prom_start:
#ifdef CONFIG_RELOCATABLE
/* process relocations for the final address of the kernel */
lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */
@@ -423,7 +424,7 @@ _STATIC(__after_prom_start)
bne 1f
add r25,r25,r26
1: mr r3,r25
- bl .relocate
+ bl relocate
#endif
/*
@@ -463,12 +464,12 @@ _STATIC(__after_prom_start)
lis r5,(copy_to_here - _stext)@ha
addi r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
- bl .copy_and_flush /* copy the first n bytes */
+ bl copy_and_flush /* copy the first n bytes */
/* this includes the code being */
/* executed here. */
addis r8,r3,(4f - _stext)@ha /* Jump to the copy of this code */
- addi r8,r8,(4f - _stext)@l /* that we just made */
- mtctr r8
+ addi r12,r8,(4f - _stext)@l /* that we just made */
+ mtctr r12
bctr
.balign 8
@@ -477,9 +478,9 @@ p_end: .llong _end - _stext
4: /* Now copy the rest of the kernel up to _end */
addis r5,r26,(p_end - _stext)@ha
ld r5,(p_end - _stext)@l(r5) /* get _end */
-5: bl .copy_and_flush /* copy the rest */
+5: bl copy_and_flush /* copy the rest */
-9: b .start_here_multiplatform
+9: b start_here_multiplatform
/*
* Copy routine used to copy the kernel to start at physical address 0
@@ -543,7 +544,7 @@ __secondary_start_pmac_0:
_GLOBAL(pmac_secondary_start)
/* turn on 64-bit mode */
- bl .enable_64b_mode
+ bl enable_64b_mode
li r0,0
mfspr r3,SPRN_HID4
@@ -555,11 +556,11 @@ _GLOBAL(pmac_secondary_start)
slbia
/* get TOC pointer (real address) */
- bl .relative_toc
+ bl relative_toc
tovirt(r2,r2)
/* Copy some CPU settings from CPU 0 */
- bl .__restore_cpu_ppc970
+ bl __restore_cpu_ppc970
/* pSeries do that early though I don't think we really need it */
mfmsr r3
@@ -618,7 +619,7 @@ __secondary_start:
std r14,PACAKSAVE(r13)
/* Do early setup for that CPU (stab, slb, hash table pointer) */
- bl .early_setup_secondary
+ bl early_setup_secondary
/*
* setup the new stack pointer, but *don't* use this until
@@ -638,7 +639,7 @@ __secondary_start:
stb r0,PACAIRQHAPPENED(r13)
/* enable MMU and jump to start_secondary */
- LOAD_REG_ADDR(r3, .start_secondary_prolog)
+ LOAD_REG_ADDR(r3, start_secondary_prolog)
LOAD_REG_IMMEDIATE(r4, MSR_KERNEL)
mtspr SPRN_SRR0,r3
@@ -651,11 +652,11 @@ __secondary_start:
* zero the stack back-chain pointer and get the TOC virtual address
* before going into C code.
*/
-_GLOBAL(start_secondary_prolog)
+start_secondary_prolog:
ld r2,PACATOC(r13)
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
- bl .start_secondary
+ bl start_secondary
b .
/*
* Reset stack pointer and call start_secondary
@@ -666,14 +667,14 @@ _GLOBAL(start_secondary_resume)
ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
- bl .start_secondary
+ bl start_secondary
b .
#endif
/*
* This subroutine clobbers r11 and r12
*/
-_GLOBAL(enable_64b_mode)
+enable_64b_mode:
mfmsr r11 /* grab the current MSR */
#ifdef CONFIG_PPC_BOOK3E
oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */
@@ -714,9 +715,9 @@ p_toc: .llong __toc_start + 0x8000 - 0b
/*
* This is where the main kernel code starts.
*/
-_INIT_STATIC(start_here_multiplatform)
+start_here_multiplatform:
/* set up the TOC */
- bl .relative_toc
+ bl relative_toc
tovirt(r2,r2)
/* Clear out the BSS. It may have been done in prom_init,
@@ -775,9 +776,9 @@ _INIT_STATIC(start_here_multiplatform)
/* Restore parameters passed from prom_init/kexec */
mr r3,r31
- bl .early_setup /* also sets r13 and SPRG_PACA */
+ bl early_setup /* also sets r13 and SPRG_PACA */
- LOAD_REG_ADDR(r3, .start_here_common)
+ LOAD_REG_ADDR(r3, start_here_common)
ld r4,PACAKMSR(r13)
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
@@ -785,7 +786,8 @@ _INIT_STATIC(start_here_multiplatform)
b . /* prevent speculative execution */
/* This is where all platforms converge execution */
-_INIT_GLOBAL(start_here_common)
+
+start_here_common:
/* relocation is on at this point */
std r1,PACAKSAVE(r13)
@@ -793,7 +795,7 @@ _INIT_GLOBAL(start_here_common)
ld r2,PACATOC(r13)
/* Do more system initializations in virtual mode */
- bl .setup_system
+ bl setup_system
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
@@ -804,7 +806,7 @@ _INIT_GLOBAL(start_here_common)
stb r0,PACAIRQHAPPENED(r13)
/* Generic kernel entry */
- bl .start_kernel
+ bl start_kernel
/* Not reached */
BUG_OPCODE
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index f45726a1d963..b497188a94a1 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -65,29 +65,78 @@ _ENTRY(_start);
nop
/* Translate device tree address to physical, save in r30/r31 */
- mfmsr r16
- mfspr r17,SPRN_PID
- rlwinm r17,r17,16,0x3fff0000 /* turn PID into MAS6[SPID] */
- rlwimi r17,r16,28,0x00000001 /* turn MSR[DS] into MAS6[SAS] */
- mtspr SPRN_MAS6,r17
-
- tlbsx 0,r3 /* must succeed */
-
- mfspr r16,SPRN_MAS1
- mfspr r20,SPRN_MAS3
- rlwinm r17,r16,25,0x1f /* r17 = log2(page size) */
- li r18,1024
- slw r18,r18,r17 /* r18 = page size */
- addi r18,r18,-1
- and r19,r3,r18 /* r19 = page offset */
- andc r31,r20,r18 /* r31 = page base */
- or r31,r31,r19 /* r31 = devtree phys addr */
- mfspr r30,SPRN_MAS7
+ bl get_phys_addr
+ mr r30,r3
+ mr r31,r4
li r25,0 /* phys kernel start (low) */
li r24,0 /* CPU number */
li r23,0 /* phys kernel start (high) */
+#ifdef CONFIG_RELOCATABLE
+ LOAD_REG_ADDR_PIC(r3, _stext) /* Get our current runtime base */
+
+ /* Translate _stext address to physical, save in r23/r25 */
+ bl get_phys_addr
+ mr r23,r3
+ mr r25,r4
+
+ bl 0f
+0: mflr r8
+ addis r3,r8,(is_second_reloc - 0b)@ha
+ lwz r19,(is_second_reloc - 0b)@l(r3)
+
+ /* Check if this is the second relocation. */
+ cmpwi r19,1
+ bne 1f
+
+ /*
+ * For the second relocation, we already get the real memstart_addr
+ * from device tree. So we will map PAGE_OFFSET to memstart_addr,
+ * then the virtual address of start kernel should be:
+ * PAGE_OFFSET + (kernstart_addr - memstart_addr)
+ * Since the offset between kernstart_addr and memstart_addr should
+ * never be beyond 1G, so we can just use the lower 32bit of them
+ * for the calculation.
+ */
+ lis r3,PAGE_OFFSET@h
+
+ addis r4,r8,(kernstart_addr - 0b)@ha
+ addi r4,r4,(kernstart_addr - 0b)@l
+ lwz r5,4(r4)
+
+ addis r6,r8,(memstart_addr - 0b)@ha
+ addi r6,r6,(memstart_addr - 0b)@l
+ lwz r7,4(r6)
+
+ subf r5,r7,r5
+ add r3,r3,r5
+ b 2f
+
+1:
+ /*
+ * We have the runtime (virutal) address of our base.
+ * We calculate our shift of offset from a 64M page.
+ * We could map the 64M page we belong to at PAGE_OFFSET and
+ * get going from there.
+ */
+ lis r4,KERNELBASE@h
+ ori r4,r4,KERNELBASE@l
+ rlwinm r6,r25,0,0x3ffffff /* r6 = PHYS_START % 64M */
+ rlwinm r5,r4,0,0x3ffffff /* r5 = KERNELBASE % 64M */
+ subf r3,r5,r6 /* r3 = r6 - r5 */
+ add r3,r4,r3 /* Required Virtual Address */
+
+2: bl relocate
+
+ /*
+ * For the second relocation, we already set the right tlb entries
+ * for the kernel space, so skip the code in fsl_booke_entry_mapping.S
+ */
+ cmpwi r19,1
+ beq set_ivor
+#endif
+
/* We try to not make any assumptions about how the boot loader
* setup or used the TLBs. We invalidate all mappings from the
* boot loader and load a single entry in TLB1[0] to map the
@@ -113,6 +162,7 @@ _ENTRY(__early_start)
#include "fsl_booke_entry_mapping.S"
#undef ENTRY_MAPPING_BOOT_SETUP
+set_ivor:
/* Establish the interrupt vector offsets */
SET_IVOR(0, CriticalInput);
SET_IVOR(1, MachineCheck);
@@ -166,8 +216,7 @@ _ENTRY(__early_start)
/* Check to see if we're the second processor, and jump
* to the secondary_start code if so
*/
- lis r24, boot_cpuid@h
- ori r24, r24, boot_cpuid@l
+ LOAD_REG_ADDR_PIC(r24, boot_cpuid)
lwz r24, 0(r24)
cmpwi r24, -1
mfspr r24,SPRN_PIR
@@ -197,6 +246,18 @@ _ENTRY(__early_start)
bl early_init
+#ifdef CONFIG_RELOCATABLE
+ mr r3,r30
+ mr r4,r31
+#ifdef CONFIG_PHYS_64BIT
+ mr r5,r23
+ mr r6,r25
+#else
+ mr r5,r25
+#endif
+ bl relocate_init
+#endif
+
#ifdef CONFIG_DYNAMIC_MEMSTART
lis r3,kernstart_addr@ha
la r3,kernstart_addr@l(r3)
@@ -856,6 +917,33 @@ KernelSPE:
#endif /* CONFIG_SPE */
/*
+ * Translate the effec addr in r3 to phys addr. The phys addr will be put
+ * into r3(higher 32bit) and r4(lower 32bit)
+ */
+get_phys_addr:
+ mfmsr r8
+ mfspr r9,SPRN_PID
+ rlwinm r9,r9,16,0x3fff0000 /* turn PID into MAS6[SPID] */
+ rlwimi r9,r8,28,0x00000001 /* turn MSR[DS] into MAS6[SAS] */
+ mtspr SPRN_MAS6,r9
+
+ tlbsx 0,r3 /* must succeed */
+
+ mfspr r8,SPRN_MAS1
+ mfspr r12,SPRN_MAS3
+ rlwinm r9,r8,25,0x1f /* r9 = log2(page size) */
+ li r10,1024
+ slw r10,r10,r9 /* r10 = page size */
+ addi r10,r10,-1
+ and r11,r3,r10 /* r11 = page offset */
+ andc r4,r12,r10 /* r4 = page base */
+ or r4,r4,r11 /* r4 = devtree phys addr */
+#ifdef CONFIG_PHYS_64BIT
+ mfspr r3,SPRN_MAS7
+#endif
+ blr
+
+/*
* Global functions
*/
@@ -1057,24 +1145,36 @@ _GLOBAL(__flush_disable_L1)
/* When we get here, r24 needs to hold the CPU # */
.globl __secondary_start
__secondary_start:
- lis r3,__secondary_hold_acknowledge@h
- ori r3,r3,__secondary_hold_acknowledge@l
- stw r24,0(r3)
-
- li r3,0
- mr r4,r24 /* Why? */
- bl call_setup_cpu
-
- lis r3,tlbcam_index@ha
- lwz r3,tlbcam_index@l(r3)
+ LOAD_REG_ADDR_PIC(r3, tlbcam_index)
+ lwz r3,0(r3)
mtctr r3
li r26,0 /* r26 safe? */
+ bl switch_to_as1
+ mr r27,r3 /* tlb entry */
/* Load each CAM entry */
1: mr r3,r26
bl loadcam_entry
addi r26,r26,1
bdnz 1b
+ mr r3,r27 /* tlb entry */
+ LOAD_REG_ADDR_PIC(r4, memstart_addr)
+ lwz r4,0(r4)
+ mr r5,r25 /* phys kernel start */
+ rlwinm r5,r5,0,~0x3ffffff /* aligned 64M */
+ subf r4,r5,r4 /* memstart_addr - phys kernel start */
+ li r5,0 /* no device tree */
+ li r6,0 /* not boot cpu */
+ bl restore_to_as0
+
+
+ lis r3,__secondary_hold_acknowledge@h
+ ori r3,r3,__secondary_hold_acknowledge@l
+ stw r24,0(r3)
+
+ li r3,0
+ mr r4,r24 /* Why? */
+ bl call_setup_cpu
/* get current_thread_info and current */
lis r1,secondary_ti@ha
@@ -1111,6 +1211,112 @@ __secondary_hold_acknowledge:
#endif
/*
+ * Create a tlb entry with the same effective and physical address as
+ * the tlb entry used by the current running code. But set the TS to 1.
+ * Then switch to the address space 1. It will return with the r3 set to
+ * the ESEL of the new created tlb.
+ */
+_GLOBAL(switch_to_as1)
+ mflr r5
+
+ /* Find a entry not used */
+ mfspr r3,SPRN_TLB1CFG
+ andi. r3,r3,0xfff
+ mfspr r4,SPRN_PID
+ rlwinm r4,r4,16,0x3fff0000 /* turn PID into MAS6[SPID] */
+ mtspr SPRN_MAS6,r4
+1: lis r4,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ addi r3,r3,-1
+ rlwimi r4,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
+ mtspr SPRN_MAS0,r4
+ tlbre
+ mfspr r4,SPRN_MAS1
+ andis. r4,r4,MAS1_VALID@h
+ bne 1b
+
+ /* Get the tlb entry used by the current running code */
+ bl 0f
+0: mflr r4
+ tlbsx 0,r4
+
+ mfspr r4,SPRN_MAS1
+ ori r4,r4,MAS1_TS /* Set the TS = 1 */
+ mtspr SPRN_MAS1,r4
+
+ mfspr r4,SPRN_MAS0
+ rlwinm r4,r4,0,~MAS0_ESEL_MASK
+ rlwimi r4,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
+ mtspr SPRN_MAS0,r4
+ tlbwe
+ isync
+ sync
+
+ mfmsr r4
+ ori r4,r4,MSR_IS | MSR_DS
+ mtspr SPRN_SRR0,r5
+ mtspr SPRN_SRR1,r4
+ sync
+ rfi
+
+/*
+ * Restore to the address space 0 and also invalidate the tlb entry created
+ * by switch_to_as1.
+ * r3 - the tlb entry which should be invalidated
+ * r4 - __pa(PAGE_OFFSET in AS1) - __pa(PAGE_OFFSET in AS0)
+ * r5 - device tree virtual address. If r4 is 0, r5 is ignored.
+ * r6 - boot cpu
+*/
+_GLOBAL(restore_to_as0)
+ mflr r0
+
+ bl 0f
+0: mflr r9
+ addi r9,r9,1f - 0b
+
+ /*
+ * We may map the PAGE_OFFSET in AS0 to a different physical address,
+ * so we need calculate the right jump and device tree address based
+ * on the offset passed by r4.
+ */
+ add r9,r9,r4
+ add r5,r5,r4
+ add r0,r0,r4
+
+2: mfmsr r7
+ li r8,(MSR_IS | MSR_DS)
+ andc r7,r7,r8
+
+ mtspr SPRN_SRR0,r9
+ mtspr SPRN_SRR1,r7
+ sync
+ rfi
+
+ /* Invalidate the temporary tlb entry for AS1 */
+1: lis r9,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ rlwimi r9,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
+ mtspr SPRN_MAS0,r9
+ tlbre
+ mfspr r9,SPRN_MAS1
+ rlwinm r9,r9,0,2,31 /* Clear MAS1 Valid and IPPROT */
+ mtspr SPRN_MAS1,r9
+ tlbwe
+ isync
+
+ cmpwi r4,0
+ cmpwi cr1,r6,0
+ cror eq,4*cr1+eq,eq
+ bne 3f /* offset != 0 && is_boot_cpu */
+ mtlr r0
+ blr
+
+ /*
+ * The PAGE_OFFSET will map to a different physical address,
+ * jump to _start to do another relocation again.
+ */
+3: mr r3,r5
+ bl _start
+
+/*
* We put a few things here that have to be page-aligned. This stuff
* goes at the beginning of the data segment, which is page-aligned.
*/
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index f0b47d1a6b0e..0bb5918faaaf 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -28,7 +28,6 @@
#include <linux/percpu.h>
#include <linux/kernel.h>
#include <linux/sched.h>
-#include <linux/init.h>
#include <linux/smp.h>
#include <asm/hw_breakpoint.h>
@@ -73,7 +72,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
* If so, DABR will be populated in single_step_dabr_instruction().
*/
if (current->thread.last_hit_ubp != bp)
- set_breakpoint(info);
+ __set_breakpoint(info);
return 0;
}
@@ -199,7 +198,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
info = counter_arch_bp(tsk->thread.last_hit_ubp);
regs->msr &= ~MSR_SE;
- set_breakpoint(info);
+ __set_breakpoint(info);
tsk->thread.last_hit_ubp = NULL;
}
@@ -285,7 +284,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args)
if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
perf_bp_event(bp, regs);
- set_breakpoint(info);
+ __set_breakpoint(info);
out:
rcu_read_unlock();
return rc;
@@ -317,7 +316,7 @@ int __kprobes single_step_dabr_instruction(struct die_args *args)
if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
perf_bp_event(bp, regs);
- set_breakpoint(info);
+ __set_breakpoint(info);
current->thread.last_hit_ubp = NULL;
/*
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
index bfb73cc209ce..48c21acef915 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_book3e.S
@@ -43,7 +43,7 @@ _GLOBAL(\name)
*/
#ifdef CONFIG_TRACE_IRQFLAGS
stdu r1,-128(r1)
- bl .trace_hardirqs_on
+ bl trace_hardirqs_on
addi r1,r1,128
#endif
li r0,1
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
index e3edaa189911..f57a19348bdd 100644
--- a/arch/powerpc/kernel/idle_power4.S
+++ b/arch/powerpc/kernel/idle_power4.S
@@ -46,7 +46,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
mflr r0
std r0,16(r1)
stdu r1,-128(r1)
- bl .trace_hardirqs_on
+ bl trace_hardirqs_on
addi r1,r1,128
ld r0,16(r1)
mtlr r0
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 847e40e62fce..5cf3d367190d 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -17,20 +17,35 @@
#include <asm/ppc-opcode.h>
#include <asm/hw_irq.h>
#include <asm/kvm_book3s_asm.h>
+#include <asm/opal.h>
#undef DEBUG
- .text
+/* Idle state entry routines */
-_GLOBAL(power7_idle)
- /* Now check if user or arch enabled NAP mode */
- LOAD_REG_ADDRBASE(r3,powersave_nap)
- lwz r4,ADDROFF(powersave_nap)(r3)
- cmpwi 0,r4,0
- beqlr
- /* fall through */
+#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
+ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
+ std r0,0(r1); \
+ ptesync; \
+ ld r0,0(r1); \
+1: cmp cr0,r0,r0; \
+ bne 1b; \
+ IDLE_INST; \
+ b .
-_GLOBAL(power7_nap)
+ .text
+
+/*
+ * Pass requested state in r3:
+ * 0 - nap
+ * 1 - sleep
+ *
+ * To check IRQ_HAPPENED in r4
+ * 0 - don't check
+ * 1 - check
+ */
+_GLOBAL(power7_powersave_common)
+ /* Use r3 to pass state nap/sleep/winkle */
/* NAP is a state loss, we create a regs frame on the
* stack, fill it up with the state we care about and
* stick a pointer to it in PACAR1. We really only
@@ -47,7 +62,7 @@ _GLOBAL(power7_nap)
/* Make sure FPU, VSX etc... are flushed as we may lose
* state when going to nap mode
*/
- bl .discard_lazy_cpu_state
+ bl discard_lazy_cpu_state
#endif /* CONFIG_SMP */
/* Hard disable interrupts */
@@ -60,6 +75,8 @@ _GLOBAL(power7_nap)
lbz r0,PACAIRQHAPPENED(r13)
cmpwi cr0,r0,0
beq 1f
+ cmpwi cr0,r4,0
+ beq 1f
addi r1,r1,INT_FRAME_SIZE
ld r0,16(r1)
mtlr r0
@@ -79,25 +96,70 @@ _GLOBAL(power7_nap)
/* Continue saving state */
SAVE_GPR(2, r1)
SAVE_NVGPRS(r1)
- mfcr r3
- std r3,_CCR(r1)
+ mfcr r4
+ std r4,_CCR(r1)
std r9,_MSR(r1)
std r1,PACAR1(r13)
+_GLOBAL(power7_enter_nap_mode)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/* Tell KVM we're napping */
li r4,KVM_HWTHREAD_IN_NAP
stb r4,HSTATE_HWTHREAD_STATE(r13)
#endif
+ cmpwi cr0,r3,1
+ beq 2f
+ IDLE_STATE_ENTER_SEQ(PPC_NAP)
+ /* No return */
+2: IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+ /* No return */
- /* Magic NAP mode enter sequence */
- std r0,0(r1)
- ptesync
- ld r0,0(r1)
-1: cmp cr0,r0,r0
- bne 1b
- PPC_NAP
- b .
+_GLOBAL(power7_idle)
+ /* Now check if user or arch enabled NAP mode */
+ LOAD_REG_ADDRBASE(r3,powersave_nap)
+ lwz r4,ADDROFF(powersave_nap)(r3)
+ cmpwi 0,r4,0
+ beqlr
+ li r3, 1
+ /* fall through */
+
+_GLOBAL(power7_nap)
+ mr r4,r3
+ li r3,0
+ b power7_powersave_common
+ /* No return */
+
+_GLOBAL(power7_sleep)
+ li r3,1
+ li r4,1
+ b power7_powersave_common
+ /* No return */
+
+_GLOBAL(power7_wakeup_tb_loss)
+ ld r2,PACATOC(r13);
+ ld r1,PACAR1(r13)
+
+ /* Time base re-sync */
+ li r0,OPAL_RESYNC_TIMEBASE
+ LOAD_REG_ADDR(r11,opal);
+ ld r12,8(r11);
+ ld r2,0(r11);
+ mtctr r12
+ bctrl
+
+ /* TODO: Check r3 for failure */
+
+ REST_NVGPRS(r1)
+ REST_GPR(2, r1)
+ ld r3,_CCR(r1)
+ ld r4,_MSR(r1)
+ ld r5,_NIP(r1)
+ addi r1,r1,INT_FRAME_SIZE
+ mtcr r3
+ mfspr r3,SPRN_SRR1 /* Return SRR1 */
+ mtspr SPRN_SRR1,r4
+ mtspr SPRN_SRR0,r5
+ rfid
_GLOBAL(power7_wakeup_loss)
ld r1,PACAR1(r13)
@@ -115,7 +177,7 @@ _GLOBAL(power7_wakeup_loss)
_GLOBAL(power7_wakeup_noloss)
lbz r0,PACA_NAPSTATELOST(r13)
cmpwi r0,0
- bne .power7_wakeup_loss
+ bne power7_wakeup_loss
ld r1,PACAR1(r13)
ld r4,_MSR(r1)
ld r5,_NIP(r1)
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
index 97a3715ac8bd..12e48d56f771 100644
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -3,7 +3,6 @@
*
* (C) Copyright 2004 Linus Torvalds
*/
-#include <linux/init.h>
#include <linux/pci.h>
#include <linux/mm.h>
#include <linux/export.h>
@@ -24,7 +23,7 @@ unsigned int ioread16(void __iomem *addr)
}
unsigned int ioread16be(void __iomem *addr)
{
- return in_be16(addr);
+ return readw_be(addr);
}
unsigned int ioread32(void __iomem *addr)
{
@@ -32,7 +31,7 @@ unsigned int ioread32(void __iomem *addr)
}
unsigned int ioread32be(void __iomem *addr)
{
- return in_be32(addr);
+ return readl_be(addr);
}
EXPORT_SYMBOL(ioread8);
EXPORT_SYMBOL(ioread16);
@@ -50,7 +49,7 @@ void iowrite16(u16 val, void __iomem *addr)
}
void iowrite16be(u16 val, void __iomem *addr)
{
- out_be16(addr, val);
+ writew_be(val, addr);
}
void iowrite32(u32 val, void __iomem *addr)
{
@@ -58,7 +57,7 @@ void iowrite32(u32 val, void __iomem *addr)
}
void iowrite32be(u32 val, void __iomem *addr)
{
- out_be32(addr, val);
+ writel_be(val, addr);
}
EXPORT_SYMBOL(iowrite8);
EXPORT_SYMBOL(iowrite16);
@@ -76,15 +75,15 @@ EXPORT_SYMBOL(iowrite32be);
*/
void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
{
- _insb((u8 __iomem *) addr, dst, count);
+ readsb(addr, dst, count);
}
void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
{
- _insw_ns((u16 __iomem *) addr, dst, count);
+ readsw(addr, dst, count);
}
void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
{
- _insl_ns((u32 __iomem *) addr, dst, count);
+ readsl(addr, dst, count);
}
EXPORT_SYMBOL(ioread8_rep);
EXPORT_SYMBOL(ioread16_rep);
@@ -92,15 +91,15 @@ EXPORT_SYMBOL(ioread32_rep);
void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
{
- _outsb((u8 __iomem *) addr, src, count);
+ writesb(addr, src, count);
}
void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
{
- _outsw_ns((u16 __iomem *) addr, src, count);
+ writesw(addr, src, count);
}
void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
{
- _outsl_ns((u32 __iomem *) addr, src, count);
+ writesl(addr, src, count);
}
EXPORT_SYMBOL(iowrite8_rep);
EXPORT_SYMBOL(iowrite16_rep);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 572bb5b95f35..88e3ec6e1d96 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -251,14 +251,13 @@ again:
if (dev)
boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
- 1 << IOMMU_PAGE_SHIFT);
+ 1 << tbl->it_page_shift);
else
- boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT);
+ boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift);
/* 4GB boundary for iseries_hv_alloc and iseries_hv_map */
- n = iommu_area_alloc(tbl->it_map, limit, start, npages,
- tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT,
- align_mask);
+ n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset,
+ boundary_size >> tbl->it_page_shift, align_mask);
if (n == -1) {
if (likely(pass == 0)) {
/* First try the pool from the start */
@@ -320,12 +319,12 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
return DMA_ERROR_CODE;
entry += tbl->it_offset; /* Offset into real TCE table */
- ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */
+ ret = entry << tbl->it_page_shift; /* Set the return dma address */
/* Put the TCEs in the HW table */
build_fail = ppc_md.tce_build(tbl, entry, npages,
- (unsigned long)page & IOMMU_PAGE_MASK,
- direction, attrs);
+ (unsigned long)page &
+ IOMMU_PAGE_MASK(tbl), direction, attrs);
/* ppc_md.tce_build() only returns non-zero for transient errors.
* Clean up the table bitmap in this case and return
@@ -352,7 +351,7 @@ static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr,
{
unsigned long entry, free_entry;
- entry = dma_addr >> IOMMU_PAGE_SHIFT;
+ entry = dma_addr >> tbl->it_page_shift;
free_entry = entry - tbl->it_offset;
if (((free_entry + npages) > tbl->it_size) ||
@@ -401,7 +400,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
unsigned long flags;
struct iommu_pool *pool;
- entry = dma_addr >> IOMMU_PAGE_SHIFT;
+ entry = dma_addr >> tbl->it_page_shift;
free_entry = entry - tbl->it_offset;
pool = get_pool(tbl, free_entry);
@@ -468,13 +467,13 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
}
/* Allocate iommu entries for that segment */
vaddr = (unsigned long) sg_virt(s);
- npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE);
+ npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE(tbl));
align = 0;
- if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && slen >= PAGE_SIZE &&
+ if (tbl->it_page_shift < PAGE_SHIFT && slen >= PAGE_SIZE &&
(vaddr & ~PAGE_MASK) == 0)
- align = PAGE_SHIFT - IOMMU_PAGE_SHIFT;
+ align = PAGE_SHIFT - tbl->it_page_shift;
entry = iommu_range_alloc(dev, tbl, npages, &handle,
- mask >> IOMMU_PAGE_SHIFT, align);
+ mask >> tbl->it_page_shift, align);
DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
@@ -489,16 +488,16 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
/* Convert entry to a dma_addr_t */
entry += tbl->it_offset;
- dma_addr = entry << IOMMU_PAGE_SHIFT;
- dma_addr |= (s->offset & ~IOMMU_PAGE_MASK);
+ dma_addr = entry << tbl->it_page_shift;
+ dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl));
DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n",
npages, entry, dma_addr);
/* Insert into HW table */
build_fail = ppc_md.tce_build(tbl, entry, npages,
- vaddr & IOMMU_PAGE_MASK,
- direction, attrs);
+ vaddr & IOMMU_PAGE_MASK(tbl),
+ direction, attrs);
if(unlikely(build_fail))
goto failure;
@@ -559,9 +558,9 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
if (s->dma_length != 0) {
unsigned long vaddr, npages;
- vaddr = s->dma_address & IOMMU_PAGE_MASK;
+ vaddr = s->dma_address & IOMMU_PAGE_MASK(tbl);
npages = iommu_num_pages(s->dma_address, s->dma_length,
- IOMMU_PAGE_SIZE);
+ IOMMU_PAGE_SIZE(tbl));
__iommu_free(tbl, vaddr, npages);
s->dma_address = DMA_ERROR_CODE;
s->dma_length = 0;
@@ -592,7 +591,7 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
if (sg->dma_length == 0)
break;
npages = iommu_num_pages(dma_handle, sg->dma_length,
- IOMMU_PAGE_SIZE);
+ IOMMU_PAGE_SIZE(tbl));
__iommu_free(tbl, dma_handle, npages);
sg = sg_next(sg);
}
@@ -676,7 +675,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
set_bit(0, tbl->it_map);
/* We only split the IOMMU table if we have 1GB or more of space */
- if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024))
+ if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
tbl->nr_pools = IOMMU_NR_POOLS;
else
tbl->nr_pools = 1;
@@ -768,16 +767,16 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
vaddr = page_address(page) + offset;
uaddr = (unsigned long)vaddr;
- npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE);
+ npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
if (tbl) {
align = 0;
- if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && size >= PAGE_SIZE &&
+ if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE &&
((unsigned long)vaddr & ~PAGE_MASK) == 0)
- align = PAGE_SHIFT - IOMMU_PAGE_SHIFT;
+ align = PAGE_SHIFT - tbl->it_page_shift;
dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
- mask >> IOMMU_PAGE_SHIFT, align,
+ mask >> tbl->it_page_shift, align,
attrs);
if (dma_handle == DMA_ERROR_CODE) {
if (printk_ratelimit()) {
@@ -786,7 +785,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
npages);
}
} else
- dma_handle |= (uaddr & ~IOMMU_PAGE_MASK);
+ dma_handle |= (uaddr & ~IOMMU_PAGE_MASK(tbl));
}
return dma_handle;
@@ -801,7 +800,8 @@ void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
BUG_ON(direction == DMA_NONE);
if (tbl) {
- npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE);
+ npages = iommu_num_pages(dma_handle, size,
+ IOMMU_PAGE_SIZE(tbl));
iommu_free(tbl, dma_handle, npages);
}
}
@@ -845,10 +845,10 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
memset(ret, 0, size);
/* Set up tces to cover the allocated range */
- nio_pages = size >> IOMMU_PAGE_SHIFT;
- io_order = get_iommu_order(size);
+ nio_pages = size >> tbl->it_page_shift;
+ io_order = get_iommu_order(size, tbl);
mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
- mask >> IOMMU_PAGE_SHIFT, io_order, NULL);
+ mask >> tbl->it_page_shift, io_order, NULL);
if (mapping == DMA_ERROR_CODE) {
free_pages((unsigned long)ret, order);
return NULL;
@@ -864,7 +864,7 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
unsigned int nio_pages;
size = PAGE_ALIGN(size);
- nio_pages = size >> IOMMU_PAGE_SHIFT;
+ nio_pages = size >> tbl->it_page_shift;
iommu_free(tbl, dma_handle, nio_pages);
size = PAGE_ALIGN(size);
free_pages((unsigned long)vaddr, get_order(size));
@@ -935,10 +935,10 @@ int iommu_tce_clear_param_check(struct iommu_table *tbl,
if (tce_value)
return -EINVAL;
- if (ioba & ~IOMMU_PAGE_MASK)
+ if (ioba & ~IOMMU_PAGE_MASK(tbl))
return -EINVAL;
- ioba >>= IOMMU_PAGE_SHIFT;
+ ioba >>= tbl->it_page_shift;
if (ioba < tbl->it_offset)
return -EINVAL;
@@ -955,13 +955,13 @@ int iommu_tce_put_param_check(struct iommu_table *tbl,
if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ)))
return -EINVAL;
- if (tce & ~(IOMMU_PAGE_MASK | TCE_PCI_WRITE | TCE_PCI_READ))
+ if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ))
return -EINVAL;
- if (ioba & ~IOMMU_PAGE_MASK)
+ if (ioba & ~IOMMU_PAGE_MASK(tbl))
return -EINVAL;
- ioba >>= IOMMU_PAGE_SHIFT;
+ ioba >>= tbl->it_page_shift;
if (ioba < tbl->it_offset)
return -EINVAL;
@@ -1037,7 +1037,7 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
/* if (unlikely(ret))
pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
- __func__, hwaddr, entry << IOMMU_PAGE_SHIFT,
+ __func__, hwaddr, entry << IOMMU_PAGE_SHIFT(tbl),
hwaddr, ret); */
return ret;
@@ -1049,14 +1049,14 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
{
int ret;
struct page *page = NULL;
- unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK & ~PAGE_MASK;
+ unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
enum dma_data_direction direction = iommu_tce_direction(tce);
ret = get_user_pages_fast(tce & PAGE_MASK, 1,
direction != DMA_TO_DEVICE, &page);
if (unlikely(ret != 1)) {
/* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n",
- tce, entry << IOMMU_PAGE_SHIFT, ret); */
+ tce, entry << IOMMU_PAGE_SHIFT(tbl), ret); */
return -EFAULT;
}
hwaddr = (unsigned long) page_address(page) + offset;
@@ -1067,7 +1067,7 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
if (ret < 0)
pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n",
- __func__, entry << IOMMU_PAGE_SHIFT, tce, ret);
+ __func__, entry << tbl->it_page_shift, tce, ret);
return ret;
}
@@ -1088,6 +1088,14 @@ int iommu_take_ownership(struct iommu_table *tbl)
memset(tbl->it_map, 0xff, sz);
iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
+ /*
+ * Disable iommu bypass, otherwise the user can DMA to all of
+ * our physical memory via the bypass window instead of just
+ * the pages that has been explicitly mapped into the iommu
+ */
+ if (tbl->set_bypass)
+ tbl->set_bypass(tbl, false);
+
return 0;
}
EXPORT_SYMBOL_GPL(iommu_take_ownership);
@@ -1102,10 +1110,14 @@ void iommu_release_ownership(struct iommu_table *tbl)
/* Restore bit#0 set by iommu_init_table() */
if (tbl->it_offset == 0)
set_bit(0, tbl->it_map);
+
+ /* The kernel owns the device now, we can restore the iommu bypass */
+ if (tbl->set_bypass)
+ tbl->set_bypass(tbl, true);
}
EXPORT_SYMBOL_GPL(iommu_release_ownership);
-static int iommu_add_device(struct device *dev)
+int iommu_add_device(struct device *dev)
{
struct iommu_table *tbl;
int ret = 0;
@@ -1127,6 +1139,12 @@ static int iommu_add_device(struct device *dev)
pr_debug("iommu_tce: adding %s to iommu group %d\n",
dev_name(dev), iommu_group_id(tbl->it_group));
+ if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
+ pr_err("iommu_tce: unsupported iommu page size.");
+ pr_err("%s has not been added\n", dev_name(dev));
+ return -EINVAL;
+ }
+
ret = iommu_group_add_device(tbl->it_group, dev);
if (ret < 0)
pr_err("iommu_tce: %s has not been added, ret=%d\n",
@@ -1134,52 +1152,23 @@ static int iommu_add_device(struct device *dev)
return ret;
}
+EXPORT_SYMBOL_GPL(iommu_add_device);
-static void iommu_del_device(struct device *dev)
-{
- iommu_group_remove_device(dev);
-}
-
-static int iommu_bus_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
+void iommu_del_device(struct device *dev)
{
- struct device *dev = data;
-
- switch (action) {
- case BUS_NOTIFY_ADD_DEVICE:
- return iommu_add_device(dev);
- case BUS_NOTIFY_DEL_DEVICE:
- iommu_del_device(dev);
- return 0;
- default:
- return 0;
+ /*
+ * Some devices might not have IOMMU table and group
+ * and we needn't detach them from the associated
+ * IOMMU groups
+ */
+ if (!dev->iommu_group) {
+ pr_debug("iommu_tce: skipping device %s with no tbl\n",
+ dev_name(dev));
+ return;
}
-}
-
-static struct notifier_block tce_iommu_bus_nb = {
- .notifier_call = iommu_bus_notifier,
-};
-
-static int __init tce_iommu_init(void)
-{
- struct pci_dev *pdev = NULL;
-
- BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE);
-
- for_each_pci_dev(pdev)
- iommu_add_device(&pdev->dev);
-
- bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
- return 0;
-}
-
-subsys_initcall_sync(tce_iommu_init);
-
-#else
-void iommu_register_group(struct iommu_table *tbl,
- int pci_domain_number, unsigned long pe_num)
-{
+ iommu_group_remove_device(dev);
}
+EXPORT_SYMBOL_GPL(iommu_del_device);
#endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ba0165615215..248ee7e5bebd 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -304,7 +304,7 @@ void notrace restore_interrupts(void)
* being re-enabled and generally sanitized the lazy irq state,
* and in the latter case it will leave with interrupts hard
* disabled and marked as such, so the local_irq_enable() call
- * in cpu_idle() will properly re-enable everything.
+ * in arch_cpu_idle() will properly re-enable everything.
*/
bool prep_irq_for_idle(void)
{
@@ -354,8 +354,13 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%*s: ", prec, "LOC");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs);
- seq_printf(p, " Local timer interrupts\n");
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event);
+ seq_printf(p, " Local timer interrupts for timer event device\n");
+
+ seq_printf(p, "%*s: ", prec, "LOC");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others);
+ seq_printf(p, " Local timer interrupts for others\n");
seq_printf(p, "%*s: ", prec, "SPU");
for_each_online_cpu(j)
@@ -389,11 +394,12 @@ int arch_show_interrupts(struct seq_file *p, int prec)
*/
u64 arch_irq_stat_cpu(unsigned int cpu)
{
- u64 sum = per_cpu(irq_stat, cpu).timer_irqs;
+ u64 sum = per_cpu(irq_stat, cpu).timer_irqs_event;
sum += per_cpu(irq_stat, cpu).pmu_irqs;
sum += per_cpu(irq_stat, cpu).mce_exceptions;
sum += per_cpu(irq_stat, cpu).spurious_irqs;
+ sum += per_cpu(irq_stat, cpu).timer_irqs_others;
#ifdef CONFIG_PPC_DOORBELL
sum += per_cpu(irq_stat, cpu).doorbell_irqs;
#endif
@@ -459,7 +465,6 @@ static inline void check_stack_overflow(void)
void __do_irq(struct pt_regs *regs)
{
- struct irq_desc *desc;
unsigned int irq;
irq_enter();
@@ -481,11 +486,8 @@ void __do_irq(struct pt_regs *regs)
/* And finally process it */
if (unlikely(irq == NO_IRQ))
__get_cpu_var(irq_stat).spurious_irqs++;
- else {
- desc = irq_to_desc(irq);
- if (likely(desc))
- desc->handle_irq(irq, desc);
- }
+ else
+ generic_handle_irq(irq);
trace_irq_exit(regs);
@@ -553,8 +555,13 @@ void exc_lvl_ctx_init(void)
#ifdef CONFIG_PPC64
cpu_nr = i;
#else
+#ifdef CONFIG_SMP
cpu_nr = get_hard_smp_processor_id(i);
+#else
+ cpu_nr = 0;
#endif
+#endif
+
memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE);
tp = critirq_ctx[cpu_nr];
tp->cpu = cpu_nr;
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 83e89d310734..8504657379f1 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -15,7 +15,6 @@
*/
#include <linux/kernel.h>
-#include <linux/init.h>
#include <linux/kgdb.h>
#include <linux/smp.h>
#include <linux/signal.h>
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 90fab64d911d..2f72af82513c 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -32,6 +32,7 @@
#include <linux/module.h>
#include <linux/kdebug.h>
#include <linux/slab.h>
+#include <asm/code-patching.h>
#include <asm/cacheflush.h>
#include <asm/sstep.h>
#include <asm/uaccess.h>
@@ -491,12 +492,10 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
return ret;
}
-#ifdef CONFIG_PPC64
unsigned long arch_deref_entry_point(void *entry)
{
- return ((func_descr_t *)entry)->entry;
+ return ppc_global_function_entry(entry);
}
-#endif
int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
@@ -508,8 +507,12 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
/* setup return addr to the jprobe handler routine */
regs->nip = arch_deref_entry_point(jp->entry);
#ifdef CONFIG_PPC64
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+ regs->gpr[12] = (unsigned long)jp->entry;
+#else
regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
#endif
+#endif
return 1;
}
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index db28032e320e..33aa4ddf597d 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -74,7 +74,7 @@
#define KVM_INST_MTSRIN 0x7c0001e4
static bool kvm_patching_worked = true;
-static char kvm_tmp[1024 * 1024];
+char kvm_tmp[1024 * 1024];
static int kvm_tmp_index;
static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
@@ -413,13 +413,13 @@ static void kvm_map_magic_page(void *data)
{
u32 *features = data;
- ulong in[8];
+ ulong in[8] = {0};
ulong out[8];
in[0] = KVM_MAGIC_PAGE;
- in[1] = KVM_MAGIC_PAGE;
+ in[1] = KVM_MAGIC_PAGE | MAGIC_PAGE_FLAG_NOT_MAPPED_NX;
- kvm_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE));
+ epapr_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE));
*features = out[0];
}
@@ -711,43 +711,6 @@ static void kvm_use_magic_page(void)
kvm_patching_worked ? "worked" : "failed");
}
-unsigned long kvm_hypercall(unsigned long *in,
- unsigned long *out,
- unsigned long nr)
-{
- unsigned long register r0 asm("r0");
- unsigned long register r3 asm("r3") = in[0];
- unsigned long register r4 asm("r4") = in[1];
- unsigned long register r5 asm("r5") = in[2];
- unsigned long register r6 asm("r6") = in[3];
- unsigned long register r7 asm("r7") = in[4];
- unsigned long register r8 asm("r8") = in[5];
- unsigned long register r9 asm("r9") = in[6];
- unsigned long register r10 asm("r10") = in[7];
- unsigned long register r11 asm("r11") = nr;
- unsigned long register r12 asm("r12");
-
- asm volatile("bl epapr_hypercall_start"
- : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
- "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11),
- "=r"(r12)
- : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8),
- "r"(r9), "r"(r10), "r"(r11)
- : "memory", "cc", "xer", "ctr", "lr");
-
- out[0] = r4;
- out[1] = r5;
- out[2] = r6;
- out[3] = r7;
- out[4] = r8;
- out[5] = r9;
- out[6] = r10;
- out[7] = r11;
-
- return r3;
-}
-EXPORT_SYMBOL_GPL(kvm_hypercall);
-
static __init void kvm_free_tmp(void)
{
free_reserved_area(&kvm_tmp[kvm_tmp_index],
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 40bd7bd4e19a..936258881c98 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -48,6 +48,9 @@ static struct of_device_id legacy_serial_parents[] __initdata = {
static unsigned int legacy_serial_count;
static int legacy_serial_console = -1;
+static const upf_t legacy_port_flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
+ UPF_SHARE_IRQ | UPF_FIXED_PORT;
+
static unsigned int tsi_serial_in(struct uart_port *p, int offset)
{
unsigned int tmp;
@@ -71,8 +74,9 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
phys_addr_t taddr, unsigned long irq,
upf_t flags, int irq_check_parent)
{
- const __be32 *clk, *spd;
+ const __be32 *clk, *spd, *rs;
u32 clock = BASE_BAUD * 16;
+ u32 shift = 0;
int index;
/* get clock freq. if present */
@@ -83,6 +87,11 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
/* get default speed if present */
spd = of_get_property(np, "current-speed", NULL);
+ /* get register shift if present */
+ rs = of_get_property(np, "reg-shift", NULL);
+ if (rs && *rs)
+ shift = be32_to_cpup(rs);
+
/* If we have a location index, then try to use it */
if (want_index >= 0 && want_index < MAX_LEGACY_SERIAL_PORTS)
index = want_index;
@@ -126,6 +135,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
legacy_serial_ports[index].uartclk = clock;
legacy_serial_ports[index].irq = irq;
legacy_serial_ports[index].flags = flags;
+ legacy_serial_ports[index].regshift = shift;
legacy_serial_infos[index].taddr = taddr;
legacy_serial_infos[index].np = of_node_get(np);
legacy_serial_infos[index].clock = clock;
@@ -153,8 +163,6 @@ static int __init add_legacy_soc_port(struct device_node *np,
{
u64 addr;
const __be32 *addrp;
- upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ
- | UPF_FIXED_PORT;
struct device_node *tsi = of_get_parent(np);
/* We only support ports that have a clock frequency properly
@@ -163,9 +171,8 @@ static int __init add_legacy_soc_port(struct device_node *np,
if (of_get_property(np, "clock-frequency", NULL) == NULL)
return -1;
- /* if reg-shift or offset, don't try to use it */
- if ((of_get_property(np, "reg-shift", NULL) != NULL) ||
- (of_get_property(np, "reg-offset", NULL) != NULL))
+ /* if reg-offset don't try to use it */
+ if ((of_get_property(np, "reg-offset", NULL) != NULL))
return -1;
/* if rtas uses this device, don't try to use it as well */
@@ -185,9 +192,11 @@ static int __init add_legacy_soc_port(struct device_node *np,
* IO port value. It will be fixed up later along with the irq
*/
if (tsi && !strcmp(tsi->type, "tsi-bridge"))
- return add_legacy_port(np, -1, UPIO_TSI, addr, addr, NO_IRQ, flags, 0);
+ return add_legacy_port(np, -1, UPIO_TSI, addr, addr,
+ NO_IRQ, legacy_port_flags, 0);
else
- return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags, 0);
+ return add_legacy_port(np, -1, UPIO_MEM, addr, addr,
+ NO_IRQ, legacy_port_flags, 0);
}
static int __init add_legacy_isa_port(struct device_node *np,
@@ -233,7 +242,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
/* Add port, irq will be dealt with later */
return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]),
- taddr, NO_IRQ, UPF_BOOT_AUTOCONF, 0);
+ taddr, NO_IRQ, legacy_port_flags, 0);
}
@@ -306,7 +315,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
* IO port value. It will be fixed up later along with the irq
*/
return add_legacy_port(np, index, iotype, base, addr, NO_IRQ,
- UPF_BOOT_AUTOCONF, np != pci_dev);
+ legacy_port_flags, np != pci_dev);
}
#endif
@@ -315,17 +324,20 @@ static void __init setup_legacy_serial_console(int console)
struct legacy_serial_info *info = &legacy_serial_infos[console];
struct plat_serial8250_port *port = &legacy_serial_ports[console];
void __iomem *addr;
+ unsigned int stride;
+
+ stride = 1 << port->regshift;
/* Check if a translated MMIO address has been found */
if (info->taddr) {
addr = ioremap(info->taddr, 0x1000);
if (addr == NULL)
return;
- udbg_uart_init_mmio(addr, 1);
+ udbg_uart_init_mmio(addr, stride);
} else {
/* Check if it's PIO and we support untranslated PIO */
if (port->iotype == UPIO_PORT && isa_io_special)
- udbg_uart_init_pio(port->iobase, 1);
+ udbg_uart_init_pio(port->iobase, stride);
else
return;
}
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 75d4f7340da8..015ae55c1868 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -196,7 +196,9 @@ int overlaps_crashkernel(unsigned long start, unsigned long size)
/* Values we need to export to the second kernel via the device tree. */
static phys_addr_t kernel_end;
+static phys_addr_t crashk_base;
static phys_addr_t crashk_size;
+static unsigned long long mem_limit;
static struct property kernel_end_prop = {
.name = "linux,kernel-end",
@@ -207,7 +209,7 @@ static struct property kernel_end_prop = {
static struct property crashk_base_prop = {
.name = "linux,crashkernel-base",
.length = sizeof(phys_addr_t),
- .value = &crashk_res.start,
+ .value = &crashk_base
};
static struct property crashk_size_prop = {
@@ -219,9 +221,11 @@ static struct property crashk_size_prop = {
static struct property memory_limit_prop = {
.name = "linux,memory-limit",
.length = sizeof(unsigned long long),
- .value = &memory_limit,
+ .value = &mem_limit,
};
+#define cpu_to_be_ulong __PASTE(cpu_to_be, BITS_PER_LONG)
+
static void __init export_crashk_values(struct device_node *node)
{
struct property *prop;
@@ -237,8 +241,9 @@ static void __init export_crashk_values(struct device_node *node)
of_remove_property(node, prop);
if (crashk_res.start != 0) {
+ crashk_base = cpu_to_be_ulong(crashk_res.start),
of_add_property(node, &crashk_base_prop);
- crashk_size = resource_size(&crashk_res);
+ crashk_size = cpu_to_be_ulong(resource_size(&crashk_res));
of_add_property(node, &crashk_size_prop);
}
@@ -246,6 +251,7 @@ static void __init export_crashk_values(struct device_node *node)
* memory_limit is required by the kexec-tools to limit the
* crash regions to the actual memory used.
*/
+ mem_limit = cpu_to_be_ulong(memory_limit);
of_update_property(node, &memory_limit_prop);
}
@@ -264,7 +270,7 @@ static int __init kexec_setup(void)
of_remove_property(node, prop);
/* information needed by userspace when using default_machine_kexec */
- kernel_end = __pa(_end);
+ kernel_end = cpu_to_be_ulong(__pa(_end));
of_add_property(node, &kernel_end_prop);
export_crashk_values(node);
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index be4e6d648f60..879b3aacac32 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -237,7 +237,7 @@ static void wake_offline_cpus(void)
if (!cpu_online(cpu)) {
printk(KERN_INFO "kexec: Waking offline cpu %d.\n",
cpu);
- cpu_up(cpu);
+ WARN_ON(cpu_up(cpu));
}
}
}
@@ -369,6 +369,7 @@ void default_machine_kexec(struct kimage *image)
/* Values we need to export to the second kernel via the device tree. */
static unsigned long htab_base;
+static unsigned long htab_size;
static struct property htab_base_prop = {
.name = "linux,htab-base",
@@ -379,7 +380,7 @@ static struct property htab_base_prop = {
static struct property htab_size_prop = {
.name = "linux,htab-size",
.length = sizeof(unsigned long),
- .value = &htab_size_bytes,
+ .value = &htab_size,
};
static int __init export_htab_values(void)
@@ -403,8 +404,9 @@ static int __init export_htab_values(void)
if (prop)
of_remove_property(node, prop);
- htab_base = __pa(htab_address);
+ htab_base = cpu_to_be64(__pa(htab_address));
of_add_property(node, &htab_base_prop);
+ htab_size = cpu_to_be64(htab_size_bytes);
of_add_property(node, &htab_size_prop);
of_node_put(node);
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
new file mode 100644
index 000000000000..a7fd4cb78b78
--- /dev/null
+++ b/arch/powerpc/kernel/mce.c
@@ -0,0 +1,352 @@
+/*
+ * Machine check exception handling.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "mce: " fmt
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+#include <linux/export.h>
+#include <linux/irq_work.h>
+#include <asm/mce.h>
+
+static DEFINE_PER_CPU(int, mce_nest_count);
+static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
+
+/* Queue for delayed MCE events. */
+static DEFINE_PER_CPU(int, mce_queue_count);
+static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
+
+static void machine_check_process_queued_event(struct irq_work *work);
+struct irq_work mce_event_process_work = {
+ .func = machine_check_process_queued_event,
+};
+
+static void mce_set_error_info(struct machine_check_event *mce,
+ struct mce_error_info *mce_err)
+{
+ mce->error_type = mce_err->error_type;
+ switch (mce_err->error_type) {
+ case MCE_ERROR_TYPE_UE:
+ mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
+ break;
+ case MCE_ERROR_TYPE_UNKNOWN:
+ default:
+ break;
+ }
+}
+
+/*
+ * Decode and save high level MCE information into per cpu buffer which
+ * is an array of machine_check_event structure.
+ */
+void save_mce_event(struct pt_regs *regs, long handled,
+ struct mce_error_info *mce_err,
+ uint64_t nip, uint64_t addr)
+{
+ uint64_t srr1;
+ int index = __get_cpu_var(mce_nest_count)++;
+ struct machine_check_event *mce = &__get_cpu_var(mce_event[index]);
+
+ /*
+ * Return if we don't have enough space to log mce event.
+ * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
+ * the check below will stop buffer overrun.
+ */
+ if (index >= MAX_MC_EVT)
+ return;
+
+ /* Populate generic machine check info */
+ mce->version = MCE_V1;
+ mce->srr0 = nip;
+ mce->srr1 = regs->msr;
+ mce->gpr3 = regs->gpr[3];
+ mce->in_use = 1;
+
+ mce->initiator = MCE_INITIATOR_CPU;
+ if (handled)
+ mce->disposition = MCE_DISPOSITION_RECOVERED;
+ else
+ mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
+ mce->severity = MCE_SEV_ERROR_SYNC;
+
+ srr1 = regs->msr;
+
+ /*
+ * Populate the mce error_type and type-specific error_type.
+ */
+ mce_set_error_info(mce, mce_err);
+
+ if (!addr)
+ return;
+
+ if (mce->error_type == MCE_ERROR_TYPE_TLB) {
+ mce->u.tlb_error.effective_address_provided = true;
+ mce->u.tlb_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
+ mce->u.slb_error.effective_address_provided = true;
+ mce->u.slb_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
+ mce->u.erat_error.effective_address_provided = true;
+ mce->u.erat_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
+ mce->u.ue_error.effective_address_provided = true;
+ mce->u.ue_error.effective_address = addr;
+ }
+ return;
+}
+
+/*
+ * get_mce_event:
+ * mce Pointer to machine_check_event structure to be filled.
+ * release Flag to indicate whether to free the event slot or not.
+ * 0 <= do not release the mce event. Caller will invoke
+ * release_mce_event() once event has been consumed.
+ * 1 <= release the slot.
+ *
+ * return 1 = success
+ * 0 = failure
+ *
+ * get_mce_event() will be called by platform specific machine check
+ * handle routine and in KVM.
+ * When we call get_mce_event(), we are still in interrupt context and
+ * preemption will not be scheduled until ret_from_expect() routine
+ * is called.
+ */
+int get_mce_event(struct machine_check_event *mce, bool release)
+{
+ int index = __get_cpu_var(mce_nest_count) - 1;
+ struct machine_check_event *mc_evt;
+ int ret = 0;
+
+ /* Sanity check */
+ if (index < 0)
+ return ret;
+
+ /* Check if we have MCE info to process. */
+ if (index < MAX_MC_EVT) {
+ mc_evt = &__get_cpu_var(mce_event[index]);
+ /* Copy the event structure and release the original */
+ if (mce)
+ *mce = *mc_evt;
+ if (release)
+ mc_evt->in_use = 0;
+ ret = 1;
+ }
+ /* Decrement the count to free the slot. */
+ if (release)
+ __get_cpu_var(mce_nest_count)--;
+
+ return ret;
+}
+
+void release_mce_event(void)
+{
+ get_mce_event(NULL, true);
+}
+
+/*
+ * Queue up the MCE event which then can be handled later.
+ */
+void machine_check_queue_event(void)
+{
+ int index;
+ struct machine_check_event evt;
+
+ if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+ return;
+
+ index = __get_cpu_var(mce_queue_count)++;
+ /* If queue is full, just return for now. */
+ if (index >= MAX_MC_EVT) {
+ __get_cpu_var(mce_queue_count)--;
+ return;
+ }
+ __get_cpu_var(mce_event_queue[index]) = evt;
+
+ /* Queue irq work to process this event later. */
+ irq_work_queue(&mce_event_process_work);
+}
+
+/*
+ * process pending MCE event from the mce event queue. This function will be
+ * called during syscall exit.
+ */
+static void machine_check_process_queued_event(struct irq_work *work)
+{
+ int index;
+
+ /*
+ * For now just print it to console.
+ * TODO: log this error event to FSP or nvram.
+ */
+ while (__get_cpu_var(mce_queue_count) > 0) {
+ index = __get_cpu_var(mce_queue_count) - 1;
+ machine_check_print_event_info(
+ &__get_cpu_var(mce_event_queue[index]));
+ __get_cpu_var(mce_queue_count)--;
+ }
+}
+
+void machine_check_print_event_info(struct machine_check_event *evt)
+{
+ const char *level, *sevstr, *subtype;
+ static const char *mc_ue_types[] = {
+ "Indeterminate",
+ "Instruction fetch",
+ "Page table walk ifetch",
+ "Load/Store",
+ "Page table walk Load/Store",
+ };
+ static const char *mc_slb_types[] = {
+ "Indeterminate",
+ "Parity",
+ "Multihit",
+ };
+ static const char *mc_erat_types[] = {
+ "Indeterminate",
+ "Parity",
+ "Multihit",
+ };
+ static const char *mc_tlb_types[] = {
+ "Indeterminate",
+ "Parity",
+ "Multihit",
+ };
+
+ /* Print things out */
+ if (evt->version != MCE_V1) {
+ pr_err("Machine Check Exception, Unknown event version %d !\n",
+ evt->version);
+ return;
+ }
+ switch (evt->severity) {
+ case MCE_SEV_NO_ERROR:
+ level = KERN_INFO;
+ sevstr = "Harmless";
+ break;
+ case MCE_SEV_WARNING:
+ level = KERN_WARNING;
+ sevstr = "";
+ break;
+ case MCE_SEV_ERROR_SYNC:
+ level = KERN_ERR;
+ sevstr = "Severe";
+ break;
+ case MCE_SEV_FATAL:
+ default:
+ level = KERN_ERR;
+ sevstr = "Fatal";
+ break;
+ }
+
+ printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
+ evt->disposition == MCE_DISPOSITION_RECOVERED ?
+ "Recovered" : "[Not recovered");
+ printk("%s Initiator: %s\n", level,
+ evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
+ switch (evt->error_type) {
+ case MCE_ERROR_TYPE_UE:
+ subtype = evt->u.ue_error.ue_error_type <
+ ARRAY_SIZE(mc_ue_types) ?
+ mc_ue_types[evt->u.ue_error.ue_error_type]
+ : "Unknown";
+ printk("%s Error type: UE [%s]\n", level, subtype);
+ if (evt->u.ue_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.ue_error.effective_address);
+ if (evt->u.ue_error.physical_address_provided)
+ printk("%s Physial address: %016llx\n",
+ level, evt->u.ue_error.physical_address);
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ subtype = evt->u.slb_error.slb_error_type <
+ ARRAY_SIZE(mc_slb_types) ?
+ mc_slb_types[evt->u.slb_error.slb_error_type]
+ : "Unknown";
+ printk("%s Error type: SLB [%s]\n", level, subtype);
+ if (evt->u.slb_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.slb_error.effective_address);
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ subtype = evt->u.erat_error.erat_error_type <
+ ARRAY_SIZE(mc_erat_types) ?
+ mc_erat_types[evt->u.erat_error.erat_error_type]
+ : "Unknown";
+ printk("%s Error type: ERAT [%s]\n", level, subtype);
+ if (evt->u.erat_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.erat_error.effective_address);
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ subtype = evt->u.tlb_error.tlb_error_type <
+ ARRAY_SIZE(mc_tlb_types) ?
+ mc_tlb_types[evt->u.tlb_error.tlb_error_type]
+ : "Unknown";
+ printk("%s Error type: TLB [%s]\n", level, subtype);
+ if (evt->u.tlb_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.tlb_error.effective_address);
+ break;
+ default:
+ case MCE_ERROR_TYPE_UNKNOWN:
+ printk("%s Error type: Unknown\n", level);
+ break;
+ }
+}
+
+uint64_t get_mce_fault_addr(struct machine_check_event *evt)
+{
+ switch (evt->error_type) {
+ case MCE_ERROR_TYPE_UE:
+ if (evt->u.ue_error.effective_address_provided)
+ return evt->u.ue_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ if (evt->u.slb_error.effective_address_provided)
+ return evt->u.slb_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ if (evt->u.erat_error.effective_address_provided)
+ return evt->u.erat_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ if (evt->u.tlb_error.effective_address_provided)
+ return evt->u.tlb_error.effective_address;
+ break;
+ default:
+ case MCE_ERROR_TYPE_UNKNOWN:
+ break;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(get_mce_fault_addr);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
new file mode 100644
index 000000000000..aa9aff3d6ad3
--- /dev/null
+++ b/arch/powerpc/kernel/mce_power.c
@@ -0,0 +1,313 @@
+/*
+ * Machine check exception handling CPU-side for power7 and power8
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "mce_power: " fmt
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <asm/mmu.h>
+#include <asm/mce.h>
+#include <asm/machdep.h>
+
+/* flush SLBs and reload */
+static void flush_and_reload_slb(void)
+{
+ struct slb_shadow *slb;
+ unsigned long i, n;
+
+ /* Invalidate all SLBs */
+ asm volatile("slbmte %0,%0; slbia" : : "r" (0));
+
+#ifdef CONFIG_KVM_BOOK3S_HANDLER
+ /*
+ * If machine check is hit when in guest or in transition, we will
+ * only flush the SLBs and continue.
+ */
+ if (get_paca()->kvm_hstate.in_guest)
+ return;
+#endif
+
+ /* For host kernel, reload the SLBs from shadow SLB buffer. */
+ slb = get_slb_shadow();
+ if (!slb)
+ return;
+
+ n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
+
+ /* Load up the SLB entries from shadow SLB */
+ for (i = 0; i < n; i++) {
+ unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
+ unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
+
+ rb = (rb & ~0xFFFul) | i;
+ asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
+ }
+}
+
+static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
+{
+ long handled = 1;
+
+ /*
+ * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
+ * reset the error bits whenever we handle them so that at the end
+ * we can check whether we handled all of them or not.
+ * */
+ if (dsisr & slb_error_bits) {
+ flush_and_reload_slb();
+ /* reset error bits */
+ dsisr &= ~(slb_error_bits);
+ }
+ if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
+ cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
+ /* reset error bits */
+ dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
+ }
+ /* Any other errors we don't understand? */
+ if (dsisr & 0xffffffffUL)
+ handled = 0;
+
+ return handled;
+}
+
+static long mce_handle_derror_p7(uint64_t dsisr)
+{
+ return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
+}
+
+static long mce_handle_common_ierror(uint64_t srr1)
+{
+ long handled = 0;
+
+ switch (P7_SRR1_MC_IFETCH(srr1)) {
+ case 0:
+ break;
+ case P7_SRR1_MC_IFETCH_SLB_PARITY:
+ case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
+ /* flush and reload SLBs for SLB errors. */
+ flush_and_reload_slb();
+ handled = 1;
+ break;
+ case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
+ cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
+ handled = 1;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return handled;
+}
+
+static long mce_handle_ierror_p7(uint64_t srr1)
+{
+ long handled = 0;
+
+ handled = mce_handle_common_ierror(srr1);
+
+ if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
+ flush_and_reload_slb();
+ handled = 1;
+ }
+ return handled;
+}
+
+static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
+{
+ switch (P7_SRR1_MC_IFETCH(srr1)) {
+ case P7_SRR1_MC_IFETCH_SLB_PARITY:
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+ break;
+ case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+ break;
+ case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
+ mce_err->error_type = MCE_ERROR_TYPE_TLB;
+ mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+ break;
+ case P7_SRR1_MC_IFETCH_UE:
+ case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
+ break;
+ case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type =
+ MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+ break;
+ }
+}
+
+static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
+{
+ mce_get_common_ierror(mce_err, srr1);
+ if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+ }
+}
+
+static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
+{
+ if (dsisr & P7_DSISR_MC_UE) {
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+ } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type =
+ MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+ } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
+ mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+ mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+ } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+ } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+ mce_err->error_type = MCE_ERROR_TYPE_TLB;
+ mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+ } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+ }
+}
+
+static long mce_handle_ue_error(struct pt_regs *regs)
+{
+ long handled = 0;
+
+ /*
+ * On specific SCOM read via MMIO we may get a machine check
+ * exception with SRR0 pointing inside opal. If that is the
+ * case OPAL may have recovery address to re-read SCOM data in
+ * different way and hence we can recover from this MC.
+ */
+
+ if (ppc_md.mce_check_early_recovery) {
+ if (ppc_md.mce_check_early_recovery(regs))
+ handled = 1;
+ }
+ return handled;
+}
+
+long __machine_check_early_realmode_p7(struct pt_regs *regs)
+{
+ uint64_t srr1, nip, addr;
+ long handled = 1;
+ struct mce_error_info mce_error_info = { 0 };
+
+ srr1 = regs->msr;
+ nip = regs->nip;
+
+ /*
+ * Handle memory errors depending whether this was a load/store or
+ * ifetch exception. Also, populate the mce error_type and
+ * type-specific error_type from either SRR1 or DSISR, depending
+ * whether this was a load/store or ifetch exception
+ */
+ if (P7_SRR1_MC_LOADSTORE(srr1)) {
+ handled = mce_handle_derror_p7(regs->dsisr);
+ mce_get_derror_p7(&mce_error_info, regs->dsisr);
+ addr = regs->dar;
+ } else {
+ handled = mce_handle_ierror_p7(srr1);
+ mce_get_ierror_p7(&mce_error_info, srr1);
+ addr = regs->nip;
+ }
+
+ /* Handle UE error. */
+ if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
+ handled = mce_handle_ue_error(regs);
+
+ save_mce_event(regs, handled, &mce_error_info, nip, addr);
+ return handled;
+}
+
+static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
+{
+ mce_get_common_ierror(mce_err, srr1);
+ if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
+ mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+ mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ }
+}
+
+static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
+{
+ mce_get_derror_p7(mce_err, dsisr);
+ if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
+ mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+ mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ }
+}
+
+static long mce_handle_ierror_p8(uint64_t srr1)
+{
+ long handled = 0;
+
+ handled = mce_handle_common_ierror(srr1);
+
+ if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
+ flush_and_reload_slb();
+ handled = 1;
+ }
+ return handled;
+}
+
+static long mce_handle_derror_p8(uint64_t dsisr)
+{
+ return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
+}
+
+long __machine_check_early_realmode_p8(struct pt_regs *regs)
+{
+ uint64_t srr1, nip, addr;
+ long handled = 1;
+ struct mce_error_info mce_error_info = { 0 };
+
+ srr1 = regs->msr;
+ nip = regs->nip;
+
+ if (P7_SRR1_MC_LOADSTORE(srr1)) {
+ handled = mce_handle_derror_p8(regs->dsisr);
+ mce_get_derror_p8(&mce_error_info, regs->dsisr);
+ addr = regs->dar;
+ } else {
+ handled = mce_handle_ierror_p8(srr1);
+ mce_get_ierror_p8(&mce_error_info, srr1);
+ addr = regs->nip;
+ }
+
+ /* Handle UE error. */
+ if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
+ handled = mce_handle_ue_error(regs);
+
+ save_mce_event(regs, handled, &mce_error_info, nip, addr);
+ return handled;
+}
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index e47d268727a4..7c6bb4b17b49 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -57,11 +57,14 @@ _GLOBAL(call_do_softirq)
mtlr r0
blr
+/*
+ * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
+ */
_GLOBAL(call_do_irq)
mflr r0
stw r0,4(r1)
lwz r10,THREAD+KSP_LIMIT(r2)
- addi r11,r3,THREAD_INFO_GAP
+ addi r11,r4,THREAD_INFO_GAP
stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
mr r1,r4
stw r10,8(r1)
@@ -344,7 +347,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
*/
_KPROBE(flush_icache_range)
BEGIN_FTR_SECTION
- isync
+ PURGE_PREFETCHED_INS
blr /* for 601, do nothing */
END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
li r5,L1_CACHE_BYTES-1
@@ -448,6 +451,7 @@ _GLOBAL(invalidate_dcache_range)
*/
_GLOBAL(__flush_dcache_icache)
BEGIN_FTR_SECTION
+ PURGE_PREFETCHED_INS
blr
END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
@@ -489,6 +493,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
*/
_GLOBAL(__flush_dcache_icache_phys)
BEGIN_FTR_SECTION
+ PURGE_PREFETCHED_INS
blr /* for 601, do nothing */
END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
mfmsr r10
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 64bf8db12b15..4e314b90c75d 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -34,7 +34,7 @@ _GLOBAL(call_do_softirq)
std r0,16(r1)
stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
mr r1,r3
- bl .__do_softirq
+ bl __do_softirq
ld r1,0(r1)
ld r0,16(r1)
mtlr r0
@@ -45,7 +45,7 @@ _GLOBAL(call_do_irq)
std r0,16(r1)
stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
mr r1,r4
- bl .__do_irq
+ bl __do_irq
ld r1,0(r1)
ld r0,16(r1)
mtlr r0
@@ -67,6 +67,7 @@ PPC64_CACHES:
_KPROBE(flush_icache_range)
BEGIN_FTR_SECTION
+ PURGE_PREFETCHED_INS
blr
END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
/*
@@ -211,6 +212,11 @@ _GLOBAL(__flush_dcache_icache)
* Different systems have different cache line sizes
*/
+BEGIN_FTR_SECTION
+ PURGE_PREFETCHED_INS
+ blr
+END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+
/* Flush the dcache */
ld r7,PPC64_CACHES@toc(r2)
clrrdi r3,r3,PAGE_SHIFT /* Page align */
@@ -500,7 +506,7 @@ _GLOBAL(kexec_smp_wait)
stb r4,PACAKEXECSTATE(r13)
SYNC
- b .kexec_wait
+ b kexec_wait
/*
* switch to real mode (turn mmu off)
@@ -570,7 +576,7 @@ _GLOBAL(kexec_sequence)
/* copy dest pages, flush whole dest image */
mr r3,r29
- bl .kexec_copy_flush /* (image) */
+ bl kexec_copy_flush /* (image) */
/* turn off mmu */
bl real_mode
@@ -580,7 +586,7 @@ _GLOBAL(kexec_sequence)
mr r4,r30 /* start, aka phys mem offset */
li r5,0x100
li r6,0
- bl .copy_and_flush /* (dest, src, copy limit, start offset) */
+ bl copy_and_flush /* (dest, src, copy limit, start offset) */
1: /* assume normal blr return */
/* release other cpus to the new kernel secondary start at 0x60 */
@@ -589,8 +595,12 @@ _GLOBAL(kexec_sequence)
stw r6,kexec_flag-1b(5)
/* clear out hardware hash page table and tlb */
- ld r5,0(r27) /* deref function descriptor */
- mtctr r5
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+ ld r12,0(r27) /* deref function descriptor */
+#else
+ mr r12,r27
+#endif
+ mtctr r12
bctrl /* ppc_md.hpte_clear_all(void); */
/*
@@ -624,3 +634,31 @@ _GLOBAL(kexec_sequence)
li r5,0
blr /* image->start(physid, image->start, 0); */
#endif /* CONFIG_KEXEC */
+
+#ifdef CONFIG_MODULES
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+
+#ifdef CONFIG_MODVERSIONS
+.weak __crc_TOC.
+.section "___kcrctab+TOC.","a"
+.globl __kcrctab_TOC.
+__kcrctab_TOC.:
+ .llong __crc_TOC.
+#endif
+
+/*
+ * Export a fake .TOC. since both modpost and depmod will complain otherwise.
+ * Both modpost and depmod strip the leading . so we do the same here.
+ */
+.section "__ksymtab_strings","a"
+__kstrtab_TOC.:
+ .asciz "TOC."
+
+.section "___ksymtab+TOC.","a"
+/* This symbol name is important: it's used by modpost to find exported syms */
+.globl __ksymtab_TOC.
+__ksymtab_TOC.:
+ .llong 0 /* .value */
+ .llong __kstrtab_TOC.
+#endif /* ELFv2 */
+#endif /* MODULES */
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 12664c130d73..d807ee626af9 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -22,6 +22,7 @@
#include <linux/vmalloc.h>
#include <linux/ftrace.h>
#include <linux/bug.h>
+#include <linux/uaccess.h>
#include <asm/module.h>
#include <asm/firmware.h>
#include <asm/code-patching.h>
@@ -41,46 +42,170 @@
#define DEBUGP(fmt , ...)
#endif
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define R2_STACK_OFFSET 24
+
+/* An address is simply the address of the function. */
+typedef unsigned long func_desc_t;
+
+static func_desc_t func_desc(unsigned long addr)
+{
+ return addr;
+}
+static unsigned long func_addr(unsigned long addr)
+{
+ return addr;
+}
+static unsigned long stub_func_addr(func_desc_t func)
+{
+ return func;
+}
+
+/* PowerPC64 specific values for the Elf64_Sym st_other field. */
+#define STO_PPC64_LOCAL_BIT 5
+#define STO_PPC64_LOCAL_MASK (7 << STO_PPC64_LOCAL_BIT)
+#define PPC64_LOCAL_ENTRY_OFFSET(other) \
+ (((1 << (((other) & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT)) >> 2) << 2)
+
+static unsigned int local_entry_offset(const Elf64_Sym *sym)
+{
+ /* sym->st_other indicates offset to local entry point
+ * (otherwise it will assume r12 is the address of the start
+ * of function and try to derive r2 from it). */
+ return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
+}
+#else
+#define R2_STACK_OFFSET 40
+
+/* An address is address of the OPD entry, which contains address of fn. */
+typedef struct ppc64_opd_entry func_desc_t;
+
+static func_desc_t func_desc(unsigned long addr)
+{
+ return *(struct ppc64_opd_entry *)addr;
+}
+static unsigned long func_addr(unsigned long addr)
+{
+ return func_desc(addr).funcaddr;
+}
+static unsigned long stub_func_addr(func_desc_t func)
+{
+ return func.funcaddr;
+}
+static unsigned int local_entry_offset(const Elf64_Sym *sym)
+{
+ return 0;
+}
+#endif
+
/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
the kernel itself). But on PPC64, these need to be used for every
jump, actually, to reset r2 (TOC+0x8000). */
struct ppc64_stub_entry
{
- /* 28 byte jump instruction sequence (7 instructions) */
- unsigned char jump[28];
- unsigned char unused[4];
+ /* 28 byte jump instruction sequence (7 instructions). We only
+ * need 6 instructions on ABIv2 but we always allocate 7 so
+ * so we don't have to modify the trampoline load instruction. */
+ u32 jump[7];
+ u32 unused;
/* Data for the above code */
- struct ppc64_opd_entry opd;
+ func_desc_t funcdata;
};
-/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external)
- function which may be more than 24-bits away. We could simply
- patch the new r2 value and function pointer into the stub, but it's
- significantly shorter to put these values at the end of the stub
- code, and patch the stub address (32-bits relative to the TOC ptr,
- r2) into the stub. */
-static struct ppc64_stub_entry ppc64_stub =
-{ .jump = {
-#ifdef __LITTLE_ENDIAN__
- 0x00, 0x00, 0x82, 0x3d, /* addis r12,r2, <high> */
- 0x00, 0x00, 0x8c, 0x39, /* addi r12,r12, <low> */
+/*
+ * PPC64 uses 24 bit jumps, but we need to jump into other modules or
+ * the kernel which may be further. So we jump to a stub.
+ *
+ * For ELFv1 we need to use this to set up the new r2 value (aka TOC
+ * pointer). For ELFv2 it's the callee's responsibility to set up the
+ * new r2, but for both we need to save the old r2.
+ *
+ * We could simply patch the new r2 value and function pointer into
+ * the stub, but it's significantly shorter to put these values at the
+ * end of the stub code, and patch the stub address (32-bits relative
+ * to the TOC ptr, r2) into the stub.
+ */
+
+static u32 ppc64_stub_insns[] = {
+ 0x3d620000, /* addis r11,r2, <high> */
+ 0x396b0000, /* addi r11,r11, <low> */
/* Save current r2 value in magic place on the stack. */
- 0x28, 0x00, 0x41, 0xf8, /* std r2,40(r1) */
- 0x20, 0x00, 0x6c, 0xe9, /* ld r11,32(r12) */
- 0x28, 0x00, 0x4c, 0xe8, /* ld r2,40(r12) */
- 0xa6, 0x03, 0x69, 0x7d, /* mtctr r11 */
- 0x20, 0x04, 0x80, 0x4e /* bctr */
-#else
- 0x3d, 0x82, 0x00, 0x00, /* addis r12,r2, <high> */
- 0x39, 0x8c, 0x00, 0x00, /* addi r12,r12, <low> */
- /* Save current r2 value in magic place on the stack. */
- 0xf8, 0x41, 0x00, 0x28, /* std r2,40(r1) */
- 0xe9, 0x6c, 0x00, 0x20, /* ld r11,32(r12) */
- 0xe8, 0x4c, 0x00, 0x28, /* ld r2,40(r12) */
- 0x7d, 0x69, 0x03, 0xa6, /* mtctr r11 */
- 0x4e, 0x80, 0x04, 0x20 /* bctr */
+ 0xf8410000|R2_STACK_OFFSET, /* std r2,R2_STACK_OFFSET(r1) */
+ 0xe98b0020, /* ld r12,32(r11) */
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+ /* Set up new r2 from function descriptor */
+ 0xe84b0028, /* ld r2,40(r11) */
+#endif
+ 0x7d8903a6, /* mtctr r12 */
+ 0x4e800420 /* bctr */
+};
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+static u32 ppc64_stub_mask[] = {
+ 0xffff0000,
+ 0xffff0000,
+ 0xffffffff,
+ 0xffffffff,
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+ 0xffffffff,
+#endif
+ 0xffffffff,
+ 0xffffffff
+};
+
+bool is_module_trampoline(u32 *p)
+{
+ unsigned int i;
+ u32 insns[ARRAY_SIZE(ppc64_stub_insns)];
+
+ BUILD_BUG_ON(sizeof(ppc64_stub_insns) != sizeof(ppc64_stub_mask));
+
+ if (probe_kernel_read(insns, p, sizeof(insns)))
+ return -EFAULT;
+
+ for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) {
+ u32 insna = insns[i];
+ u32 insnb = ppc64_stub_insns[i];
+ u32 mask = ppc64_stub_mask[i];
+
+ if ((insna & mask) != (insnb & mask))
+ return false;
+ }
+
+ return true;
+}
+
+int module_trampoline_target(struct module *mod, u32 *trampoline,
+ unsigned long *target)
+{
+ u32 buf[2];
+ u16 upper, lower;
+ long offset;
+ void *toc_entry;
+
+ if (probe_kernel_read(buf, trampoline, sizeof(buf)))
+ return -EFAULT;
+
+ upper = buf[0] & 0xffff;
+ lower = buf[1] & 0xffff;
+
+ /* perform the addis/addi, both signed */
+ offset = ((short)upper << 16) + (short)lower;
+
+ /*
+ * Now get the address this trampoline jumps to. This
+ * is always 32 bytes into our trampoline stub.
+ */
+ toc_entry = (void *)mod->arch.toc + offset + 32;
+
+ if (probe_kernel_read(target, toc_entry, sizeof(*target)))
+ return -EFAULT;
+
+ return 0;
+}
+
#endif
-} };
/* Count how many different 24-bit relocations (different symbol,
different addend) */
@@ -183,17 +308,27 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
return relocs * sizeof(struct ppc64_stub_entry);
}
+/* Still needed for ELFv2, for .TOC. */
static void dedotify_versions(struct modversion_info *vers,
unsigned long size)
{
struct modversion_info *end;
for (end = (void *)vers + size; vers < end; vers++)
- if (vers->name[0] == '.')
+ if (vers->name[0] == '.') {
memmove(vers->name, vers->name+1, strlen(vers->name));
+#ifdef ARCH_RELOCATES_KCRCTAB
+ /* The TOC symbol has no CRC computed. To avoid CRC
+ * check failing, we must force it to the expected
+ * value (see CRC check in module.c).
+ */
+ if (!strcmp(vers->name, "TOC."))
+ vers->crc = -(unsigned long)reloc_start;
+#endif
+ }
}
-/* Undefined symbols which refer to .funcname, hack to funcname */
+/* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */
static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
{
unsigned int i;
@@ -207,6 +342,24 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
}
}
+static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex)
+{
+ unsigned int i, numsyms;
+ Elf64_Sym *syms;
+
+ syms = (Elf64_Sym *)sechdrs[symindex].sh_addr;
+ numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym);
+
+ for (i = 1; i < numsyms; i++) {
+ if (syms[i].st_shndx == SHN_UNDEF
+ && strcmp(strtab + syms[i].st_name, "TOC.") == 0)
+ return &syms[i];
+ }
+ return NULL;
+}
+
int module_frob_arch_sections(Elf64_Ehdr *hdr,
Elf64_Shdr *sechdrs,
char *secstrings,
@@ -271,21 +424,12 @@ static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)
/* Patch stub to reference function and correct r2 value. */
static inline int create_stub(Elf64_Shdr *sechdrs,
struct ppc64_stub_entry *entry,
- struct ppc64_opd_entry *opd,
+ unsigned long addr,
struct module *me)
{
- Elf64_Half *loc1, *loc2;
long reladdr;
- *entry = ppc64_stub;
-
-#ifdef __LITTLE_ENDIAN__
- loc1 = (Elf64_Half *)&entry->jump[0];
- loc2 = (Elf64_Half *)&entry->jump[4];
-#else
- loc1 = (Elf64_Half *)&entry->jump[2];
- loc2 = (Elf64_Half *)&entry->jump[6];
-#endif
+ memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns));
/* Stub uses address relative to r2. */
reladdr = (unsigned long)entry - my_r2(sechdrs, me);
@@ -296,35 +440,33 @@ static inline int create_stub(Elf64_Shdr *sechdrs,
}
DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr);
- *loc1 = PPC_HA(reladdr);
- *loc2 = PPC_LO(reladdr);
- entry->opd.funcaddr = opd->funcaddr;
- entry->opd.r2 = opd->r2;
+ entry->jump[0] |= PPC_HA(reladdr);
+ entry->jump[1] |= PPC_LO(reladdr);
+ entry->funcdata = func_desc(addr);
return 1;
}
-/* Create stub to jump to function described in this OPD: we need the
+/* Create stub to jump to function described in this OPD/ptr: we need the
stub to set up the TOC ptr (r2) for the function. */
static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
- unsigned long opdaddr,
+ unsigned long addr,
struct module *me)
{
struct ppc64_stub_entry *stubs;
- struct ppc64_opd_entry *opd = (void *)opdaddr;
unsigned int i, num_stubs;
num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs);
/* Find this stub, or if that fails, the next avail. entry */
stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
- for (i = 0; stubs[i].opd.funcaddr; i++) {
+ for (i = 0; stub_func_addr(stubs[i].funcdata); i++) {
BUG_ON(i >= num_stubs);
- if (stubs[i].opd.funcaddr == opd->funcaddr)
+ if (stub_func_addr(stubs[i].funcdata) == func_addr(addr))
return (unsigned long)&stubs[i];
}
- if (!create_stub(sechdrs, &stubs[i], opd, me))
+ if (!create_stub(sechdrs, &stubs[i], addr, me))
return 0;
return (unsigned long)&stubs[i];
@@ -339,7 +481,8 @@ static int restore_r2(u32 *instruction, struct module *me)
me->name, *instruction);
return 0;
}
- *instruction = 0xe8410028; /* ld r2,40(r1) */
+ /* ld r2,R2_STACK_OFFSET(r1) */
+ *instruction = 0xe8410000 | R2_STACK_OFFSET;
return 1;
}
@@ -357,6 +500,17 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
sechdrs[relsec].sh_info);
+
+ /* First time we're called, we can fix up .TOC. */
+ if (!me->arch.toc_fixed) {
+ sym = find_dot_toc(sechdrs, strtab, symindex);
+ /* It's theoretically possible that a module doesn't want a
+ * .TOC. so don't fail it just for that. */
+ if (sym)
+ sym->st_value = my_r2(sechdrs, me);
+ me->arch.toc_fixed = true;
+ }
+
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
/* This is where to make the change */
location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
@@ -453,7 +607,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
return -ENOENT;
if (!restore_r2((u32 *)location + 1, me))
return -ENOEXEC;
- }
+ } else
+ value += local_entry_offset(sym);
/* Convert value to relative */
value -= (unsigned long)location;
@@ -474,6 +629,31 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
*location = value - (unsigned long)location;
break;
+ case R_PPC64_TOCSAVE:
+ /*
+ * Marker reloc indicates we don't have to save r2.
+ * That would only save us one instruction, so ignore
+ * it.
+ */
+ break;
+
+ case R_PPC64_REL16_HA:
+ /* Subtract location pointer */
+ value -= (unsigned long)location;
+ value = ((value + 0x8000) >> 16);
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xffff)
+ | (value & 0xffff);
+ break;
+
+ case R_PPC64_REL16_LO:
+ /* Subtract location pointer */
+ value -= (unsigned long)location;
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xffff)
+ | (value & 0xffff);
+ break;
+
default:
printk("%s: Unknown ADD relocation: %lu\n",
me->name,
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 0620eaaaad45..d6e195e8cd4c 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -98,13 +98,32 @@ static inline void free_lppacas(void) { }
/*
* 3 persistent SLBs are registered here. The buffer will be zero
* initially, hence will all be invaild until we actually write them.
+ *
+ * If you make the number of persistent SLB entries dynamic, please also
+ * update PR KVM to flush and restore them accordingly.
*/
-struct slb_shadow slb_shadow[] __cacheline_aligned = {
- [0 ... (NR_CPUS-1)] = {
- .persistent = cpu_to_be32(SLB_NUM_BOLTED),
- .buffer_length = cpu_to_be32(sizeof(struct slb_shadow)),
- },
-};
+static struct slb_shadow *slb_shadow;
+
+static void __init allocate_slb_shadows(int nr_cpus, int limit)
+{
+ int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus);
+ slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit));
+ memset(slb_shadow, 0, size);
+}
+
+static struct slb_shadow * __init init_slb_shadow(int cpu)
+{
+ struct slb_shadow *s = &slb_shadow[cpu];
+
+ s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
+ s->buffer_length = cpu_to_be32(sizeof(*s));
+
+ return s;
+}
+
+#else /* CONFIG_PPC_STD_MMU_64 */
+
+static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
#endif /* CONFIG_PPC_STD_MMU_64 */
@@ -136,14 +155,20 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
new_paca->paca_index = cpu;
new_paca->kernel_toc = kernel_toc;
new_paca->kernelbase = (unsigned long) _stext;
- new_paca->kernel_msr = MSR_KERNEL;
+ /* Only set MSR:IR/DR when MMU is initialized */
+ new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR);
new_paca->hw_cpu_id = 0xffff;
new_paca->kexec_state = KEXEC_STATE_NONE;
new_paca->__current = &init_task;
new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
#ifdef CONFIG_PPC_STD_MMU_64
- new_paca->slb_shadow_ptr = &slb_shadow[cpu];
+ new_paca->slb_shadow_ptr = init_slb_shadow(cpu);
#endif /* CONFIG_PPC_STD_MMU_64 */
+
+#ifdef CONFIG_PPC_BOOK3E
+ /* For now -- if we have threads this will be adjusted later */
+ new_paca->tcd_ptr = &new_paca->tcd;
+#endif
}
/* Put the paca pointer into r13 and SPRG_PACA */
@@ -190,6 +215,8 @@ void __init allocate_pacas(void)
allocate_lppacas(nr_cpu_ids, limit);
+ allocate_slb_shadows(nr_cpu_ids, limit);
+
/* Can't use for_each_*_cpu, as they aren't functional yet */
for (cpu = 0; cpu < nr_cpu_ids; cpu++)
initialise_paca(&paca[cpu], cpu);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index a1e3e40ca3fd..b49c72fd7f16 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -21,6 +21,7 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/bootmem.h>
+#include <linux/delay.h>
#include <linux/export.h>
#include <linux/of_address.h>
#include <linux/of_pci.h>
@@ -120,6 +121,25 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus,
return 1;
}
+void pcibios_reset_secondary_bus(struct pci_dev *dev)
+{
+ u16 ctrl;
+
+ if (ppc_md.pcibios_reset_secondary_bus) {
+ ppc_md.pcibios_reset_secondary_bus(dev);
+ return;
+ }
+
+ pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
+ ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+ pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+ msleep(2);
+
+ ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+ pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+ ssleep(1);
+}
+
static resource_size_t pcibios_io_size(const struct pci_controller *hose)
{
#ifdef CONFIG_PPC64
@@ -201,26 +221,6 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node)
return NULL;
}
-static ssize_t pci_show_devspec(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct pci_dev *pdev;
- struct device_node *np;
-
- pdev = to_pci_dev (dev);
- np = pci_device_to_OF_node(pdev);
- if (np == NULL || np->full_name == NULL)
- return 0;
- return sprintf(buf, "%s", np->full_name);
-}
-static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL);
-
-/* Add sysfs properties */
-int pcibios_add_platform_entries(struct pci_dev *pdev)
-{
- return device_create_file(&pdev->dev, &dev_attr_devspec);
-}
-
/*
* Reads the interrupt pin to determine if interrupt is use by card.
* If the interrupt is used, then gets the interrupt line from the
@@ -666,60 +666,36 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar,
void pci_process_bridge_OF_ranges(struct pci_controller *hose,
struct device_node *dev, int primary)
{
- const __be32 *ranges;
- int rlen;
- int pna = of_n_addr_cells(dev);
- int np = pna + 5;
int memno = 0;
- u32 pci_space;
- unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size;
struct resource *res;
+ struct of_pci_range range;
+ struct of_pci_range_parser parser;
printk(KERN_INFO "PCI host bridge %s %s ranges:\n",
dev->full_name, primary ? "(primary)" : "");
- /* Get ranges property */
- ranges = of_get_property(dev, "ranges", &rlen);
- if (ranges == NULL)
+ /* Check for ranges property */
+ if (of_pci_range_parser_init(&parser, dev))
return;
/* Parse it */
- while ((rlen -= np * 4) >= 0) {
- /* Read next ranges element */
- pci_space = of_read_number(ranges, 1);
- pci_addr = of_read_number(ranges + 1, 2);
- cpu_addr = of_translate_address(dev, ranges + 3);
- size = of_read_number(ranges + pna + 3, 2);
- ranges += np;
-
+ for_each_of_pci_range(&parser, &range) {
/* If we failed translation or got a zero-sized region
* (some FW try to feed us with non sensical zero sized regions
* such as power3 which look like some kind of attempt at exposing
* the VGA memory hole)
*/
- if (cpu_addr == OF_BAD_ADDR || size == 0)
+ if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
continue;
- /* Now consume following elements while they are contiguous */
- for (; rlen >= np * sizeof(u32);
- ranges += np, rlen -= np * 4) {
- if (of_read_number(ranges, 1) != pci_space)
- break;
- pci_next = of_read_number(ranges + 1, 2);
- cpu_next = of_translate_address(dev, ranges + 3);
- if (pci_next != pci_addr + size ||
- cpu_next != cpu_addr + size)
- break;
- size += of_read_number(ranges + pna + 3, 2);
- }
-
/* Act based on address space type */
res = NULL;
- switch ((pci_space >> 24) & 0x3) {
- case 1: /* PCI IO space */
+ switch (range.flags & IORESOURCE_TYPE_BITS) {
+ case IORESOURCE_IO:
printk(KERN_INFO
" IO 0x%016llx..0x%016llx -> 0x%016llx\n",
- cpu_addr, cpu_addr + size - 1, pci_addr);
+ range.cpu_addr, range.cpu_addr + range.size - 1,
+ range.pci_addr);
/* We support only one IO range */
if (hose->pci_io_size) {
@@ -729,11 +705,12 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
}
#ifdef CONFIG_PPC32
/* On 32 bits, limit I/O space to 16MB */
- if (size > 0x01000000)
- size = 0x01000000;
+ if (range.size > 0x01000000)
+ range.size = 0x01000000;
/* 32 bits needs to map IOs here */
- hose->io_base_virt = ioremap(cpu_addr, size);
+ hose->io_base_virt = ioremap(range.cpu_addr,
+ range.size);
/* Expect trouble if pci_addr is not 0 */
if (primary)
@@ -743,20 +720,20 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
/* pci_io_size and io_base_phys always represent IO
* space starting at 0 so we factor in pci_addr
*/
- hose->pci_io_size = pci_addr + size;
- hose->io_base_phys = cpu_addr - pci_addr;
+ hose->pci_io_size = range.pci_addr + range.size;
+ hose->io_base_phys = range.cpu_addr - range.pci_addr;
/* Build resource */
res = &hose->io_resource;
- res->flags = IORESOURCE_IO;
- res->start = pci_addr;
+ range.cpu_addr = range.pci_addr;
break;
- case 2: /* PCI Memory space */
- case 3: /* PCI 64 bits Memory space */
+ case IORESOURCE_MEM:
printk(KERN_INFO
" MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n",
- cpu_addr, cpu_addr + size - 1, pci_addr,
- (pci_space & 0x40000000) ? "Prefetch" : "");
+ range.cpu_addr, range.cpu_addr + range.size - 1,
+ range.pci_addr,
+ (range.pci_space & 0x40000000) ?
+ "Prefetch" : "");
/* We support only 3 memory ranges */
if (memno >= 3) {
@@ -765,28 +742,21 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
continue;
}
/* Handles ISA memory hole space here */
- if (pci_addr == 0) {
+ if (range.pci_addr == 0) {
if (primary || isa_mem_base == 0)
- isa_mem_base = cpu_addr;
- hose->isa_mem_phys = cpu_addr;
- hose->isa_mem_size = size;
+ isa_mem_base = range.cpu_addr;
+ hose->isa_mem_phys = range.cpu_addr;
+ hose->isa_mem_size = range.size;
}
/* Build resource */
- hose->mem_offset[memno] = cpu_addr - pci_addr;
+ hose->mem_offset[memno] = range.cpu_addr -
+ range.pci_addr;
res = &hose->mem_resources[memno++];
- res->flags = IORESOURCE_MEM;
- if (pci_space & 0x40000000)
- res->flags |= IORESOURCE_PREFETCH;
- res->start = cpu_addr;
break;
}
if (res != NULL) {
- res->name = dev->full_name;
- res->end = res->start + size - 1;
- res->parent = NULL;
- res->sibling = NULL;
- res->child = NULL;
+ of_pci_range_to_resource(&range, dev, res);
}
}
}
@@ -835,7 +805,7 @@ static void pcibios_fixup_resources(struct pci_dev *dev)
* at 0 as unset as well, except if PCI_PROBE_ONLY is also set
* since in that case, we don't want to re-assign anything
*/
- pcibios_resource_to_bus(dev, &reg, res);
+ pcibios_resource_to_bus(dev->bus, &reg, res);
if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) ||
(reg.start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) {
/* Only print message if not re-assigning */
@@ -886,7 +856,7 @@ static int pcibios_uninitialized_bridge_resource(struct pci_bus *bus,
/* Job is a bit different between memory and IO */
if (res->flags & IORESOURCE_MEM) {
- pcibios_resource_to_bus(dev, &region, res);
+ pcibios_resource_to_bus(dev->bus, &region, res);
/* If the BAR is non-0 then it's probably been initialized */
if (region.start != 0)
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index c1e17ae68a08..5b789177aa29 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -98,8 +98,7 @@ void pcibios_add_pci_devices(struct pci_bus * bus)
max = bus->busn_res.start;
for (pass = 0; pass < 2; pass++) {
list_for_each_entry(dev, &bus->devices, bus_list) {
- if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
- dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+ if (pci_is_bridge(dev))
max = pci_scan_bridge(bus, dev,
max, pass);
}
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index a9e311f7a9dd..155013da27e0 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -208,8 +208,7 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,
unsigned long in_devfn)
{
struct pci_controller* hose;
- struct list_head *ln;
- struct pci_bus *bus = NULL;
+ struct pci_bus *tmp_bus, *bus = NULL;
struct device_node *hose_node;
/* Argh ! Please forgive me for that hack, but that's the
@@ -230,11 +229,12 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,
* used on pre-domains setup. We return the first match
*/
- for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) {
- bus = pci_bus_b(ln);
- if (in_bus >= bus->number && in_bus <= bus->busn_res.end)
+ list_for_each_entry(tmp_bus, &pci_root_buses, node) {
+ if (in_bus >= tmp_bus->number &&
+ in_bus <= tmp_bus->busn_res.end) {
+ bus = tmp_bus;
break;
- bus = NULL;
+ }
}
if (bus == NULL || bus->dev.of_node == NULL)
return -ENODEV;
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index ac0b034f9ae0..44562aa97f16 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -111,7 +111,7 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
res->name = pci_name(dev);
region.start = base;
region.end = base + size - 1;
- pcibios_bus_to_resource(dev, res, &region);
+ pcibios_bus_to_resource(dev->bus, res, &region);
}
}
@@ -280,7 +280,7 @@ void of_scan_pci_bridge(struct pci_dev *dev)
res->flags = flags;
region.start = of_read_number(&ranges[1], 2);
region.end = region.start + size - 1;
- pcibios_bus_to_resource(dev, res, &region);
+ pcibios_bus_to_resource(dev->bus, res, &region);
}
sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
bus->number);
@@ -304,6 +304,9 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
struct pci_dev *dev = NULL;
const __be32 *reg;
int reglen, devfn;
+#ifdef CONFIG_EEH
+ struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+#endif
pr_debug(" * %s\n", dn->full_name);
if (!of_device_is_available(dn))
@@ -321,6 +324,12 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
return dev;
}
+ /* Device removed permanently ? */
+#ifdef CONFIG_EEH
+ if (edev && (edev->mode & EEH_DEV_REMOVED))
+ return NULL;
+#endif
+
/* create a new pci_dev for this device */
dev = of_create_pci_dev(dn, bus, devfn);
if (!dev)
@@ -362,8 +371,7 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
/* Now scan child busses */
list_for_each_entry(dev, &bus->devices, bus_list) {
- if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
- dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) {
+ if (pci_is_bridge(dev)) {
of_scan_pci_bridge(dev);
}
}
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 3bd77edd7610..48d17d6fca5b 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -120,6 +120,7 @@ EXPORT_SYMBOL(giveup_spe);
EXPORT_SYMBOL(flush_instruction_cache);
#endif
EXPORT_SYMBOL(flush_dcache_range);
+EXPORT_SYMBOL(flush_icache_range);
#ifdef CONFIG_SMP
#ifdef CONFIG_PPC32
@@ -154,9 +155,7 @@ EXPORT_SYMBOL(__cmpdi2);
#endif
long long __bswapdi2(long long);
EXPORT_SYMBOL(__bswapdi2);
-#ifdef __BIG_ENDIAN__
EXPORT_SYMBOL(memcpy);
-#endif
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memcmp);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4a96556fd2d4..be99774d3f44 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -25,7 +25,6 @@
#include <linux/slab.h>
#include <linux/user.h>
#include <linux/elf.h>
-#include <linux/init.h>
#include <linux/prctl.h>
#include <linux/init_task.h>
#include <linux/export.h>
@@ -55,6 +54,7 @@
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
#endif
+#include <asm/code-patching.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
@@ -74,6 +74,48 @@ struct task_struct *last_task_used_vsx = NULL;
struct task_struct *last_task_used_spe = NULL;
#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void giveup_fpu_maybe_transactional(struct task_struct *tsk)
+{
+ /*
+ * If we are saving the current thread's registers, and the
+ * thread is in a transactional state, set the TIF_RESTORE_TM
+ * bit so that we know to restore the registers before
+ * returning to userspace.
+ */
+ if (tsk == current && tsk->thread.regs &&
+ MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
+ !test_thread_flag(TIF_RESTORE_TM)) {
+ tsk->thread.tm_orig_msr = tsk->thread.regs->msr;
+ set_thread_flag(TIF_RESTORE_TM);
+ }
+
+ giveup_fpu(tsk);
+}
+
+void giveup_altivec_maybe_transactional(struct task_struct *tsk)
+{
+ /*
+ * If we are saving the current thread's registers, and the
+ * thread is in a transactional state, set the TIF_RESTORE_TM
+ * bit so that we know to restore the registers before
+ * returning to userspace.
+ */
+ if (tsk == current && tsk->thread.regs &&
+ MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
+ !test_thread_flag(TIF_RESTORE_TM)) {
+ tsk->thread.tm_orig_msr = tsk->thread.regs->msr;
+ set_thread_flag(TIF_RESTORE_TM);
+ }
+
+ giveup_altivec(tsk);
+}
+
+#else
+#define giveup_fpu_maybe_transactional(tsk) giveup_fpu(tsk)
+#define giveup_altivec_maybe_transactional(tsk) giveup_altivec(tsk)
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
#ifdef CONFIG_PPC_FPU
/*
* Make sure the floating-point register state in the
@@ -102,13 +144,13 @@ void flush_fp_to_thread(struct task_struct *tsk)
*/
BUG_ON(tsk != current);
#endif
- giveup_fpu(tsk);
+ giveup_fpu_maybe_transactional(tsk);
}
preempt_enable();
}
}
EXPORT_SYMBOL_GPL(flush_fp_to_thread);
-#endif
+#endif /* CONFIG_PPC_FPU */
void enable_kernel_fp(void)
{
@@ -116,11 +158,11 @@ void enable_kernel_fp(void)
#ifdef CONFIG_SMP
if (current->thread.regs && (current->thread.regs->msr & MSR_FP))
- giveup_fpu(current);
+ giveup_fpu_maybe_transactional(current);
else
giveup_fpu(NULL); /* just enables FP for kernel */
#else
- giveup_fpu(last_task_used_math);
+ giveup_fpu_maybe_transactional(last_task_used_math);
#endif /* CONFIG_SMP */
}
EXPORT_SYMBOL(enable_kernel_fp);
@@ -132,11 +174,11 @@ void enable_kernel_altivec(void)
#ifdef CONFIG_SMP
if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
- giveup_altivec(current);
+ giveup_altivec_maybe_transactional(current);
else
giveup_altivec_notask();
#else
- giveup_altivec(last_task_used_altivec);
+ giveup_altivec_maybe_transactional(last_task_used_altivec);
#endif /* CONFIG_SMP */
}
EXPORT_SYMBOL(enable_kernel_altivec);
@@ -153,7 +195,7 @@ void flush_altivec_to_thread(struct task_struct *tsk)
#ifdef CONFIG_SMP
BUG_ON(tsk != current);
#endif
- giveup_altivec(tsk);
+ giveup_altivec_maybe_transactional(tsk);
}
preempt_enable();
}
@@ -182,8 +224,8 @@ EXPORT_SYMBOL(enable_kernel_vsx);
void giveup_vsx(struct task_struct *tsk)
{
- giveup_fpu(tsk);
- giveup_altivec(tsk);
+ giveup_fpu_maybe_transactional(tsk);
+ giveup_altivec_maybe_transactional(tsk);
__giveup_vsx(tsk);
}
@@ -454,14 +496,21 @@ static inline int set_dawr(struct arch_hw_breakpoint *brk)
return 0;
}
-int set_breakpoint(struct arch_hw_breakpoint *brk)
+void __set_breakpoint(struct arch_hw_breakpoint *brk)
{
__get_cpu_var(current_brk) = *brk;
if (cpu_has_feature(CPU_FTR_DAWR))
- return set_dawr(brk);
+ set_dawr(brk);
+ else
+ set_dabr(brk);
+}
- return set_dabr(brk);
+void set_breakpoint(struct arch_hw_breakpoint *brk)
+{
+ preempt_disable();
+ __set_breakpoint(brk);
+ preempt_enable();
}
#ifdef CONFIG_PPC64
@@ -479,7 +528,48 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
return false;
return true;
}
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static void tm_reclaim_thread(struct thread_struct *thr,
+ struct thread_info *ti, uint8_t cause)
+{
+ unsigned long msr_diff = 0;
+
+ /*
+ * If FP/VSX registers have been already saved to the
+ * thread_struct, move them to the transact_fp array.
+ * We clear the TIF_RESTORE_TM bit since after the reclaim
+ * the thread will no longer be transactional.
+ */
+ if (test_ti_thread_flag(ti, TIF_RESTORE_TM)) {
+ msr_diff = thr->tm_orig_msr & ~thr->regs->msr;
+ if (msr_diff & MSR_FP)
+ memcpy(&thr->transact_fp, &thr->fp_state,
+ sizeof(struct thread_fp_state));
+ if (msr_diff & MSR_VEC)
+ memcpy(&thr->transact_vr, &thr->vr_state,
+ sizeof(struct thread_vr_state));
+ clear_ti_thread_flag(ti, TIF_RESTORE_TM);
+ msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1;
+ }
+
+ tm_reclaim(thr, thr->regs->msr, cause);
+
+ /* Having done the reclaim, we now have the checkpointed
+ * FP/VSX values in the registers. These might be valid
+ * even if we have previously called enable_kernel_fp() or
+ * flush_fp_to_thread(), so update thr->regs->msr to
+ * indicate their current validity.
+ */
+ thr->regs->msr |= msr_diff;
+}
+
+void tm_reclaim_current(uint8_t cause)
+{
+ tm_enable();
+ tm_reclaim_thread(&current->thread, current_thread_info(), cause);
+}
+
static inline void tm_reclaim_task(struct task_struct *tsk)
{
/* We have to work out if we're switching from/to a task that's in the
@@ -502,9 +592,11 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
/* Stash the original thread MSR, as giveup_fpu et al will
* modify it. We hold onto it to see whether the task used
- * FP & vector regs.
+ * FP & vector regs. If the TIF_RESTORE_TM flag is set,
+ * tm_orig_msr is already set.
*/
- thr->tm_orig_msr = thr->regs->msr;
+ if (!test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_TM))
+ thr->tm_orig_msr = thr->regs->msr;
TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, "
"ccr=%lx, msr=%lx, trap=%lx)\n",
@@ -512,7 +604,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
thr->regs->ccr, thr->regs->msr,
thr->regs->trap);
- tm_reclaim(thr, thr->regs->msr, TM_CAUSE_RESCHED);
+ tm_reclaim_thread(thr, task_thread_info(tsk), TM_CAUSE_RESCHED);
TM_DEBUG("--- tm_reclaim on pid %d complete\n",
tsk->pid);
@@ -526,6 +618,31 @@ out_and_saveregs:
tm_save_sprs(thr);
}
+extern void __tm_recheckpoint(struct thread_struct *thread,
+ unsigned long orig_msr);
+
+void tm_recheckpoint(struct thread_struct *thread,
+ unsigned long orig_msr)
+{
+ unsigned long flags;
+
+ /* We really can't be interrupted here as the TEXASR registers can't
+ * change and later in the trecheckpoint code, we have a userspace R1.
+ * So let's hard disable over this region.
+ */
+ local_irq_save(flags);
+ hard_irq_disable();
+
+ /* The TM SPRs are restored here, so that TEXASR.FS can be set
+ * before the trecheckpoint and no explosion occurs.
+ */
+ tm_restore_sprs(thread);
+
+ __tm_recheckpoint(thread, orig_msr);
+
+ local_irq_restore(flags);
+}
+
static inline void tm_recheckpoint_new_task(struct task_struct *new)
{
unsigned long msr;
@@ -544,13 +661,10 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new)
if (!new->thread.regs)
return;
- /* The TM SPRs are restored here, so that TEXASR.FS can be set
- * before the trecheckpoint and no explosion occurs.
- */
- tm_restore_sprs(&new->thread);
-
- if (!MSR_TM_ACTIVE(new->thread.regs->msr))
+ if (!MSR_TM_ACTIVE(new->thread.regs->msr)){
+ tm_restore_sprs(&new->thread);
return;
+ }
msr = new->thread.tm_orig_msr;
/* Recheckpoint to restore original checkpointed register state. */
TM_DEBUG("*** tm_recheckpoint of pid %d "
@@ -588,6 +702,43 @@ static inline void __switch_to_tm(struct task_struct *prev)
tm_reclaim_task(prev);
}
}
+
+/*
+ * This is called if we are on the way out to userspace and the
+ * TIF_RESTORE_TM flag is set. It checks if we need to reload
+ * FP and/or vector state and does so if necessary.
+ * If userspace is inside a transaction (whether active or
+ * suspended) and FP/VMX/VSX instructions have ever been enabled
+ * inside that transaction, then we have to keep them enabled
+ * and keep the FP/VMX/VSX state loaded while ever the transaction
+ * continues. The reason is that if we didn't, and subsequently
+ * got a FP/VMX/VSX unavailable interrupt inside a transaction,
+ * we don't know whether it's the same transaction, and thus we
+ * don't know which of the checkpointed state and the transactional
+ * state to use.
+ */
+void restore_tm_state(struct pt_regs *regs)
+{
+ unsigned long msr_diff;
+
+ clear_thread_flag(TIF_RESTORE_TM);
+ if (!MSR_TM_ACTIVE(regs->msr))
+ return;
+
+ msr_diff = current->thread.tm_orig_msr & ~regs->msr;
+ msr_diff &= MSR_FP | MSR_VEC | MSR_VSX;
+ if (msr_diff & MSR_FP) {
+ fp_enable();
+ load_fp_state(&current->thread.fp_state);
+ regs->msr |= current->thread.fpexc_mode;
+ }
+ if (msr_diff & MSR_VEC) {
+ vec_enable();
+ load_vr_state(&current->thread.vr_state);
+ }
+ regs->msr |= msr_diff;
+}
+
#else
#define tm_recheckpoint_new_task(new)
#define __switch_to_tm(prev)
@@ -604,15 +755,15 @@ struct task_struct *__switch_to(struct task_struct *prev,
WARN_ON(!irqs_disabled());
- /* Back up the TAR across context switches.
+ /* Back up the TAR and DSCR across context switches.
* Note that the TAR is not available for use in the kernel. (To
* provide this, the TAR should be backed up/restored on exception
* entry/exit instead, and be in pt_regs. FIXME, this should be in
* pt_regs anyway (for debug).)
- * Save the TAR here before we do treclaim/trecheckpoint as these
- * will change the TAR.
+ * Save the TAR and DSCR here before we do treclaim/trecheckpoint as
+ * these will change them.
*/
- save_tar(&prev->thread);
+ save_early_sprs(&prev->thread);
__switch_to_tm(prev);
@@ -690,8 +841,8 @@ struct task_struct *__switch_to(struct task_struct *prev,
* schedule DABR
*/
#ifndef CONFIG_HAVE_HW_BREAKPOINT
- if (unlikely(hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk)))
- set_breakpoint(&new->thread.hw_brk);
+ if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk)))
+ __set_breakpoint(&new->thread.hw_brk);
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#endif
@@ -927,6 +1078,15 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
flush_altivec_to_thread(src);
flush_vsx_to_thread(src);
flush_spe_to_thread(src);
+ /*
+ * Flush TM state out so we can copy it. __switch_to_tm() does this
+ * flush but it removes the checkpointed state from the current CPU and
+ * transitions the CPU out of TM mode. Hence we need to call
+ * tm_recheckpoint_new_task() (on the same task) to restore the
+ * checkpointed state back and the TM mode.
+ */
+ __switch_to_tm(src);
+ tm_recheckpoint_new_task(src);
*dst = *src;
@@ -956,7 +1116,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
struct thread_info *ti = (void *)task_stack_page(p);
memset(childregs, 0, sizeof(struct pt_regs));
childregs->gpr[1] = sp + sizeof(struct pt_regs);
- childregs->gpr[14] = usp; /* function */
+ /* function */
+ if (usp)
+ childregs->gpr[14] = ppc_function_entry((void *)usp);
#ifdef CONFIG_PPC64
clear_tsk_thread_flag(p, TIF_32BIT);
childregs->softe = 1;
@@ -1035,17 +1197,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
if (cpu_has_feature(CPU_FTR_HAS_PPR))
p->thread.ppr = INIT_PPR;
#endif
- /*
- * The PPC64 ABI makes use of a TOC to contain function
- * pointers. The function (ret_from_except) is actually a pointer
- * to the TOC entry. The first entry is a pointer to the actual
- * function.
- */
-#ifdef CONFIG_PPC64
- kregs->nip = *((unsigned long *)f);
-#else
- kregs->nip = (unsigned long)f;
-#endif
+ kregs->nip = ppc_function_entry(f);
return 0;
}
@@ -1175,6 +1327,19 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
if (val & PR_FP_EXC_SW_ENABLE) {
#ifdef CONFIG_SPE
if (cpu_has_feature(CPU_FTR_SPE)) {
+ /*
+ * When the sticky exception bits are set
+ * directly by userspace, it must call prctl
+ * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE
+ * in the existing prctl settings) or
+ * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in
+ * the bits being set). <fenv.h> functions
+ * saving and restoring the whole
+ * floating-point environment need to do so
+ * anyway to restore the prctl settings from
+ * the saved environment.
+ */
+ tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
tsk->thread.fpexc_mode = val &
(PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
return 0;
@@ -1206,9 +1371,22 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
#ifdef CONFIG_SPE
- if (cpu_has_feature(CPU_FTR_SPE))
+ if (cpu_has_feature(CPU_FTR_SPE)) {
+ /*
+ * When the sticky exception bits are set
+ * directly by userspace, it must call prctl
+ * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE
+ * in the existing prctl settings) or
+ * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in
+ * the bits being set). <fenv.h> functions
+ * saving and restoring the whole
+ * floating-point environment need to do so
+ * anyway to restore the prctl settings from
+ * the saved environment.
+ */
+ tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
val = tsk->thread.fpexc_mode;
- else
+ } else
return -EINVAL;
#else
return -EINVAL;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fa0ad8aafbcc..b694b0730971 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -29,10 +29,11 @@
#include <linux/bitops.h>
#include <linux/export.h>
#include <linux/kexec.h>
-#include <linux/debugfs.h>
#include <linux/irq.h>
#include <linux/memblock.h>
#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
#include <asm/prom.h>
#include <asm/rtas.h>
@@ -117,14 +118,14 @@ static void __init move_device_tree(void)
DBG("-> move_device_tree\n");
start = __pa(initial_boot_params);
- size = be32_to_cpu(initial_boot_params->totalsize);
+ size = fdt_totalsize(initial_boot_params);
if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) ||
overlaps_crashkernel(start, size) ||
overlaps_initrd(start, size)) {
p = __va(memblock_alloc(size, PAGE_SIZE));
memcpy(p, initial_boot_params, size);
- initial_boot_params = (struct boot_param_header *)p;
+ initial_boot_params = p;
DBG("Moved device tree to 0x%p\n", p);
}
@@ -162,7 +163,7 @@ static struct ibm_pa_feature {
{CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0},
};
-static void __init scan_features(unsigned long node, unsigned char *ftrs,
+static void __init scan_features(unsigned long node, const unsigned char *ftrs,
unsigned long tablelen,
struct ibm_pa_feature *fp,
unsigned long ft_size)
@@ -201,8 +202,8 @@ static void __init scan_features(unsigned long node, unsigned char *ftrs,
static void __init check_cpu_pa_features(unsigned long node)
{
- unsigned char *pa_ftrs;
- unsigned long tablelen;
+ const unsigned char *pa_ftrs;
+ int tablelen;
pa_ftrs = of_get_flat_dt_prop(node, "ibm,pa-features", &tablelen);
if (pa_ftrs == NULL)
@@ -215,7 +216,7 @@ static void __init check_cpu_pa_features(unsigned long node)
#ifdef CONFIG_PPC_STD_MMU_64
static void __init check_cpu_slb_size(unsigned long node)
{
- __be32 *slb_size_ptr;
+ const __be32 *slb_size_ptr;
slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL);
if (slb_size_ptr != NULL) {
@@ -256,7 +257,7 @@ static struct feature_property {
static inline void identical_pvr_fixup(unsigned long node)
{
unsigned int pvr;
- char *model = of_get_flat_dt_prop(node, "model", NULL);
+ const char *model = of_get_flat_dt_prop(node, "model", NULL);
/*
* Since 440GR(x)/440EP(x) processors have the same pvr,
@@ -294,11 +295,11 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
const char *uname, int depth,
void *data)
{
- char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
const __be32 *prop;
const __be32 *intserv;
int i, nthreads;
- unsigned long len;
+ int len;
int found = -1;
int found_thread = 0;
@@ -324,9 +325,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
* version 2 of the kexec param format adds the phys cpuid of
* booted proc.
*/
- if (be32_to_cpu(initial_boot_params->version) >= 2) {
+ if (fdt_version(initial_boot_params) >= 2) {
if (be32_to_cpu(intserv[i]) ==
- be32_to_cpu(initial_boot_params->boot_cpuid_phys)) {
+ fdt_boot_cpuid_phys(initial_boot_params)) {
found = boot_cpu_count;
found_thread = i;
}
@@ -346,52 +347,52 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
#endif
}
- if (found >= 0) {
- DBG("boot cpu: logical %d physical %d\n", found,
- be32_to_cpu(intserv[found_thread]));
- boot_cpuid = found;
- set_hard_smp_processor_id(found,
- be32_to_cpu(intserv[found_thread]));
+ /* Not the boot CPU */
+ if (found < 0)
+ return 0;
- /*
- * PAPR defines "logical" PVR values for cpus that
- * meet various levels of the architecture:
- * 0x0f000001 Architecture version 2.04
- * 0x0f000002 Architecture version 2.05
- * If the cpu-version property in the cpu node contains
- * such a value, we call identify_cpu again with the
- * logical PVR value in order to use the cpu feature
- * bits appropriate for the architecture level.
- *
- * A POWER6 partition in "POWER6 architected" mode
- * uses the 0x0f000002 PVR value; in POWER5+ mode
- * it uses 0x0f000001.
- */
- prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
- if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000)
- identify_cpu(0, be32_to_cpup(prop));
+ DBG("boot cpu: logical %d physical %d\n", found,
+ be32_to_cpu(intserv[found_thread]));
+ boot_cpuid = found;
+ set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread]));
- identical_pvr_fixup(node);
- }
+ /*
+ * PAPR defines "logical" PVR values for cpus that
+ * meet various levels of the architecture:
+ * 0x0f000001 Architecture version 2.04
+ * 0x0f000002 Architecture version 2.05
+ * If the cpu-version property in the cpu node contains
+ * such a value, we call identify_cpu again with the
+ * logical PVR value in order to use the cpu feature
+ * bits appropriate for the architecture level.
+ *
+ * A POWER6 partition in "POWER6 architected" mode
+ * uses the 0x0f000002 PVR value; in POWER5+ mode
+ * it uses 0x0f000001.
+ */
+ prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
+ if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000)
+ identify_cpu(0, be32_to_cpup(prop));
+
+ identical_pvr_fixup(node);
check_cpu_feature_properties(node);
check_cpu_pa_features(node);
check_cpu_slb_size(node);
-#ifdef CONFIG_PPC_PSERIES
+#ifdef CONFIG_PPC64
if (nthreads > 1)
cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
else
cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
#endif
-
return 0;
}
int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname,
int depth, void *data)
{
- unsigned long *lprop; /* All these set by kernel, so no need to convert endian */
+ const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */
/* Use common scan routine to determine if this is the chosen node */
if (early_init_dt_scan_chosen(node, uname, depth, data) == 0)
@@ -442,8 +443,9 @@ int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname,
*/
static int __init early_init_dt_scan_drconf_memory(unsigned long node)
{
- __be32 *dm, *ls, *usm;
- unsigned long l, n, flags;
+ const __be32 *dm, *ls, *usm;
+ int l;
+ unsigned long n, flags;
u64 base, size, memblock_size;
unsigned int is_kexec_kdump = 0, rngs;
@@ -523,6 +525,20 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node,
return early_init_dt_scan_memory(node, uname, depth, data);
}
+/*
+ * For a relocatable kernel, we need to get the memstart_addr first,
+ * then use it to calculate the virtual kernel start address. This has
+ * to happen at a very early stage (before machine_init). In this case,
+ * we just want to get the memstart_address and would not like to mess the
+ * memblock at this stage. So introduce a variable to skip the memblock_add()
+ * for this reason.
+ */
+#ifdef CONFIG_RELOCATABLE
+static int add_mem_to_memblock = 1;
+#else
+#define add_mem_to_memblock 1
+#endif
+
void __init early_init_dt_add_memory_arch(u64 base, u64 size)
{
#ifdef CONFIG_PPC64
@@ -543,14 +559,18 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
}
/* Add the chunk to the MEMBLOCK list */
- memblock_add(base, size);
+ if (add_mem_to_memblock)
+ memblock_add(base, size);
}
static void __init early_reserve_mem_dt(void)
{
- unsigned long i, len, dt_root;
+ unsigned long i, dt_root;
+ int len;
const __be32 *prop;
+ early_init_fdt_scan_reserved_mem();
+
dt_root = of_get_flat_dt_root();
prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len);
@@ -577,18 +597,10 @@ static void __init early_reserve_mem_dt(void)
static void __init early_reserve_mem(void)
{
- u64 base, size;
__be64 *reserve_map;
- unsigned long self_base;
- unsigned long self_size;
reserve_map = (__be64 *)(((unsigned long)initial_boot_params) +
- be32_to_cpu(initial_boot_params->off_mem_rsvmap));
-
- /* before we do anything, lets reserve the dt blob */
- self_base = __pa((unsigned long)initial_boot_params);
- self_size = be32_to_cpu(initial_boot_params->totalsize);
- memblock_reserve(self_base, self_size);
+ fdt_off_mem_rsvmap(initial_boot_params));
/* Look for the new "reserved-regions" property in the DT */
early_reserve_mem_dt();
@@ -618,26 +630,12 @@ static void __init early_reserve_mem(void)
size_32 = be32_to_cpup(reserve_map_32++);
if (size_32 == 0)
break;
- /* skip if the reservation is for the blob */
- if (base_32 == self_base && size_32 == self_size)
- continue;
DBG("reserving: %x -> %x\n", base_32, size_32);
memblock_reserve(base_32, size_32);
}
return;
}
#endif
- DBG("Processing reserve map\n");
-
- /* Handle the reserve map in the fdt blob if it exists */
- while (1) {
- base = be64_to_cpup(reserve_map++);
- size = be64_to_cpup(reserve_map++);
- if (size == 0)
- break;
- DBG("reserving: %llx -> %llx\n", base, size);
- memblock_reserve(base, size);
- }
}
void __init early_init_devtree(void *params)
@@ -664,13 +662,6 @@ void __init early_init_devtree(void *params)
of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
#endif
- /* Pre-initialize the cmd_line with the content of boot_commmand_line,
- * which will be empty except when the content of the variable has
- * been overriden by a bootloading mechanism. This happens typically
- * with HAL takeover
- */
- strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE);
-
/* Retrieve various informations from the /chosen node of the
* device-tree, including the platform type, initrd location and
* size, TCE reserve, and more ...
@@ -729,6 +720,10 @@ void __init early_init_devtree(void *params)
* (altivec support, boot CPU ID, ...)
*/
of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
+ if (boot_cpuid < 0) {
+ printk("Failed to indentify boot CPU !\n");
+ BUG();
+ }
#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
/* We'll later wait for secondaries to check in; there are
@@ -737,9 +732,38 @@ void __init early_init_devtree(void *params)
spinning_secondaries = boot_cpu_count - 1;
#endif
+#ifdef CONFIG_PPC_POWERNV
+ /* Scan and build the list of machine check recoverable ranges */
+ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL);
+#endif
+
DBG(" <- early_init_devtree()\n");
}
+#ifdef CONFIG_RELOCATABLE
+/*
+ * This function run before early_init_devtree, so we have to init
+ * initial_boot_params.
+ */
+void __init early_get_first_memblock_info(void *params, phys_addr_t *size)
+{
+ /* Setup flat device-tree pointer */
+ initial_boot_params = params;
+
+ /*
+ * Scan the memory nodes and set add_mem_to_memblock to 0 to avoid
+ * mess the memblock.
+ */
+ add_mem_to_memblock = 0;
+ of_scan_flat_dt(early_init_dt_scan_root, NULL);
+ of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+ add_mem_to_memblock = 1;
+
+ if (size)
+ *size = first_memblock_size;
+}
+#endif
+
/*******
*
* New implementation of the OF "find" APIs, return a refcounted
@@ -871,23 +895,3 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
return (int)phys_id == get_hard_smp_processor_id(cpu);
}
-
-#if defined(CONFIG_DEBUG_FS) && defined(DEBUG)
-static struct debugfs_blob_wrapper flat_dt_blob;
-
-static int __init export_flat_device_tree(void)
-{
- struct dentry *d;
-
- flat_dt_blob.data = initial_boot_params;
- flat_dt_blob.size = be32_to_cpu(initial_boot_params->totalsize);
-
- d = debugfs_create_blob("flat-device-tree", S_IFREG | S_IRUSR,
- powerpc_debugfs_root, &flat_dt_blob);
- if (!d)
- return 1;
-
- return 0;
-}
-__initcall(export_flat_device_tree);
-#endif
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 078145acf7fb..1a85d8f96739 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1268,201 +1268,6 @@ static u64 __initdata prom_opal_base;
static u64 __initdata prom_opal_entry;
#endif
-#ifdef __BIG_ENDIAN__
-/* XXX Don't change this structure without updating opal-takeover.S */
-static struct opal_secondary_data {
- s64 ack; /* 0 */
- u64 go; /* 8 */
- struct opal_takeover_args args; /* 16 */
-} opal_secondary_data;
-
-static u64 __initdata prom_opal_align;
-static u64 __initdata prom_opal_size;
-static int __initdata prom_rtas_start_cpu;
-static u64 __initdata prom_rtas_data;
-static u64 __initdata prom_rtas_entry;
-
-extern char opal_secondary_entry;
-
-static void __init prom_query_opal(void)
-{
- long rc;
-
- /* We must not query for OPAL presence on a machine that
- * supports TNK takeover (970 blades), as this uses the same
- * h-call with different arguments and will crash
- */
- if (PHANDLE_VALID(call_prom("finddevice", 1, 1,
- ADDR("/tnk-memory-map")))) {
- prom_printf("TNK takeover detected, skipping OPAL check\n");
- return;
- }
-
- prom_printf("Querying for OPAL presence... ");
-
- rc = opal_query_takeover(&prom_opal_size,
- &prom_opal_align);
- prom_debug("(rc = %ld) ", rc);
- if (rc != 0) {
- prom_printf("not there.\n");
- return;
- }
- of_platform = PLATFORM_OPAL;
- prom_printf(" there !\n");
- prom_debug(" opal_size = 0x%lx\n", prom_opal_size);
- prom_debug(" opal_align = 0x%lx\n", prom_opal_align);
- if (prom_opal_align < 0x10000)
- prom_opal_align = 0x10000;
-}
-
-static int __init prom_rtas_call(int token, int nargs, int nret,
- int *outputs, ...)
-{
- struct rtas_args rtas_args;
- va_list list;
- int i;
-
- rtas_args.token = token;
- rtas_args.nargs = nargs;
- rtas_args.nret = nret;
- rtas_args.rets = (rtas_arg_t *)&(rtas_args.args[nargs]);
- va_start(list, outputs);
- for (i = 0; i < nargs; ++i)
- rtas_args.args[i] = va_arg(list, rtas_arg_t);
- va_end(list);
-
- for (i = 0; i < nret; ++i)
- rtas_args.rets[i] = 0;
-
- opal_enter_rtas(&rtas_args, prom_rtas_data,
- prom_rtas_entry);
-
- if (nret > 1 && outputs != NULL)
- for (i = 0; i < nret-1; ++i)
- outputs[i] = rtas_args.rets[i+1];
- return (nret > 0)? rtas_args.rets[0]: 0;
-}
-
-static void __init prom_opal_hold_cpus(void)
-{
- int i, cnt, cpu, rc;
- long j;
- phandle node;
- char type[64];
- u32 servers[8];
- void *entry = (unsigned long *)&opal_secondary_entry;
- struct opal_secondary_data *data = &opal_secondary_data;
-
- prom_debug("prom_opal_hold_cpus: start...\n");
- prom_debug(" - entry = 0x%x\n", entry);
- prom_debug(" - data = 0x%x\n", data);
-
- data->ack = -1;
- data->go = 0;
-
- /* look for cpus */
- for (node = 0; prom_next_node(&node); ) {
- type[0] = 0;
- prom_getprop(node, "device_type", type, sizeof(type));
- if (strcmp(type, "cpu") != 0)
- continue;
-
- /* Skip non-configured cpus. */
- if (prom_getprop(node, "status", type, sizeof(type)) > 0)
- if (strcmp(type, "okay") != 0)
- continue;
-
- cnt = prom_getprop(node, "ibm,ppc-interrupt-server#s", servers,
- sizeof(servers));
- if (cnt == PROM_ERROR)
- break;
- cnt >>= 2;
- for (i = 0; i < cnt; i++) {
- cpu = servers[i];
- prom_debug("CPU %d ... ", cpu);
- if (cpu == prom.cpu) {
- prom_debug("booted !\n");
- continue;
- }
- prom_debug("starting ... ");
-
- /* Init the acknowledge var which will be reset by
- * the secondary cpu when it awakens from its OF
- * spinloop.
- */
- data->ack = -1;
- rc = prom_rtas_call(prom_rtas_start_cpu, 3, 1,
- NULL, cpu, entry, data);
- prom_debug("rtas rc=%d ...", rc);
-
- for (j = 0; j < 100000000 && data->ack == -1; j++) {
- HMT_low();
- mb();
- }
- HMT_medium();
- if (data->ack != -1)
- prom_debug("done, PIR=0x%x\n", data->ack);
- else
- prom_debug("timeout !\n");
- }
- }
- prom_debug("prom_opal_hold_cpus: end...\n");
-}
-
-static void __init prom_opal_takeover(void)
-{
- struct opal_secondary_data *data = &opal_secondary_data;
- struct opal_takeover_args *args = &data->args;
- u64 align = prom_opal_align;
- u64 top_addr, opal_addr;
-
- args->k_image = (u64)_stext;
- args->k_size = _end - _stext;
- args->k_entry = 0;
- args->k_entry2 = 0x60;
-
- top_addr = _ALIGN_UP(args->k_size, align);
-
- if (prom_initrd_start != 0) {
- args->rd_image = prom_initrd_start;
- args->rd_size = prom_initrd_end - args->rd_image;
- args->rd_loc = top_addr;
- top_addr = _ALIGN_UP(args->rd_loc + args->rd_size, align);
- }
-
- /* Pickup an address for the HAL. We want to go really high
- * up to avoid problem with future kexecs. On the other hand
- * we don't want to be all over the TCEs on P5IOC2 machines
- * which are going to be up there too. We assume the machine
- * has plenty of memory, and we ask for the HAL for now to
- * be just below the 1G point, or above the initrd
- */
- opal_addr = _ALIGN_DOWN(0x40000000 - prom_opal_size, align);
- if (opal_addr < top_addr)
- opal_addr = top_addr;
- args->hal_addr = opal_addr;
-
- /* Copy the command line to the kernel image */
- strlcpy(boot_command_line, prom_cmd_line,
- COMMAND_LINE_SIZE);
-
- prom_debug(" k_image = 0x%lx\n", args->k_image);
- prom_debug(" k_size = 0x%lx\n", args->k_size);
- prom_debug(" k_entry = 0x%lx\n", args->k_entry);
- prom_debug(" k_entry2 = 0x%lx\n", args->k_entry2);
- prom_debug(" hal_addr = 0x%lx\n", args->hal_addr);
- prom_debug(" rd_image = 0x%lx\n", args->rd_image);
- prom_debug(" rd_size = 0x%lx\n", args->rd_size);
- prom_debug(" rd_loc = 0x%lx\n", args->rd_loc);
- prom_printf("Performing OPAL takeover,this can take a few minutes..\n");
- prom_close_stdin();
- mb();
- data->go = 1;
- for (;;)
- opal_do_takeover(args);
-}
-#endif /* __BIG_ENDIAN__ */
-
/*
* Allocate room for and instantiate OPAL
*/
@@ -1597,12 +1402,6 @@ static void __init prom_instantiate_rtas(void)
&val, sizeof(val)) != PROM_ERROR)
rtas_has_query_cpu_stopped = true;
-#if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__)
- /* PowerVN takeover hack */
- prom_rtas_data = base;
- prom_rtas_entry = entry;
- prom_getprop(rtas_node, "start-cpu", &prom_rtas_start_cpu, 4);
-#endif
prom_debug("rtas base = 0x%x\n", base);
prom_debug("rtas entry = 0x%x\n", entry);
prom_debug("rtas size = 0x%x\n", (long)size);
@@ -3027,16 +2826,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
prom_instantiate_rtas();
#ifdef CONFIG_PPC_POWERNV
-#ifdef __BIG_ENDIAN__
- /* Detect HAL and try instanciating it & doing takeover */
- if (of_platform == PLATFORM_PSERIES_LPAR) {
- prom_query_opal();
- if (of_platform == PLATFORM_OPAL) {
- prom_opal_hold_cpus();
- prom_opal_takeover();
- }
- } else
-#endif /* __BIG_ENDIAN__ */
if (of_platform == PLATFORM_OPAL)
prom_instantiate_opal();
#endif /* CONFIG_PPC_POWERNV */
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index b0c263da219a..fe8e54b9ef7d 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -21,9 +21,7 @@ _end enter_prom memcpy memset reloc_offset __secondary_hold
__secondary_hold_acknowledge __secondary_hold_spinloop __start
strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224
reloc_got2 kernstart_addr memstart_addr linux_banner _stext
-opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry
-boot_command_line __prom_init_toc_start __prom_init_toc_end
-btext_setup_display"
+__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
NM="$1"
OBJ="$2"
diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S
index b47a0e1ab001..d88736fbece6 100644
--- a/arch/powerpc/kernel/reloc_64.S
+++ b/arch/powerpc/kernel/reloc_64.S
@@ -69,8 +69,8 @@ _GLOBAL(relocate)
* R_PPC64_RELATIVE ones.
*/
mtctr r8
-5: lwz r0,12(9) /* ELF64_R_TYPE(reloc->r_info) */
- cmpwi r0,R_PPC64_RELATIVE
+5: ld r0,8(9) /* ELF64_R_TYPE(reloc->r_info) */
+ cmpdi r0,R_PPC64_RELATIVE
bne 6f
ld r6,0(r9) /* reloc->r_offset */
ld r0,16(r9) /* reloc->r_addend */
@@ -81,6 +81,7 @@ _GLOBAL(relocate)
6: blr
+.balign 8
p_dyn: .llong __dynamic_start - 0b
p_rela: .llong __rela_dyn_start - 0b
p_st: .llong _stext - 0b
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 4cf674d7d5ae..8b4c857c1421 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -993,32 +993,36 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
(struct rtas_ext_event_log_v6 *)log->buffer;
struct pseries_errorlog *sect;
unsigned char *p, *log_end;
+ uint32_t ext_log_length = rtas_error_extended_log_length(log);
+ uint8_t log_format = rtas_ext_event_log_format(ext_log);
+ uint32_t company_id = rtas_ext_event_company_id(ext_log);
/* Check that we understand the format */
- if (log->extended_log_length < sizeof(struct rtas_ext_event_log_v6) ||
- ext_log->log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG ||
- ext_log->company_id != RTAS_V6EXT_COMPANY_ID_IBM)
+ if (ext_log_length < sizeof(struct rtas_ext_event_log_v6) ||
+ log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG ||
+ company_id != RTAS_V6EXT_COMPANY_ID_IBM)
return NULL;
- log_end = log->buffer + log->extended_log_length;
+ log_end = log->buffer + ext_log_length;
p = ext_log->vendor_log;
while (p < log_end) {
sect = (struct pseries_errorlog *)p;
- if (sect->id == section_id)
+ if (pseries_errorlog_id(sect) == section_id)
return sect;
- p += sect->length;
+ p += pseries_errorlog_length(sect);
}
return NULL;
}
+/* We assume to be passed big endian arguments */
asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
{
struct rtas_args args;
unsigned long flags;
char *buff_copy, *errbuf = NULL;
- int nargs;
+ int nargs, nret, token;
int rc;
if (!capable(CAP_SYS_ADMIN))
@@ -1027,10 +1031,13 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
if (copy_from_user(&args, uargs, 3 * sizeof(u32)) != 0)
return -EFAULT;
- nargs = args.nargs;
+ nargs = be32_to_cpu(args.nargs);
+ nret = be32_to_cpu(args.nret);
+ token = be32_to_cpu(args.token);
+
if (nargs > ARRAY_SIZE(args.args)
- || args.nret > ARRAY_SIZE(args.args)
- || nargs + args.nret > ARRAY_SIZE(args.args))
+ || nret > ARRAY_SIZE(args.args)
+ || nargs + nret > ARRAY_SIZE(args.args))
return -EINVAL;
/* Copy in args. */
@@ -1038,14 +1045,14 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
nargs * sizeof(rtas_arg_t)) != 0)
return -EFAULT;
- if (args.token == RTAS_UNKNOWN_SERVICE)
+ if (token == RTAS_UNKNOWN_SERVICE)
return -EINVAL;
args.rets = &args.args[nargs];
- memset(args.rets, 0, args.nret * sizeof(rtas_arg_t));
+ memset(args.rets, 0, nret * sizeof(rtas_arg_t));
/* Need to handle ibm,suspend_me call specially */
- if (args.token == ibm_suspend_me_token) {
+ if (token == ibm_suspend_me_token) {
rc = rtas_ibm_suspend_me(&args);
if (rc)
return rc;
@@ -1062,7 +1069,7 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
/* A -1 return code indicates that the last command couldn't
be completed due to a hardware error. */
- if (args.rets[0] == -1)
+ if (be32_to_cpu(args.rets[0]) == -1)
errbuf = __fetch_rtas_last_error(buff_copy);
unlock_rtas(flags);
@@ -1077,7 +1084,7 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
/* Copy out args. */
if (copy_to_user(uargs->args + nargs,
args.args + nargs,
- args.nret * sizeof(rtas_arg_t)) != 0)
+ nret * sizeof(rtas_arg_t)) != 0)
return -EFAULT;
return 0;
@@ -1135,7 +1142,7 @@ void __init rtas_initialize(void)
int __init early_init_dt_scan_rtas(unsigned long node,
const char *uname, int depth, void *data)
{
- u32 *basep, *entryp, *sizep;
+ const u32 *basep, *entryp, *sizep;
if (depth != 1 || strcmp(uname, "rtas") != 0)
return 0;
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 2f3cdb01506d..658e89d2025b 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -705,7 +705,7 @@ static int __init rtas_flash_init(void)
if (rtas_token("ibm,update-flash-64-and-reboot") ==
RTAS_UNKNOWN_SERVICE) {
pr_info("rtas_flash: no firmware flash support\n");
- return 1;
+ return -EINVAL;
}
rtas_validate_flash_data.buf = kzalloc(VALIDATE_BUF_SIZE, GFP_KERNEL);
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 7d4c7172f38e..c168337aef9d 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -80,10 +80,6 @@ int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
if (ret)
return PCIBIOS_DEVICE_NOT_FOUND;
- if (returnval == EEH_IO_ERROR_VALUE(size) &&
- eeh_dev_check_failure(of_node_to_eeh_dev(pdn->node)))
- return PCIBIOS_DEVICE_NOT_FOUND;
-
return PCIBIOS_SUCCESSFUL;
}
@@ -92,18 +88,39 @@ static int rtas_pci_read_config(struct pci_bus *bus,
int where, int size, u32 *val)
{
struct device_node *busdn, *dn;
-
- busdn = pci_bus_to_OF_node(bus);
+ struct pci_dn *pdn;
+ bool found = false;
+#ifdef CONFIG_EEH
+ struct eeh_dev *edev;
+#endif
+ int ret;
/* Search only direct children of the bus */
+ *val = 0xFFFFFFFF;
+ busdn = pci_bus_to_OF_node(bus);
for (dn = busdn->child; dn; dn = dn->sibling) {
- struct pci_dn *pdn = PCI_DN(dn);
+ pdn = PCI_DN(dn);
if (pdn && pdn->devfn == devfn
- && of_device_is_available(dn))
- return rtas_read_config(pdn, where, size, val);
+ && of_device_is_available(dn)) {
+ found = true;
+ break;
+ }
}
- return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!found)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+#ifdef CONFIG_EEH
+ edev = of_node_to_eeh_dev(dn);
+ if (edev && edev->pe && edev->pe->state & EEH_PE_RESET)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+#endif
+
+ ret = rtas_read_config(pdn, where, size, val);
+ if (*val == EEH_IO_ERROR_VALUE(size) &&
+ eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return ret;
}
int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
@@ -136,17 +153,34 @@ static int rtas_pci_write_config(struct pci_bus *bus,
int where, int size, u32 val)
{
struct device_node *busdn, *dn;
-
- busdn = pci_bus_to_OF_node(bus);
+ struct pci_dn *pdn;
+ bool found = false;
+#ifdef CONFIG_EEH
+ struct eeh_dev *edev;
+#endif
+ int ret;
/* Search only direct children of the bus */
+ busdn = pci_bus_to_OF_node(bus);
for (dn = busdn->child; dn; dn = dn->sibling) {
- struct pci_dn *pdn = PCI_DN(dn);
+ pdn = PCI_DN(dn);
if (pdn && pdn->devfn == devfn
- && of_device_is_available(dn))
- return rtas_write_config(pdn, where, size, val);
+ && of_device_is_available(dn)) {
+ found = true;
+ break;
+ }
}
- return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (!found)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+#ifdef CONFIG_EEH
+ edev = of_node_to_eeh_dev(dn);
+ if (edev && edev->pe && (edev->pe->state & EEH_PE_RESET))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+#endif
+ ret = rtas_write_config(pdn, where, size, val);
+
+ return ret;
}
static struct pci_ops rtas_pci_ops = {
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 1130c53ad652..e736387fee6a 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -150,8 +150,8 @@ static void printk_log_rtas(char *buf, int len)
struct rtas_error_log *errlog = (struct rtas_error_log *)buf;
printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n",
- error_log_cnt, rtas_event_type(errlog->type),
- errlog->severity);
+ error_log_cnt, rtas_event_type(rtas_error_type(errlog)),
+ rtas_error_severity(errlog));
}
}
@@ -159,14 +159,16 @@ static int log_rtas_len(char * buf)
{
int len;
struct rtas_error_log *err;
+ uint32_t extended_log_length;
/* rtas fixed header */
len = 8;
err = (struct rtas_error_log *)buf;
- if (err->extended && err->extended_log_length) {
+ extended_log_length = rtas_error_extended_log_length(err);
+ if (rtas_error_extended(err) && extended_log_length) {
/* extended header */
- len += err->extended_log_length;
+ len += extended_log_length;
}
if (rtas_error_log_max == 0)
@@ -293,15 +295,13 @@ void prrn_schedule_update(u32 scope)
static void handle_rtas_event(const struct rtas_error_log *log)
{
- if (log->type == RTAS_TYPE_PRRN) {
- /* For PRRN Events the extended log length is used to denote
- * the scope for calling rtas update-nodes.
- */
- if (prrn_is_enabled())
- prrn_schedule_update(log->extended_log_length);
- }
+ if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled())
+ return;
- return;
+ /* For PRRN Events the extended log length is used to denote
+ * the scope for calling rtas update-nodes.
+ */
+ prrn_schedule_update(rtas_error_extended_log_length(log));
}
#else
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index bc76cc6b419c..e5b022c55ccd 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -76,6 +76,9 @@ EXPORT_SYMBOL(ppc_md);
struct machdep_calls *machine_id;
EXPORT_SYMBOL(machine_id);
+int boot_cpuid = -1;
+EXPORT_SYMBOL_GPL(boot_cpuid);
+
unsigned long klimit = (unsigned long) _end;
char cmd_line[COMMAND_LINE_SIZE];
@@ -209,6 +212,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
{
unsigned long cpu_id = (unsigned long)v - 1;
unsigned int pvr;
+ unsigned long proc_freq;
unsigned short maj;
unsigned short min;
@@ -260,12 +264,19 @@ static int show_cpuinfo(struct seq_file *m, void *v)
#endif /* CONFIG_TAU */
/*
- * Assume here that all clock rates are the same in a
- * smp system. -- Cort
+ * Platforms that have variable clock rates, should implement
+ * the method ppc_md.get_proc_freq() that reports the clock
+ * rate of a given cpu. The rest can use ppc_proc_freq to
+ * report the clock rate that is same across all cpus.
*/
- if (ppc_proc_freq)
+ if (ppc_md.get_proc_freq)
+ proc_freq = ppc_md.get_proc_freq(cpu_id);
+ else
+ proc_freq = ppc_proc_freq;
+
+ if (proc_freq)
seq_printf(m, "clock\t\t: %lu.%06luMHz\n",
- ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
+ proc_freq / 1000000, proc_freq % 1000000);
if (ppc_md.show_percpuinfo != NULL)
ppc_md.show_percpuinfo(m, cpu_id);
@@ -379,9 +390,10 @@ void __init check_for_initrd(void)
#ifdef CONFIG_SMP
-int threads_per_core, threads_shift;
+int threads_per_core, threads_per_subcore, threads_shift;
cpumask_t threads_core_mask;
EXPORT_SYMBOL_GPL(threads_per_core);
+EXPORT_SYMBOL_GPL(threads_per_subcore);
EXPORT_SYMBOL_GPL(threads_shift);
EXPORT_SYMBOL_GPL(threads_core_mask);
@@ -390,6 +402,7 @@ static void __init cpu_init_thread_core_maps(int tpc)
int i;
threads_per_core = tpc;
+ threads_per_subcore = tpc;
cpumask_clear(&threads_core_mask);
/* This implementation only supports power of 2 number of threads
@@ -456,9 +469,17 @@ void __init smp_setup_cpu_maps(void)
}
for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
+ bool avail;
+
DBG(" thread %d -> cpu %d (hard id %d)\n",
j, cpu, be32_to_cpu(intserv[j]));
- set_cpu_present(cpu, true);
+
+ avail = of_device_is_available(dn);
+ if (!avail)
+ avail = !of_property_match_string(dn,
+ "enable-method", "spin-table");
+
+ set_cpu_present(cpu, avail);
set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j]));
set_cpu_possible(cpu, true);
cpu++;
@@ -715,33 +736,6 @@ static int powerpc_debugfs_init(void)
arch_initcall(powerpc_debugfs_init);
#endif
-#ifdef CONFIG_BOOKE_WDT
-extern u32 booke_wdt_enabled;
-extern u32 booke_wdt_period;
-
-/* Checks wdt=x and wdt_period=xx command-line option */
-notrace int __init early_parse_wdt(char *p)
-{
- if (p && strncmp(p, "0", 1) != 0)
- booke_wdt_enabled = 1;
-
- return 0;
-}
-early_param("wdt", early_parse_wdt);
-
-int __init early_parse_wdt_period(char *p)
-{
- unsigned long ret;
- if (p) {
- if (!kstrtol(p, 0, &ret))
- booke_wdt_period = ret;
- }
-
- return 0;
-}
-early_param("wdt_period", early_parse_wdt_period);
-#endif /* CONFIG_BOOKE_WDT */
-
void ppc_printk_progress(char *s, unsigned short hex)
{
pr_info("%s\n", s);
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index b903dc5cf944..ea4fda60e57b 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -44,8 +44,6 @@
extern void bootx_init(unsigned long r4, unsigned long phys);
-int boot_cpuid = -1;
-EXPORT_SYMBOL_GPL(boot_cpuid);
int boot_cpuid_phys;
EXPORT_SYMBOL_GPL(boot_cpuid_phys);
@@ -247,7 +245,12 @@ static void __init exc_lvl_early_init(void)
/* interrupt stacks must be in lowmem, we get that for free on ppc32
* as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */
for_each_possible_cpu(i) {
+#ifdef CONFIG_SMP
hw_cpu = get_hard_smp_processor_id(i);
+#else
+ hw_cpu = 0;
+#endif
+
critirq_ctx[hw_cpu] = (struct thread_info *)
__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
#ifdef CONFIG_BOOKE
@@ -296,9 +299,6 @@ void __init setup_arch(char **cmdline_p)
if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE))
ucache_bsize = icache_bsize = dcache_bsize;
- /* reboot on panic */
- panic_timeout = 180;
-
if (ppc_md.panic)
setup_panic();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 4085aaa9478f..ee082d771178 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -36,6 +36,7 @@
#include <linux/lockdep.h>
#include <linux/memblock.h>
#include <linux/hugetlb.h>
+#include <linux/memory.h>
#include <asm/io.h>
#include <asm/kdump.h>
@@ -74,7 +75,6 @@
#define DBG(fmt...)
#endif
-int boot_cpuid = 0;
int spinning_secondaries;
u64 ppc64_pft_size;
@@ -97,6 +97,38 @@ int dcache_bsize;
int icache_bsize;
int ucache_bsize;
+#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
+static void setup_tlb_core_data(void)
+{
+ int cpu;
+
+ BUILD_BUG_ON(offsetof(struct tlb_core_data, lock) != 0);
+
+ for_each_possible_cpu(cpu) {
+ int first = cpu_first_thread_sibling(cpu);
+
+ paca[cpu].tcd_ptr = &paca[first].tcd;
+
+ /*
+ * If we have threads, we need either tlbsrx.
+ * or e6500 tablewalk mode, or else TLB handlers
+ * will be racy and could produce duplicate entries.
+ */
+ if (smt_enabled_at_boot >= 2 &&
+ !mmu_has_feature(MMU_FTR_USE_TLBRSRV) &&
+ book3e_htw_mode != PPC_HTW_E6500) {
+ /* Should we panic instead? */
+ WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n",
+ __func__);
+ }
+ }
+}
+#else
+static void setup_tlb_core_data(void)
+{
+}
+#endif
+
#ifdef CONFIG_SMP
static char *smt_enabled_cmdline;
@@ -164,6 +196,19 @@ static void fixup_boot_paca(void)
get_paca()->data_offset = 0;
}
+static void cpu_ready_for_interrupts(void)
+{
+ /* Set IR and DR in PACA MSR */
+ get_paca()->kernel_msr = MSR_KERNEL;
+
+ /* Enable AIL if supported */
+ if (cpu_has_feature(CPU_FTR_HVMODE) &&
+ cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ unsigned long lpcr = mfspr(SPRN_LPCR);
+ mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
+ }
+}
+
/*
* Early initialization entry point. This is called by head.S
* with MMU translation disabled. We rely on the "feature" of
@@ -230,6 +275,14 @@ void __init early_setup(unsigned long dt_ptr)
/* Initialize the hash table or TLB handling */
early_init_mmu();
+ /*
+ * At this point, we can let interrupts switch to virtual mode
+ * (the MMU has been setup), so adjust the MSR in the PACA to
+ * have IR and DR set and enable AIL if it exists
+ */
+ cpu_ready_for_interrupts();
+
+ /* Reserve large chunks of memory for use by CMA for KVM */
kvm_cma_reserve();
/*
@@ -262,6 +315,13 @@ void early_setup_secondary(void)
/* Initialize the hash table or TLB handling */
early_init_mmu_secondary();
+
+ /*
+ * At this point, we can let interrupts switch to virtual mode
+ * (the MMU has been setup), so adjust the MSR in the PACA to
+ * have IR and DR set.
+ */
+ cpu_ready_for_interrupts();
}
#endif /* CONFIG_SMP */
@@ -282,7 +342,7 @@ void smp_release_cpus(void)
ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop
- PHYSICAL_START);
- *ptr = __pa(generic_secondary_smp_init);
+ *ptr = ppc_function_entry(generic_secondary_smp_init);
/* And wait a bit for them to catch up */
for (i = 0; i < 100000; i++) {
@@ -445,6 +505,7 @@ void __init setup_system(void)
smp_setup_cpu_maps();
check_smt_enabled();
+ setup_tlb_core_data();
#ifdef CONFIG_SMP
/* Release secondary cpus out of their spinloops at 0x60 now that
@@ -520,23 +581,25 @@ static void __init irqstack_early_init(void)
#ifdef CONFIG_PPC_BOOK3E
static void __init exc_lvl_early_init(void)
{
- extern unsigned int interrupt_base_book3e;
- extern unsigned int exc_debug_debug_book3e;
-
unsigned int i;
+ unsigned long sp;
for_each_possible_cpu(i) {
- critirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
- dbgirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
- mcheckirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
+ critirq_ctx[i] = (struct thread_info *)__va(sp);
+ paca[i].crit_kstack = __va(sp + THREAD_SIZE);
+
+ sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
+ dbgirq_ctx[i] = (struct thread_info *)__va(sp);
+ paca[i].dbg_kstack = __va(sp + THREAD_SIZE);
+
+ sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
+ mcheckirq_ctx[i] = (struct thread_info *)__va(sp);
+ paca[i].mc_kstack = __va(sp + THREAD_SIZE);
}
if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
- patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1,
- (unsigned long)&exc_debug_debug_book3e, 0);
+ patch_exception(0x040, exc_debug_debug_book3e);
}
#else
#define exc_lvl_early_init()
@@ -544,7 +607,8 @@ static void __init exc_lvl_early_init(void)
/*
* Stack space used when we detect a bad kernel stack pointer, and
- * early in SMP boots before relocation is enabled.
+ * early in SMP boots before relocation is enabled. Exclusive emergency
+ * stack for machine checks.
*/
static void __init emergency_stack_init(void)
{
@@ -567,6 +631,13 @@ static void __init emergency_stack_init(void)
sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
sp += THREAD_SIZE;
paca[i].emergency_sp = __va(sp);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* emergency stack for machine check exception handling. */
+ sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
+ sp += THREAD_SIZE;
+ paca[i].mc_emergency_sp = __va(sp);
+#endif
}
}
@@ -588,9 +659,6 @@ void __init setup_arch(char **cmdline_p)
dcache_bsize = ppc64_caches.dline_size;
icache_bsize = ppc64_caches.iline_size;
- /* reboot on panic */
- panic_timeout = 180;
-
if (ppc_md.panic)
setup_panic();
@@ -713,6 +781,15 @@ void __init setup_per_cpu_areas(void)
}
#endif
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+unsigned long memory_block_size_bytes(void)
+{
+ if (ppc_md.memory_block_size)
+ return ppc_md.memory_block_size();
+
+ return MIN_MEMORY_BLOCK_SIZE;
+}
+#endif
#if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO)
struct ppc_pci_io ppc_pci_io;
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 457e97aa2945..1c794cef2883 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -134,7 +134,7 @@ static int do_signal(struct pt_regs *regs)
*/
if (current->thread.hw_brk.address &&
current->thread.hw_brk.type)
- set_breakpoint(&current->thread.hw_brk);
+ __set_breakpoint(&current->thread.hw_brk);
#endif
/* Re-enable the breakpoints for the signal stack */
thread_change_pc(current, regs);
@@ -203,8 +203,7 @@ unsigned long get_tm_stackpointer(struct pt_regs *regs)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (MSR_TM_ACTIVE(regs->msr)) {
- tm_enable();
- tm_reclaim(&current->thread, regs->msr, TM_CAUSE_SIGNAL);
+ tm_reclaim_current(TM_CAUSE_SIGNAL);
if (MSR_TM_TRANSACTIONAL(regs->msr))
return current->thread.ckpt_regs.gpr[1];
}
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 68027bfa5f8e..1bc5a1755ed4 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -54,7 +54,6 @@
#include "signal.h"
-#undef DEBUG_SIG
#ifdef CONFIG_PPC64
#define sys_rt_sigreturn compat_sys_rt_sigreturn
@@ -519,6 +518,13 @@ static int save_tm_user_regs(struct pt_regs *regs,
{
unsigned long msr = regs->msr;
+ /* Remove TM bits from thread's MSR. The MSR in the sigcontext
+ * just indicates to userland that we were doing a transaction, but we
+ * don't want to return in transactional state. This also ensures
+ * that flush_fp_to_thread won't set TIF_RESTORE_TM again.
+ */
+ regs->msr &= ~MSR_TS_MASK;
+
/* Make sure floating point registers are stored in regs */
flush_fp_to_thread(current);
@@ -874,6 +880,8 @@ static long restore_tm_user_regs(struct pt_regs *regs,
* transactional versions should be loaded.
*/
tm_enable();
+ /* Make sure the transaction is marked as failed */
+ current->thread.tm_texasr |= TEXASR_FS;
/* This loads the checkpointed FP/VEC state, if used */
tm_recheckpoint(&current->thread, msr);
/* Get the top half of the MSR */
@@ -1015,29 +1023,24 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
tm_frame = &rt_sf->uc_transact.uc_mcontext;
if (MSR_TM_ACTIVE(regs->msr)) {
+ if (__put_user((unsigned long)&rt_sf->uc_transact,
+ &rt_sf->uc.uc_link) ||
+ __put_user((unsigned long)tm_frame,
+ &rt_sf->uc_transact.uc_regs))
+ goto badframe;
if (save_tm_user_regs(regs, frame, tm_frame, sigret))
goto badframe;
}
else
#endif
{
+ if (__put_user(0, &rt_sf->uc.uc_link))
+ goto badframe;
if (save_user_regs(regs, frame, tm_frame, sigret, 1))
goto badframe;
}
regs->link = tramp;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- if (MSR_TM_ACTIVE(regs->msr)) {
- if (__put_user((unsigned long)&rt_sf->uc_transact,
- &rt_sf->uc.uc_link)
- || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs))
- goto badframe;
- }
- else
-#endif
- if (__put_user(0, &rt_sf->uc.uc_link))
- goto badframe;
-
current->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
/* create a stack frame for the caller of the handler */
@@ -1056,20 +1059,9 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
/* enter the signal handler in native-endian mode */
regs->msr &= ~MSR_LE;
regs->msr |= (MSR_KERNEL & MSR_LE);
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* Remove TM bits from thread's MSR. The MSR in the sigcontext
- * just indicates to userland that we were doing a transaction, but we
- * don't want to return in transactional state:
- */
- regs->msr &= ~MSR_TS_MASK;
-#endif
return 1;
badframe:
-#ifdef DEBUG_SIG
- printk("badframe in handle_rt_signal, regs=%p frame=%p newsp=%lx\n",
- regs, frame, newsp);
-#endif
if (show_unhandled_signals)
printk_ratelimited(KERN_INFO
"%s[%d]: bad frame in handle_rt_signal32: "
@@ -1484,20 +1476,9 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
regs->nip = (unsigned long) ka->sa.sa_handler;
/* enter the signal handler in big-endian mode */
regs->msr &= ~MSR_LE;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* Remove TM bits from thread's MSR. The MSR in the sigcontext
- * just indicates to userland that we were doing a transaction, but we
- * don't want to return in transactional state:
- */
- regs->msr &= ~MSR_TS_MASK;
-#endif
return 1;
badframe:
-#ifdef DEBUG_SIG
- printk("badframe in handle_signal, regs=%p frame=%p newsp=%lx\n",
- regs, frame, newsp);
-#endif
if (show_unhandled_signals)
printk_ratelimited(KERN_INFO
"%s[%d]: bad frame in handle_signal32: "
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 42991045349f..97c1e4b683fc 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -38,7 +38,6 @@
#include "signal.h"
-#define DEBUG_SIG 0
#define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
#define FP_REGS_SIZE sizeof(elf_fpregset_t)
@@ -65,8 +64,8 @@ struct rt_sigframe {
struct siginfo __user *pinfo;
void __user *puc;
struct siginfo info;
- /* 64 bit ABI allows for 288 bytes below sp before decrementing it. */
- char abigap[288];
+ /* New 64 bit little-endian ABI allows redzone of 512 bytes below sp */
+ char abigap[USER_REDZONE_SIZE];
} __attribute__ ((aligned (16)));
static const char fmt32[] = KERN_INFO \
@@ -192,6 +191,13 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
BUG_ON(!MSR_TM_ACTIVE(regs->msr));
+ /* Remove TM bits from thread's MSR. The MSR in the sigcontext
+ * just indicates to userland that we were doing a transaction, but we
+ * don't want to return in transactional state. This also ensures
+ * that flush_fp_to_thread won't set TIF_RESTORE_TM again.
+ */
+ regs->msr &= ~MSR_TS_MASK;
+
flush_fp_to_thread(current);
#ifdef CONFIG_ALTIVEC
@@ -520,6 +526,8 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
}
#endif
tm_enable();
+ /* Make sure the transaction is marked as failed */
+ current->thread.tm_texasr |= TEXASR_FS;
/* This loads the checkpointed FP/VEC state, if used */
tm_recheckpoint(&current->thread, msr);
@@ -691,10 +699,6 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
return 0;
badframe:
-#if DEBUG_SIG
- printk("badframe in sys_rt_sigreturn, regs=%p uc=%p &uc->uc_mcontext=%p\n",
- regs, uc, &uc->uc_mcontext);
-#endif
if (show_unhandled_signals)
printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,
current->comm, current->pid, "rt_sigreturn",
@@ -749,13 +753,6 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
/* Make sure signal handler doesn't get spurious FP exceptions */
current->thread.fp_state.fpscr = 0;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* Remove TM bits from thread's MSR. The MSR in the sigcontext
- * just indicates to userland that we were doing a transaction, but we
- * don't want to return in transactional state:
- */
- regs->msr &= ~MSR_TS_MASK;
-#endif
/* Set up to return from userspace. */
if (vdso64_rt_sigtramp && current->mm->context.vdso_base) {
@@ -807,10 +804,6 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
return 1;
badframe:
-#if DEBUG_SIG
- printk("badframe in setup_rt_frame, regs=%p frame=%p newsp=%lx\n",
- regs, frame, newsp);
-#endif
if (show_unhandled_signals)
printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,
current->comm, current->pid, "setup_rt_frame",
diff --git a/arch/powerpc/kernel/smp-tbsync.c b/arch/powerpc/kernel/smp-tbsync.c
index e68fd1ae727a..7a37ecd3afa3 100644
--- a/arch/powerpc/kernel/smp-tbsync.c
+++ b/arch/powerpc/kernel/smp-tbsync.c
@@ -9,7 +9,6 @@
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/unistd.h>
-#include <linux/init.h>
#include <linux/slab.h>
#include <linux/atomic.h>
#include <asm/smp.h>
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index c1cf4a1522d9..51a3ff78838a 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -35,6 +35,8 @@
#include <asm/ptrace.h>
#include <linux/atomic.h>
#include <asm/irq.h>
+#include <asm/hw_irq.h>
+#include <asm/kvm_ppc.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/prom.h>
@@ -145,9 +147,9 @@ static irqreturn_t reschedule_action(int irq, void *data)
return IRQ_HANDLED;
}
-static irqreturn_t call_function_single_action(int irq, void *data)
+static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
{
- generic_smp_call_function_single_interrupt();
+ tick_broadcast_ipi_handler();
return IRQ_HANDLED;
}
@@ -168,14 +170,14 @@ static irqreturn_t debug_ipi_action(int irq, void *data)
static irq_handler_t smp_ipi_action[] = {
[PPC_MSG_CALL_FUNCTION] = call_function_action,
[PPC_MSG_RESCHEDULE] = reschedule_action,
- [PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action,
+ [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,
[PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
};
const char *smp_ipi_name[] = {
[PPC_MSG_CALL_FUNCTION] = "ipi call function",
[PPC_MSG_RESCHEDULE] = "ipi reschedule",
- [PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single",
+ [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
[PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
};
@@ -251,8 +253,8 @@ irqreturn_t smp_ipi_demux(void)
generic_smp_call_function_interrupt();
if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
scheduler_ipi();
- if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNC_SINGLE))
- generic_smp_call_function_single_interrupt();
+ if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
+ tick_broadcast_ipi_handler();
if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK))
debug_ipi_action(0, NULL);
} while (info->messages);
@@ -280,7 +282,7 @@ EXPORT_SYMBOL_GPL(smp_send_reschedule);
void arch_send_call_function_single_ipi(int cpu)
{
- do_message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE);
+ do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
}
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
@@ -291,6 +293,16 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
}
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+void tick_broadcast(const struct cpumask *mask)
+{
+ unsigned int cpu;
+
+ for_each_cpu(cpu, mask)
+ do_message_pass(cpu, PPC_MSG_TICK_BROADCAST);
+}
+#endif
+
#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
void smp_send_debugger_break(void)
{
@@ -369,13 +381,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
- if (smp_ops)
- if (smp_ops->probe)
- max_cpus = smp_ops->probe();
- else
- max_cpus = NR_CPUS;
- else
- max_cpus = 1;
+ if (smp_ops && smp_ops->probe)
+ smp_ops->probe();
}
void smp_prepare_boot_cpu(void)
@@ -384,6 +391,7 @@ void smp_prepare_boot_cpu(void)
#ifdef CONFIG_PPC64
paca[boot_cpuid].__current = current;
#endif
+ set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
current_set[boot_cpuid] = task_thread_info(current);
}
@@ -451,38 +459,9 @@ int generic_check_cpu_restart(unsigned int cpu)
return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;
}
-static atomic_t secondary_inhibit_count;
-
-/*
- * Don't allow secondary CPU threads to come online
- */
-void inhibit_secondary_onlining(void)
-{
- /*
- * This makes secondary_inhibit_count stable during cpu
- * online/offline operations.
- */
- get_online_cpus();
-
- atomic_inc(&secondary_inhibit_count);
- put_online_cpus();
-}
-EXPORT_SYMBOL_GPL(inhibit_secondary_onlining);
-
-/*
- * Allow secondary CPU threads to come online again
- */
-void uninhibit_secondary_onlining(void)
-{
- get_online_cpus();
- atomic_dec(&secondary_inhibit_count);
- put_online_cpus();
-}
-EXPORT_SYMBOL_GPL(uninhibit_secondary_onlining);
-
-static int secondaries_inhibited(void)
+static bool secondaries_inhibited(void)
{
- return atomic_read(&secondary_inhibit_count);
+ return kvm_hv_mode_active();
}
#else /* HOTPLUG_CPU */
@@ -511,7 +490,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
* Don't allow secondary threads to come online if inhibited
*/
if (threads_per_core > 1 && secondaries_inhibited() &&
- cpu % threads_per_core != 0)
+ cpu_thread_in_subcore(cpu))
return -EBUSY;
if (smp_ops == NULL ||
@@ -744,6 +723,12 @@ void start_secondary(void *unused)
}
traverse_core_siblings(cpu, true);
+ /*
+ * numa_node_id() works after this.
+ */
+ set_numa_node(numa_cpu_lookup_table[cpu]);
+ set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
+
smp_wmb();
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
@@ -760,6 +745,28 @@ int setup_profiling_timer(unsigned int multiplier)
return 0;
}
+#ifdef CONFIG_SCHED_SMT
+/* cpumask of CPUs with asymetric SMT dependancy */
+static const int powerpc_smt_flags(void)
+{
+ int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+
+ if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+ printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+ flags |= SD_ASYM_PACKING;
+ }
+ return flags;
+}
+#endif
+
+static struct sched_domain_topology_level powerpc_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+ { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { NULL, },
+};
+
void __init smp_cpus_done(unsigned int max_cpus)
{
cpumask_var_t old_mask;
@@ -784,15 +791,8 @@ void __init smp_cpus_done(unsigned int max_cpus)
dump_numa_cpu_topology();
-}
+ set_sched_topology(powerpc_topology);
-int arch_sd_sibling_asym_packing(void)
-{
- if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
- printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
- return SD_ASYM_PACKING;
- }
- return 0;
}
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_booke.S
index 0f204053e5b5..553c1405ee05 100644
--- a/arch/powerpc/kernel/swsusp_booke.S
+++ b/arch/powerpc/kernel/swsusp_booke.S
@@ -74,21 +74,21 @@ _GLOBAL(swsusp_arch_suspend)
bne 1b
/* Save SPRGs */
- mfsprg r4,0
+ mfspr r4,SPRN_SPRG0
stw r4,SL_SPRG0(r11)
- mfsprg r4,1
+ mfspr r4,SPRN_SPRG1
stw r4,SL_SPRG1(r11)
- mfsprg r4,2
+ mfspr r4,SPRN_SPRG2
stw r4,SL_SPRG2(r11)
- mfsprg r4,3
+ mfspr r4,SPRN_SPRG3
stw r4,SL_SPRG3(r11)
- mfsprg r4,4
+ mfspr r4,SPRN_SPRG4
stw r4,SL_SPRG4(r11)
- mfsprg r4,5
+ mfspr r4,SPRN_SPRG5
stw r4,SL_SPRG5(r11)
- mfsprg r4,6
+ mfspr r4,SPRN_SPRG6
stw r4,SL_SPRG6(r11)
- mfsprg r4,7
+ mfspr r4,SPRN_SPRG7
stw r4,SL_SPRG7(r11)
/* Call the low level suspend stuff (we should probably have made
@@ -150,21 +150,21 @@ _GLOBAL(swsusp_arch_resume)
bl _tlbil_all
lwz r4,SL_SPRG0(r11)
- mtsprg 0,r4
+ mtspr SPRN_SPRG0,r4
lwz r4,SL_SPRG1(r11)
- mtsprg 1,r4
+ mtspr SPRN_SPRG1,r4
lwz r4,SL_SPRG2(r11)
- mtsprg 2,r4
+ mtspr SPRN_SPRG2,r4
lwz r4,SL_SPRG3(r11)
- mtsprg 3,r4
+ mtspr SPRN_SPRG3,r4
lwz r4,SL_SPRG4(r11)
- mtsprg 4,r4
+ mtspr SPRN_SPRG4,r4
lwz r4,SL_SPRG5(r11)
- mtsprg 5,r4
+ mtspr SPRN_SPRG5,r4
lwz r4,SL_SPRG6(r11)
- mtsprg 6,r4
+ mtspr SPRN_SPRG6,r4
lwz r4,SL_SPRG7(r11)
- mtsprg 7,r4
+ mtspr SPRN_SPRG7,r4
/* restore the MSR */
lwz r3,SL_MSR(r11)
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 4e3cc47f26b9..cd9be9aa016d 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -34,7 +34,6 @@
#include <linux/ipc.h>
#include <linux/utsname.h>
#include <linux/file.h>
-#include <linux/init.h>
#include <linux/personality.h>
#include <asm/uaccess.h>
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index b4e667663d9b..67fd2fd2620a 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -51,8 +51,6 @@ static ssize_t store_smt_snooze_delay(struct device *dev,
return -EINVAL;
per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
- update_smt_snooze_delay(cpu->dev.id, snooze);
-
return count;
}
@@ -86,6 +84,304 @@ __setup("smt-snooze-delay=", setup_smt_snooze_delay);
#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_PPC_FSL_BOOK3E
+#define MAX_BIT 63
+
+static u64 pw20_wt;
+static u64 altivec_idle_wt;
+
+static unsigned int get_idle_ticks_bit(u64 ns)
+{
+ u64 cycle;
+
+ if (ns >= 10000)
+ cycle = div_u64(ns + 500, 1000) * tb_ticks_per_usec;
+ else
+ cycle = div_u64(ns * tb_ticks_per_usec, 1000);
+
+ if (!cycle)
+ return 0;
+
+ return ilog2(cycle);
+}
+
+static void do_show_pwrmgtcr0(void *val)
+{
+ u32 *value = val;
+
+ *value = mfspr(SPRN_PWRMGTCR0);
+}
+
+static ssize_t show_pw20_state(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ u32 value;
+ unsigned int cpu = dev->id;
+
+ smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
+
+ value &= PWRMGTCR0_PW20_WAIT;
+
+ return sprintf(buf, "%u\n", value ? 1 : 0);
+}
+
+static void do_store_pw20_state(void *val)
+{
+ u32 *value = val;
+ u32 pw20_state;
+
+ pw20_state = mfspr(SPRN_PWRMGTCR0);
+
+ if (*value)
+ pw20_state |= PWRMGTCR0_PW20_WAIT;
+ else
+ pw20_state &= ~PWRMGTCR0_PW20_WAIT;
+
+ mtspr(SPRN_PWRMGTCR0, pw20_state);
+}
+
+static ssize_t store_pw20_state(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u32 value;
+ unsigned int cpu = dev->id;
+
+ if (kstrtou32(buf, 0, &value))
+ return -EINVAL;
+
+ if (value > 1)
+ return -EINVAL;
+
+ smp_call_function_single(cpu, do_store_pw20_state, &value, 1);
+
+ return count;
+}
+
+static ssize_t show_pw20_wait_time(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ u32 value;
+ u64 tb_cycle = 1;
+ u64 time;
+
+ unsigned int cpu = dev->id;
+
+ if (!pw20_wt) {
+ smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
+ value = (value & PWRMGTCR0_PW20_ENT) >>
+ PWRMGTCR0_PW20_ENT_SHIFT;
+
+ tb_cycle = (tb_cycle << (MAX_BIT - value + 1));
+ /* convert ms to ns */
+ if (tb_ticks_per_usec > 1000) {
+ time = div_u64(tb_cycle, tb_ticks_per_usec / 1000);
+ } else {
+ u32 rem_us;
+
+ time = div_u64_rem(tb_cycle, tb_ticks_per_usec,
+ &rem_us);
+ time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec;
+ }
+ } else {
+ time = pw20_wt;
+ }
+
+ return sprintf(buf, "%llu\n", time > 0 ? time : 0);
+}
+
+static void set_pw20_wait_entry_bit(void *val)
+{
+ u32 *value = val;
+ u32 pw20_idle;
+
+ pw20_idle = mfspr(SPRN_PWRMGTCR0);
+
+ /* Set Automatic PW20 Core Idle Count */
+ /* clear count */
+ pw20_idle &= ~PWRMGTCR0_PW20_ENT;
+
+ /* set count */
+ pw20_idle |= ((MAX_BIT - *value) << PWRMGTCR0_PW20_ENT_SHIFT);
+
+ mtspr(SPRN_PWRMGTCR0, pw20_idle);
+}
+
+static ssize_t store_pw20_wait_time(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u32 entry_bit;
+ u64 value;
+
+ unsigned int cpu = dev->id;
+
+ if (kstrtou64(buf, 0, &value))
+ return -EINVAL;
+
+ if (!value)
+ return -EINVAL;
+
+ entry_bit = get_idle_ticks_bit(value);
+ if (entry_bit > MAX_BIT)
+ return -EINVAL;
+
+ pw20_wt = value;
+
+ smp_call_function_single(cpu, set_pw20_wait_entry_bit,
+ &entry_bit, 1);
+
+ return count;
+}
+
+static ssize_t show_altivec_idle(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ u32 value;
+ unsigned int cpu = dev->id;
+
+ smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
+
+ value &= PWRMGTCR0_AV_IDLE_PD_EN;
+
+ return sprintf(buf, "%u\n", value ? 1 : 0);
+}
+
+static void do_store_altivec_idle(void *val)
+{
+ u32 *value = val;
+ u32 altivec_idle;
+
+ altivec_idle = mfspr(SPRN_PWRMGTCR0);
+
+ if (*value)
+ altivec_idle |= PWRMGTCR0_AV_IDLE_PD_EN;
+ else
+ altivec_idle &= ~PWRMGTCR0_AV_IDLE_PD_EN;
+
+ mtspr(SPRN_PWRMGTCR0, altivec_idle);
+}
+
+static ssize_t store_altivec_idle(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u32 value;
+ unsigned int cpu = dev->id;
+
+ if (kstrtou32(buf, 0, &value))
+ return -EINVAL;
+
+ if (value > 1)
+ return -EINVAL;
+
+ smp_call_function_single(cpu, do_store_altivec_idle, &value, 1);
+
+ return count;
+}
+
+static ssize_t show_altivec_idle_wait_time(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ u32 value;
+ u64 tb_cycle = 1;
+ u64 time;
+
+ unsigned int cpu = dev->id;
+
+ if (!altivec_idle_wt) {
+ smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
+ value = (value & PWRMGTCR0_AV_IDLE_CNT) >>
+ PWRMGTCR0_AV_IDLE_CNT_SHIFT;
+
+ tb_cycle = (tb_cycle << (MAX_BIT - value + 1));
+ /* convert ms to ns */
+ if (tb_ticks_per_usec > 1000) {
+ time = div_u64(tb_cycle, tb_ticks_per_usec / 1000);
+ } else {
+ u32 rem_us;
+
+ time = div_u64_rem(tb_cycle, tb_ticks_per_usec,
+ &rem_us);
+ time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec;
+ }
+ } else {
+ time = altivec_idle_wt;
+ }
+
+ return sprintf(buf, "%llu\n", time > 0 ? time : 0);
+}
+
+static void set_altivec_idle_wait_entry_bit(void *val)
+{
+ u32 *value = val;
+ u32 altivec_idle;
+
+ altivec_idle = mfspr(SPRN_PWRMGTCR0);
+
+ /* Set Automatic AltiVec Idle Count */
+ /* clear count */
+ altivec_idle &= ~PWRMGTCR0_AV_IDLE_CNT;
+
+ /* set count */
+ altivec_idle |= ((MAX_BIT - *value) << PWRMGTCR0_AV_IDLE_CNT_SHIFT);
+
+ mtspr(SPRN_PWRMGTCR0, altivec_idle);
+}
+
+static ssize_t store_altivec_idle_wait_time(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u32 entry_bit;
+ u64 value;
+
+ unsigned int cpu = dev->id;
+
+ if (kstrtou64(buf, 0, &value))
+ return -EINVAL;
+
+ if (!value)
+ return -EINVAL;
+
+ entry_bit = get_idle_ticks_bit(value);
+ if (entry_bit > MAX_BIT)
+ return -EINVAL;
+
+ altivec_idle_wt = value;
+
+ smp_call_function_single(cpu, set_altivec_idle_wait_entry_bit,
+ &entry_bit, 1);
+
+ return count;
+}
+
+/*
+ * Enable/Disable interface:
+ * 0, disable. 1, enable.
+ */
+static DEVICE_ATTR(pw20_state, 0600, show_pw20_state, store_pw20_state);
+static DEVICE_ATTR(altivec_idle, 0600, show_altivec_idle, store_altivec_idle);
+
+/*
+ * Set wait time interface:(Nanosecond)
+ * Example: Base on TBfreq is 41MHZ.
+ * 1~48(ns): TB[63]
+ * 49~97(ns): TB[62]
+ * 98~195(ns): TB[61]
+ * 196~390(ns): TB[60]
+ * 391~780(ns): TB[59]
+ * 781~1560(ns): TB[58]
+ * ...
+ */
+static DEVICE_ATTR(pw20_wait_time, 0600,
+ show_pw20_wait_time,
+ store_pw20_wait_time);
+static DEVICE_ATTR(altivec_idle_wait_time, 0600,
+ show_altivec_idle_wait_time,
+ store_altivec_idle_wait_time);
+#endif
+
/*
* Enabling PMCs will slow partition context switch times so we only do
* it the first time we write to the PMCs.
@@ -108,16 +404,18 @@ void ppc_enable_pmcs(void)
}
EXPORT_SYMBOL(ppc_enable_pmcs);
-#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \
static void read_##NAME(void *val) \
{ \
*(unsigned long *)val = mfspr(ADDRESS); \
} \
static void write_##NAME(void *val) \
{ \
- ppc_enable_pmcs(); \
+ EXTRA; \
mtspr(ADDRESS, *(unsigned long *)val); \
-} \
+}
+
+#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \
static ssize_t show_##NAME(struct device *dev, \
struct device_attribute *attr, \
char *buf) \
@@ -140,6 +438,15 @@ static ssize_t __used \
return count; \
}
+#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+ __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \
+ __SYSFS_SPRSETUP_SHOW_STORE(NAME)
+#define SYSFS_SPRSETUP(NAME, ADDRESS) \
+ __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \
+ __SYSFS_SPRSETUP_SHOW_STORE(NAME)
+
+#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \
+ __SYSFS_SPRSETUP_SHOW_STORE(NAME)
/* Let's define all possible registers, we'll only hook up the ones
* that are implemented on the current processor
@@ -175,10 +482,9 @@ SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
-SYSFS_PMCSETUP(purr, SPRN_PURR);
-SYSFS_PMCSETUP(spurr, SPRN_SPURR);
-SYSFS_PMCSETUP(dscr, SPRN_DSCR);
-SYSFS_PMCSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(purr, SPRN_PURR);
+SYSFS_SPRSETUP(spurr, SPRN_SPURR);
+SYSFS_SPRSETUP(pir, SPRN_PIR);
/*
Lets only enable read for phyp resources and
@@ -187,12 +493,27 @@ SYSFS_PMCSETUP(pir, SPRN_PIR);
*/
static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
-static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
static DEVICE_ATTR(pir, 0400, show_pir, NULL);
-unsigned long dscr_default = 0;
-EXPORT_SYMBOL(dscr_default);
+static unsigned long dscr_default;
+
+static void read_dscr(void *val)
+{
+ *(unsigned long *)val = get_paca()->dscr_default;
+}
+
+static void write_dscr(void *val)
+{
+ get_paca()->dscr_default = *(unsigned long *)val;
+ if (!current->thread.dscr_inherit) {
+ current->thread.dscr = *(unsigned long *)val;
+ mtspr(SPRN_DSCR, *(unsigned long *)val);
+ }
+}
+
+SYSFS_SPRSETUP_SHOW_STORE(dscr);
+static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
static void add_write_permission_dev_attr(struct device_attribute *attr)
{
@@ -205,14 +526,6 @@ static ssize_t show_dscr_default(struct device *dev,
return sprintf(buf, "%lx\n", dscr_default);
}
-static void update_dscr(void *dummy)
-{
- if (!current->thread.dscr_inherit) {
- current->thread.dscr = dscr_default;
- mtspr(SPRN_DSCR, dscr_default);
- }
-}
-
static ssize_t __used store_dscr_default(struct device *dev,
struct device_attribute *attr, const char *buf,
size_t count)
@@ -225,7 +538,7 @@ static ssize_t __used store_dscr_default(struct device *dev,
return -EINVAL;
dscr_default = val;
- on_each_cpu(update_dscr, NULL, 1);
+ on_each_cpu(write_dscr, &val, 1);
return count;
}
@@ -249,34 +562,34 @@ SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3);
SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4);
SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5);
#ifdef CONFIG_DEBUG_KERNEL
-SYSFS_PMCSETUP(hid0, SPRN_HID0);
-SYSFS_PMCSETUP(hid1, SPRN_HID1);
-SYSFS_PMCSETUP(hid4, SPRN_HID4);
-SYSFS_PMCSETUP(hid5, SPRN_HID5);
-SYSFS_PMCSETUP(ima0, SPRN_PA6T_IMA0);
-SYSFS_PMCSETUP(ima1, SPRN_PA6T_IMA1);
-SYSFS_PMCSETUP(ima2, SPRN_PA6T_IMA2);
-SYSFS_PMCSETUP(ima3, SPRN_PA6T_IMA3);
-SYSFS_PMCSETUP(ima4, SPRN_PA6T_IMA4);
-SYSFS_PMCSETUP(ima5, SPRN_PA6T_IMA5);
-SYSFS_PMCSETUP(ima6, SPRN_PA6T_IMA6);
-SYSFS_PMCSETUP(ima7, SPRN_PA6T_IMA7);
-SYSFS_PMCSETUP(ima8, SPRN_PA6T_IMA8);
-SYSFS_PMCSETUP(ima9, SPRN_PA6T_IMA9);
-SYSFS_PMCSETUP(imaat, SPRN_PA6T_IMAAT);
-SYSFS_PMCSETUP(btcr, SPRN_PA6T_BTCR);
-SYSFS_PMCSETUP(pccr, SPRN_PA6T_PCCR);
-SYSFS_PMCSETUP(rpccr, SPRN_PA6T_RPCCR);
-SYSFS_PMCSETUP(der, SPRN_PA6T_DER);
-SYSFS_PMCSETUP(mer, SPRN_PA6T_MER);
-SYSFS_PMCSETUP(ber, SPRN_PA6T_BER);
-SYSFS_PMCSETUP(ier, SPRN_PA6T_IER);
-SYSFS_PMCSETUP(sier, SPRN_PA6T_SIER);
-SYSFS_PMCSETUP(siar, SPRN_PA6T_SIAR);
-SYSFS_PMCSETUP(tsr0, SPRN_PA6T_TSR0);
-SYSFS_PMCSETUP(tsr1, SPRN_PA6T_TSR1);
-SYSFS_PMCSETUP(tsr2, SPRN_PA6T_TSR2);
-SYSFS_PMCSETUP(tsr3, SPRN_PA6T_TSR3);
+SYSFS_SPRSETUP(hid0, SPRN_HID0);
+SYSFS_SPRSETUP(hid1, SPRN_HID1);
+SYSFS_SPRSETUP(hid4, SPRN_HID4);
+SYSFS_SPRSETUP(hid5, SPRN_HID5);
+SYSFS_SPRSETUP(ima0, SPRN_PA6T_IMA0);
+SYSFS_SPRSETUP(ima1, SPRN_PA6T_IMA1);
+SYSFS_SPRSETUP(ima2, SPRN_PA6T_IMA2);
+SYSFS_SPRSETUP(ima3, SPRN_PA6T_IMA3);
+SYSFS_SPRSETUP(ima4, SPRN_PA6T_IMA4);
+SYSFS_SPRSETUP(ima5, SPRN_PA6T_IMA5);
+SYSFS_SPRSETUP(ima6, SPRN_PA6T_IMA6);
+SYSFS_SPRSETUP(ima7, SPRN_PA6T_IMA7);
+SYSFS_SPRSETUP(ima8, SPRN_PA6T_IMA8);
+SYSFS_SPRSETUP(ima9, SPRN_PA6T_IMA9);
+SYSFS_SPRSETUP(imaat, SPRN_PA6T_IMAAT);
+SYSFS_SPRSETUP(btcr, SPRN_PA6T_BTCR);
+SYSFS_SPRSETUP(pccr, SPRN_PA6T_PCCR);
+SYSFS_SPRSETUP(rpccr, SPRN_PA6T_RPCCR);
+SYSFS_SPRSETUP(der, SPRN_PA6T_DER);
+SYSFS_SPRSETUP(mer, SPRN_PA6T_MER);
+SYSFS_SPRSETUP(ber, SPRN_PA6T_BER);
+SYSFS_SPRSETUP(ier, SPRN_PA6T_IER);
+SYSFS_SPRSETUP(sier, SPRN_PA6T_SIER);
+SYSFS_SPRSETUP(siar, SPRN_PA6T_SIAR);
+SYSFS_SPRSETUP(tsr0, SPRN_PA6T_TSR0);
+SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1);
+SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2);
+SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3);
#endif /* CONFIG_DEBUG_KERNEL */
#endif /* HAS_PPC_PMC_PA6T */
@@ -421,6 +734,15 @@ static void register_cpu_online(unsigned int cpu)
device_create_file(s, &dev_attr_pir);
#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_PPC_FSL_BOOK3E
+ if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
+ device_create_file(s, &dev_attr_pw20_state);
+ device_create_file(s, &dev_attr_pw20_wait_time);
+
+ device_create_file(s, &dev_attr_altivec_idle);
+ device_create_file(s, &dev_attr_altivec_idle_wait_time);
+ }
+#endif
cacheinfo_cpu_online(cpu);
}
@@ -493,6 +815,15 @@ static void unregister_cpu_online(unsigned int cpu)
device_remove_file(s, &dev_attr_pir);
#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_PPC_FSL_BOOK3E
+ if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
+ device_remove_file(s, &dev_attr_pw20_state);
+ device_remove_file(s, &dev_attr_pw20_wait_time);
+
+ device_remove_file(s, &dev_attr_altivec_idle);
+ device_remove_file(s, &dev_attr_altivec_idle_wait_time);
+ }
+#endif
cacheinfo_cpu_offline(cpu);
}
@@ -657,7 +988,8 @@ static int __init topology_init(void)
int cpu;
register_nodes();
- register_cpu_notifier(&sysfs_cpu_nb);
+
+ cpu_notifier_register_begin();
for_each_possible_cpu(cpu) {
struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -681,6 +1013,11 @@ static int __init topology_init(void)
if (cpu_online(cpu))
register_cpu_online(cpu);
}
+
+ __register_cpu_notifier(&sysfs_cpu_nb);
+
+ cpu_notifier_register_done();
+
#ifdef CONFIG_PPC64
sysfs_create_dscr_default();
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 93219c34af32..895c50ca943c 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -17,12 +17,12 @@
#include <asm/ppc_asm.h>
#ifdef CONFIG_PPC64
-#define SYSCALL(func) .llong .sys_##func,.sys_##func
-#define COMPAT_SYS(func) .llong .sys_##func,.compat_sys_##func
-#define PPC_SYS(func) .llong .ppc_##func,.ppc_##func
-#define OLDSYS(func) .llong .sys_ni_syscall,.sys_ni_syscall
-#define SYS32ONLY(func) .llong .sys_ni_syscall,.compat_sys_##func
-#define SYSX(f, f3264, f32) .llong .f,.f3264
+#define SYSCALL(func) .llong DOTSYM(sys_##func),DOTSYM(sys_##func)
+#define COMPAT_SYS(func) .llong DOTSYM(sys_##func),DOTSYM(compat_sys_##func)
+#define PPC_SYS(func) .llong DOTSYM(ppc_##func),DOTSYM(ppc_##func)
+#define OLDSYS(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall)
+#define SYS32ONLY(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func)
+#define SYSX(f, f3264, f32) .llong DOTSYM(f),DOTSYM(f3264)
#else
#define SYSCALL(func) .long sys_##func
#define COMPAT_SYS(func) .long sys_##func
@@ -36,6 +36,8 @@
#define PPC_SYS_SPU(func) PPC_SYS(func)
#define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32)
+.section .rodata,"a"
+
#ifdef CONFIG_PPC64
#define sys_sigpending sys_ni_syscall
#define sys_old_getrlimit sys_ni_syscall
@@ -43,5 +45,7 @@
.p2align 3
#endif
-_GLOBAL(sys_call_table)
+.globl sys_call_table
+sys_call_table:
+
#include <asm/systbl.h>
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b3b144121cc9..9fff9cdcc519 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -42,6 +42,7 @@
#include <linux/timex.h>
#include <linux/kernel_stat.h>
#include <linux/time.h>
+#include <linux/clockchips.h>
#include <linux/init.h>
#include <linux/profile.h>
#include <linux/cpu.h>
@@ -106,7 +107,7 @@ struct clock_event_device decrementer_clockevent = {
.irq = 0,
.set_next_event = decrementer_set_next_event,
.set_mode = decrementer_set_mode,
- .features = CLOCK_EVT_FEAT_ONESHOT,
+ .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP,
};
EXPORT_SYMBOL(decrementer_clockevent);
@@ -478,6 +479,47 @@ void arch_irq_work_raise(void)
#endif /* CONFIG_IRQ_WORK */
+void __timer_interrupt(void)
+{
+ struct pt_regs *regs = get_irq_regs();
+ u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+ struct clock_event_device *evt = &__get_cpu_var(decrementers);
+ u64 now;
+
+ trace_timer_interrupt_entry(regs);
+
+ if (test_irq_work_pending()) {
+ clear_irq_work_pending();
+ irq_work_run();
+ }
+
+ now = get_tb_or_rtc();
+ if (now >= *next_tb) {
+ *next_tb = ~(u64)0;
+ if (evt->event_handler)
+ evt->event_handler(evt);
+ __get_cpu_var(irq_stat).timer_irqs_event++;
+ } else {
+ now = *next_tb - now;
+ if (now <= DECREMENTER_MAX)
+ set_dec((int)now);
+ /* We may have raced with new irq work */
+ if (test_irq_work_pending())
+ set_dec(1);
+ __get_cpu_var(irq_stat).timer_irqs_others++;
+ }
+
+#ifdef CONFIG_PPC64
+ /* collect purr register values often, for accurate calculations */
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
+ cu->current_tb = mfspr(SPRN_PURR);
+ }
+#endif
+
+ trace_timer_interrupt_exit(regs);
+}
+
/*
* timer_interrupt - gets called when the decrementer overflows,
* with interrupts disabled.
@@ -486,8 +528,6 @@ void timer_interrupt(struct pt_regs * regs)
{
struct pt_regs *old_regs;
u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
- struct clock_event_device *evt = &__get_cpu_var(decrementers);
- u64 now;
/* Ensure a positive value is written to the decrementer, or else
* some CPUs will continue to take decrementer exceptions.
@@ -510,9 +550,8 @@ void timer_interrupt(struct pt_regs * regs)
*/
may_hard_irq_enable();
- __get_cpu_var(irq_stat).timer_irqs++;
-#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC)
+#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
if (atomic_read(&ppc_n_lost_interrupts) != 0)
do_IRQ(regs);
#endif
@@ -520,34 +559,7 @@ void timer_interrupt(struct pt_regs * regs)
old_regs = set_irq_regs(regs);
irq_enter();
- trace_timer_interrupt_entry(regs);
-
- if (test_irq_work_pending()) {
- clear_irq_work_pending();
- irq_work_run();
- }
-
- now = get_tb_or_rtc();
- if (now >= *next_tb) {
- *next_tb = ~(u64)0;
- if (evt->event_handler)
- evt->event_handler(evt);
- } else {
- now = *next_tb - now;
- if (now <= DECREMENTER_MAX)
- set_dec((int)now);
- }
-
-#ifdef CONFIG_PPC64
- /* collect purr register values often, for accurate calculations */
- if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
- struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
- cu->current_tb = mfspr(SPRN_PURR);
- }
-#endif
-
- trace_timer_interrupt_exit(regs);
-
+ __timer_interrupt();
irq_exit();
set_irq_regs(old_regs);
}
@@ -803,6 +815,11 @@ static int decrementer_set_next_event(unsigned long evt,
{
__get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt;
set_dec(evt);
+
+ /* We may have raced with new irq work */
+ if (test_irq_work_pending())
+ set_dec(1);
+
return 0;
}
@@ -813,6 +830,15 @@ static void decrementer_set_mode(enum clock_event_mode mode,
decrementer_set_next_event(DECREMENTER_MAX, dev);
}
+/* Interrupt handler for the timer broadcast IPI */
+void tick_broadcast_ipi_handler(void)
+{
+ u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+
+ *next_tb = get_tb_or_rtc();
+ __timer_interrupt();
+}
+
static void register_decrementer_clockevent(int cpu)
{
struct clock_event_device *dec = &per_cpu(decrementers, cpu);
@@ -916,6 +942,7 @@ void __init time_init(void)
clocksource_init();
init_decrementer_clockevent();
+ tick_setup_hrtimer_broadcast();
}
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index ef47bcbd4352..2a324f4cb1b9 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -10,6 +10,7 @@
#include <asm/ppc-opcode.h>
#include <asm/ptrace.h>
#include <asm/reg.h>
+#include <asm/bug.h>
#ifdef CONFIG_VSX
/* See fpu.S, this is borrowed from there */
@@ -41,7 +42,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
/* Stack frame offsets for local variables. */
#define TM_FRAME_L0 TM_FRAME_SIZE-16
#define TM_FRAME_L1 TM_FRAME_SIZE-8
-#define STACK_PARAM(x) (48+((x)*8))
/* In order to access the TM SPRs, TM must be enabled. So, do so: */
@@ -78,12 +78,6 @@ _GLOBAL(tm_abort)
TABORT(R3)
blr
- .section ".toc","aw"
-DSCR_DEFAULT:
- .tc dscr_default[TC],dscr_default
-
- .section ".text"
-
/* void tm_reclaim(struct thread_struct *thread,
* unsigned long orig_msr,
* uint8_t cause)
@@ -108,12 +102,12 @@ _GLOBAL(tm_reclaim)
mflr r0
stw r6, 8(r1)
std r0, 16(r1)
- std r2, 40(r1)
+ std r2, STK_GOT(r1)
stdu r1, -TM_FRAME_SIZE(r1)
/* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */
- std r3, STACK_PARAM(0)(r1)
+ std r3, STK_PARAM(R3)(r1)
SAVE_NVGPRS(r1)
/* We need to setup MSR for VSX register save instructions. Here we
@@ -175,6 +169,13 @@ dont_backup_vec:
stfd fr0,FPSTATE_FPSCR(r7)
dont_backup_fp:
+ /* Do sanity check on MSR to make sure we are suspended */
+ li r7, (MSR_TS_S)@higher
+ srdi r6, r14, 32
+ and r6, r6, r7
+1: tdeqi r6, 0
+ EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
+
/* The moment we treclaim, ALL of our GPRs will switch
* to user register state. (FPRs, CCR etc. also!)
* Use an sprg and a tm_scratch in the PACA to shuffle.
@@ -202,7 +203,7 @@ dont_backup_fp:
/* Now get some more GPRS free */
std r7, GPR7(r1) /* Temporary stash */
std r12, GPR12(r1) /* '' '' '' */
- ld r12, STACK_PARAM(0)(r1) /* Param 0, thread_struct * */
+ ld r12, STK_PARAM(R3)(r1) /* Param 0, thread_struct * */
std r11, THREAD_TM_PPR(r12) /* Store PPR and free r11 */
@@ -289,11 +290,10 @@ dont_backup_fp:
ld r0, 16(r1)
mtcr r4
mtlr r0
- ld r2, 40(r1)
+ ld r2, STK_GOT(r1)
- /* Load system default DSCR */
- ld r4, DSCR_DEFAULT@toc(r2)
- ld r0, 0(r4)
+ /* Load CPU's default DSCR */
+ ld r0, PACA_DSCR(r13)
mtspr SPRN_DSCR, r0
blr
@@ -307,12 +307,12 @@ dont_backup_fp:
* Call with IRQs off, stacks get all out of sync for
* some periods in here!
*/
-_GLOBAL(tm_recheckpoint)
+_GLOBAL(__tm_recheckpoint)
mfcr r5
mflr r0
stw r5, 8(r1)
std r0, 16(r1)
- std r2, 40(r1)
+ std r2, STK_GOT(r1)
stdu r1, -TM_FRAME_SIZE(r1)
/* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD].
@@ -320,8 +320,6 @@ _GLOBAL(tm_recheckpoint)
*/
SAVE_NVGPRS(r1)
- std r1, PACAR1(r13)
-
/* Load complete register state from ts_ckpt* registers */
addi r7, r3, PT_CKPT_REGS /* Thread's ckpt_regs */
@@ -385,12 +383,10 @@ restore_gprs:
/* ******************** CR,LR,CCR,MSR ********** */
ld r4, _CTR(r7)
ld r5, _LINK(r7)
- ld r6, _CCR(r7)
ld r8, _XER(r7)
mtctr r4
mtlr r5
- mtcr r6
mtxer r8
/* ******************** TAR ******************** */
@@ -406,7 +402,8 @@ restore_gprs:
li r4, 0
mtmsrd r4, 1
- REST_4GPRS(0, r7) /* GPR0-3 */
+ REST_GPR(0, r7) /* GPR0 */
+ REST_2GPRS(2, r7) /* GPR2-3 */
REST_GPR(4, r7) /* GPR4 */
REST_4GPRS(8, r7) /* GPR8-11 */
REST_2GPRS(12, r7) /* GPR12-13 */
@@ -418,6 +415,31 @@ restore_gprs:
mtspr SPRN_DSCR, r5
mtspr SPRN_PPR, r6
+ /* Do final sanity check on TEXASR to make sure FS is set. Do this
+ * here before we load up the userspace r1 so any bugs we hit will get
+ * a call chain */
+ mfspr r5, SPRN_TEXASR
+ srdi r5, r5, 16
+ li r6, (TEXASR_FS)@h
+ and r6, r6, r5
+1: tdeqi r6, 0
+ EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
+
+ /* Do final sanity check on MSR to make sure we are not transactional
+ * or suspended
+ */
+ mfmsr r6
+ li r5, (MSR_TS_MASK)@higher
+ srdi r6, r6, 32
+ and r6, r6, r5
+1: tdnei r6, 0
+ EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
+
+ /* Restore CR */
+ ld r6, _CCR(r7)
+ mtcr r6
+
+ REST_GPR(1, r7) /* GPR1 */
REST_GPR(5, r7) /* GPR5-7 */
REST_GPR(6, r7)
ld r7, GPR7(r7)
@@ -448,11 +470,10 @@ restore_gprs:
ld r0, 16(r1)
mtcr r4
mtlr r0
- ld r2, 40(r1)
+ ld r2, STK_GOT(r1)
- /* Load system default DSCR */
- ld r4, DSCR_DEFAULT@toc(r2)
- ld r0, 0(r4)
+ /* Load CPU's default DSCR */
+ ld r0, PACA_DSCR(r13)
mtspr SPRN_DSCR, r0
blr
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 907a472f9a9e..239f1cde3fff 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -285,6 +285,23 @@ void system_reset_exception(struct pt_regs *regs)
/* What should we do here? We could issue a shutdown or hard reset. */
}
+
+/*
+ * This function is called in real mode. Strictly no printk's please.
+ *
+ * regs->nip and regs->msr contains srr0 and ssr1.
+ */
+long machine_check_early(struct pt_regs *regs)
+{
+ long handled = 0;
+
+ __get_cpu_var(irq_stat).mce_exceptions++;
+
+ if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
+ handled = cur_cpu_spec->machine_check_early(regs);
+ return handled;
+}
+
#endif
/*
@@ -1364,8 +1381,9 @@ void facility_unavailable_exception(struct pt_regs *regs)
if (!arch_irq_disabled_regs(regs))
local_irq_enable();
- pr_err("%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n",
- hv ? "Hypervisor " : "", facility, regs->nip, regs->msr);
+ pr_err_ratelimited(
+ "%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n",
+ hv ? "Hypervisor " : "", facility, regs->nip, regs->msr);
if (user_mode(regs)) {
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
@@ -1384,7 +1402,6 @@ void fp_unavailable_tm(struct pt_regs *regs)
TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n",
regs->nip, regs->msr);
- tm_enable();
/* We can only have got here if the task started using FP after
* beginning the transaction. So, the transactional regs are just a
@@ -1393,8 +1410,7 @@ void fp_unavailable_tm(struct pt_regs *regs)
* transaction, and probably retry but now with FP enabled. So the
* checkpointed FP registers need to be loaded.
*/
- tm_reclaim(&current->thread, current->thread.regs->msr,
- TM_CAUSE_FAC_UNAV);
+ tm_reclaim_current(TM_CAUSE_FAC_UNAV);
/* Reclaim didn't save out any FPRs to transact_fprs. */
/* Enable FP for the task: */
@@ -1403,11 +1419,19 @@ void fp_unavailable_tm(struct pt_regs *regs)
/* This loads and recheckpoints the FP registers from
* thread.fpr[]. They will remain in registers after the
* checkpoint so we don't need to reload them after.
+ * If VMX is in use, the VRs now hold checkpointed values,
+ * so we don't want to load the VRs from the thread_struct.
*/
- tm_recheckpoint(&current->thread, regs->msr);
+ tm_recheckpoint(&current->thread, MSR_FP);
+
+ /* If VMX is in use, get the transactional values back */
+ if (regs->msr & MSR_VEC) {
+ do_load_up_transact_altivec(&current->thread);
+ /* At this point all the VSX state is loaded, so enable it */
+ regs->msr |= MSR_VSX;
+ }
}
-#ifdef CONFIG_ALTIVEC
void altivec_unavailable_tm(struct pt_regs *regs)
{
/* See the comments in fp_unavailable_tm(). This function operates
@@ -1417,18 +1441,21 @@ void altivec_unavailable_tm(struct pt_regs *regs)
TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx,"
"MSR=%lx\n",
regs->nip, regs->msr);
- tm_enable();
- tm_reclaim(&current->thread, current->thread.regs->msr,
- TM_CAUSE_FAC_UNAV);
+ tm_reclaim_current(TM_CAUSE_FAC_UNAV);
regs->msr |= MSR_VEC;
- tm_recheckpoint(&current->thread, regs->msr);
+ tm_recheckpoint(&current->thread, MSR_VEC);
current->thread.used_vr = 1;
+
+ if (regs->msr & MSR_FP) {
+ do_load_up_transact_fpu(&current->thread);
+ regs->msr |= MSR_VSX;
+ }
}
-#endif
-#ifdef CONFIG_VSX
void vsx_unavailable_tm(struct pt_regs *regs)
{
+ unsigned long orig_msr = regs->msr;
+
/* See the comments in fp_unavailable_tm(). This works similarly,
* though we're loading both FP and VEC registers in here.
*
@@ -1440,18 +1467,30 @@ void vsx_unavailable_tm(struct pt_regs *regs)
"MSR=%lx\n",
regs->nip, regs->msr);
- tm_enable();
+ current->thread.used_vsr = 1;
+
+ /* If FP and VMX are already loaded, we have all the state we need */
+ if ((orig_msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC)) {
+ regs->msr |= MSR_VSX;
+ return;
+ }
+
/* This reclaims FP and/or VR regs if they're already enabled */
- tm_reclaim(&current->thread, current->thread.regs->msr,
- TM_CAUSE_FAC_UNAV);
+ tm_reclaim_current(TM_CAUSE_FAC_UNAV);
regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode |
MSR_VSX;
- /* This loads & recheckpoints FP and VRs. */
- tm_recheckpoint(&current->thread, regs->msr);
- current->thread.used_vsr = 1;
+
+ /* This loads & recheckpoints FP and VRs; but we have
+ * to be sure not to overwrite previously-valid state.
+ */
+ tm_recheckpoint(&current->thread, regs->msr & ~orig_msr);
+
+ if (orig_msr & MSR_FP)
+ do_load_up_transact_fpu(&current->thread);
+ if (orig_msr & MSR_VEC)
+ do_load_up_transact_altivec(&current->thread);
}
-#endif
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
void performance_monitor_exception(struct pt_regs *regs)
@@ -1831,6 +1870,7 @@ struct ppc_emulated ppc_emulated = {
#ifdef CONFIG_PPC64
WARN_EMULATED_SETUP(mfdscr),
WARN_EMULATED_SETUP(mtdscr),
+ WARN_EMULATED_SETUP(lq_stq),
#endif
};
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index a15837519dca..b7aa07279a63 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -62,8 +62,6 @@ void __init udbg_early_init(void)
udbg_init_cpm();
#elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO)
udbg_init_usbgecko();
-#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP)
- udbg_init_wsp();
#elif defined(CONFIG_PPC_EARLY_DEBUG_MEMCONS)
/* In memory console */
udbg_init_memcons();
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index 75702e207b29..6e7c4923b5ea 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -296,14 +296,3 @@ void __init udbg_init_40x_realmode(void)
}
#endif /* CONFIG_PPC_EARLY_DEBUG_40x */
-
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
-
-void __init udbg_init_wsp(void)
-{
- udbg_uart_init_mmio((void *)WSP_UART_VIRT, 1);
- udbg_uart_setup(57600, 50000000);
-}
-
-#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index 59f419b935f2..003b20964ea0 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -186,7 +186,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
* emulate_step() returns 1 if the insn was successfully emulated.
* For all other cases, we need to single-step in hardware.
*/
- ret = emulate_step(regs, auprobe->ainsn);
+ ret = emulate_step(regs, auprobe->insn);
if (ret > 0)
return true;
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 094e45c16a17..ce74c335a6a4 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -715,8 +715,8 @@ int vdso_getcpu_init(void)
unsigned long cpu, node, val;
/*
- * SPRG3 contains the CPU in the bottom 16 bits and the NUMA node in
- * the next 16 bits. The VDSO uses this to implement getcpu().
+ * SPRG_VDSO contains the CPU in the bottom 16 bits and the NUMA node
+ * in the next 16 bits. The VDSO uses this to implement getcpu().
*/
cpu = get_cpu();
WARN_ON_ONCE(cpu > 0xffff);
@@ -725,8 +725,8 @@ int vdso_getcpu_init(void)
WARN_ON_ONCE(node > 0xffff);
val = (cpu & 0xfff) | ((node & 0xffff) << 16);
- mtspr(SPRN_SPRG3, val);
- get_paca()->sprg3 = val;
+ mtspr(SPRN_SPRG_VDSO_WRITE, val);
+ get_paca()->sprg_vdso = val;
put_cpu();
diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S
index 47afd08c90f7..23eb9a9441bd 100644
--- a/arch/powerpc/kernel/vdso32/getcpu.S
+++ b/arch/powerpc/kernel/vdso32/getcpu.S
@@ -29,7 +29,7 @@
*/
V_FUNCTION_BEGIN(__kernel_getcpu)
.cfi_startproc
- mfspr r5,SPRN_USPRG3
+ mfspr r5,SPRN_SPRG_VDSO_READ
cmpdi cr0,r3,0
cmpdi cr1,r4,0
clrlwi r6,r5,16
diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
index 6e8f507ed32b..6ac107ac402a 100644
--- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
+++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
@@ -1,4 +1,3 @@
-#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/page.h>
@@ -7,7 +6,7 @@
.globl vdso32_start, vdso32_end
.balign PAGE_SIZE
vdso32_start:
- .incbin "arch/powerpc/kernel/vdso32/vdso32.so"
+ .incbin "arch/powerpc/kernel/vdso32/vdso32.so.dbg"
.balign PAGE_SIZE
vdso32_end:
diff --git a/arch/powerpc/kernel/vdso64/getcpu.S b/arch/powerpc/kernel/vdso64/getcpu.S
index 47afd08c90f7..23eb9a9441bd 100644
--- a/arch/powerpc/kernel/vdso64/getcpu.S
+++ b/arch/powerpc/kernel/vdso64/getcpu.S
@@ -29,7 +29,7 @@
*/
V_FUNCTION_BEGIN(__kernel_getcpu)
.cfi_startproc
- mfspr r5,SPRN_USPRG3
+ mfspr r5,SPRN_SPRG_VDSO_READ
cmpdi cr0,r3,0
cmpdi cr1,r4,0
clrlwi r6,r5,16
diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
index b8553d62b792..df60fca6a13d 100644
--- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
+++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
@@ -1,4 +1,3 @@
-#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/page.h>
@@ -7,7 +6,7 @@
.globl vdso64_start, vdso64_end
.balign PAGE_SIZE
vdso64_start:
- .incbin "arch/powerpc/kernel/vdso64/vdso64.so"
+ .incbin "arch/powerpc/kernel/vdso64/vdso64.so.dbg"
.balign PAGE_SIZE
vdso64_end:
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 0458a9aaba9d..74f8050518d6 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -37,6 +37,16 @@ _GLOBAL(do_load_up_transact_altivec)
#endif
/*
+ * Enable use of VMX/Altivec for the caller.
+ */
+_GLOBAL(vec_enable)
+ mfmsr r3
+ oris r3,r3,MSR_VEC@h
+ MTMSRD(r3)
+ isync
+ blr
+
+/*
* Load state from memory into VMX registers including VSCR.
* Assumes the caller has enabled VMX in the MSR.
*/
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 76a64821f4a2..904c66128fae 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -518,16 +518,18 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
struct dma_attrs *attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
+ struct iommu_table *tbl;
dma_addr_t ret = DMA_ERROR_CODE;
- if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
+ tbl = get_iommu_table_base(dev);
+ if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) {
atomic_inc(&viodev->cmo.allocs_failed);
return ret;
}
ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs);
if (unlikely(dma_mapping_error(dev, ret))) {
- vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
atomic_inc(&viodev->cmo.allocs_failed);
}
@@ -540,10 +542,12 @@ static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
struct dma_attrs *attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
+ struct iommu_table *tbl;
+ tbl = get_iommu_table_base(dev);
dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs);
- vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
}
static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -551,12 +555,14 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
struct dma_attrs *attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
+ struct iommu_table *tbl;
struct scatterlist *sgl;
int ret, count = 0;
size_t alloc_size = 0;
+ tbl = get_iommu_table_base(dev);
for (sgl = sglist; count < nelems; count++, sgl++)
- alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE);
+ alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl));
if (vio_cmo_alloc(viodev, alloc_size)) {
atomic_inc(&viodev->cmo.allocs_failed);
@@ -572,7 +578,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
}
for (sgl = sglist, count = 0; count < ret; count++, sgl++)
- alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
+ alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
if (alloc_size)
vio_cmo_dealloc(viodev, alloc_size);
@@ -585,12 +591,14 @@ static void vio_dma_iommu_unmap_sg(struct device *dev,
struct dma_attrs *attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
+ struct iommu_table *tbl;
struct scatterlist *sgl;
size_t alloc_size = 0;
int count = 0;
+ tbl = get_iommu_table_base(dev);
for (sgl = sglist; count < nelems; count++, sgl++)
- alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
+ alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
@@ -706,11 +714,14 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev)
{
struct vio_cmo_dev_entry *dev_ent;
struct device *dev = &viodev->dev;
+ struct iommu_table *tbl;
struct vio_driver *viodrv = to_vio_driver(dev->driver);
unsigned long flags;
size_t size;
bool dma_capable = false;
+ tbl = get_iommu_table_base(dev);
+
/* A device requires entitlement if it has a DMA window property */
switch (viodev->family) {
case VDEVICE:
@@ -736,7 +747,8 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev)
return -EINVAL;
}
- viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev));
+ viodev->cmo.desired =
+ IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev), tbl);
if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
viodev->cmo.desired = VIO_CMO_MIN_ENT;
size = VIO_CMO_MIN_ENT;
@@ -1176,9 +1188,10 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
&tbl->it_index, &offset, &size);
/* TCE table size - measured in tce entries */
- tbl->it_size = size >> IOMMU_PAGE_SHIFT;
+ tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
+ tbl->it_size = size >> tbl->it_page_shift;
/* offset for VIO should always be 0 */
- tbl->it_offset = offset >> IOMMU_PAGE_SHIFT;
+ tbl->it_offset = offset >> tbl->it_page_shift;
tbl->it_busno = 0;
tbl->it_type = TCE_VB;
tbl->it_blocksize = 16;
@@ -1419,7 +1432,8 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
/* needed to ensure proper operation of coherent allocations
* later, in case driver doesn't set it explicitly */
- dma_coerce_mask_and_coherent(&viodev->dev, DMA_BIT_MASK(64));
+ viodev->dev.coherent_dma_mask = DMA_BIT_MASK(64);
+ viodev->dev.dma_mask = &viodev->dev.coherent_dma_mask;
}
/* register with generic device framework */