aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile4
-rw-r--r--arch/powerpc/kernel/align.c774
-rw-r--r--arch/powerpc/kernel/asm-offsets.c8
-rw-r--r--arch/powerpc/kernel/btext.c2
-rw-r--r--arch/powerpc/kernel/cacheinfo.c34
-rw-r--r--arch/powerpc/kernel/cputable.c8
-rw-r--r--arch/powerpc/kernel/eeh.c20
-rw-r--r--arch/powerpc/kernel/eeh_dev.c7
-rw-r--r--arch/powerpc/kernel/eeh_driver.c2
-rw-r--r--arch/powerpc/kernel/eeh_pe.c90
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c3
-rw-r--r--arch/powerpc/kernel/entry_32.S22
-rw-r--r--arch/powerpc/kernel/entry_64.S69
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S47
-rw-r--r--arch/powerpc/kernel/fadump.c31
-rw-r--r--arch/powerpc/kernel/head_32.S6
-rw-r--r--arch/powerpc/kernel/head_64.S8
-rw-r--r--arch/powerpc/kernel/head_8xx.S109
-rw-r--r--arch/powerpc/kernel/idle_book3s.S138
-rw-r--r--arch/powerpc/kernel/io-workarounds.c9
-rw-r--r--arch/powerpc/kernel/iommu.c7
-rw-r--r--arch/powerpc/kernel/irq.c78
-rw-r--r--arch/powerpc/kernel/isa-bridge.c32
-rw-r--r--arch/powerpc/kernel/kgdb.c4
-rw-r--r--arch/powerpc/kernel/kvm.c7
-rw-r--r--arch/powerpc/kernel/l2cr_6xx.S4
-rw-r--r--arch/powerpc/kernel/legacy_serial.c12
-rw-r--r--arch/powerpc/kernel/mce.c33
-rw-r--r--arch/powerpc/kernel/of_platform.c2
-rw-r--r--arch/powerpc/kernel/optprobes_head.S8
-rw-r--r--arch/powerpc/kernel/paca.c13
-rw-r--r--arch/powerpc/kernel/pci-common.c15
-rw-r--r--arch/powerpc/kernel/pci_32.c4
-rw-r--r--arch/powerpc/kernel/pci_64.c4
-rw-r--r--arch/powerpc/kernel/pci_dn.c22
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c24
-rw-r--r--arch/powerpc/kernel/process.c104
-rw-r--r--arch/powerpc/kernel/prom_init.c34
-rw-r--r--arch/powerpc/kernel/ptrace.c55
-rw-r--r--arch/powerpc/kernel/reloc_64.S6
-rw-r--r--arch/powerpc/kernel/rtas_pci.c33
-rw-r--r--arch/powerpc/kernel/setup-common.c36
-rw-r--r--arch/powerpc/kernel/setup_32.c7
-rw-r--r--arch/powerpc/kernel/setup_64.c39
-rw-r--r--arch/powerpc/kernel/smp.c240
-rw-r--r--arch/powerpc/kernel/swsusp_asm64.S2
-rw-r--r--arch/powerpc/kernel/systbl.S14
-rw-r--r--arch/powerpc/kernel/traps.c305
-rw-r--r--arch/powerpc/kernel/uprobes.c9
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S12
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S2
-rw-r--r--arch/powerpc/kernel/watchdog.c52
52 files changed, 1047 insertions, 1563 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 4aa7c147e447..91960f83039c 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -38,7 +38,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
paca.o nvram_64.o firmware.o
obj-$(CONFIG_VDSO32) += vdso32/
-obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog.o
+obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
@@ -83,7 +83,7 @@ extra-y := head_$(BITS).o
extra-$(CONFIG_40x) := head_40x.o
extra-$(CONFIG_44x) := head_44x.o
extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o
-extra-$(CONFIG_8xx) := head_8xx.o
+extra-$(CONFIG_PPC_8xx) := head_8xx.o
extra-y += vmlinux.lds
obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index ec7a8b099dd9..26b9994d27ee 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -27,6 +27,7 @@
#include <asm/switch_to.h>
#include <asm/disassemble.h>
#include <asm/cpu_has_feature.h>
+#include <asm/sstep.h>
struct aligninfo {
unsigned char len;
@@ -40,364 +41,9 @@ struct aligninfo {
#define LD 0 /* load */
#define ST 1 /* store */
#define SE 2 /* sign-extend value, or FP ld/st as word */
-#define F 4 /* to/from fp regs */
-#define U 8 /* update index register */
-#define M 0x10 /* multiple load/store */
#define SW 0x20 /* byte swap */
-#define S 0x40 /* single-precision fp or... */
-#define SX 0x40 /* ... byte count in XER */
-#define HARD 0x80 /* string, stwcx. */
#define E4 0x40 /* SPE endianness is word */
#define E8 0x80 /* SPE endianness is double word */
-#define SPLT 0x80 /* VSX SPLAT load */
-
-/* DSISR bits reported for a DCBZ instruction: */
-#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */
-
-/*
- * The PowerPC stores certain bits of the instruction that caused the
- * alignment exception in the DSISR register. This array maps those
- * bits to information about the operand length and what the
- * instruction would do.
- */
-static struct aligninfo aligninfo[128] = {
- { 4, LD }, /* 00 0 0000: lwz / lwarx */
- INVALID, /* 00 0 0001 */
- { 4, ST }, /* 00 0 0010: stw */
- INVALID, /* 00 0 0011 */
- { 2, LD }, /* 00 0 0100: lhz */
- { 2, LD+SE }, /* 00 0 0101: lha */
- { 2, ST }, /* 00 0 0110: sth */
- { 4, LD+M }, /* 00 0 0111: lmw */
- { 4, LD+F+S }, /* 00 0 1000: lfs */
- { 8, LD+F }, /* 00 0 1001: lfd */
- { 4, ST+F+S }, /* 00 0 1010: stfs */
- { 8, ST+F }, /* 00 0 1011: stfd */
- { 16, LD }, /* 00 0 1100: lq */
- { 8, LD }, /* 00 0 1101: ld/ldu/lwa */
- INVALID, /* 00 0 1110 */
- { 8, ST }, /* 00 0 1111: std/stdu */
- { 4, LD+U }, /* 00 1 0000: lwzu */
- INVALID, /* 00 1 0001 */
- { 4, ST+U }, /* 00 1 0010: stwu */
- INVALID, /* 00 1 0011 */
- { 2, LD+U }, /* 00 1 0100: lhzu */
- { 2, LD+SE+U }, /* 00 1 0101: lhau */
- { 2, ST+U }, /* 00 1 0110: sthu */
- { 4, ST+M }, /* 00 1 0111: stmw */
- { 4, LD+F+S+U }, /* 00 1 1000: lfsu */
- { 8, LD+F+U }, /* 00 1 1001: lfdu */
- { 4, ST+F+S+U }, /* 00 1 1010: stfsu */
- { 8, ST+F+U }, /* 00 1 1011: stfdu */
- { 16, LD+F }, /* 00 1 1100: lfdp */
- INVALID, /* 00 1 1101 */
- { 16, ST+F }, /* 00 1 1110: stfdp */
- INVALID, /* 00 1 1111 */
- { 8, LD }, /* 01 0 0000: ldx */
- INVALID, /* 01 0 0001 */
- { 8, ST }, /* 01 0 0010: stdx */
- INVALID, /* 01 0 0011 */
- INVALID, /* 01 0 0100 */
- { 4, LD+SE }, /* 01 0 0101: lwax */
- INVALID, /* 01 0 0110 */
- INVALID, /* 01 0 0111 */
- { 4, LD+M+HARD+SX }, /* 01 0 1000: lswx */
- { 4, LD+M+HARD }, /* 01 0 1001: lswi */
- { 4, ST+M+HARD+SX }, /* 01 0 1010: stswx */
- { 4, ST+M+HARD }, /* 01 0 1011: stswi */
- INVALID, /* 01 0 1100 */
- { 8, LD+U }, /* 01 0 1101: ldu */
- INVALID, /* 01 0 1110 */
- { 8, ST+U }, /* 01 0 1111: stdu */
- { 8, LD+U }, /* 01 1 0000: ldux */
- INVALID, /* 01 1 0001 */
- { 8, ST+U }, /* 01 1 0010: stdux */
- INVALID, /* 01 1 0011 */
- INVALID, /* 01 1 0100 */
- { 4, LD+SE+U }, /* 01 1 0101: lwaux */
- INVALID, /* 01 1 0110 */
- INVALID, /* 01 1 0111 */
- INVALID, /* 01 1 1000 */
- INVALID, /* 01 1 1001 */
- INVALID, /* 01 1 1010 */
- INVALID, /* 01 1 1011 */
- INVALID, /* 01 1 1100 */
- INVALID, /* 01 1 1101 */
- INVALID, /* 01 1 1110 */
- INVALID, /* 01 1 1111 */
- INVALID, /* 10 0 0000 */
- INVALID, /* 10 0 0001 */
- INVALID, /* 10 0 0010: stwcx. */
- INVALID, /* 10 0 0011 */
- INVALID, /* 10 0 0100 */
- INVALID, /* 10 0 0101 */
- INVALID, /* 10 0 0110 */
- INVALID, /* 10 0 0111 */
- { 4, LD+SW }, /* 10 0 1000: lwbrx */
- INVALID, /* 10 0 1001 */
- { 4, ST+SW }, /* 10 0 1010: stwbrx */
- INVALID, /* 10 0 1011 */
- { 2, LD+SW }, /* 10 0 1100: lhbrx */
- { 4, LD+SE }, /* 10 0 1101 lwa */
- { 2, ST+SW }, /* 10 0 1110: sthbrx */
- { 16, ST }, /* 10 0 1111: stq */
- INVALID, /* 10 1 0000 */
- INVALID, /* 10 1 0001 */
- INVALID, /* 10 1 0010 */
- INVALID, /* 10 1 0011 */
- INVALID, /* 10 1 0100 */
- INVALID, /* 10 1 0101 */
- INVALID, /* 10 1 0110 */
- INVALID, /* 10 1 0111 */
- INVALID, /* 10 1 1000 */
- INVALID, /* 10 1 1001 */
- INVALID, /* 10 1 1010 */
- INVALID, /* 10 1 1011 */
- INVALID, /* 10 1 1100 */
- INVALID, /* 10 1 1101 */
- INVALID, /* 10 1 1110 */
- { 0, ST+HARD }, /* 10 1 1111: dcbz */
- { 4, LD }, /* 11 0 0000: lwzx */
- INVALID, /* 11 0 0001 */
- { 4, ST }, /* 11 0 0010: stwx */
- INVALID, /* 11 0 0011 */
- { 2, LD }, /* 11 0 0100: lhzx */
- { 2, LD+SE }, /* 11 0 0101: lhax */
- { 2, ST }, /* 11 0 0110: sthx */
- INVALID, /* 11 0 0111 */
- { 4, LD+F+S }, /* 11 0 1000: lfsx */
- { 8, LD+F }, /* 11 0 1001: lfdx */
- { 4, ST+F+S }, /* 11 0 1010: stfsx */
- { 8, ST+F }, /* 11 0 1011: stfdx */
- { 16, LD+F }, /* 11 0 1100: lfdpx */
- { 4, LD+F+SE }, /* 11 0 1101: lfiwax */
- { 16, ST+F }, /* 11 0 1110: stfdpx */
- { 4, ST+F }, /* 11 0 1111: stfiwx */
- { 4, LD+U }, /* 11 1 0000: lwzux */
- INVALID, /* 11 1 0001 */
- { 4, ST+U }, /* 11 1 0010: stwux */
- INVALID, /* 11 1 0011 */
- { 2, LD+U }, /* 11 1 0100: lhzux */
- { 2, LD+SE+U }, /* 11 1 0101: lhaux */
- { 2, ST+U }, /* 11 1 0110: sthux */
- INVALID, /* 11 1 0111 */
- { 4, LD+F+S+U }, /* 11 1 1000: lfsux */
- { 8, LD+F+U }, /* 11 1 1001: lfdux */
- { 4, ST+F+S+U }, /* 11 1 1010: stfsux */
- { 8, ST+F+U }, /* 11 1 1011: stfdux */
- INVALID, /* 11 1 1100 */
- { 4, LD+F }, /* 11 1 1101: lfiwzx */
- INVALID, /* 11 1 1110 */
- INVALID, /* 11 1 1111 */
-};
-
-/*
- * The dcbz (data cache block zero) instruction
- * gives an alignment fault if used on non-cacheable
- * memory. We handle the fault mainly for the
- * case when we are running with the cache disabled
- * for debugging.
- */
-static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
-{
- long __user *p;
- int i, size;
-
-#ifdef __powerpc64__
- size = ppc64_caches.l1d.block_size;
-#else
- size = L1_CACHE_BYTES;
-#endif
- p = (long __user *) (regs->dar & -size);
- if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size))
- return -EFAULT;
- for (i = 0; i < size / sizeof(long); ++i)
- if (__put_user_inatomic(0, p+i))
- return -EFAULT;
- return 1;
-}
-
-/*
- * Emulate load & store multiple instructions
- * On 64-bit machines, these instructions only affect/use the
- * bottom 4 bytes of each register, and the loads clear the
- * top 4 bytes of the affected register.
- */
-#ifdef __BIG_ENDIAN__
-#ifdef CONFIG_PPC64
-#define REG_BYTE(rp, i) *((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4)
-#else
-#define REG_BYTE(rp, i) *((u8 *)(rp) + (i))
-#endif
-#else
-#define REG_BYTE(rp, i) (*(((u8 *)((rp) + ((i)>>2)) + ((i)&3))))
-#endif
-
-#define SWIZ_PTR(p) ((unsigned char __user *)((p) ^ swiz))
-
-static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
- unsigned int reg, unsigned int nb,
- unsigned int flags, unsigned int instr,
- unsigned long swiz)
-{
- unsigned long *rptr;
- unsigned int nb0, i, bswiz;
- unsigned long p;
-
- /*
- * We do not try to emulate 8 bytes multiple as they aren't really
- * available in our operating environments and we don't try to
- * emulate multiples operations in kernel land as they should never
- * be used/generated there at least not on unaligned boundaries
- */
- if (unlikely((nb > 4) || !user_mode(regs)))
- return 0;
-
- /* lmw, stmw, lswi/x, stswi/x */
- nb0 = 0;
- if (flags & HARD) {
- if (flags & SX) {
- nb = regs->xer & 127;
- if (nb == 0)
- return 1;
- } else {
- unsigned long pc = regs->nip ^ (swiz & 4);
-
- if (__get_user_inatomic(instr,
- (unsigned int __user *)pc))
- return -EFAULT;
- if (swiz == 0 && (flags & SW))
- instr = cpu_to_le32(instr);
- nb = (instr >> 11) & 0x1f;
- if (nb == 0)
- nb = 32;
- }
- if (nb + reg * 4 > 128) {
- nb0 = nb + reg * 4 - 128;
- nb = 128 - reg * 4;
- }
-#ifdef __LITTLE_ENDIAN__
- /*
- * String instructions are endian neutral but the code
- * below is not. Force byte swapping on so that the
- * effects of swizzling are undone in the load/store
- * loops below.
- */
- flags ^= SW;
-#endif
- } else {
- /* lwm, stmw */
- nb = (32 - reg) * 4;
- }
-
- if (!access_ok((flags & ST ? VERIFY_WRITE: VERIFY_READ), addr, nb+nb0))
- return -EFAULT; /* bad address */
-
- rptr = &regs->gpr[reg];
- p = (unsigned long) addr;
- bswiz = (flags & SW)? 3: 0;
-
- if (!(flags & ST)) {
- /*
- * This zeroes the top 4 bytes of the affected registers
- * in 64-bit mode, and also zeroes out any remaining
- * bytes of the last register for lsw*.
- */
- memset(rptr, 0, ((nb + 3) / 4) * sizeof(unsigned long));
- if (nb0 > 0)
- memset(&regs->gpr[0], 0,
- ((nb0 + 3) / 4) * sizeof(unsigned long));
-
- for (i = 0; i < nb; ++i, ++p)
- if (__get_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
- SWIZ_PTR(p)))
- return -EFAULT;
- if (nb0 > 0) {
- rptr = &regs->gpr[0];
- addr += nb;
- for (i = 0; i < nb0; ++i, ++p)
- if (__get_user_inatomic(REG_BYTE(rptr,
- i ^ bswiz),
- SWIZ_PTR(p)))
- return -EFAULT;
- }
-
- } else {
- for (i = 0; i < nb; ++i, ++p)
- if (__put_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
- SWIZ_PTR(p)))
- return -EFAULT;
- if (nb0 > 0) {
- rptr = &regs->gpr[0];
- addr += nb;
- for (i = 0; i < nb0; ++i, ++p)
- if (__put_user_inatomic(REG_BYTE(rptr,
- i ^ bswiz),
- SWIZ_PTR(p)))
- return -EFAULT;
- }
- }
- return 1;
-}
-
-/*
- * Emulate floating-point pair loads and stores.
- * Only POWER6 has these instructions, and it does true little-endian,
- * so we don't need the address swizzling.
- */
-static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg,
- unsigned int flags)
-{
- char *ptr0 = (char *) &current->thread.TS_FPR(reg);
- char *ptr1 = (char *) &current->thread.TS_FPR(reg+1);
- int i, ret, sw = 0;
-
- if (reg & 1)
- return 0; /* invalid form: FRS/FRT must be even */
- if (flags & SW)
- sw = 7;
- ret = 0;
- for (i = 0; i < 8; ++i) {
- if (!(flags & ST)) {
- ret |= __get_user(ptr0[i^sw], addr + i);
- ret |= __get_user(ptr1[i^sw], addr + i + 8);
- } else {
- ret |= __put_user(ptr0[i^sw], addr + i);
- ret |= __put_user(ptr1[i^sw], addr + i + 8);
- }
- }
- if (ret)
- return -EFAULT;
- return 1; /* exception handled and fixed up */
-}
-
-#ifdef CONFIG_PPC64
-static int emulate_lq_stq(struct pt_regs *regs, unsigned char __user *addr,
- unsigned int reg, unsigned int flags)
-{
- char *ptr0 = (char *)&regs->gpr[reg];
- char *ptr1 = (char *)&regs->gpr[reg+1];
- int i, ret, sw = 0;
-
- if (reg & 1)
- return 0; /* invalid form: GPR must be even */
- if (flags & SW)
- sw = 7;
- ret = 0;
- for (i = 0; i < 8; ++i) {
- if (!(flags & ST)) {
- ret |= __get_user(ptr0[i^sw], addr + i);
- ret |= __get_user(ptr1[i^sw], addr + i + 8);
- } else {
- ret |= __put_user(ptr0[i^sw], addr + i);
- ret |= __put_user(ptr1[i^sw], addr + i + 8);
- }
- }
- if (ret)
- return -EFAULT;
- return 1; /* exception handled and fixed up */
-}
-#endif /* CONFIG_PPC64 */
#ifdef CONFIG_SPE
@@ -636,133 +282,21 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
}
#endif /* CONFIG_SPE */
-#ifdef CONFIG_VSX
-/*
- * Emulate VSX instructions...
- */
-static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
- unsigned int areg, struct pt_regs *regs,
- unsigned int flags, unsigned int length,
- unsigned int elsize)
-{
- char *ptr;
- unsigned long *lptr;
- int ret = 0;
- int sw = 0;
- int i, j;
-
- /* userland only */
- if (unlikely(!user_mode(regs)))
- return 0;
-
- flush_vsx_to_thread(current);
-
- if (reg < 32)
- ptr = (char *) &current->thread.fp_state.fpr[reg][0];
- else
- ptr = (char *) &current->thread.vr_state.vr[reg - 32];
-
- lptr = (unsigned long *) ptr;
-
-#ifdef __LITTLE_ENDIAN__
- if (flags & SW) {
- elsize = length;
- sw = length-1;
- } else {
- /*
- * The elements are BE ordered, even in LE mode, so process
- * them in reverse order.
- */
- addr += length - elsize;
-
- /* 8 byte memory accesses go in the top 8 bytes of the VR */
- if (length == 8)
- ptr += 8;
- }
-#else
- if (flags & SW)
- sw = elsize-1;
-#endif
-
- for (j = 0; j < length; j += elsize) {
- for (i = 0; i < elsize; ++i) {
- if (flags & ST)
- ret |= __put_user(ptr[i^sw], addr + i);
- else
- ret |= __get_user(ptr[i^sw], addr + i);
- }
- ptr += elsize;
-#ifdef __LITTLE_ENDIAN__
- addr -= elsize;
-#else
- addr += elsize;
-#endif
- }
-
-#ifdef __BIG_ENDIAN__
-#define VSX_HI 0
-#define VSX_LO 1
-#else
-#define VSX_HI 1
-#define VSX_LO 0
-#endif
-
- if (!ret) {
- if (flags & U)
- regs->gpr[areg] = regs->dar;
-
- /* Splat load copies the same data to top and bottom 8 bytes */
- if (flags & SPLT)
- lptr[VSX_LO] = lptr[VSX_HI];
- /* For 8 byte loads, zero the low 8 bytes */
- else if (!(flags & ST) && (8 == length))
- lptr[VSX_LO] = 0;
- } else
- return -EFAULT;
-
- return 1;
-}
-#endif
-
/*
* Called on alignment exception. Attempts to fixup
*
* Return 1 on success
* Return 0 if unable to handle the interrupt
* Return -EFAULT if data address is bad
+ * Other negative return values indicate that the instruction can't
+ * be emulated, and the process should be given a SIGBUS.
*/
int fix_alignment(struct pt_regs *regs)
{
- unsigned int instr, nb, flags, instruction = 0;
- unsigned int reg, areg;
- unsigned int dsisr;
- unsigned char __user *addr;
- unsigned long p, swiz;
- int ret, i;
- union data {
- u64 ll;
- double dd;
- unsigned char v[8];
- struct {
-#ifdef __LITTLE_ENDIAN__
- int low32;
- unsigned hi32;
-#else
- unsigned hi32;
- int low32;
-#endif
- } x32;
- struct {
-#ifdef __LITTLE_ENDIAN__
- short low16;
- unsigned char hi48[6];
-#else
- unsigned char hi48[6];
- short low16;
-#endif
- } x16;
- } data;
+ unsigned int instr;
+ struct instruction_op op;
+ int r, type;
/*
* We require a complete register set, if not, then our assembly
@@ -770,121 +304,23 @@ int fix_alignment(struct pt_regs *regs)
*/
CHECK_FULL_REGS(regs);
- dsisr = regs->dsisr;
-
- /* Some processors don't provide us with a DSISR we can use here,
- * let's make one up from the instruction
- */
- if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) {
- unsigned long pc = regs->nip;
-
- if (cpu_has_feature(CPU_FTR_PPC_LE) && (regs->msr & MSR_LE))
- pc ^= 4;
- if (unlikely(__get_user_inatomic(instr,
- (unsigned int __user *)pc)))
- return -EFAULT;
- if (cpu_has_feature(CPU_FTR_REAL_LE) && (regs->msr & MSR_LE))
- instr = cpu_to_le32(instr);
- dsisr = make_dsisr(instr);
- instruction = instr;
+ if (unlikely(__get_user(instr, (unsigned int __user *)regs->nip)))
+ return -EFAULT;
+ if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
+ /* We don't handle PPC little-endian any more... */
+ if (cpu_has_feature(CPU_FTR_PPC_LE))
+ return -EIO;
+ instr = swab32(instr);
}
- /* extract the operation and registers from the dsisr */
- reg = (dsisr >> 5) & 0x1f; /* source/dest register */
- areg = dsisr & 0x1f; /* register to update */
-
#ifdef CONFIG_SPE
if ((instr >> 26) == 0x4) {
+ int reg = (instr >> 21) & 0x1f;
PPC_WARN_ALIGNMENT(spe, regs);
return emulate_spe(regs, reg, instr);
}
#endif
- instr = (dsisr >> 10) & 0x7f;
- instr |= (dsisr >> 13) & 0x60;
-
- /* Lookup the operation in our table */
- nb = aligninfo[instr].len;
- flags = aligninfo[instr].flags;
-
- /*
- * Handle some cases which give overlaps in the DSISR values.
- */
- if (IS_XFORM(instruction)) {
- switch (get_xop(instruction)) {
- case 532: /* ldbrx */
- nb = 8;
- flags = LD+SW;
- break;
- case 660: /* stdbrx */
- nb = 8;
- flags = ST+SW;
- break;
- case 20: /* lwarx */
- case 84: /* ldarx */
- case 116: /* lharx */
- case 276: /* lqarx */
- return 0; /* not emulated ever */
- }
- }
-
- /* Byteswap little endian loads and stores */
- swiz = 0;
- if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
- flags ^= SW;
-#ifdef __BIG_ENDIAN__
- /*
- * So-called "PowerPC little endian" mode works by
- * swizzling addresses rather than by actually doing
- * any byte-swapping. To emulate this, we XOR each
- * byte address with 7. We also byte-swap, because
- * the processor's address swizzling depends on the
- * operand size (it xors the address with 7 for bytes,
- * 6 for halfwords, 4 for words, 0 for doublewords) but
- * we will xor with 7 and load/store each byte separately.
- */
- if (cpu_has_feature(CPU_FTR_PPC_LE))
- swiz = 7;
-#endif
- }
-
- /* DAR has the operand effective address */
- addr = (unsigned char __user *)regs->dar;
-
-#ifdef CONFIG_VSX
- if ((instruction & 0xfc00003e) == 0x7c000018) {
- unsigned int elsize;
-
- /* Additional register addressing bit (64 VSX vs 32 FPR/GPR) */
- reg |= (instruction & 0x1) << 5;
- /* Simple inline decoder instead of a table */
- /* VSX has only 8 and 16 byte memory accesses */
- nb = 8;
- if (instruction & 0x200)
- nb = 16;
-
- /* Vector stores in little-endian mode swap individual
- elements, so process them separately */
- elsize = 4;
- if (instruction & 0x80)
- elsize = 8;
-
- flags = 0;
- if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE))
- flags |= SW;
- if (instruction & 0x100)
- flags |= ST;
- if (instruction & 0x040)
- flags |= U;
- /* splat load needs a special decoder */
- if ((instruction & 0x400) == 0){
- flags |= SPLT;
- nb = 8;
- }
- PPC_WARN_ALIGNMENT(vsx, regs);
- return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize);
- }
-#endif
/*
* ISA 3.0 (such as P9) copy, copy_first, paste and paste_last alignment
@@ -896,173 +332,27 @@ int fix_alignment(struct pt_regs *regs)
* when pasting to a co-processor. Furthermore, paste_last is the
* synchronisation point for preceding copy/paste sequences.
*/
- if ((instruction & 0xfc0006fe) == PPC_INST_COPY)
+ if ((instr & 0xfc0006fe) == PPC_INST_COPY)
return -EIO;
- /* A size of 0 indicates an instruction we don't support, with
- * the exception of DCBZ which is handled as a special case here
- */
- if (instr == DCBZ) {
- PPC_WARN_ALIGNMENT(dcbz, regs);
- return emulate_dcbz(regs, addr);
- }
- if (unlikely(nb == 0))
- return 0;
-
- /* Load/Store Multiple instructions are handled in their own
- * function
- */
- if (flags & M) {
- PPC_WARN_ALIGNMENT(multiple, regs);
- return emulate_multiple(regs, addr, reg, nb,
- flags, instr, swiz);
- }
-
- /* Verify the address of the operand */
- if (unlikely(user_mode(regs) &&
- !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ),
- addr, nb)))
- return -EFAULT;
-
- /* Force the fprs into the save area so we can reference them */
- if (flags & F) {
- /* userland only */
- if (unlikely(!user_mode(regs)))
- return 0;
- flush_fp_to_thread(current);
- }
+ r = analyse_instr(&op, regs, instr);
+ if (r < 0)
+ return -EINVAL;
- if (nb == 16) {
- if (flags & F) {
- /* Special case for 16-byte FP loads and stores */
- PPC_WARN_ALIGNMENT(fp_pair, regs);
- return emulate_fp_pair(addr, reg, flags);
- } else {
-#ifdef CONFIG_PPC64
- /* Special case for 16-byte loads and stores */
- PPC_WARN_ALIGNMENT(lq_stq, regs);
- return emulate_lq_stq(regs, addr, reg, flags);
-#else
- return 0;
-#endif
- }
- }
-
- PPC_WARN_ALIGNMENT(unaligned, regs);
-
- /* If we are loading, get the data from user space, else
- * get it from register values
- */
- if (!(flags & ST)) {
- unsigned int start = 0;
-
- switch (nb) {
- case 4:
- start = offsetof(union data, x32.low32);
- break;
- case 2:
- start = offsetof(union data, x16.low16);
- break;
- }
-
- data.ll = 0;
- ret = 0;
- p = (unsigned long)addr;
-
- for (i = 0; i < nb; i++)
- ret |= __get_user_inatomic(data.v[start + i],
- SWIZ_PTR(p++));
-
- if (unlikely(ret))
- return -EFAULT;
-
- } else if (flags & F) {
- data.ll = current->thread.TS_FPR(reg);
- if (flags & S) {
- /* Single-precision FP store requires conversion... */
-#ifdef CONFIG_PPC_FPU
- preempt_disable();
- enable_kernel_fp();
- cvt_df(&data.dd, (float *)&data.x32.low32);
- disable_kernel_fp();
- preempt_enable();
-#else
- return 0;
-#endif
- }
- } else
- data.ll = regs->gpr[reg];
-
- if (flags & SW) {
- switch (nb) {
- case 8:
- data.ll = swab64(data.ll);
- break;
- case 4:
- data.x32.low32 = swab32(data.x32.low32);
- break;
- case 2:
- data.x16.low16 = swab16(data.x16.low16);
- break;
- }
- }
-
- /* Perform other misc operations like sign extension
- * or floating point single precision conversion
- */
- switch (flags & ~(U|SW)) {
- case LD+SE: /* sign extending integer loads */
- case LD+F+SE: /* sign extend for lfiwax */
- if ( nb == 2 )
- data.ll = data.x16.low16;
- else /* nb must be 4 */
- data.ll = data.x32.low32;
- break;
-
- /* Single-precision FP load requires conversion... */
- case LD+F+S:
-#ifdef CONFIG_PPC_FPU
- preempt_disable();
- enable_kernel_fp();
- cvt_fd((float *)&data.x32.low32, &data.dd);
- disable_kernel_fp();
- preempt_enable();
-#else
- return 0;
-#endif
- break;
+ type = op.type & INSTR_TYPE_MASK;
+ if (!OP_IS_LOAD_STORE(type)) {
+ if (type != CACHEOP + DCBZ)
+ return -EINVAL;
+ PPC_WARN_ALIGNMENT(dcbz, regs);
+ r = emulate_dcbz(op.ea, regs);
+ } else {
+ if (type == LARX || type == STCX)
+ return -EIO;
+ PPC_WARN_ALIGNMENT(unaligned, regs);
+ r = emulate_loadstore(regs, &op);
}
- /* Store result to memory or update registers */
- if (flags & ST) {
- unsigned int start = 0;
-
- switch (nb) {
- case 4:
- start = offsetof(union data, x32.low32);
- break;
- case 2:
- start = offsetof(union data, x16.low16);
- break;
- }
-
- ret = 0;
- p = (unsigned long)addr;
-
- for (i = 0; i < nb; i++)
- ret |= __put_user_inatomic(data.v[start + i],
- SWIZ_PTR(p++));
-
- if (unlikely(ret))
- return -EFAULT;
- } else if (flags & F)
- current->thread.TS_FPR(reg) = data.ll;
- else
- regs->gpr[reg] = data.ll;
-
- /* Update RA as needed */
- if (flags & U)
- regs->gpr[areg] = regs->dar;
-
- return 1;
+ if (!r)
+ return 1;
+ return r;
}
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6e95c2c19a7e..8cfb20e38cfe 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -746,6 +746,14 @@ int main(void)
OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
+#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f)
+ STOP_SPR(STOP_PID, pid);
+ STOP_SPR(STOP_LDBAR, ldbar);
+ STOP_SPR(STOP_FSCR, fscr);
+ STOP_SPR(STOP_HFSCR, hfscr);
+ STOP_SPR(STOP_MMCR1, mmcr1);
+ STOP_SPR(STOP_MMCR2, mmcr2);
+ STOP_SPR(STOP_MMCRA, mmcra);
#endif
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 8275858a434d..3f46ca1c59f9 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -253,7 +253,7 @@ int __init btext_find_display(int allow_nonstdout)
for_each_node_by_type(np, "display") {
if (of_get_property(np, "linux,opened", NULL)) {
- printk("trying %s ...\n", np->full_name);
+ printk("trying %pOF ...\n", np);
rc = btext_initialize(np);
printk("result: %d\n", rc);
}
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index c641983bbdd6..a8f20e5928e1 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -167,10 +167,10 @@ static void release_cache_debugcheck(struct cache *cache)
list_for_each_entry(iter, &cache_list, list)
WARN_ONCE(iter->next_local == cache,
- "cache for %s(%s) refers to cache for %s(%s)\n",
- iter->ofnode->full_name,
+ "cache for %pOF(%s) refers to cache for %pOF(%s)\n",
+ iter->ofnode,
cache_type_string(iter),
- cache->ofnode->full_name,
+ cache->ofnode,
cache_type_string(cache));
}
@@ -179,8 +179,8 @@ static void release_cache(struct cache *cache)
if (!cache)
return;
- pr_debug("freeing L%d %s cache for %s\n", cache->level,
- cache_type_string(cache), cache->ofnode->full_name);
+ pr_debug("freeing L%d %s cache for %pOF\n", cache->level,
+ cache_type_string(cache), cache->ofnode);
release_cache_debugcheck(cache);
list_del(&cache->list);
@@ -194,8 +194,8 @@ static void cache_cpu_set(struct cache *cache, int cpu)
while (next) {
WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map),
- "CPU %i already accounted in %s(%s)\n",
- cpu, next->ofnode->full_name,
+ "CPU %i already accounted in %pOF(%s)\n",
+ cpu, next->ofnode,
cache_type_string(next));
cpumask_set_cpu(cpu, &next->shared_cpu_map);
next = next->next_local;
@@ -355,7 +355,7 @@ static int cache_is_unified_d(const struct device_node *np)
*/
static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level)
{
- pr_debug("creating L%d ucache for %s\n", level, node->full_name);
+ pr_debug("creating L%d ucache for %pOF\n", level, node);
return new_cache(cache_is_unified_d(node), level, node);
}
@@ -365,8 +365,8 @@ static struct cache *cache_do_one_devnode_split(struct device_node *node,
{
struct cache *dcache, *icache;
- pr_debug("creating L%d dcache and icache for %s\n", level,
- node->full_name);
+ pr_debug("creating L%d dcache and icache for %pOF\n", level,
+ node);
dcache = new_cache(CACHE_TYPE_DATA, level, node);
icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
@@ -679,7 +679,6 @@ static struct kobj_type cache_index_type = {
static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
{
- const char *cache_name;
const char *cache_type;
struct cache *cache;
char *buf;
@@ -690,7 +689,6 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
return;
cache = dir->cache;
- cache_name = cache->ofnode->full_name;
cache_type = cache_type_string(cache);
/* We don't want to create an attribute that can't provide a
@@ -707,14 +705,14 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
rc = attr->show(&dir->kobj, attr, buf);
if (rc <= 0) {
pr_debug("not creating %s attribute for "
- "%s(%s) (rc = %zd)\n",
- attr->attr.name, cache_name,
+ "%pOF(%s) (rc = %zd)\n",
+ attr->attr.name, cache->ofnode,
cache_type, rc);
continue;
}
if (sysfs_create_file(&dir->kobj, &attr->attr))
- pr_debug("could not create %s attribute for %s(%s)\n",
- attr->attr.name, cache_name, cache_type);
+ pr_debug("could not create %s attribute for %pOF(%s)\n",
+ attr->attr.name, cache->ofnode, cache_type);
}
kfree(buf);
@@ -831,8 +829,8 @@ static void cache_cpu_clear(struct cache *cache, int cpu)
struct cache *next = cache->next_local;
WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map),
- "CPU %i not accounted in %s(%s)\n",
- cpu, cache->ofnode->full_name,
+ "CPU %i not accounted in %pOF(%s)\n",
+ cpu, cache->ofnode,
cache_type_string(cache));
cpumask_clear_cpu(cpu, &cache->shared_cpu_map);
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 6f849832a669..760872916013 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1259,10 +1259,10 @@ static struct cpu_spec __initdata cpu_specs[] = {
.platform = "ppc603",
},
#endif /* CONFIG_PPC_BOOK3S_32 */
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
{ /* 8xx */
.pvr_mask = 0xffff0000,
- .pvr_value = 0x00500000,
+ .pvr_value = PVR_8xx,
.cpu_name = "8xx",
/* CPU_FTR_MAYBE_CAN_DOZE is possible,
* if the 8xx code is there.... */
@@ -1274,7 +1274,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_8xx,
.platform = "ppc823",
},
-#endif /* CONFIG_8xx */
+#endif /* CONFIG_PPC_8xx */
#ifdef CONFIG_40x
{ /* 403GC */
.pvr_mask = 0xffffff00,
@@ -1936,6 +1936,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_440A,
.platform = "ppc440",
},
+#ifdef CONFIG_PPC_47x
{ /* 476 DD2 core */
.pvr_mask = 0xffffffff,
.pvr_value = 0x11a52080,
@@ -1992,6 +1993,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_47x,
.platform = "ppc470",
},
+#endif /* CONFIG_PPC_47x */
{ /* default match */
.pvr_mask = 0x00000000,
.pvr_value = 0x00000000,
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 63992b2d8e15..9e816787c0d4 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -44,6 +44,7 @@
#include <asm/machdep.h>
#include <asm/ppc-pci.h>
#include <asm/rtas.h>
+#include <asm/pte-walk.h>
/** Overview:
@@ -169,10 +170,10 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
char buffer[128];
n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
- edev->phb->global_number, pdn->busno,
+ pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
- edev->phb->global_number, pdn->busno,
+ pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
@@ -352,8 +353,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
* worried about _PAGE_SPLITTING/collapse. Also we will not hit
* page table free, because of init_mm.
*/
- ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token,
- NULL, &hugepage_shift);
+ ptep = find_init_mm_pte(token, &hugepage_shift);
if (!ptep)
return token;
WARN_ON(hugepage_shift);
@@ -435,7 +435,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
int ret;
int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
unsigned long flags;
- struct pci_dn *pdn;
+ struct device_node *dn;
struct pci_dev *dev;
struct eeh_pe *pe, *parent_pe, *phb_pe;
int rc = 0;
@@ -493,9 +493,10 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
if (pe->state & EEH_PE_ISOLATED) {
pe->check_count++;
if (pe->check_count % EEH_MAX_FAILS == 0) {
- pdn = eeh_dev_to_pdn(edev);
- if (pdn->node)
- location = of_get_property(pdn->node, "ibm,loc-code", NULL);
+ dn = pci_device_to_OF_node(dev);
+ if (dn)
+ location = of_get_property(dn, "ibm,loc-code",
+ NULL);
printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
"location=%s driver=%s pci addr=%s\n",
pe->check_count,
@@ -1064,7 +1065,7 @@ core_initcall_sync(eeh_init);
*/
void eeh_add_device_early(struct pci_dn *pdn)
{
- struct pci_controller *phb;
+ struct pci_controller *phb = pdn ? pdn->phb : NULL;
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
if (!edev)
@@ -1074,7 +1075,6 @@ void eeh_add_device_early(struct pci_dn *pdn)
return;
/* USB Bus children of PCI devices will not have BUID's */
- phb = edev->phb;
if (NULL == phb ||
(eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid))
return;
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index d6b2ca70d14d..ad04ecd63c20 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -50,21 +50,16 @@
*/
struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
{
- struct pci_controller *phb = pdn->phb;
struct eeh_dev *edev;
/* Allocate EEH device */
edev = kzalloc(sizeof(*edev), GFP_KERNEL);
- if (!edev) {
- pr_warn("%s: out of memory\n",
- __func__);
+ if (!edev)
return NULL;
- }
/* Associate EEH device with OF node */
pdn->edev = edev;
edev->pdn = pdn;
- edev->phb = phb;
INIT_LIST_HEAD(&edev->list);
INIT_LIST_HEAD(&edev->rmv_list);
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index c405c79e50cd..8b840191df59 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -428,7 +428,7 @@ static void *eeh_add_virt_device(void *data, void *userdata)
if (!(edev->physfn)) {
pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
- __func__, edev->phb->global_number, pdn->busno,
+ __func__, pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
return NULL;
}
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index cc4b206f77e4..2e8d1b2b5af4 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -230,10 +230,15 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
* Bus/Device/Function number. The extra data referred by flag
* indicates which type of address should be used.
*/
+struct eeh_pe_get_flag {
+ int pe_no;
+ int config_addr;
+};
+
static void *__eeh_pe_get(void *data, void *flag)
{
struct eeh_pe *pe = (struct eeh_pe *)data;
- struct eeh_dev *edev = (struct eeh_dev *)flag;
+ struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag;
/* Unexpected PHB PE */
if (pe->type & EEH_PE_PHB)
@@ -244,17 +249,17 @@ static void *__eeh_pe_get(void *data, void *flag)
* have non-zero PE address
*/
if (eeh_has_flag(EEH_VALID_PE_ZERO)) {
- if (edev->pe_config_addr == pe->addr)
+ if (tmp->pe_no == pe->addr)
return pe;
} else {
- if (edev->pe_config_addr &&
- (edev->pe_config_addr == pe->addr))
+ if (tmp->pe_no &&
+ (tmp->pe_no == pe->addr))
return pe;
}
/* Try BDF address */
- if (edev->config_addr &&
- (edev->config_addr == pe->config_addr))
+ if (tmp->config_addr &&
+ (tmp->config_addr == pe->config_addr))
return pe;
return NULL;
@@ -262,7 +267,9 @@ static void *__eeh_pe_get(void *data, void *flag)
/**
* eeh_pe_get - Search PE based on the given address
- * @edev: EEH device
+ * @phb: PCI controller
+ * @pe_no: PE number
+ * @config_addr: Config address
*
* Search the corresponding PE based on the specified address which
* is included in the eeh device. The function is used to check if
@@ -271,12 +278,14 @@ static void *__eeh_pe_get(void *data, void *flag)
* which is composed of PCI bus/device/function number, or unified
* PE address.
*/
-struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
+ int pe_no, int config_addr)
{
- struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
+ struct eeh_pe *root = eeh_phb_pe_get(phb);
+ struct eeh_pe_get_flag tmp = { pe_no, config_addr };
struct eeh_pe *pe;
- pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
+ pe = eeh_pe_traverse(root, __eeh_pe_get, &tmp);
return pe;
}
@@ -330,11 +339,13 @@ static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
int eeh_add_to_parent_pe(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent;
+ struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+ int config_addr = (pdn->busno << 8) | (pdn->devfn);
/* Check if the PE number is valid */
if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n",
- __func__, edev->config_addr, edev->phb->global_number);
+ __func__, config_addr, pdn->phb->global_number);
return -EINVAL;
}
@@ -344,7 +355,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* PE should be composed of PCI bus and its subordinate
* components.
*/
- pe = eeh_pe_get(edev);
+ pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr);
if (pe && !(pe->type & EEH_PE_INVALID)) {
/* Mark the PE as type of PCI bus */
pe->type = EEH_PE_BUS;
@@ -353,11 +364,11 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
/* Put the edev to PE */
list_add_tail(&edev->list, &pe->edevs);
pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
- edev->phb->global_number,
- edev->config_addr >> 8,
- PCI_SLOT(edev->config_addr & 0xFF),
- PCI_FUNC(edev->config_addr & 0xFF),
- pe->addr);
+ pdn->phb->global_number,
+ pdn->busno,
+ PCI_SLOT(pdn->devfn),
+ PCI_FUNC(pdn->devfn),
+ pe->addr);
return 0;
} else if (pe && (pe->type & EEH_PE_INVALID)) {
list_add_tail(&edev->list, &pe->edevs);
@@ -376,25 +387,25 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
pr_debug("EEH: Add %04x:%02x:%02x.%01x to Device "
"PE#%x, Parent PE#%x\n",
- edev->phb->global_number,
- edev->config_addr >> 8,
- PCI_SLOT(edev->config_addr & 0xFF),
- PCI_FUNC(edev->config_addr & 0xFF),
- pe->addr, pe->parent->addr);
+ pdn->phb->global_number,
+ pdn->busno,
+ PCI_SLOT(pdn->devfn),
+ PCI_FUNC(pdn->devfn),
+ pe->addr, pe->parent->addr);
return 0;
}
/* Create a new EEH PE */
if (edev->physfn)
- pe = eeh_pe_alloc(edev->phb, EEH_PE_VF);
+ pe = eeh_pe_alloc(pdn->phb, EEH_PE_VF);
else
- pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
+ pe = eeh_pe_alloc(pdn->phb, EEH_PE_DEVICE);
if (!pe) {
pr_err("%s: out of memory!\n", __func__);
return -ENOMEM;
}
pe->addr = edev->pe_config_addr;
- pe->config_addr = edev->config_addr;
+ pe->config_addr = config_addr;
/*
* Put the new EEH PE into hierarchy tree. If the parent
@@ -404,10 +415,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
*/
parent = eeh_pe_get_parent(edev);
if (!parent) {
- parent = eeh_phb_pe_get(edev->phb);
+ parent = eeh_phb_pe_get(pdn->phb);
if (!parent) {
pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
- __func__, edev->phb->global_number);
+ __func__, pdn->phb->global_number);
edev->pe = NULL;
kfree(pe);
return -EEXIST;
@@ -424,10 +435,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
edev->pe = pe;
pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
"Device PE#%x, Parent PE#%x\n",
- edev->phb->global_number,
- edev->config_addr >> 8,
- PCI_SLOT(edev->config_addr & 0xFF),
- PCI_FUNC(edev->config_addr & 0xFF),
+ pdn->phb->global_number,
+ pdn->busno,
+ PCI_SLOT(pdn->devfn),
+ PCI_FUNC(pdn->devfn),
pe->addr, pe->parent->addr);
return 0;
@@ -446,13 +457,14 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent, *child;
int cnt;
+ struct pci_dn *pdn = eeh_dev_to_pdn(edev);
if (!edev->pe) {
pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
- __func__, edev->phb->global_number,
- edev->config_addr >> 8,
- PCI_SLOT(edev->config_addr & 0xFF),
- PCI_FUNC(edev->config_addr & 0xFF));
+ __func__, pdn->phb->global_number,
+ pdn->busno,
+ PCI_SLOT(pdn->devfn),
+ PCI_FUNC(pdn->devfn));
return -EEXIST;
}
@@ -712,10 +724,10 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
return;
pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
- __func__, edev->phb->global_number,
- edev->config_addr >> 8,
- PCI_SLOT(edev->config_addr & 0xFF),
- PCI_FUNC(edev->config_addr & 0xFF));
+ __func__, pdn->phb->global_number,
+ pdn->busno,
+ PCI_SLOT(pdn->devfn),
+ PCI_FUNC(pdn->devfn));
/* Check slot status */
cap = edev->pcie_cap;
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index 1ceecdda810b..797549289798 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -51,7 +51,6 @@ static ssize_t eeh_show_##_name(struct device *dev, \
static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
EEH_SHOW_ATTR(eeh_mode, mode, "0x%x");
-EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x");
EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x");
static ssize_t eeh_pe_state_show(struct device *dev,
@@ -103,7 +102,6 @@ void eeh_sysfs_add_device(struct pci_dev *pdev)
return;
rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
- rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state);
@@ -128,7 +126,6 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
}
device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
- device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 8587059ad848..e780e1fbf6c2 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -43,6 +43,13 @@
#define LOAD_MSR_KERNEL(r, x) li r,(x)
#endif
+/*
+ * Align to 4k in order to ensure that all functions modyfing srr0/srr1
+ * fit into one page in order to not encounter a TLB miss between the
+ * modification of srr0/srr1 and the associated rfi.
+ */
+ .align 12
+
#ifdef CONFIG_BOOKE
.globl mcheck_transfer_to_handler
mcheck_transfer_to_handler:
@@ -586,6 +593,10 @@ ppc_swapcontext:
handle_page_fault:
stw r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_6xx
+ andis. r0,r5,DSISR_DABRMATCH@h
+ bne- handle_dabr_fault
+#endif
bl do_page_fault
cmpwi r3,0
beq+ ret_from_except
@@ -599,6 +610,17 @@ handle_page_fault:
bl bad_page_fault
b ret_from_except_full
+#ifdef CONFIG_6xx
+ /* We have a data breakpoint exception - handle it */
+handle_dabr_fault:
+ SAVE_NVGPRS(r1)
+ lwz r0,_TRAP(r1)
+ clrrwi r0,r0,1
+ stw r0,_TRAP(r1)
+ bl do_break
+ b ret_from_except_full
+#endif
+
/*
* This routine switches between two different tasks. The process
* state of one is saved on its kernel stack. Then the state
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 49d8422767b4..4a0fd4f40245 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -223,17 +223,27 @@ system_call_exit:
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
bne- .Lsyscall_exit_work
- /* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */
- li r7,MSR_FP
+ andi. r0,r8,MSR_FP
+ beq 2f
#ifdef CONFIG_ALTIVEC
- oris r7,r7,MSR_VEC@h
+ andis. r0,r8,MSR_VEC@h
+ bne 3f
#endif
- and r0,r8,r7
- cmpd r0,r7
- bne .Lsyscall_restore_math
-.Lsyscall_restore_math_cont:
+2: addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_PPC_BOOK3S
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Restore RI */
+#endif
+ bl restore_math
+#ifdef CONFIG_PPC_BOOK3S
+ li r11,0
+ mtmsrd r11,1
+#endif
+ ld r8,_MSR(r1)
+ ld r3,RESULT(r1)
+ li r11,-MAX_ERRNO
- cmpld r3,r11
+3: cmpld r3,r11
ld r5,_CCR(r1)
bge- .Lsyscall_error
.Lsyscall_error_cont:
@@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r5,_CCR(r1)
b .Lsyscall_error_cont
-.Lsyscall_restore_math:
- /*
- * Some initial tests from restore_math to avoid the heavyweight
- * C code entry and MSR manipulations.
- */
- LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK)
- and. r0,r0,r8
- bne 1f
-
- ld r7,PACACURRENT(r13)
- lbz r0,THREAD+THREAD_LOAD_FP(r7)
-#ifdef CONFIG_ALTIVEC
- lbz r6,THREAD+THREAD_LOAD_VEC(r7)
- add r0,r0,r6
-#endif
- cmpdi r0,0
- beq .Lsyscall_restore_math_cont
-
-1: addi r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_BOOK3S
- li r10,MSR_RI
- mtmsrd r10,1 /* Restore RI */
-#endif
- bl restore_math
-#ifdef CONFIG_PPC_BOOK3S
- li r11,0
- mtmsrd r11,1
-#endif
- /* Restore volatiles, reload MSR from updated one */
- ld r8,_MSR(r1)
- ld r3,RESULT(r1)
- li r11,-MAX_ERRNO
- b .Lsyscall_restore_math_cont
-
/* Traced system call support */
.Lsyscall_dotrace:
bl save_nvgprs
@@ -990,16 +966,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
#ifdef CONFIG_PPC_BOOK3E
cmpwi cr0,r3,0x280
#else
- BEGIN_FTR_SECTION
- cmpwi cr0,r3,0xe80
- FTR_SECTION_ELSE
- cmpwi cr0,r3,0xa00
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+ cmpwi cr0,r3,0xa00
#endif /* CONFIG_PPC_BOOK3E */
bne 1f
addi r3,r1,STACK_FRAME_OVERHEAD;
bl doorbell_exception
- b ret_from_except
#endif /* CONFIG_PPC_DOORBELL */
1: b ret_from_except /* What else to do here ? */
@@ -1133,7 +1104,7 @@ _ASM_NOKPROBE_SYMBOL(__enter_rtas)
_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
.align 3
-1: .llong rtas_restore_regs
+1: .8byte rtas_restore_regs
rtas_restore_regs:
/* relocation is on at this point */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 9029afd1fa2a..48da0f5d2f7f 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -541,7 +541,7 @@ EXC_COMMON_BEGIN(instruction_access_common)
RECONCILE_IRQ_STATE(r10, r11)
ld r12,_MSR(r1)
ld r3,_NIP(r1)
- andis. r4,r12,0x5820
+ andis. r4,r12,DSISR_BAD_FAULT_64S@h
li r5,0x400
std r3,_DAR(r1)
std r4,_DSISR(r1)
@@ -1314,7 +1314,7 @@ EXC_REAL_NONE(0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
#endif
-#if defined(CONFIG_HARDLOCKUP_DETECTOR) && defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH)
+#ifdef CONFIG_PPC_WATCHDOG
#define MASKED_DEC_HANDLER_LABEL 3f
@@ -1325,20 +1325,28 @@ EXC_VIRT_NONE(0x5800, 0x100)
std r10,PACA_EXGEN+EX_R13(r13); \
EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H)
+/*
+ * Branch to soft_nmi_interrupt using the emergency stack. The emergency
+ * stack is one that is usable by maskable interrupts so long as MSR_EE
+ * remains off. It is used for recovery when something has corrupted the
+ * normal kernel stack, for example. The "soft NMI" must not use the process
+ * stack because we want irq disabled sections to avoid touching the stack
+ * at all (other than PMU interrupts), so use the emergency stack for this,
+ * and run it entirely with interrupts hard disabled.
+ */
EXC_COMMON_BEGIN(soft_nmi_common)
mr r10,r1
ld r1,PACAEMERGSP(r13)
- ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE
EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900,
system_reset, soft_nmi_interrupt,
ADD_NVGPRS;ADD_RECONCILE)
b ret_from_except
-#else
+#else /* CONFIG_PPC_WATCHDOG */
#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */
#define MASKED_DEC_HANDLER(_H)
-#endif
+#endif /* CONFIG_PPC_WATCHDOG */
/*
* An interrupt came in while soft-disabled. We set paca->irq_happened, then:
@@ -1362,19 +1370,16 @@ masked_##_H##interrupt: \
ori r10,r10,0xffff; \
mtspr SPRN_DEC,r10; \
b MASKED_DEC_HANDLER_LABEL; \
-1: cmpwi r10,PACA_IRQ_DBELL; \
- beq 2f; \
- cmpwi r10,PACA_IRQ_HMI; \
- beq 2f; \
+1: andi. r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI); \
+ bne 2f; \
mfspr r10,SPRN_##_H##SRR1; \
- rldicl r10,r10,48,1; /* clear MSR_EE */ \
- rotldi r10,r10,16; \
+ xori r10,r10,MSR_EE; /* clear MSR_EE */ \
mtspr SPRN_##_H##SRR1,r10; \
2: mtcrf 0x80,r9; \
ld r9,PACA_EXGEN+EX_R9(r13); \
ld r10,PACA_EXGEN+EX_R10(r13); \
ld r11,PACA_EXGEN+EX_R11(r13); \
- GET_SCRATCH0(r13); \
+ /* returns to kernel where r13 must be set up, so don't restore it */ \
##_H##rfid; \
b .; \
MASKED_DEC_HANDLER(_H)
@@ -1477,8 +1482,10 @@ USE_TEXT_SECTION()
*/
.balign IFETCH_ALIGN_BYTES
do_hash_page:
-#ifdef CONFIG_PPC_STD_MMU_64
- andis. r0,r4,0xa450 /* weird error? */
+ #ifdef CONFIG_PPC_STD_MMU_64
+ lis r0,DSISR_BAD_FAULT_64S@h
+ ori r0,r0,DSISR_BAD_FAULT_64S@l
+ and. r0,r4,r0 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
CURRENT_THREAD_INFO(r11, r1)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
@@ -1661,25 +1668,27 @@ _GLOBAL(__replay_interrupt)
* we don't give a damn about, so we don't bother storing them.
*/
mfmsr r12
- LOAD_REG_ADDR(r11, 1f)
+ LOAD_REG_ADDR(r11, replay_interrupt_return)
mfcr r9
ori r12,r12,MSR_EE
cmpwi r3,0x900
beq decrementer_common
cmpwi r3,0x500
+BEGIN_FTR_SECTION
+ beq h_virt_irq_common
+FTR_SECTION_ELSE
beq hardware_interrupt_common
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
- cmpwi r3,0xe80
+ cmpwi r3,0xa00
beq h_doorbell_common_msgclr
- cmpwi r3,0xea0
- beq h_virt_irq_common
cmpwi r3,0xe60
beq hmi_exception_common
FTR_SECTION_ELSE
cmpwi r3,0xa00
beq doorbell_super_common_msgclr
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
-1:
+replay_interrupt_return:
blr
_ASM_NOKPROBE_SYMBOL(__replay_interrupt)
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index dc0c49cfd90a..e1431800bfb9 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -125,6 +125,13 @@ int is_fadump_boot_memory_area(u64 addr, ulong size)
return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
}
+int should_fadump_crash(void)
+{
+ if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
+ return 0;
+ return 1;
+}
+
int is_fadump_active(void)
{
return fw_dump.dump_active;
@@ -518,7 +525,7 @@ void crash_fadump(struct pt_regs *regs, const char *str)
struct fadump_crash_info_header *fdh = NULL;
int old_cpu, this_cpu;
- if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
+ if (!should_fadump_crash())
return;
/*
@@ -1446,6 +1453,25 @@ static void fadump_init_files(void)
return;
}
+static int fadump_panic_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ /*
+ * If firmware-assisted dump has been registered then trigger
+ * firmware-assisted dump and let firmware handle everything
+ * else. If this returns, then fadump was not registered, so
+ * go through the rest of the panic path.
+ */
+ crash_fadump(NULL, ptr);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block fadump_panic_block = {
+ .notifier_call = fadump_panic_event,
+ .priority = INT_MIN /* may not return; must be done last */
+};
+
/*
* Prepare for firmware-assisted dump.
*/
@@ -1478,6 +1504,9 @@ int __init setup_fadump(void)
init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
fadump_init_files();
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &fadump_panic_block);
+
return 1;
}
subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index e22734278458..8c54166491e7 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -388,7 +388,7 @@ DataAccess:
EXCEPTION_PROLOG
mfspr r10,SPRN_DSISR
stw r10,_DSISR(r11)
- andis. r0,r10,0xa470 /* weird error? */
+ andis. r0,r10,DSISR_BAD_FAULT_32S@h
bne 1f /* if not, try to put a PTE */
mfspr r4,SPRN_DAR /* into the hash table */
rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
@@ -403,13 +403,13 @@ DataAccess:
DO_KVM 0x400
InstructionAccess:
EXCEPTION_PROLOG
- andis. r0,r9,0x4000 /* no pte found? */
+ andis. r0,r9,SRR1_ISI_NOPT@h /* no pte found? */
beq 1f /* if so, try to put a PTE */
li r3,0 /* into the hash table */
mr r4,r12 /* SRR0 is fault address */
bl hash_page
1: mr r4,r12
- mr r5,r9
+ andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
EXC_XFER_LITE(0x400, handle_page_fault)
/* External interrupt */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 0ddc602b33a4..ff8511d6d8ea 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -92,13 +92,13 @@ END_FTR_SECTION(0, 1)
.balign 8
.globl __secondary_hold_spinloop
__secondary_hold_spinloop:
- .llong 0x0
+ .8byte 0x0
/* Secondary processors write this value with their cpu # */
/* after they enter the spin loop immediately below. */
.globl __secondary_hold_acknowledge
__secondary_hold_acknowledge:
- .llong 0x0
+ .8byte 0x0
#ifdef CONFIG_RELOCATABLE
/* This flag is set to 1 by a loader if the kernel should run
@@ -650,7 +650,7 @@ __after_prom_start:
bctr
.balign 8
-p_end: .llong _end - copy_to_here
+p_end: .8byte _end - copy_to_here
4:
/*
@@ -892,7 +892,7 @@ _GLOBAL(relative_toc)
blr
.balign 8
-p_toc: .llong __toc_start + 0x8000 - 0b
+p_toc: .8byte __toc_start + 0x8000 - 0b
/*
* This is where the main kernel code starts.
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index c032fe8c2d26..4fee00d414e8 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -50,18 +50,20 @@
mtspr spr, reg
#endif
-/* Macro to test if an address is a kernel address */
#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
-#define IS_KERNEL(tmp, addr) \
- andis. tmp, addr, 0x8000 /* Address >= 0x80000000 */
-#define BRANCH_UNLESS_KERNEL(label) beq label
-#else
-#define IS_KERNEL(tmp, addr) \
- rlwinm tmp, addr, 16, 16, 31; \
- cmpli cr0, tmp, PAGE_OFFSET >> 16
-#define BRANCH_UNLESS_KERNEL(label) blt label
+/* By simply checking Address >= 0x80000000, we know if its a kernel address */
+#define SIMPLE_KERNEL_ADDRESS 1
#endif
+/*
+ * We need an ITLB miss handler for kernel addresses if:
+ * - Either we have modules
+ * - Or we have not pinned the first 8M
+ */
+#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) || \
+ defined(CONFIG_DEBUG_PAGEALLOC)
+#define ITLB_MISS_KERNEL 1
+#endif
/*
* Value for the bits that have fixed value in RPN entries.
@@ -123,7 +125,6 @@ turn_on_mmu:
lis r0,start_here@h
ori r0,r0,start_here@l
mtspr SPRN_SRR0,r0
- SYNC
rfi /* enables MMU */
/*
@@ -170,7 +171,7 @@ turn_on_mmu:
stw r1,0(r11); \
tovirt(r1,r11); /* set new kernel sp */ \
li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
- MTMSRD(r10); /* (except for mach check in rtas) */ \
+ mtmsr r10; \
stw r0,GPR0(r11); \
SAVE_4GPRS(3, r11); \
SAVE_2GPRS(7, r11)
@@ -300,7 +301,7 @@ SystemCall:
/* On the MPC8xx, this is a software emulation interrupt. It occurs
* for all unimplemented and illegal instructions.
*/
- EXCEPTION(0x1000, SoftEmu, SoftwareEmulation, EXC_XFER_STD)
+ EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD)
. = 0x1100
/*
@@ -325,7 +326,7 @@ SystemCall:
#endif
InstructionTLBMiss:
-#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mtspr SPRN_SPRG_SCRATCH2, r3
#endif
EXCEPTION_PROLOG_0
@@ -343,15 +344,32 @@ InstructionTLBMiss:
INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10)
/* Only modules will cause ITLB Misses as we always
* pin the first 8MB of kernel memory */
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mfcr r3
#endif
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
- IS_KERNEL(r11, r10)
+#ifdef ITLB_MISS_KERNEL
+#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
+ andis. r11, r10, 0x8000 /* Address >= 0x80000000 */
+#else
+ rlwinm r11, r10, 16, 0xfff8
+ cmpli cr0, r11, PAGE_OFFSET@h
+#ifndef CONFIG_PIN_TLB_TEXT
+ /* It is assumed that kernel code fits into the first 8M page */
+_ENTRY(ITLBMiss_cmp)
+ cmpli cr7, r11, (PAGE_OFFSET + 0x0800000)@h
+#endif
+#endif
#endif
mfspr r11, SPRN_M_TW /* Get level 1 table */
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
- BRANCH_UNLESS_KERNEL(3f)
+#ifdef ITLB_MISS_KERNEL
+#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
+ beq+ 3f
+#else
+ blt+ 3f
+#endif
+#ifndef CONFIG_PIN_TLB_TEXT
+ blt cr7, ITLBMissLinear
+#endif
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
3:
#endif
@@ -369,7 +387,7 @@ InstructionTLBMiss:
rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
lwz r10, 0(r10) /* Get the pte */
4:
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mtcr r3
#endif
/* Insert the APG into the TWC from the Linux PTE. */
@@ -400,7 +418,7 @@ InstructionTLBMiss:
MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */
/* Restore registers */
-#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mfspr r3, SPRN_SPRG_SCRATCH2
#endif
EXCEPTION_EPILOG_0
@@ -447,23 +465,23 @@ DataStoreTLBMiss:
* kernel page tables.
*/
mfspr r10, SPRN_MD_EPN
- rlwinm r10, r10, 16, 0xfff8
- cmpli cr0, r10, PAGE_OFFSET@h
+ rlwinm r11, r10, 16, 0xfff8
+ cmpli cr0, r11, PAGE_OFFSET@h
mfspr r11, SPRN_M_TW /* Get level 1 table */
blt+ 3f
+ rlwinm r11, r10, 16, 0xfff8
#ifndef CONFIG_PIN_TLB_IMMR
- cmpli cr0, r10, VIRT_IMMR_BASE@h
+ cmpli cr0, r11, VIRT_IMMR_BASE@h
#endif
_ENTRY(DTLBMiss_cmp)
- cmpli cr7, r10, (PAGE_OFFSET + 0x1800000)@h
- lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
+ cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
#ifndef CONFIG_PIN_TLB_IMMR
_ENTRY(DTLBMiss_jmp)
beq- DTLBMissIMMR
#endif
blt cr7, DTLBMissLinear
+ lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
3:
- mfspr r10, SPRN_MD_EPN
/* Insert level 1 index */
rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
@@ -569,8 +587,8 @@ _ENTRY(DTLBMiss_jmp)
InstructionTLBError:
EXCEPTION_PROLOG
mr r4,r12
- mr r5,r9
- andis. r10,r5,0x4000
+ andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
+ andis. r10,r9,SRR1_ISI_NOPT@h
beq+ 1f
tlbie r4
itlbie:
@@ -595,7 +613,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */
mfspr r5,SPRN_DSISR
stw r5,_DSISR(r11)
mfspr r4,SPRN_DAR
- andis. r10,r5,0x4000
+ andis. r10,r5,DSISR_NOHPTE@h
beq+ 1f
tlbie r4
dtlbie:
@@ -684,7 +702,7 @@ DTLBMissLinear:
/* Set 8M byte page and mark it valid */
li r11, MD_PS8MEG | MD_SVALID
MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
- rlwinm r10, r10, 16, 0x0f800000 /* 8xx supports max 256Mb RAM */
+ rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
_PAGE_PRESENT
MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */
@@ -695,6 +713,22 @@ DTLBMissLinear:
EXCEPTION_EPILOG_0
rfi
+#ifndef CONFIG_PIN_TLB_TEXT
+ITLBMissLinear:
+ mtcr r3
+ /* Set 8M byte page and mark it valid */
+ li r11, MI_PS8MEG | MI_SVALID | _PAGE_EXEC
+ MTSPR_CPU6(SPRN_MI_TWC, r11, r3)
+ rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
+ ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+ _PAGE_PRESENT
+ MTSPR_CPU6(SPRN_MI_RPN, r10, r11) /* Update TLB entry */
+
+ mfspr r3, SPRN_SPRG_SCRATCH2
+ EXCEPTION_EPILOG_0
+ rfi
+#endif
+
/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
* by decoding the registers used by the dcbx instruction and adding them.
* DAR is set to the calculated address.
@@ -705,9 +739,10 @@ FixupDAR:/* Entry point for dcbx workaround. */
mtspr SPRN_SPRG_SCRATCH2, r10
/* fetch instruction from memory. */
mfspr r10, SPRN_SRR0
- IS_KERNEL(r11, r10)
+ rlwinm r11, r10, 16, 0xfff8
+ cmpli cr0, r11, PAGE_OFFSET@h
mfspr r11, SPRN_M_TW /* Get level 1 table */
- BRANCH_UNLESS_KERNEL(3f)
+ blt+ 3f
rlwinm r11, r10, 16, 0xfff8
_ENTRY(FixupDAR_cmp)
cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
@@ -915,10 +950,8 @@ start_here:
rfi
/* Load up the kernel context */
2:
- SYNC /* Force all PTE updates to finish */
tlbia /* Clear all TLB entries */
sync /* wait for tlbia/tlbie to finish */
- TLBSYNC /* ... on all CPUs */
/* set up the PTE pointers for the Abatron bdiGDB.
*/
@@ -955,15 +988,14 @@ initial_mmu:
mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */
tlbia /* Invalidate all TLB entries */
-/* Always pin the first 8 MB ITLB to prevent ITLB
- misses while mucking around with SRR0/SRR1 in asm
-*/
+#ifdef CONFIG_PIN_TLB_TEXT
lis r8, MI_RSV4I@h
ori r8, r8, 0x1c00
mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
+#endif
-#ifdef CONFIG_PIN_TLB
+#ifdef CONFIG_PIN_TLB_DATA
oris r10, r10, MD_RSV4I@h
mtspr SPRN_MD_CTR, r10 /* Set data TLB control */
#endif
@@ -989,6 +1021,7 @@ initial_mmu:
* internal registers (among other things).
*/
#ifdef CONFIG_PIN_TLB_IMMR
+ oris r10, r10, MD_RSV4I@h
ori r10, r10, 0x1c00
mtspr SPRN_MD_CTR, r10
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 516ebef905c0..1125c9be9e06 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -85,7 +85,61 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
std r3,_WORT(r1)
mfspr r3,SPRN_WORC
std r3,_WORC(r1)
+/*
+ * On POWER9, there are idle states such as stop4, invoked via cpuidle,
+ * that lose hypervisor resources. In such cases, we need to save
+ * additional SPRs before entering those idle states so that they can
+ * be restored to their older values on wakeup from the idle state.
+ *
+ * On POWER8, the only such deep idle state is winkle which is used
+ * only in the context of CPU-Hotplug, where these additional SPRs are
+ * reinitiazed to a sane value. Hence there is no need to save/restore
+ * these SPRs.
+ */
+BEGIN_FTR_SECTION
+ blr
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
+power9_save_additional_sprs:
+ mfspr r3, SPRN_PID
+ mfspr r4, SPRN_LDBAR
+ std r3, STOP_PID(r13)
+ std r4, STOP_LDBAR(r13)
+ mfspr r3, SPRN_FSCR
+ mfspr r4, SPRN_HFSCR
+ std r3, STOP_FSCR(r13)
+ std r4, STOP_HFSCR(r13)
+
+ mfspr r3, SPRN_MMCRA
+ mfspr r4, SPRN_MMCR1
+ std r3, STOP_MMCRA(r13)
+ std r4, STOP_MMCR1(r13)
+
+ mfspr r3, SPRN_MMCR2
+ std r3, STOP_MMCR2(r13)
+ blr
+
+power9_restore_additional_sprs:
+ ld r3,_LPCR(r1)
+ ld r4, STOP_PID(r13)
+ mtspr SPRN_LPCR,r3
+ mtspr SPRN_PID, r4
+
+ ld r3, STOP_LDBAR(r13)
+ ld r4, STOP_FSCR(r13)
+ mtspr SPRN_LDBAR, r3
+ mtspr SPRN_FSCR, r4
+
+ ld r3, STOP_HFSCR(r13)
+ ld r4, STOP_MMCRA(r13)
+ mtspr SPRN_HFSCR, r3
+ mtspr SPRN_MMCRA, r4
+ /* We have already restored PACA_MMCR0 */
+ ld r3, STOP_MMCR1(r13)
+ ld r4, STOP_MMCR2(r13)
+ mtspr SPRN_MMCR1, r3
+ mtspr SPRN_MMCR2, r4
blr
/*
@@ -141,7 +195,16 @@ pnv_powersave_common:
std r5,_CCR(r1)
std r1,PACAR1(r13)
+BEGIN_FTR_SECTION
+ /*
+ * POWER9 does not require real mode to stop, and presently does not
+ * set hwthread_state for KVM (threads don't share MMU context), so
+ * we can remain in virtual mode for this.
+ */
+ bctr
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
/*
+ * POWER8
* Go to real mode to do the nap, as required by the architecture.
* Also, we need to be in real mode before setting hwthread_state,
* because as soon as we do that, another thread can switch
@@ -151,6 +214,20 @@ pnv_powersave_common:
mtmsrd r7,0
bctr
+/*
+ * This is the sequence required to execute idle instructions, as
+ * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
+ */
+#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \
+ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
+ std r0,0(r1); \
+ ptesync; \
+ ld r0,0(r1); \
+236: cmpd cr0,r0,r0; \
+ bne 236b; \
+ IDLE_INST;
+
+
.globl pnv_enter_arch207_idle_mode
pnv_enter_arch207_idle_mode:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -242,20 +319,27 @@ enter_winkle:
/*
* r3 - PSSCR value corresponding to the requested stop state.
*/
-power_enter_stop:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /* Tell KVM we're entering idle */
+power_enter_stop_kvm_rm:
+ /*
+ * This is currently unused because POWER9 KVM does not have to
+ * gather secondary threads into sibling mode, but the code is
+ * here in case that function is required.
+ *
+ * Tell KVM we're entering idle.
+ */
li r4,KVM_HWTHREAD_IN_IDLE
/* DO THIS IN REAL MODE! See comment above. */
stb r4,HSTATE_HWTHREAD_STATE(r13)
#endif
+power_enter_stop:
/*
* Check if we are executing the lite variant with ESL=EC=0
*/
andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
bne .Lhandle_esl_ec_set
- IDLE_STATE_ENTER_SEQ(PPC_STOP)
+ PPC_STOP
li r3,0 /* Since we didn't lose state, return 0 */
/*
@@ -288,7 +372,8 @@ power_enter_stop:
ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
cmpd r3,r4
bge .Lhandle_deep_stop
- IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
+ PPC_STOP /* Does not return (system reset interrupt) */
+
.Lhandle_deep_stop:
/*
* Entering deep idle state.
@@ -310,7 +395,7 @@ lwarx_loop_stop:
bl save_sprs_to_stack
- IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
+ PPC_STOP /* Does not return (system reset interrupt) */
/*
* Entered with MSR[EE]=0 and no soft-masked interrupts pending.
@@ -411,6 +496,18 @@ pnv_powersave_wakeup_mce:
b pnv_powersave_wakeup
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+kvm_start_guest_check:
+ li r0,KVM_HWTHREAD_IN_KERNEL
+ stb r0,HSTATE_HWTHREAD_STATE(r13)
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ sync
+ lbz r0,HSTATE_HWTHREAD_REQ(r13)
+ cmpwi r0,0
+ beqlr
+ b kvm_start_guest
+#endif
+
/*
* Called from reset vector for powersave wakeups.
* cr3 - set to gt if waking up with partial/complete hypervisor state loss
@@ -435,15 +532,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mr r3,r12
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- li r0,KVM_HWTHREAD_IN_KERNEL
- stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* Order setting hwthread_state vs. testing hwthread_req */
- sync
- lbz r0,HSTATE_HWTHREAD_REQ(r13)
- cmpwi r0,0
- beq 1f
- b kvm_start_guest
-1:
+BEGIN_FTR_SECTION
+ bl kvm_start_guest_check
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#endif
/* Return SRR1 from power7_nap() */
@@ -460,11 +551,17 @@ pnv_restore_hyp_resource_arch300:
/*
* Workaround for POWER9, if we lost resources, the ERAT
* might have been mixed up and needs flushing. We also need
- * to reload MMCR0 (see comment above).
+ * to reload MMCR0 (see comment above). We also need to set
+ * then clear bit 60 in MMCRA to ensure the PMU starts running.
*/
blt cr3,1f
PPC_INVALIDATE_ERAT
ld r1,PACAR1(r13)
+ mfspr r4,SPRN_MMCRA
+ ori r4,r4,(1 << (63-60))
+ mtspr SPRN_MMCRA,r4
+ xori r4,r4,(1 << (63-60))
+ mtspr SPRN_MMCRA,r4
ld r4,_MMCR0(r1)
mtspr SPRN_MMCR0,r4
1:
@@ -803,9 +900,16 @@ no_segments:
mtctr r12
bctrl
+/*
+ * On POWER9, we can come here on wakeup from a cpuidle stop state.
+ * Hence restore the additional SPRs to the saved value.
+ *
+ * On POWER8, we come here only on winkle. Since winkle is used
+ * only in the case of CPU-Hotplug, we don't need to restore
+ * the additional SPRs.
+ */
BEGIN_FTR_SECTION
- ld r4,_LPCR(r1)
- mtspr SPRN_LPCR,r4
+ bl power9_restore_additional_sprs
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
hypervisor_state_restored:
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index a582e0d42525..aa9f1b8261db 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -19,6 +19,8 @@
#include <asm/pgtable.h>
#include <asm/ppc-pci.h>
#include <asm/io-workarounds.h>
+#include <asm/pte-walk.h>
+
#define IOWA_MAX_BUS 8
@@ -75,8 +77,7 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
* We won't find huge pages here (iomem). Also can't hit
* a page table free due to init_mm
*/
- ptep = __find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
- NULL, &hugepage_shift);
+ ptep = find_init_mm_pte(vaddr, &hugepage_shift);
if (ptep == NULL)
paddr = 0;
else {
@@ -192,7 +193,7 @@ void iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops,
if (iowa_bus_count >= IOWA_MAX_BUS) {
pr_err("IOWA:Too many pci bridges, "
- "workarounds disabled for %s\n", np->full_name);
+ "workarounds disabled for %pOF\n", np);
return;
}
@@ -207,6 +208,6 @@ void iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops,
iowa_bus_count++;
- pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name);
+ pr_debug("IOWA:[%d]Add bus, %pOF.\n", iowa_bus_count-1, np);
}
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 233ca3fe4754..af7a20dc6e09 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -127,8 +127,7 @@ static ssize_t fail_iommu_store(struct device *dev,
return count;
}
-static DEVICE_ATTR(fail_iommu, S_IRUGO|S_IWUSR, fail_iommu_show,
- fail_iommu_store);
+static DEVICE_ATTR_RW(fail_iommu);
static int fail_iommu_bus_notify(struct notifier_block *nb,
unsigned long action, void *data)
@@ -190,7 +189,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
unsigned int pool_nr;
struct iommu_pool *pool;
- align_mask = 0xffffffffffffffffl >> (64 - align_order);
+ align_mask = (1ull << align_order) - 1;
/* This allocator was derived from x86_64's bit string search */
@@ -208,7 +207,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
* We don't need to disable preemption here because any CPU can
* safely use any IOMMU pool.
*/
- pool_nr = __this_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1);
+ pool_nr = raw_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1);
if (largealloc)
pool = &(tbl->large_pool);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 0bcec745a672..4e65bf82f5e0 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -24,7 +24,7 @@
* mask register (of which only 16 are defined), hence the weird shifting
* and complement of the cached_irq_mask. I want to be able to stuff
* this right into the SIU SMASK register.
- * Many of the prep/chrp functions are conditional compiled on CONFIG_8xx
+ * Many of the prep/chrp functions are conditional compiled on CONFIG_PPC_8xx
* to reduce code space and undefined function references.
*/
@@ -143,8 +143,22 @@ notrace unsigned int __check_irq_replay(void)
*/
unsigned char happened = local_paca->irq_happened;
- /* Clear bit 0 which we wouldn't clear otherwise */
- local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+ if (happened & PACA_IRQ_HARD_DIS) {
+ /* Clear bit 0 which we wouldn't clear otherwise */
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+
+ /*
+ * We may have missed a decrementer interrupt if hard disabled.
+ * Check the decrementer register in case we had a rollover
+ * while hard disabled.
+ */
+ if (!(happened & PACA_IRQ_DEC)) {
+ if (decrementer_check_overflow()) {
+ local_paca->irq_happened |= PACA_IRQ_DEC;
+ happened |= PACA_IRQ_DEC;
+ }
+ }
+ }
/*
* Force the delivery of pending soft-disabled interrupts on PS3.
@@ -160,41 +174,39 @@ notrace unsigned int __check_irq_replay(void)
* This is a higher priority interrupt than the others, so
* replay it first.
*/
- local_paca->irq_happened &= ~PACA_IRQ_HMI;
- if (happened & PACA_IRQ_HMI)
+ if (happened & PACA_IRQ_HMI) {
+ local_paca->irq_happened &= ~PACA_IRQ_HMI;
return 0xe60;
+ }
- /*
- * We may have missed a decrementer interrupt. We check the
- * decrementer itself rather than the paca irq_happened field
- * in case we also had a rollover while hard disabled
- */
- local_paca->irq_happened &= ~PACA_IRQ_DEC;
- if ((happened & PACA_IRQ_DEC) || decrementer_check_overflow())
+ if (happened & PACA_IRQ_DEC) {
+ local_paca->irq_happened &= ~PACA_IRQ_DEC;
return 0x900;
+ }
- /* Finally check if an external interrupt happened */
- local_paca->irq_happened &= ~PACA_IRQ_EE;
- if (happened & PACA_IRQ_EE)
+ if (happened & PACA_IRQ_EE) {
+ local_paca->irq_happened &= ~PACA_IRQ_EE;
return 0x500;
+ }
#ifdef CONFIG_PPC_BOOK3E
- /* Finally check if an EPR external interrupt happened
- * this bit is typically set if we need to handle another
- * "edge" interrupt from within the MPIC "EPR" handler
+ /*
+ * Check if an EPR external interrupt happened this bit is typically
+ * set if we need to handle another "edge" interrupt from within the
+ * MPIC "EPR" handler.
*/
- local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
- if (happened & PACA_IRQ_EE_EDGE)
+ if (happened & PACA_IRQ_EE_EDGE) {
+ local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
return 0x500;
+ }
- local_paca->irq_happened &= ~PACA_IRQ_DBELL;
- if (happened & PACA_IRQ_DBELL)
+ if (happened & PACA_IRQ_DBELL) {
+ local_paca->irq_happened &= ~PACA_IRQ_DBELL;
return 0x280;
+ }
#else
- local_paca->irq_happened &= ~PACA_IRQ_DBELL;
if (happened & PACA_IRQ_DBELL) {
- if (cpu_has_feature(CPU_FTR_HVMODE))
- return 0xe80;
+ local_paca->irq_happened &= ~PACA_IRQ_DBELL;
return 0xa00;
}
#endif /* CONFIG_PPC_BOOK3E */
@@ -470,6 +482,18 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, " Hypervisor Maintenance Interrupts\n");
}
+ seq_printf(p, "%*s: ", prec, "NMI");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).sreset_irqs);
+ seq_printf(p, " System Reset interrupts\n");
+
+#ifdef CONFIG_PPC_WATCHDOG
+ seq_printf(p, "%*s: ", prec, "WDG");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).soft_nmi_irqs);
+ seq_printf(p, " Watchdog soft-NMI interrupts\n");
+#endif
+
#ifdef CONFIG_PPC_DOORBELL
if (cpu_has_feature(CPU_FTR_DBELL)) {
seq_printf(p, "%*s: ", prec, "DBL");
@@ -494,6 +518,10 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += per_cpu(irq_stat, cpu).spurious_irqs;
sum += per_cpu(irq_stat, cpu).timer_irqs_others;
sum += per_cpu(irq_stat, cpu).hmi_exceptions;
+ sum += per_cpu(irq_stat, cpu).sreset_irqs;
+#ifdef CONFIG_PPC_WATCHDOG
+ sum += per_cpu(irq_stat, cpu).soft_nmi_irqs;
+#endif
#ifdef CONFIG_PPC_DOORBELL
sum += per_cpu(irq_stat, cpu).doorbell_irqs;
#endif
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index bb6f8993412e..1df6c74aa731 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -164,7 +164,7 @@ void __init isa_bridge_find_early(struct pci_controller *hose)
/* Set the global ISA io base to indicate we have an ISA bridge */
isa_io_base = ISA_IO_BASE;
- pr_debug("ISA bridge (early) is %s\n", np->full_name);
+ pr_debug("ISA bridge (early) is %pOF\n", np);
}
/**
@@ -187,15 +187,15 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
pna = of_n_addr_cells(np);
if (of_property_read_u32(np, "#address-cells", &na) ||
of_property_read_u32(np, "#size-cells", &ns)) {
- pr_warn("ISA: Non-PCI bridge %s is missing address format\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF is missing address format\n",
+ np);
return;
}
/* Check it's a supported address format */
if (na != 2 || ns != 1) {
- pr_warn("ISA: Non-PCI bridge %s has unsupported address format\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF has unsupported address format\n",
+ np);
return;
}
rs = na + ns + pna;
@@ -203,8 +203,8 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
/* Grab the ranges property */
ranges = of_get_property(np, "ranges", &rlen);
if (ranges == NULL || rlen < rs) {
- pr_warn("ISA: Non-PCI bridge %s has absent or invalid ranges\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF has absent or invalid ranges\n",
+ np);
return;
}
@@ -220,8 +220,8 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
/* Got something ? */
if (!size || !pbasep) {
- pr_warn("ISA: Non-PCI bridge %s has no usable IO range\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF has no usable IO range\n",
+ np);
return;
}
@@ -233,15 +233,15 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
/* Map pbase */
pbase = of_translate_address(np, pbasep);
if (pbase == OF_BAD_ADDR) {
- pr_warn("ISA: Non-PCI bridge %s failed to translate IO base\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF failed to translate IO base\n",
+ np);
return;
}
/* We need page alignment */
if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) {
- pr_warn("ISA: Non-PCI bridge %s has non aligned IO range\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF has non aligned IO range\n",
+ np);
return;
}
@@ -255,7 +255,7 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
__ioremap_at(pbase, (void *)ISA_IO_BASE,
size, pgprot_val(pgprot_noncached(__pgprot(0))));
- pr_debug("ISA: Non-PCI bridge is %s\n", np->full_name);
+ pr_debug("ISA: Non-PCI bridge is %pOF\n", np);
}
/**
@@ -277,8 +277,8 @@ static void isa_bridge_find_late(struct pci_dev *pdev,
/* Set the global ISA io base to indicate we have an ISA bridge */
isa_io_base = ISA_IO_BASE;
- pr_debug("ISA bridge (late) is %s on %s\n",
- devnode->full_name, pci_name(pdev));
+ pr_debug("ISA bridge (late) is %pOF on %s\n",
+ devnode, pci_name(pdev));
}
/**
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index dbf098121ce6..35e240a0a408 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -67,9 +67,9 @@ static struct hard_trap_info
#endif
#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */
{ 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */
-#if defined(CONFIG_8xx)
+#if defined(CONFIG_PPC_8xx)
{ 0x1000, 0x04 /* SIGILL */ }, /* software emulation */
-#else /* ! CONFIG_8xx */
+#else /* ! CONFIG_PPC_8xx */
{ 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */
{ 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */
{ 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 1086ea37c832..9ad37f827a97 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -25,7 +25,6 @@
#include <linux/kvm_para.h>
#include <linux/slab.h>
#include <linux/of.h>
-#include <linux/nmi.h> /* hardlockup_detector_disable() */
#include <asm/reg.h>
#include <asm/sections.h>
@@ -719,12 +718,6 @@ static __init void kvm_free_tmp(void)
static int __init kvm_guest_init(void)
{
- /*
- * The hardlockup detector is likely to get false positives in
- * KVM guests, so disable it by default.
- */
- hardlockup_detector_disable();
-
if (!kvm_para_available())
goto free_tmp;
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
index 97ec8557f974..6408f09dbbd9 100644
--- a/arch/powerpc/kernel/l2cr_6xx.S
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -181,7 +181,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
mtctr r4
li r4,0
1:
- lwzx r0,r0,r4
+ lwzx r0,0,r4
addi r4,r4,32 /* Go to start of next cache line */
bdnz 1b
isync
@@ -328,7 +328,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
mtctr r4
li r4,0
1:
- lwzx r0,r0,r4
+ lwzx r0,0,r4
dcbf 0,r4
addi r4,r4,32 /* Go to start of next cache line */
bdnz 1b
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 0694d20f85b6..5e5a64a8b4e4 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -147,8 +147,8 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
legacy_serial_ports[index].serial_out = tsi_serial_out;
}
- printk(KERN_DEBUG "Found legacy serial port %d for %s\n",
- index, np->full_name);
+ printk(KERN_DEBUG "Found legacy serial port %d for %pOF\n",
+ index, np);
printk(KERN_DEBUG " %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n",
(iotype == UPIO_PORT) ? "port" : "mem",
(unsigned long long)base, (unsigned long long)taddr, irq,
@@ -207,7 +207,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
int index = -1;
u64 taddr;
- DBG(" -> add_legacy_isa_port(%s)\n", np->full_name);
+ DBG(" -> add_legacy_isa_port(%pOF)\n", np);
/* Get the ISA port number */
reg = of_get_property(np, "reg", NULL);
@@ -256,7 +256,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
unsigned int flags;
int iotype, index = -1, lindex = 0;
- DBG(" -> add_legacy_pci_port(%s)\n", np->full_name);
+ DBG(" -> add_legacy_pci_port(%pOF)\n", np);
/* We only support ports that have a clock frequency properly
* encoded in the device-tree (that is have an fcode). Anything
@@ -374,7 +374,7 @@ void __init find_legacy_serial_ports(void)
if (path != NULL) {
stdout = of_find_node_by_path(path);
if (stdout)
- DBG("stdout is %s\n", stdout->full_name);
+ DBG("stdout is %pOF\n", stdout);
} else {
DBG(" no linux,stdout-path !\n");
}
@@ -603,7 +603,7 @@ static int __init check_legacy_serial_console(void)
DBG(" can't find stdout package %s !\n", name);
return -ENODEV;
}
- DBG("stdout is %s\n", prom_stdout->full_name);
+ DBG("stdout is %pOF\n", prom_stdout);
name = of_get_property(prom_stdout, "name", NULL);
if (!name) {
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index e0e131e662ed..9b2ea7e71c06 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -22,11 +22,14 @@
#undef DEBUG
#define pr_fmt(fmt) "mce: " fmt
+#include <linux/hardirq.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/percpu.h>
#include <linux/export.h>
#include <linux/irq_work.h>
+
+#include <asm/machdep.h>
#include <asm/mce.h>
static DEFINE_PER_CPU(int, mce_nest_count);
@@ -446,3 +449,33 @@ uint64_t get_mce_fault_addr(struct machine_check_event *evt)
return 0;
}
EXPORT_SYMBOL(get_mce_fault_addr);
+
+/*
+ * This function is called in real mode. Strictly no printk's please.
+ *
+ * regs->nip and regs->msr contains srr0 and ssr1.
+ */
+long machine_check_early(struct pt_regs *regs)
+{
+ long handled = 0;
+
+ __this_cpu_inc(irq_stat.mce_exceptions);
+
+ if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
+ handled = cur_cpu_spec->machine_check_early(regs);
+ return handled;
+}
+
+long hmi_exception_realmode(struct pt_regs *regs)
+{
+ __this_cpu_inc(irq_stat.hmi_exceptions);
+
+ wait_for_subcore_guest_exit();
+
+ if (ppc_md.hmi_exception_early)
+ ppc_md.hmi_exception_early(regs);
+
+ wait_for_tb_resync();
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 34aeac54f120..becaec990140 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -45,7 +45,7 @@ static int of_pci_phb_probe(struct platform_device *dev)
if (ppc_md.pci_setup_phb == NULL)
return -ENODEV;
- pr_info("Setting up PCI bus %s\n", dev->dev.of_node->full_name);
+ pr_info("Setting up PCI bus %pOF\n", dev->dev.of_node);
/* Alloc and setup PHB data structure */
phb = pcibios_alloc_controller(dev->dev.of_node);
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index 4937bef7652f..52fc864cdec4 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -60,10 +60,6 @@ optprobe_template_entry:
std r5,_CCR(r1)
lbz r5,PACASOFTIRQEN(r13)
std r5,SOFTE(r1)
- mfdar r5
- std r5,_DAR(r1)
- mfdsisr r5
- std r5,_DSISR(r1)
/*
* We may get here from a module, so load the kernel TOC in r2.
@@ -122,10 +118,6 @@ optprobe_template_call_emulate:
mtxer r5
ld r5,_CCR(r1)
mtcr r5
- ld r5,_DAR(r1)
- mtdar r5
- ld r5,_DSISR(r1)
- mtdsisr r5
REST_GPR(0,r1)
REST_10GPRS(2,r1)
REST_10GPRS(12,r1)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 8d63627e067f..70f073d6c3b2 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -99,18 +99,27 @@ static inline void free_lppacas(void) { }
* If you make the number of persistent SLB entries dynamic, please also
* update PR KVM to flush and restore them accordingly.
*/
-static struct slb_shadow *slb_shadow;
+static struct slb_shadow * __initdata slb_shadow;
static void __init allocate_slb_shadows(int nr_cpus, int limit)
{
int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus);
+
+ if (early_radix_enabled())
+ return;
+
slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit));
memset(slb_shadow, 0, size);
}
static struct slb_shadow * __init init_slb_shadow(int cpu)
{
- struct slb_shadow *s = &slb_shadow[cpu];
+ struct slb_shadow *s;
+
+ if (early_radix_enabled())
+ return NULL;
+
+ s = &slb_shadow[cpu];
/*
* When we come through here to initialise boot_paca, the slb_shadow
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 341a7469cab8..02831a396419 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -373,9 +373,8 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
if (virq)
irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW);
} else {
- pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n",
- oirq.args_count, oirq.args[0], oirq.args[1],
- of_node_full_name(oirq.np));
+ pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %pOF\n",
+ oirq.args_count, oirq.args[0], oirq.args[1], oirq.np);
virq = irq_create_of_mapping(&oirq);
}
@@ -741,8 +740,8 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
struct of_pci_range range;
struct of_pci_range_parser parser;
- printk(KERN_INFO "PCI host bridge %s %s ranges:\n",
- dev->full_name, primary ? "(primary)" : "");
+ printk(KERN_INFO "PCI host bridge %pOF %s ranges:\n",
+ dev, primary ? "(primary)" : "");
/* Check for ranges property */
if (of_pci_range_parser_init(&parser, dev))
@@ -1556,8 +1555,8 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose,
if (!res->flags) {
pr_debug("PCI: I/O resource not set for host"
- " bridge %s (domain %d)\n",
- hose->dn->full_name, hose->global_number);
+ " bridge %pOF (domain %d)\n",
+ hose->dn, hose->global_number);
} else {
offset = pcibios_io_space_offset(hose);
@@ -1668,7 +1667,7 @@ void pcibios_scan_phb(struct pci_controller *hose)
struct device_node *node = hose->dn;
int mode;
- pr_debug("PCI: Scanning PHB %s\n", of_node_full_name(node));
+ pr_debug("PCI: Scanning PHB %pOF\n", node);
/* Get some IO space for the new PHB */
pcibios_setup_phb_io_space(hose);
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 41c86c6b6e4d..1d817f4d97d9 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -79,8 +79,8 @@ make_one_node_map(struct device_node* node, u8 pci_bus)
return;
bus_range = of_get_property(node, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int)) {
- printk(KERN_WARNING "Can't get bus-range for %s, "
- "assuming it starts at 0\n", node->full_name);
+ printk(KERN_WARNING "Can't get bus-range for %pOF, "
+ "assuming it starts at 0\n", node);
pci_to_OF_bus_map[pci_bus] = 0;
} else
pci_to_OF_bus_map[pci_bus] = bus_range[0];
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index ed5e9ff61a68..932b9741aa8f 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -111,7 +111,7 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
if (hose->io_base_alloc == NULL)
return 0;
- pr_debug("IO unmapping for PHB %s\n", hose->dn->full_name);
+ pr_debug("IO unmapping for PHB %pOF\n", hose->dn);
pr_debug(" alloc=0x%p\n", hose->io_base_alloc);
/* This is a PHB, we fully unmap the IO area */
@@ -151,7 +151,7 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose)
hose->io_base_virt = (void __iomem *)(area->addr +
hose->io_base_phys - phys_page);
- pr_debug("IO mapping for PHB %s\n", hose->dn->full_name);
+ pr_debug("IO mapping for PHB %pOF\n", hose->dn);
pr_debug(" phys=0x%016llx, virt=0x%p (alloc=0x%p)\n",
hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc);
pr_debug(" size=0x%016llx (alloc=0x%016lx)\n",
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 592693437070..0e395afbf0f4 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -139,7 +139,6 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
#ifdef CONFIG_PCI_IOV
static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
- struct pci_dev *pdev,
int vf_index,
int busno, int devfn)
{
@@ -150,10 +149,8 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
return NULL;
pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
- if (!pdn) {
- dev_warn(&pdev->dev, "%s: Out of memory!\n", __func__);
+ if (!pdn)
return NULL;
- }
pdn->phb = parent->phb;
pdn->parent = parent;
@@ -167,13 +164,6 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
INIT_LIST_HEAD(&pdn->list);
list_add_tail(&pdn->list, &parent->child_list);
- /*
- * If we already have PCI device instance, lets
- * bind them.
- */
- if (pdev)
- pdev->dev.archdata.pci_data = pdn;
-
return pdn;
}
#endif
@@ -201,7 +191,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
struct eeh_dev *edev __maybe_unused;
- pdn = add_one_dev_pci_data(parent, NULL, i,
+ pdn = add_one_dev_pci_data(parent, i,
pci_iov_virtfn_bus(pdev, i),
pci_iov_virtfn_devfn(pdev, i));
if (!pdn) {
@@ -303,7 +293,6 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
if (pdn == NULL)
return NULL;
dn->data = pdn;
- pdn->node = dn;
pdn->phb = hose;
#ifdef CONFIG_PPC_POWERNV
pdn->pe_number = IODA_INVALID_PE;
@@ -352,6 +341,7 @@ EXPORT_SYMBOL_GPL(pci_add_device_node_info);
void pci_remove_device_node_info(struct device_node *dn)
{
struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
+ struct device_node *parent;
#ifdef CONFIG_EEH
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
@@ -364,8 +354,10 @@ void pci_remove_device_node_info(struct device_node *dn)
WARN_ON(!list_empty(&pdn->child_list));
list_del(&pdn->list);
- if (pdn->parent)
- of_node_put(pdn->parent->node);
+
+ parent = of_get_parent(dn);
+ if (parent)
+ of_node_put(parent);
dn->data = NULL;
kfree(pdn);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index ea3d98115b88..0d790f8432d2 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -211,19 +211,19 @@ void of_scan_pci_bridge(struct pci_dev *dev)
unsigned int flags;
u64 size;
- pr_debug("of_scan_pci_bridge(%s)\n", node->full_name);
+ pr_debug("of_scan_pci_bridge(%pOF)\n", node);
/* parse bus-range property */
busrange = of_get_property(node, "bus-range", &len);
if (busrange == NULL || len != 8) {
- printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n",
- node->full_name);
+ printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %pOF\n",
+ node);
return;
}
ranges = of_get_property(node, "ranges", &len);
if (ranges == NULL) {
- printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n",
- node->full_name);
+ printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %pOF\n",
+ node);
return;
}
@@ -233,8 +233,8 @@ void of_scan_pci_bridge(struct pci_dev *dev)
bus = pci_add_new_bus(dev->bus, dev,
of_read_number(busrange, 1));
if (!bus) {
- printk(KERN_ERR "Failed to create pci bus for %s\n",
- node->full_name);
+ printk(KERN_ERR "Failed to create pci bus for %pOF\n",
+ node);
return;
}
}
@@ -262,13 +262,13 @@ void of_scan_pci_bridge(struct pci_dev *dev)
res = bus->resource[0];
if (res->flags) {
printk(KERN_ERR "PCI: ignoring extra I/O range"
- " for bridge %s\n", node->full_name);
+ " for bridge %pOF\n", node);
continue;
}
} else {
if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
printk(KERN_ERR "PCI: too many memory ranges"
- " for bridge %s\n", node->full_name);
+ " for bridge %pOF\n", node);
continue;
}
res = bus->resource[i];
@@ -307,7 +307,7 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
struct eeh_dev *edev = pdn_to_eeh_dev(PCI_DN(dn));
#endif
- pr_debug(" * %s\n", dn->full_name);
+ pr_debug(" * %pOF\n", dn);
if (!of_device_is_available(dn))
return NULL;
@@ -350,8 +350,8 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
struct device_node *child;
struct pci_dev *dev;
- pr_debug("of_scan_bus(%s) bus no %d...\n",
- node->full_name, bus->number);
+ pr_debug("of_scan_bus(%pOF) bus no %d...\n",
+ node, bus->number);
/* Scan direct children */
for_each_child_of_node(node, child) {
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9f3e2c932dcc..a0c74bbf3454 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -230,7 +230,8 @@ void enable_kernel_fp(void)
}
EXPORT_SYMBOL(enable_kernel_fp);
-static int restore_fp(struct task_struct *tsk) {
+static int restore_fp(struct task_struct *tsk)
+{
if (tsk->thread.load_fp || msr_tm_active(tsk->thread.regs->msr)) {
load_fp_state(&current->thread.fp_state);
current->thread.load_fp++;
@@ -330,11 +331,19 @@ static inline int restore_altivec(struct task_struct *tsk) { return 0; }
#ifdef CONFIG_VSX
static void __giveup_vsx(struct task_struct *tsk)
{
- if (tsk->thread.regs->msr & MSR_FP)
+ unsigned long msr = tsk->thread.regs->msr;
+
+ /*
+ * We should never be ssetting MSR_VSX without also setting
+ * MSR_FP and MSR_VEC
+ */
+ WARN_ON((msr & MSR_VSX) && !((msr & MSR_FP) && (msr & MSR_VEC)));
+
+ /* __giveup_fpu will clear MSR_VSX */
+ if (msr & MSR_FP)
__giveup_fpu(tsk);
- if (tsk->thread.regs->msr & MSR_VEC)
+ if (msr & MSR_VEC)
__giveup_altivec(tsk);
- tsk->thread.regs->msr &= ~MSR_VSX;
}
static void giveup_vsx(struct task_struct *tsk)
@@ -346,14 +355,6 @@ static void giveup_vsx(struct task_struct *tsk)
msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
}
-static void save_vsx(struct task_struct *tsk)
-{
- if (tsk->thread.regs->msr & MSR_FP)
- save_fpu(tsk);
- if (tsk->thread.regs->msr & MSR_VEC)
- save_altivec(tsk);
-}
-
void enable_kernel_vsx(void)
{
unsigned long cpumsr;
@@ -362,7 +363,8 @@ void enable_kernel_vsx(void)
cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
- if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) {
+ if (current->thread.regs &&
+ (current->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP))) {
check_if_tm_restore_required(current);
/*
* If a thread has already been reclaimed then the
@@ -373,10 +375,6 @@ void enable_kernel_vsx(void)
*/
if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
return;
- if (current->thread.regs->msr & MSR_FP)
- __giveup_fpu(current);
- if (current->thread.regs->msr & MSR_VEC)
- __giveup_altivec(current);
__giveup_vsx(current);
}
}
@@ -386,7 +384,7 @@ void flush_vsx_to_thread(struct task_struct *tsk)
{
if (tsk->thread.regs) {
preempt_disable();
- if (tsk->thread.regs->msr & MSR_VSX) {
+ if (tsk->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP)) {
BUG_ON(tsk != current);
giveup_vsx(tsk);
}
@@ -406,7 +404,6 @@ static int restore_vsx(struct task_struct *tsk)
}
#else
static inline int restore_vsx(struct task_struct *tsk) { return 0; }
-static inline void save_vsx(struct task_struct *tsk) { }
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
@@ -486,6 +483,8 @@ void giveup_all(struct task_struct *tsk)
msr_check_and_set(msr_all_available);
check_if_tm_restore_required(tsk);
+ WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
+
#ifdef CONFIG_PPC_FPU
if (usermsr & MSR_FP)
__giveup_fpu(tsk);
@@ -494,10 +493,6 @@ void giveup_all(struct task_struct *tsk)
if (usermsr & MSR_VEC)
__giveup_altivec(tsk);
#endif
-#ifdef CONFIG_VSX
- if (usermsr & MSR_VSX)
- __giveup_vsx(tsk);
-#endif
#ifdef CONFIG_SPE
if (usermsr & MSR_SPE)
__giveup_spe(tsk);
@@ -511,10 +506,6 @@ void restore_math(struct pt_regs *regs)
{
unsigned long msr;
- /*
- * Syscall exit makes a similar initial check before branching
- * to restore_math. Keep them in synch.
- */
if (!msr_tm_active(regs->msr) &&
!current->thread.load_fp && !loadvec(current->thread))
return;
@@ -556,19 +547,13 @@ void save_all(struct task_struct *tsk)
msr_check_and_set(msr_all_available);
- /*
- * Saving the way the register space is in hardware, save_vsx boils
- * down to a save_fpu() and save_altivec()
- */
- if (usermsr & MSR_VSX) {
- save_vsx(tsk);
- } else {
- if (usermsr & MSR_FP)
- save_fpu(tsk);
+ WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
- if (usermsr & MSR_VEC)
- save_altivec(tsk);
- }
+ if (usermsr & MSR_FP)
+ save_fpu(tsk);
+
+ if (usermsr & MSR_VEC)
+ save_altivec(tsk);
if (usermsr & MSR_SPE)
__giveup_spe(tsk);
@@ -1395,13 +1380,13 @@ void show_regs(struct pt_regs * regs)
show_regs_print_info(KERN_DEFAULT);
- printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
+ printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
regs->nip, regs->link, regs->ctr);
printk("REGS: %p TRAP: %04lx %s (%s)\n",
regs, regs->trap, print_tainted(), init_utsname()->release);
- printk("MSR: "REG" ", regs->msr);
+ printk("MSR: "REG" ", regs->msr);
print_msr_bits(regs->msr);
- printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
+ pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
trap = TRAP(regs);
if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
pr_cont("CFAR: "REG" ", regs->orig_gpr3);
@@ -1994,11 +1979,25 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
void notrace __ppc64_runlatch_on(void)
{
struct thread_info *ti = current_thread_info();
- unsigned long ctrl;
- ctrl = mfspr(SPRN_CTRLF);
- ctrl |= CTRL_RUNLATCH;
- mtspr(SPRN_CTRLT, ctrl);
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ /*
+ * Least significant bit (RUN) is the only writable bit of
+ * the CTRL register, so we can avoid mfspr. 2.06 is not the
+ * earliest ISA where this is the case, but it's convenient.
+ */
+ mtspr(SPRN_CTRLT, CTRL_RUNLATCH);
+ } else {
+ unsigned long ctrl;
+
+ /*
+ * Some architectures (e.g., Cell) have writable fields other
+ * than RUN, so do the read-modify-write.
+ */
+ ctrl = mfspr(SPRN_CTRLF);
+ ctrl |= CTRL_RUNLATCH;
+ mtspr(SPRN_CTRLT, ctrl);
+ }
ti->local_flags |= _TLF_RUNLATCH;
}
@@ -2007,13 +2006,18 @@ void notrace __ppc64_runlatch_on(void)
void notrace __ppc64_runlatch_off(void)
{
struct thread_info *ti = current_thread_info();
- unsigned long ctrl;
ti->local_flags &= ~_TLF_RUNLATCH;
- ctrl = mfspr(SPRN_CTRLF);
- ctrl &= ~CTRL_RUNLATCH;
- mtspr(SPRN_CTRLT, ctrl);
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ mtspr(SPRN_CTRLT, 0);
+ } else {
+ unsigned long ctrl;
+
+ ctrl = mfspr(SPRN_CTRLF);
+ ctrl &= ~CTRL_RUNLATCH;
+ mtspr(SPRN_CTRLT, ctrl);
+ }
}
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 613f79f03877..02190e90c7ae 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -177,6 +177,7 @@ struct platform_support {
bool hash_mmu;
bool radix_mmu;
bool radix_gtse;
+ bool xive;
};
/* Platforms codes are now obsolete in the kernel. Now only used within this
@@ -1041,6 +1042,27 @@ static void __init prom_parse_mmu_model(u8 val,
}
}
+static void __init prom_parse_xive_model(u8 val,
+ struct platform_support *support)
+{
+ switch (val) {
+ case OV5_FEAT(OV5_XIVE_EITHER): /* Either Available */
+ prom_debug("XIVE - either mode supported\n");
+ support->xive = true;
+ break;
+ case OV5_FEAT(OV5_XIVE_EXPLOIT): /* Only Exploitation mode */
+ prom_debug("XIVE - exploitation mode supported\n");
+ support->xive = true;
+ break;
+ case OV5_FEAT(OV5_XIVE_LEGACY): /* Only Legacy mode */
+ prom_debug("XIVE - legacy mode supported\n");
+ break;
+ default:
+ prom_debug("Unknown xive support option: 0x%x\n", val);
+ break;
+ }
+}
+
static void __init prom_parse_platform_support(u8 index, u8 val,
struct platform_support *support)
{
@@ -1054,6 +1076,10 @@ static void __init prom_parse_platform_support(u8 index, u8 val,
support->radix_gtse = true;
}
break;
+ case OV5_INDX(OV5_XIVE_SUPPORT): /* Interrupt mode */
+ prom_parse_xive_model(val & OV5_FEAT(OV5_XIVE_SUPPORT),
+ support);
+ break;
}
}
@@ -1062,7 +1088,8 @@ static void __init prom_check_platform_support(void)
struct platform_support supported = {
.hash_mmu = false,
.radix_mmu = false,
- .radix_gtse = false
+ .radix_gtse = false,
+ .xive = false
};
int prop_len = prom_getproplen(prom.chosen,
"ibm,arch-vec-5-platform-support");
@@ -1095,6 +1122,11 @@ static void __init prom_check_platform_support(void)
/* We're probably on a legacy hypervisor */
prom_debug("Assuming legacy hash support\n");
}
+
+ if (supported.xive) {
+ prom_debug("Asking for XIVE\n");
+ ibm_architecture_vec.vec5.intarch = OV5_FEAT(OV5_XIVE_EXPLOIT);
+ }
}
static void __init prom_send_capabilities(void)
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 925a4ef90559..07cd22e35405 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -127,12 +127,19 @@ static void flush_tmregs_to_thread(struct task_struct *tsk)
* If task is not current, it will have been flushed already to
* it's thread_struct during __switch_to().
*
- * A reclaim flushes ALL the state.
+ * A reclaim flushes ALL the state or if not in TM save TM SPRs
+ * in the appropriate thread structures from live.
*/
- if (tsk == current && MSR_TM_SUSPENDED(mfmsr()))
- tm_reclaim_current(TM_CAUSE_SIGNAL);
+ if (tsk != current)
+ return;
+ if (MSR_TM_SUSPENDED(mfmsr())) {
+ tm_reclaim_current(TM_CAUSE_SIGNAL);
+ } else {
+ tm_enable();
+ tm_save_sprs(&(tsk->thread));
+ }
}
#else
static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
@@ -1587,11 +1594,8 @@ static int ppr_get(struct task_struct *target,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ppr, 0, sizeof(u64));
- return ret;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ppr, 0, sizeof(u64));
}
static int ppr_set(struct task_struct *target,
@@ -1599,11 +1603,8 @@ static int ppr_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ppr, 0, sizeof(u64));
- return ret;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ppr, 0, sizeof(u64));
}
static int dscr_get(struct task_struct *target,
@@ -1611,22 +1612,16 @@ static int dscr_get(struct task_struct *target,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.dscr, 0, sizeof(u64));
- return ret;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.dscr, 0, sizeof(u64));
}
static int dscr_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.dscr, 0, sizeof(u64));
- return ret;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.dscr, 0, sizeof(u64));
}
#endif
#ifdef CONFIG_PPC_BOOK3S_64
@@ -1635,22 +1630,16 @@ static int tar_get(struct task_struct *target,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tar, 0, sizeof(u64));
- return ret;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tar, 0, sizeof(u64));
}
static int tar_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tar, 0, sizeof(u64));
- return ret;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tar, 0, sizeof(u64));
}
static int ebb_active(struct task_struct *target,
diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S
index d88736fbece6..e8cfc69f59ae 100644
--- a/arch/powerpc/kernel/reloc_64.S
+++ b/arch/powerpc/kernel/reloc_64.S
@@ -82,7 +82,7 @@ _GLOBAL(relocate)
6: blr
.balign 8
-p_dyn: .llong __dynamic_start - 0b
-p_rela: .llong __rela_dyn_start - 0b
-p_st: .llong _stext - 0b
+p_dyn: .8byte __dynamic_start - 0b
+p_rela: .8byte __rela_dyn_start - 0b
+p_st: .8byte _stext - 0b
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 73f1934582c2..c2b148b1634a 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -91,26 +91,14 @@ static int rtas_pci_read_config(struct pci_bus *bus,
unsigned int devfn,
int where, int size, u32 *val)
{
- struct device_node *busdn, *dn;
struct pci_dn *pdn;
- bool found = false;
int ret;
- /* Search only direct children of the bus */
*val = 0xFFFFFFFF;
- busdn = pci_bus_to_OF_node(bus);
- for (dn = busdn->child; dn; dn = dn->sibling) {
- pdn = PCI_DN(dn);
- if (pdn && pdn->devfn == devfn
- && of_device_is_available(dn)) {
- found = true;
- break;
- }
- }
- if (!found)
- return PCIBIOS_DEVICE_NOT_FOUND;
+ pdn = pci_get_pdn_by_devfn(bus, devfn);
+ /* Validity of pdn is checked in here */
ret = rtas_read_config(pdn, where, size, val);
if (*val == EEH_IO_ERROR_VALUE(size) &&
eeh_dev_check_failure(pdn_to_eeh_dev(pdn)))
@@ -153,24 +141,11 @@ static int rtas_pci_write_config(struct pci_bus *bus,
unsigned int devfn,
int where, int size, u32 val)
{
- struct device_node *busdn, *dn;
struct pci_dn *pdn;
- bool found = false;
-
- /* Search only direct children of the bus */
- busdn = pci_bus_to_OF_node(bus);
- for (dn = busdn->child; dn; dn = dn->sibling) {
- pdn = PCI_DN(dn);
- if (pdn && pdn->devfn == devfn
- && of_device_is_available(dn)) {
- found = true;
- break;
- }
- }
- if (!found)
- return PCIBIOS_DEVICE_NOT_FOUND;
+ pdn = pci_get_pdn_by_devfn(bus, devfn);
+ /* Validity of pdn is checked in here. */
return rtas_write_config(pdn, where, size, val);
}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 94a948207cd2..7de73589d8e2 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -481,7 +481,7 @@ void __init smp_setup_cpu_maps(void)
__be32 cpu_be;
int j, len;
- DBG(" * %s...\n", dn->full_name);
+ DBG(" * %pOF...\n", dn);
intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
&len);
@@ -704,30 +704,6 @@ int check_legacy_ioport(unsigned long base_port)
}
EXPORT_SYMBOL(check_legacy_ioport);
-static int ppc_panic_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- /*
- * If firmware-assisted dump has been registered then trigger
- * firmware-assisted dump and let firmware handle everything else.
- */
- crash_fadump(NULL, ptr);
- ppc_md.panic(ptr); /* May not return */
- return NOTIFY_DONE;
-}
-
-static struct notifier_block ppc_panic_block = {
- .notifier_call = ppc_panic_event,
- .priority = INT_MIN /* may not return; must be done last */
-};
-
-void __init setup_panic(void)
-{
- if (!ppc_md.panic)
- return;
- atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
-}
-
#ifdef CONFIG_CHECK_CACHE_COHERENCY
/*
* For platforms that have configurable cache-coherency. This function
@@ -872,9 +848,6 @@ void __init setup_arch(char **cmdline_p)
/* Probe the machine type, establish ppc_md. */
probe_machine();
- /* Setup panic notifier if requested by the platform. */
- setup_panic();
-
/*
* Configure ppc_md.power_save (ppc32 only, 64-bit machines do
* it from their respective probe() function.
@@ -916,13 +889,6 @@ void __init setup_arch(char **cmdline_p)
/* Reserve large chunks of memory for use by CMA for KVM. */
kvm_cma_reserve();
- /*
- * Reserve any gigantic pages requested on the command line.
- * memblock needs to have been initialized by the time this is
- * called since this will reserve memory.
- */
- reserve_hugetlb_gpages();
-
klp_init_thread_info(&init_thread_info);
init_mm.start_code = (unsigned long)_stext;
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 2f88f6cf1a42..51ebc01fff52 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -98,6 +98,9 @@ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
notrace void __init machine_init(u64 dt_ptr)
{
+ unsigned int *addr = &memset_nocache_branch;
+ unsigned long insn;
+
/* Configure static keys first, now that we're relocated. */
setup_feature_keys();
@@ -105,7 +108,9 @@ notrace void __init machine_init(u64 dt_ptr)
udbg_early_init();
patch_instruction((unsigned int *)&memcpy, PPC_INST_NOP);
- patch_instruction(&memset_nocache_branch, PPC_INST_NOP);
+
+ insn = create_cond_branch(addr, branch_target(addr), 0x820000);
+ patch_instruction(addr, insn); /* replace b by bne cr0 */
/* Do some early initialization based on the flat device tree */
early_init_devtree(__va(dt_ptr));
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index af23d4b576ec..b89c6aac48c9 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -564,6 +564,9 @@ static __init u64 safe_stack_limit(void)
/* Other BookE, we assume the first GB is bolted */
return 1ul << 30;
#else
+ if (early_radix_enabled())
+ return ULONG_MAX;
+
/* BookS, the first segment is bolted */
if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
return 1UL << SID_SHIFT_1T;
@@ -578,7 +581,8 @@ void __init irqstack_early_init(void)
/*
* Interrupt stacks must be in the first segment since we
- * cannot afford to take SLB misses on them.
+ * cannot afford to take SLB misses on them. They are not
+ * accessed in realmode.
*/
for_each_possible_cpu(i) {
softirq_ctx[i] = (struct thread_info *)
@@ -649,8 +653,9 @@ void __init emergency_stack_init(void)
* aligned.
*
* Since we use these as temporary stacks during secondary CPU
- * bringup, we need to get at them in real mode. This means they
- * must also be within the RMO region.
+ * bringup, machine check, system reset, and HMI, we need to get
+ * at them in real mode. This means they must also be within the RMO
+ * region.
*
* The IRQ stacks allocated elsewhere in this file are zeroed and
* initialized in kernel/irq.c. These are initialized here in order
@@ -751,3 +756,31 @@ unsigned long memory_block_size_bytes(void)
struct ppc_pci_io ppc_pci_io;
EXPORT_SYMBOL(ppc_pci_io);
#endif
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
+u64 hw_nmi_get_sample_period(int watchdog_thresh)
+{
+ return ppc_proc_freq * watchdog_thresh;
+}
+#endif
+
+/*
+ * The perf based hardlockup detector breaks PMU event based branches, so
+ * disable it by default. Book3S has a soft-nmi hardlockup detector based
+ * on the decrementer interrupt, so it does not suffer from this problem.
+ *
+ * It is likely to get false positives in VM guests, so disable it there
+ * by default too.
+ */
+static int __init disable_hardlockup_detector(void)
+{
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
+ hardlockup_detector_disable();
+#else
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ hardlockup_detector_disable();
+#endif
+
+ return 0;
+}
+early_initcall(disable_hardlockup_detector);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 997c88d54acf..e0a4c1f82e25 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -75,9 +75,11 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
struct thread_info *secondary_ti;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
/* SMP operations for this machine */
@@ -351,7 +353,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
hard_irq_disable();
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
raw_local_irq_restore(*flags);
- cpu_relax();
+ spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
raw_local_irq_save(*flags);
hard_irq_disable();
}
@@ -360,7 +362,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
static void nmi_ipi_lock(void)
{
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
- cpu_relax();
+ spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
}
static void nmi_ipi_unlock(void)
@@ -475,7 +477,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
nmi_ipi_lock_start(&flags);
while (nmi_ipi_busy_count) {
nmi_ipi_unlock_end(&flags);
- cpu_relax();
+ spin_until_cond(nmi_ipi_busy_count == 0);
nmi_ipi_lock_start(&flags);
}
@@ -571,6 +573,26 @@ static void smp_store_cpu_info(int id)
#endif
}
+/*
+ * Relationships between CPUs are maintained in a set of per-cpu cpumasks so
+ * rather than just passing around the cpumask we pass around a function that
+ * returns the that cpumask for the given CPU.
+ */
+static void set_cpus_related(int i, int j, struct cpumask *(*get_cpumask)(int))
+{
+ cpumask_set_cpu(i, get_cpumask(j));
+ cpumask_set_cpu(j, get_cpumask(i));
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void set_cpus_unrelated(int i, int j,
+ struct cpumask *(*get_cpumask)(int))
+{
+ cpumask_clear_cpu(i, get_cpumask(j));
+ cpumask_clear_cpu(j, get_cpumask(i));
+}
+#endif
+
void __init smp_prepare_cpus(unsigned int max_cpus)
{
unsigned int cpu;
@@ -590,6 +612,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
for_each_possible_cpu(cpu) {
zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
+ zalloc_cpumask_var_node(&per_cpu(cpu_l2_cache_map, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
/*
@@ -602,7 +626,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
}
}
+ /* Init the cpumasks so the boot CPU is related to itself */
cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
+ cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
if (smp_ops && smp_ops->probe)
@@ -828,33 +854,6 @@ int cpu_first_thread_of_core(int core)
}
EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
-static void traverse_siblings_chip_id(int cpu, bool add, int chipid)
-{
- const struct cpumask *mask;
- struct device_node *np;
- int i, plen;
- const __be32 *prop;
-
- mask = add ? cpu_online_mask : cpu_present_mask;
- for_each_cpu(i, mask) {
- np = of_get_cpu_node(i, NULL);
- if (!np)
- continue;
- prop = of_get_property(np, "ibm,chip-id", &plen);
- if (prop && plen == sizeof(int) &&
- of_read_number(prop, 1) == chipid) {
- if (add) {
- cpumask_set_cpu(cpu, cpu_core_mask(i));
- cpumask_set_cpu(i, cpu_core_mask(cpu));
- } else {
- cpumask_clear_cpu(cpu, cpu_core_mask(i));
- cpumask_clear_cpu(i, cpu_core_mask(cpu));
- }
- }
- of_node_put(np);
- }
-}
-
/* Must be called when no change can occur to cpu_present_mask,
* i.e. during cpu online or offline.
*/
@@ -877,52 +876,93 @@ static struct device_node *cpu_to_l2cache(int cpu)
return cache;
}
-static void traverse_core_siblings(int cpu, bool add)
+static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
{
struct device_node *l2_cache, *np;
- const struct cpumask *mask;
- int i, chip, plen;
- const __be32 *prop;
-
- /* First see if we have ibm,chip-id properties in cpu nodes */
- np = of_get_cpu_node(cpu, NULL);
- if (np) {
- chip = -1;
- prop = of_get_property(np, "ibm,chip-id", &plen);
- if (prop && plen == sizeof(int))
- chip = of_read_number(prop, 1);
- of_node_put(np);
- if (chip >= 0) {
- traverse_siblings_chip_id(cpu, add, chip);
- return;
- }
- }
+ int i;
l2_cache = cpu_to_l2cache(cpu);
- mask = add ? cpu_online_mask : cpu_present_mask;
- for_each_cpu(i, mask) {
+ if (!l2_cache)
+ return false;
+
+ for_each_cpu(i, cpu_online_mask) {
+ /*
+ * when updating the marks the current CPU has not been marked
+ * online, but we need to update the cache masks
+ */
np = cpu_to_l2cache(i);
if (!np)
continue;
- if (np == l2_cache) {
- if (add) {
- cpumask_set_cpu(cpu, cpu_core_mask(i));
- cpumask_set_cpu(i, cpu_core_mask(cpu));
- } else {
- cpumask_clear_cpu(cpu, cpu_core_mask(i));
- cpumask_clear_cpu(i, cpu_core_mask(cpu));
- }
- }
+
+ if (np == l2_cache)
+ set_cpus_related(cpu, i, mask_fn);
+
of_node_put(np);
}
of_node_put(l2_cache);
+
+ return true;
}
+#ifdef CONFIG_HOTPLUG_CPU
+static void remove_cpu_from_masks(int cpu)
+{
+ int i;
+
+ /* NB: cpu_core_mask is a superset of the others */
+ for_each_cpu(i, cpu_core_mask(cpu)) {
+ set_cpus_unrelated(cpu, i, cpu_core_mask);
+ set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
+ set_cpus_unrelated(cpu, i, cpu_sibling_mask);
+ }
+}
+#endif
+
+static void add_cpu_to_masks(int cpu)
+{
+ int first_thread = cpu_first_thread_sibling(cpu);
+ int chipid = cpu_to_chip_id(cpu);
+ int i;
+
+ /*
+ * This CPU will not be in the online mask yet so we need to manually
+ * add it to it's own thread sibling mask.
+ */
+ cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+
+ for (i = first_thread; i < first_thread + threads_per_core; i++)
+ if (cpu_online(i))
+ set_cpus_related(i, cpu, cpu_sibling_mask);
+
+ /*
+ * Copy the thread sibling mask into the cache sibling mask
+ * and mark any CPUs that share an L2 with this CPU.
+ */
+ for_each_cpu(i, cpu_sibling_mask(cpu))
+ set_cpus_related(cpu, i, cpu_l2_cache_mask);
+ update_mask_by_l2(cpu, cpu_l2_cache_mask);
+
+ /*
+ * Copy the cache sibling mask into core sibling mask and mark
+ * any CPUs on the same chip as this CPU.
+ */
+ for_each_cpu(i, cpu_l2_cache_mask(cpu))
+ set_cpus_related(cpu, i, cpu_core_mask);
+
+ if (chipid == -1)
+ return;
+
+ for_each_cpu(i, cpu_online_mask)
+ if (cpu_to_chip_id(i) == chipid)
+ set_cpus_related(cpu, i, cpu_core_mask);
+}
+
+static bool shared_caches;
+
/* Activate a secondary processor. */
void start_secondary(void *unused)
{
unsigned int cpu = smp_processor_id();
- int i, base;
mmgrab(&init_mm);
current->active_mm = &init_mm;
@@ -945,22 +985,15 @@ void start_secondary(void *unused)
vdso_getcpu_init();
#endif
- /* Update sibling maps */
- base = cpu_first_thread_sibling(cpu);
- for (i = 0; i < threads_per_core; i++) {
- if (cpu_is_offline(base + i) && (cpu != base + i))
- continue;
- cpumask_set_cpu(cpu, cpu_sibling_mask(base + i));
- cpumask_set_cpu(base + i, cpu_sibling_mask(cpu));
+ /* Update topology CPU masks */
+ add_cpu_to_masks(cpu);
- /* cpu_core_map should be a superset of
- * cpu_sibling_map even if we don't have cache
- * information, so update the former here, too.
- */
- cpumask_set_cpu(cpu, cpu_core_mask(base + i));
- cpumask_set_cpu(base + i, cpu_core_mask(cpu));
- }
- traverse_core_siblings(cpu, true);
+ /*
+ * Check for any shared caches. Note that this must be done on a
+ * per-core basis because one core in the pair might be disabled.
+ */
+ if (!cpumask_equal(cpu_l2_cache_mask(cpu), cpu_sibling_mask(cpu)))
+ shared_caches = true;
set_numa_node(numa_cpu_lookup_table[cpu]);
set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
@@ -1003,35 +1036,65 @@ static struct sched_domain_topology_level powerpc_topology[] = {
{ NULL, },
};
-static __init long smp_setup_cpu_workfn(void *data __always_unused)
+/*
+ * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
+ * This topology makes it *much* cheaper to migrate tasks between adjacent cores
+ * since the migrated task remains cache hot. We want to take advantage of this
+ * at the scheduler level so an extra topology level is required.
+ */
+static int powerpc_shared_cache_flags(void)
{
- smp_ops->setup_cpu(boot_cpuid);
- return 0;
+ return SD_SHARE_PKG_RESOURCES;
}
+/*
+ * We can't just pass cpu_l2_cache_mask() directly because
+ * returns a non-const pointer and the compiler barfs on that.
+ */
+static const struct cpumask *shared_cache_mask(int cpu)
+{
+ return cpu_l2_cache_mask(cpu);
+}
+
+static struct sched_domain_topology_level power9_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+ { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+ { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { NULL, },
+};
+
void __init smp_cpus_done(unsigned int max_cpus)
{
/*
- * We want the setup_cpu() here to be called on the boot CPU, but
- * init might run on any CPU, so make sure it's invoked on the boot
- * CPU.
+ * We are running pinned to the boot CPU, see rest_init().
*/
if (smp_ops && smp_ops->setup_cpu)
- work_on_cpu_safe(boot_cpuid, smp_setup_cpu_workfn, NULL);
+ smp_ops->setup_cpu(boot_cpuid);
if (smp_ops && smp_ops->bringup_done)
smp_ops->bringup_done();
dump_numa_cpu_topology();
- set_sched_topology(powerpc_topology);
+ /*
+ * If any CPU detects that it's sharing a cache with another CPU then
+ * use the deeper topology that is aware of this sharing.
+ */
+ if (shared_caches) {
+ pr_info("Using shared cache scheduler topology\n");
+ set_sched_topology(power9_topology);
+ } else {
+ pr_info("Using standard scheduler topology\n");
+ set_sched_topology(powerpc_topology);
+ }
}
#ifdef CONFIG_HOTPLUG_CPU
int __cpu_disable(void)
{
int cpu = smp_processor_id();
- int base, i;
int err;
if (!smp_ops->cpu_disable)
@@ -1042,14 +1105,7 @@ int __cpu_disable(void)
return err;
/* Update sibling maps */
- base = cpu_first_thread_sibling(cpu);
- for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) {
- cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
- cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
- cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
- cpumask_clear_cpu(base + i, cpu_core_mask(cpu));
- }
- traverse_core_siblings(cpu, false);
+ remove_cpu_from_masks(cpu);
return 0;
}
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index 988f38dced0f..82d8aae81c6a 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -179,7 +179,7 @@ nothing_to_copy:
sld r3, r3, r0
li r0, 0
1:
- dcbf r0,r3
+ dcbf 0,r3
addi r3,r3,0x20
bdnz 1b
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 4d6b1d3a747f..7ccb7f81f8db 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -17,13 +17,13 @@
#include <asm/ppc_asm.h>
#ifdef CONFIG_PPC64
-#define SYSCALL(func) .llong DOTSYM(sys_##func),DOTSYM(sys_##func)
-#define COMPAT_SYS(func) .llong DOTSYM(sys_##func),DOTSYM(compat_sys_##func)
-#define PPC_SYS(func) .llong DOTSYM(ppc_##func),DOTSYM(ppc_##func)
-#define OLDSYS(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall)
-#define SYS32ONLY(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func)
-#define PPC64ONLY(func) .llong DOTSYM(ppc_##func),DOTSYM(sys_ni_syscall)
-#define SYSX(f, f3264, f32) .llong DOTSYM(f),DOTSYM(f3264)
+#define SYSCALL(func) .8byte DOTSYM(sys_##func),DOTSYM(sys_##func)
+#define COMPAT_SYS(func) .8byte DOTSYM(sys_##func),DOTSYM(compat_sys_##func)
+#define PPC_SYS(func) .8byte DOTSYM(ppc_##func),DOTSYM(ppc_##func)
+#define OLDSYS(func) .8byte DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall)
+#define SYS32ONLY(func) .8byte DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func)
+#define PPC64ONLY(func) .8byte DOTSYM(ppc_##func),DOTSYM(sys_ni_syscall)
+#define SYSX(f, f3264, f32) .8byte DOTSYM(f),DOTSYM(f3264)
#else
#define SYSCALL(func) .long sys_##func
#define COMPAT_SYS(func) .long sys_##func
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index bfcfd9ef09f2..ec74e203ee04 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -114,6 +114,28 @@ static void pmac_backlight_unblank(void)
static inline void pmac_backlight_unblank(void) { }
#endif
+/*
+ * If oops/die is expected to crash the machine, return true here.
+ *
+ * This should not be expected to be 100% accurate, there may be
+ * notifiers registered or other unexpected conditions that may bring
+ * down the kernel. Or if the current process in the kernel is holding
+ * locks or has other critical state, the kernel may become effectively
+ * unusable anyway.
+ */
+bool die_will_crash(void)
+{
+ if (should_fadump_crash())
+ return true;
+ if (kexec_should_crash(current))
+ return true;
+ if (in_interrupt() || panic_on_oops ||
+ !current->pid || is_global_init(current))
+ return true;
+
+ return false;
+}
+
static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
static int die_owner = -1;
static unsigned int die_nest_count;
@@ -162,21 +184,9 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
crash_fadump(regs, "die oops");
- /*
- * A system reset (0x100) is a request to dump, so we always send
- * it through the crashdump code.
- */
- if (kexec_should_crash(current) || (TRAP(regs) == 0x100)) {
+ if (kexec_should_crash(current))
crash_kexec(regs);
- /*
- * We aren't the primary crash CPU. We need to send it
- * to a holding pattern to avoid it ending up in the panic
- * code.
- */
- crash_kexec_secondary(regs);
- }
-
if (!signr)
return;
@@ -202,18 +212,25 @@ NOKPROBE_SYMBOL(oops_end);
static int __die(const char *str, struct pt_regs *regs, long err)
{
printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP NR_CPUS=%d ", NR_CPUS);
-#endif
+
+ if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+ printk("LE ");
+ else
+ printk("BE ");
+
+ if (IS_ENABLED(CONFIG_PREEMPT))
+ pr_cont("PREEMPT ");
+
+ if (IS_ENABLED(CONFIG_SMP))
+ pr_cont("SMP NR_CPUS=%d ", NR_CPUS);
+
if (debug_pagealloc_enabled())
- printk("DEBUG_PAGEALLOC ");
-#ifdef CONFIG_NUMA
- printk("NUMA ");
-#endif
- printk("%s\n", ppc_md.name ? ppc_md.name : "");
+ pr_cont("DEBUG_PAGEALLOC ");
+
+ if (IS_ENABLED(CONFIG_NUMA))
+ pr_cont("NUMA ");
+
+ pr_cont("%s\n", ppc_md.name ? ppc_md.name : "");
if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
return 1;
@@ -288,23 +305,52 @@ void system_reset_exception(struct pt_regs *regs)
if (!nested)
nmi_enter();
+ __this_cpu_inc(irq_stat.sreset_irqs);
+
/* See if any machine dependent calls */
if (ppc_md.system_reset_exception) {
if (ppc_md.system_reset_exception(regs))
goto out;
}
- die("System Reset", regs, SIGABRT);
+ if (debugger(regs))
+ goto out;
+
+ /*
+ * A system reset is a request to dump, so we always send
+ * it through the crashdump code (if fadump or kdump are
+ * registered).
+ */
+ crash_fadump(regs, "System Reset");
+
+ crash_kexec(regs);
+
+ /*
+ * We aren't the primary crash CPU. We need to send it
+ * to a holding pattern to avoid it ending up in the panic
+ * code.
+ */
+ crash_kexec_secondary(regs);
+
+ /*
+ * No debugger or crash dump registered, print logs then
+ * panic.
+ */
+ __die("System Reset", regs, SIGABRT);
+
+ mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */
+ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+ nmi_panic(regs, "System Reset");
out:
#ifdef CONFIG_PPC_BOOK3S_64
BUG_ON(get_paca()->in_nmi == 0);
if (get_paca()->in_nmi > 1)
- panic("Unrecoverable nested System Reset");
+ nmi_panic(regs, "Unrecoverable nested System Reset");
#endif
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
- panic("Unrecoverable System Reset");
+ nmi_panic(regs, "Unrecoverable System Reset");
if (!nested)
nmi_exit();
@@ -312,39 +358,6 @@ out:
/* What should we do here? We could issue a shutdown or hard reset. */
}
-#ifdef CONFIG_PPC64
-/*
- * This function is called in real mode. Strictly no printk's please.
- *
- * regs->nip and regs->msr contains srr0 and ssr1.
- */
-long machine_check_early(struct pt_regs *regs)
-{
- long handled = 0;
-
- __this_cpu_inc(irq_stat.mce_exceptions);
-
- if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
- handled = cur_cpu_spec->machine_check_early(regs);
- return handled;
-}
-
-long hmi_exception_realmode(struct pt_regs *regs)
-{
- __this_cpu_inc(irq_stat.hmi_exceptions);
-
- wait_for_subcore_guest_exit();
-
- if (ppc_md.hmi_exception_early)
- ppc_md.hmi_exception_early(regs);
-
- wait_for_tb_resync();
-
- return 0;
-}
-
-#endif
-
/*
* I/O accesses can cause machine checks on powermacs.
* Check if the NIP corresponds to the address of a sync
@@ -397,11 +410,6 @@ static inline int check_io_access(struct pt_regs *regs)
/* On 4xx, the reason for the machine check or program exception
is in the ESR. */
#define get_reason(regs) ((regs)->dsisr)
-#ifndef CONFIG_FSL_BOOKE
-#define get_mc_reason(regs) ((regs)->dsisr)
-#else
-#define get_mc_reason(regs) (mfspr(SPRN_MCSR))
-#endif
#define REASON_FP ESR_FP
#define REASON_ILLEGAL (ESR_PIL | ESR_PUO)
#define REASON_PRIVILEGED ESR_PPR
@@ -415,108 +423,17 @@ static inline int check_io_access(struct pt_regs *regs)
/* On non-4xx, the reason for the machine check or program
exception is in the MSR. */
#define get_reason(regs) ((regs)->msr)
-#define get_mc_reason(regs) ((regs)->msr)
-#define REASON_TM 0x200000
-#define REASON_FP 0x100000
-#define REASON_ILLEGAL 0x80000
-#define REASON_PRIVILEGED 0x40000
-#define REASON_TRAP 0x20000
+#define REASON_TM SRR1_PROGTM
+#define REASON_FP SRR1_PROGFPE
+#define REASON_ILLEGAL SRR1_PROGILL
+#define REASON_PRIVILEGED SRR1_PROGPRIV
+#define REASON_TRAP SRR1_PROGTRAP
#define single_stepping(regs) ((regs)->msr & MSR_SE)
#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE)
#endif
-#if defined(CONFIG_4xx)
-int machine_check_4xx(struct pt_regs *regs)
-{
- unsigned long reason = get_mc_reason(regs);
-
- if (reason & ESR_IMCP) {
- printk("Instruction");
- mtspr(SPRN_ESR, reason & ~ESR_IMCP);
- } else
- printk("Data");
- printk(" machine check in kernel mode.\n");
-
- return 0;
-}
-
-int machine_check_440A(struct pt_regs *regs)
-{
- unsigned long reason = get_mc_reason(regs);
-
- printk("Machine check in kernel mode.\n");
- if (reason & ESR_IMCP){
- printk("Instruction Synchronous Machine Check exception\n");
- mtspr(SPRN_ESR, reason & ~ESR_IMCP);
- }
- else {
- u32 mcsr = mfspr(SPRN_MCSR);
- if (mcsr & MCSR_IB)
- printk("Instruction Read PLB Error\n");
- if (mcsr & MCSR_DRB)
- printk("Data Read PLB Error\n");
- if (mcsr & MCSR_DWB)
- printk("Data Write PLB Error\n");
- if (mcsr & MCSR_TLBP)
- printk("TLB Parity Error\n");
- if (mcsr & MCSR_ICP){
- flush_instruction_cache();
- printk("I-Cache Parity Error\n");
- }
- if (mcsr & MCSR_DCSP)
- printk("D-Cache Search Parity Error\n");
- if (mcsr & MCSR_DCFP)
- printk("D-Cache Flush Parity Error\n");
- if (mcsr & MCSR_IMPE)
- printk("Machine Check exception is imprecise\n");
-
- /* Clear MCSR */
- mtspr(SPRN_MCSR, mcsr);
- }
- return 0;
-}
-
-int machine_check_47x(struct pt_regs *regs)
-{
- unsigned long reason = get_mc_reason(regs);
- u32 mcsr;
-
- printk(KERN_ERR "Machine check in kernel mode.\n");
- if (reason & ESR_IMCP) {
- printk(KERN_ERR
- "Instruction Synchronous Machine Check exception\n");
- mtspr(SPRN_ESR, reason & ~ESR_IMCP);
- return 0;
- }
- mcsr = mfspr(SPRN_MCSR);
- if (mcsr & MCSR_IB)
- printk(KERN_ERR "Instruction Read PLB Error\n");
- if (mcsr & MCSR_DRB)
- printk(KERN_ERR "Data Read PLB Error\n");
- if (mcsr & MCSR_DWB)
- printk(KERN_ERR "Data Write PLB Error\n");
- if (mcsr & MCSR_TLBP)
- printk(KERN_ERR "TLB Parity Error\n");
- if (mcsr & MCSR_ICP) {
- flush_instruction_cache();
- printk(KERN_ERR "I-Cache Parity Error\n");
- }
- if (mcsr & MCSR_DCSP)
- printk(KERN_ERR "D-Cache Search Parity Error\n");
- if (mcsr & PPC47x_MCSR_GPR)
- printk(KERN_ERR "GPR Parity Error\n");
- if (mcsr & PPC47x_MCSR_FPR)
- printk(KERN_ERR "FPR Parity Error\n");
- if (mcsr & PPC47x_MCSR_IPR)
- printk(KERN_ERR "Machine Check exception is imprecise\n");
-
- /* Clear MCSR */
- mtspr(SPRN_MCSR, mcsr);
-
- return 0;
-}
-#elif defined(CONFIG_E500)
+#if defined(CONFIG_E500)
int machine_check_e500mc(struct pt_regs *regs)
{
unsigned long mcsr = mfspr(SPRN_MCSR);
@@ -618,7 +535,7 @@ silent_out:
int machine_check_e500(struct pt_regs *regs)
{
- unsigned long reason = get_mc_reason(regs);
+ unsigned long reason = mfspr(SPRN_MCSR);
if (reason & MCSR_BUS_RBERR) {
if (fsl_rio_mcheck_exception(regs))
@@ -665,7 +582,7 @@ int machine_check_generic(struct pt_regs *regs)
#elif defined(CONFIG_E200)
int machine_check_e200(struct pt_regs *regs)
{
- unsigned long reason = get_mc_reason(regs);
+ unsigned long reason = mfspr(SPRN_MCSR);
printk("Machine check in kernel mode.\n");
printk("Caused by (from MCSR=%lx): ", reason);
@@ -687,35 +604,10 @@ int machine_check_e200(struct pt_regs *regs)
return 0;
}
-#elif defined(CONFIG_PPC_8xx)
-int machine_check_8xx(struct pt_regs *regs)
-{
- unsigned long reason = get_mc_reason(regs);
-
- pr_err("Machine check in kernel mode.\n");
- pr_err("Caused by (from SRR1=%lx): ", reason);
- if (reason & 0x40000000)
- pr_err("Fetch error at address %lx\n", regs->nip);
- else
- pr_err("Data access error at address %lx\n", regs->dar);
-
-#ifdef CONFIG_PCI
- /* the qspan pci read routines can cause machine checks -- Cort
- *
- * yuck !!! that totally needs to go away ! There are better ways
- * to deal with that than having a wart in the mcheck handler.
- * -- BenH
- */
- bad_page_fault(regs, regs->dar, SIGBUS);
- return 1;
-#else
- return 0;
-#endif
-}
-#else
+#elif defined(CONFIG_PPC32)
int machine_check_generic(struct pt_regs *regs)
{
- unsigned long reason = get_mc_reason(regs);
+ unsigned long reason = regs->msr;
printk("Machine check in kernel mode.\n");
printk("Caused by (from SRR1=%lx): ", reason);
@@ -752,10 +644,14 @@ int machine_check_generic(struct pt_regs *regs)
void machine_check_exception(struct pt_regs *regs)
{
- enum ctx_state prev_state = exception_enter();
int recover = 0;
+ bool nested = in_nmi();
+ if (!nested)
+ nmi_enter();
- __this_cpu_inc(irq_stat.mce_exceptions);
+ /* 64s accounts the mce in machine_check_early when in HVMODE */
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !cpu_has_feature(CPU_FTR_HVMODE))
+ __this_cpu_inc(irq_stat.mce_exceptions);
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -783,10 +679,11 @@ void machine_check_exception(struct pt_regs *regs)
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
- panic("Unrecoverable Machine check");
+ nmi_panic(regs, "Unrecoverable Machine check");
bail:
- exception_exit(prev_state);
+ if (!nested)
+ nmi_exit();
}
void SMIException(struct pt_regs *regs)
@@ -1672,24 +1569,6 @@ void performance_monitor_exception(struct pt_regs *regs)
perf_irq(regs);
}
-#ifdef CONFIG_8xx
-void SoftwareEmulation(struct pt_regs *regs)
-{
- CHECK_FULL_REGS(regs);
-
- if (!user_mode(regs)) {
- debugger(regs);
- die("Kernel Mode Unimplemented Instruction or SW FPU Emulation",
- regs, SIGFPE);
- }
-
- if (!emulate_math(regs))
- return;
-
- _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-}
-#endif /* CONFIG_8xx */
-
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
{
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index 003b20964ea0..5d105b8eeece 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -205,3 +205,12 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
return orig_ret_vaddr;
}
+
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+ struct pt_regs *regs)
+{
+ if (ctx == RP_CHECK_CHAIN_CALL)
+ return regs->gpr[1] <= ret->stack;
+ else
+ return regs->gpr[1] < ret->stack;
+}
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 6b2b69616e77..769c2624e0a6 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -232,15 +232,9 @@ __do_get_tspec:
lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
/* Get a stable TB value */
-#ifdef CONFIG_8xx
-2: mftbu r3
- mftbl r4
- mftbu r0
-#else
-2: mfspr r3, SPRN_TBRU
- mfspr r4, SPRN_TBRL
- mfspr r0, SPRN_TBRU
-#endif
+2: MFTBU(r3)
+ MFTBL(r4)
+ MFTBU(r0)
cmplw cr0,r3,r0
bne- 2b
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index b1a250560198..882628fa6987 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -8,7 +8,7 @@
#include <asm/cache.h>
#include <asm/thread_info.h>
-#ifdef CONFIG_STRICT_KERNEL_RWX
+#if defined(CONFIG_STRICT_KERNEL_RWX) && !defined(CONFIG_PPC32)
#define STRICT_ALIGN_SIZE (1 << 24)
#else
#define STRICT_ALIGN_SIZE PAGE_SIZE
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index b67f8b03a32d..2f6eadd9408d 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -71,15 +71,20 @@ static inline void wd_smp_lock(unsigned long *flags)
* This may be called from low level interrupt handlers at some
* point in future.
*/
- local_irq_save(*flags);
- while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock)))
- cpu_relax();
+ raw_local_irq_save(*flags);
+ hard_irq_disable(); /* Make it soft-NMI safe */
+ while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) {
+ raw_local_irq_restore(*flags);
+ spin_until_cond(!test_bit(0, &__wd_smp_lock));
+ raw_local_irq_save(*flags);
+ hard_irq_disable();
+ }
}
static inline void wd_smp_unlock(unsigned long *flags)
{
clear_bit_unlock(0, &__wd_smp_lock);
- local_irq_restore(*flags);
+ raw_local_irq_restore(*flags);
}
static void wd_lockup_ipi(struct pt_regs *regs)
@@ -96,10 +101,10 @@ static void wd_lockup_ipi(struct pt_regs *regs)
nmi_panic(regs, "Hard LOCKUP");
}
-static void set_cpu_stuck(int cpu, u64 tb)
+static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
{
- cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
- cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+ cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
+ cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
if (cpumask_empty(&wd_smp_cpus_pending)) {
wd_smp_last_reset_tb = tb;
cpumask_andnot(&wd_smp_cpus_pending,
@@ -107,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb)
&wd_smp_cpus_stuck);
}
}
+static void set_cpu_stuck(int cpu, u64 tb)
+{
+ set_cpumask_stuck(cpumask_of(cpu), tb);
+}
static void watchdog_smp_panic(int cpu, u64 tb)
{
@@ -135,11 +144,9 @@ static void watchdog_smp_panic(int cpu, u64 tb)
}
smp_flush_nmi_ipi(1000000);
- /* Take the stuck CPU out of the watch group */
- for_each_cpu(c, &wd_smp_cpus_pending)
- set_cpu_stuck(c, tb);
+ /* Take the stuck CPUs out of the watch group */
+ set_cpumask_stuck(&wd_smp_cpus_pending, tb);
-out:
wd_smp_unlock(&flags);
printk_safe_flush();
@@ -152,6 +159,11 @@ out:
if (hardlockup_panic)
nmi_panic(NULL, "Hard LOCKUP");
+
+ return;
+
+out:
+ wd_smp_unlock(&flags);
}
static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
@@ -204,6 +216,9 @@ void soft_nmi_interrupt(struct pt_regs *regs)
return;
nmi_enter();
+
+ __this_cpu_inc(irq_stat.soft_nmi_irqs);
+
tb = get_tb();
if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
per_cpu(wd_timer_tb, cpu) = tb;
@@ -258,9 +273,11 @@ static void wd_timer_fn(unsigned long data)
void arch_touch_nmi_watchdog(void)
{
+ unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
int cpu = smp_processor_id();
- watchdog_timer_interrupt(cpu);
+ if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks)
+ watchdog_timer_interrupt(cpu);
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
@@ -283,6 +300,8 @@ static void stop_watchdog_timer_on(unsigned int cpu)
static int start_wd_on_cpu(unsigned int cpu)
{
+ unsigned long flags;
+
if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
WARN_ON(1);
return 0;
@@ -297,12 +316,14 @@ static int start_wd_on_cpu(unsigned int cpu)
if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
return 0;
+ wd_smp_lock(&flags);
cpumask_set_cpu(cpu, &wd_cpus_enabled);
if (cpumask_weight(&wd_cpus_enabled) == 1) {
cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
wd_smp_last_reset_tb = get_tb();
}
- smp_wmb();
+ wd_smp_unlock(&flags);
+
start_watchdog_timer_on(cpu);
return 0;
@@ -310,12 +331,17 @@ static int start_wd_on_cpu(unsigned int cpu)
static int stop_wd_on_cpu(unsigned int cpu)
{
+ unsigned long flags;
+
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
return 0; /* Can happen in CPU unplug case */
stop_watchdog_timer_on(cpu);
+ wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_cpus_enabled);
+ wd_smp_unlock(&flags);
+
wd_smp_clear_cpu_pending(cpu, get_tb());
return 0;