aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/.gitignore1
-rw-r--r--arch/powerpc/kernel/Makefile44
-rw-r--r--arch/powerpc/kernel/align.c97
-rw-r--r--arch/powerpc/kernel/asm-offsets.c222
-rw-r--r--arch/powerpc/kernel/audit.c12
-rw-r--r--arch/powerpc/kernel/btext.c21
-rw-r--r--arch/powerpc/kernel/cacheinfo.c174
-rw-r--r--arch/powerpc/kernel/compat_audit.c13
-rw-r--r--arch/powerpc/kernel/cpu_setup_6xx.S2
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S9
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S219
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.c272
-rw-r--r--arch/powerpc/kernel/cputable.c401
-rw-r--r--arch/powerpc/kernel/crash_dump.c7
-rw-r--r--arch/powerpc/kernel/dawr.c30
-rw-r--r--arch/powerpc/kernel/dbell.c64
-rw-r--r--arch/powerpc/kernel/dma-iommu.c161
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c121
-rw-r--r--arch/powerpc/kernel/eeh.c555
-rw-r--r--arch/powerpc/kernel/eeh_cache.c9
-rw-r--r--arch/powerpc/kernel/eeh_dev.c67
-rw-r--r--arch/powerpc/kernel/eeh_driver.c24
-rw-r--r--arch/powerpc/kernel/eeh_pe.c187
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c2
-rw-r--r--arch/powerpc/kernel/entry_32.S1222
-rw-r--r--arch/powerpc/kernel/entry_64.S899
-rw-r--r--arch/powerpc/kernel/epapr_paravirt.c3
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S337
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S2615
-rw-r--r--arch/powerpc/kernel/fadump.c380
-rw-r--r--arch/powerpc/kernel/firmware.c23
-rw-r--r--arch/powerpc/kernel/fpu.S31
-rw-r--r--arch/powerpc/kernel/fsl_booke_entry_mapping.S8
-rw-r--r--arch/powerpc/kernel/head_32.h358
-rw-r--r--arch/powerpc/kernel/head_40x.S588
-rw-r--r--arch/powerpc/kernel/head_44x.S72
-rw-r--r--arch/powerpc/kernel/head_64.S69
-rw-r--r--arch/powerpc/kernel/head_8xx.S612
-rw-r--r--arch/powerpc/kernel/head_book3s_32.S (renamed from arch/powerpc/kernel/head_32.S)595
-rw-r--r--arch/powerpc/kernel/head_booke.h332
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S180
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c624
-rw-r--r--arch/powerpc/kernel/hw_breakpoint_constraints.c153
-rw-r--r--arch/powerpc/kernel/idle.c12
-rw-r--r--arch/powerpc/kernel/idle_6xx.S15
-rw-r--r--arch/powerpc/kernel/idle_book3s.S152
-rw-r--r--arch/powerpc/kernel/idle_e500.S15
-rw-r--r--arch/powerpc/kernel/ima_arch.c6
-rw-r--r--arch/powerpc/kernel/interrupt.c670
-rw-r--r--arch/powerpc/kernel/interrupt_64.S733
-rw-r--r--arch/powerpc/kernel/io-workarounds.c18
-rw-r--r--arch/powerpc/kernel/iomap.c166
-rw-r--r--arch/powerpc/kernel/iommu.c153
-rw-r--r--arch/powerpc/kernel/irq.c433
-rw-r--r--arch/powerpc/kernel/isa-bridge.c28
-rw-r--r--arch/powerpc/kernel/jump_label.c7
-rw-r--r--arch/powerpc/kernel/kdebugfs.c14
-rw-r--r--arch/powerpc/kernel/kgdb.c26
-rw-r--r--arch/powerpc/kernel/kprobes-ftrace.c17
-rw-r--r--arch/powerpc/kernel/kprobes.c266
-rw-r--r--arch/powerpc/kernel/kvm.c3
-rw-r--r--arch/powerpc/kernel/l2cr_6xx.S4
-rw-r--r--arch/powerpc/kernel/legacy_serial.c36
-rw-r--r--arch/powerpc/kernel/mce.c146
-rw-r--r--arch/powerpc/kernel/mce_power.c257
-rw-r--r--arch/powerpc/kernel/misc.S8
-rw-r--r--arch/powerpc/kernel/misc_32.S108
-rw-r--r--arch/powerpc/kernel/misc_64.S39
-rw-r--r--arch/powerpc/kernel/module.c38
-rw-r--r--arch/powerpc/kernel/module.lds8
-rw-r--r--arch/powerpc/kernel/module_32.c36
-rw-r--r--arch/powerpc/kernel/module_64.c372
-rw-r--r--arch/powerpc/kernel/nvram_64.c16
-rw-r--r--arch/powerpc/kernel/of_platform.c14
-rw-r--r--arch/powerpc/kernel/optprobes.c173
-rw-r--r--arch/powerpc/kernel/optprobes_head.S62
-rw-r--r--arch/powerpc/kernel/paca.c69
-rw-r--r--arch/powerpc/kernel/pci-common.c108
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c4
-rw-r--r--arch/powerpc/kernel/pci_64.c60
-rw-r--r--arch/powerpc/kernel/pci_dn.c91
-rw-r--r--arch/powerpc/kernel/ppc_save_regs.S6
-rw-r--r--arch/powerpc/kernel/process.c694
-rw-r--r--arch/powerpc/kernel/prom.c97
-rw-r--r--arch/powerpc/kernel/prom_init.c232
-rw-r--r--arch/powerpc/kernel/ptrace.c3468
-rw-r--r--arch/powerpc/kernel/ptrace/Makefile21
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-adv.c494
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-altivec.c115
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-decl.h177
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-fpu.c50
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-noadv.c298
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-novsx.c64
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-spe.c60
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-tm.c788
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-view.c847
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-vsx.c148
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace.c448
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace32.c (renamed from arch/powerpc/kernel/ptrace32.c)21
-rw-r--r--arch/powerpc/kernel/reloc_32.S2
-rw-r--r--arch/powerpc/kernel/rtas-proc.c15
-rw-r--r--arch/powerpc/kernel/rtas-rtc.c2
-rw-r--r--arch/powerpc/kernel/rtas.c568
-rw-r--r--arch/powerpc/kernel/rtas_pci.c2
-rw-r--r--arch/powerpc/kernel/rtasd.c32
-rw-r--r--arch/powerpc/kernel/secure_boot.c18
-rw-r--r--arch/powerpc/kernel/security.c475
-rw-r--r--arch/powerpc/kernel/setup-common.c61
-rw-r--r--arch/powerpc/kernel/setup.h12
-rw-r--r--arch/powerpc/kernel/setup_32.c18
-rw-r--r--arch/powerpc/kernel/setup_64.c346
-rw-r--r--arch/powerpc/kernel/signal.c210
-rw-r--r--arch/powerpc/kernel/signal.h176
-rw-r--r--arch/powerpc/kernel/signal_32.c1040
-rw-r--r--arch/powerpc/kernel/signal_64.c413
-rw-r--r--arch/powerpc/kernel/smp.c712
-rw-r--r--arch/powerpc/kernel/stacktrace.c141
-rw-r--r--arch/powerpc/kernel/static_call.c37
-rw-r--r--arch/powerpc/kernel/swsusp_32.S2
-rw-r--r--arch/powerpc/kernel/swsusp_64.c5
-rw-r--r--arch/powerpc/kernel/swsusp_asm64.S1
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c49
-rw-r--r--arch/powerpc/kernel/syscalls.c30
-rw-r--r--arch/powerpc/kernel/syscalls/Makefile52
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl56
-rw-r--r--arch/powerpc/kernel/syscalls/syscallhdr.sh37
-rw-r--r--arch/powerpc/kernel/syscalls/syscalltbl.sh36
-rw-r--r--arch/powerpc/kernel/sysfs.c531
-rw-r--r--arch/powerpc/kernel/systbl.S14
-rw-r--r--arch/powerpc/kernel/tau_6xx.c153
-rw-r--r--arch/powerpc/kernel/time.c332
-rw-r--r--arch/powerpc/kernel/tm.S35
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c177
-rw-r--r--arch/powerpc/kernel/traps.c482
-rw-r--r--arch/powerpc/kernel/udbg.c2
-rw-r--r--arch/powerpc/kernel/udbg_16550.c39
-rw-r--r--arch/powerpc/kernel/uprobes.c15
-rw-r--r--arch/powerpc/kernel/vdso.c805
-rw-r--r--arch/powerpc/kernel/vdso32/.gitignore1
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile56
-rw-r--r--arch/powerpc/kernel/vdso32/cacheflush.S19
-rw-r--r--arch/powerpc/kernel/vdso32/datapage.S9
-rwxr-xr-xarch/powerpc/kernel/vdso32/gen_vdso_offsets.sh16
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S307
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32.lds.S70
-rw-r--r--arch/powerpc/kernel/vdso32/vgettimeofday.c34
-rw-r--r--arch/powerpc/kernel/vdso32_wrapper.S (renamed from arch/powerpc/kernel/vdso32/vdso32_wrapper.S)0
-rw-r--r--arch/powerpc/kernel/vdso64/.gitignore1
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile57
-rw-r--r--arch/powerpc/kernel/vdso64/cacheflush.S25
-rw-r--r--arch/powerpc/kernel/vdso64/datapage.S31
-rwxr-xr-xarch/powerpc/kernel/vdso64/gen_vdso_offsets.sh16
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S243
-rw-r--r--arch/powerpc/kernel/vdso64/sigtramp.S22
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64.lds.S67
-rw-r--r--arch/powerpc/kernel/vdso64/vgettimeofday.c29
-rw-r--r--arch/powerpc/kernel/vdso64_wrapper.S (renamed from arch/powerpc/kernel/vdso64/vdso64_wrapper.S)0
-rw-r--r--arch/powerpc/kernel/vecemu.c20
-rw-r--r--arch/powerpc/kernel/vector.S19
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S96
-rw-r--r--arch/powerpc/kernel/watchdog.c28
161 files changed, 16201 insertions, 16324 deletions
diff --git a/arch/powerpc/kernel/.gitignore b/arch/powerpc/kernel/.gitignore
index 67ebd3003c05..d71179d3ffe9 100644
--- a/arch/powerpc/kernel/.gitignore
+++ b/arch/powerpc/kernel/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
prom_init_check
vmlinux.lds
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 78a1b22d4fd8..b039877c743d 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -3,8 +3,6 @@
# Makefile for the linux kernel.
#
-CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
-
ifdef CONFIG_PPC64
CFLAGS_prom_init.o += $(NO_MINIMAL_TOC)
endif
@@ -18,7 +16,7 @@ CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
-CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector)
+CFLAGS_prom_init.o += -fno-stack-protector
CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_prom_init.o += -ffreestanding
@@ -41,17 +39,20 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
endif
-obj-y := cputable.o ptrace.o syscalls.o \
- irq.o align.o signal_32.o pmc.o vdso.o \
+obj-y := cputable.o syscalls.o \
+ irq.o align.o signal_$(BITS).o pmc.o vdso.o \
process.o systbl.o idle.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
udbg.o misc.o io.o misc_$(BITS).o \
- of_platform.o prom_parse.o
-obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
- signal_64.o ptrace32.o \
- paca.o nvram_64.o firmware.o note.o
-obj-$(CONFIG_VDSO32) += vdso32/
+ of_platform.o prom_parse.o firmware.o \
+ hw_breakpoint_constraints.o interrupt.o \
+ kdebugfs.o stacktrace.o
+obj-y += ptrace/
+obj-$(CONFIG_PPC64) += setup_64.o \
+ paca.o nvram_64.o note.o
+obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o
+obj-$(CONFIG_VDSO32) += vdso32_wrapper.o
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_DAWR) += dawr.o
@@ -60,7 +61,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o
-obj-$(CONFIG_PPC64) += vdso64/
+obj-$(CONFIG_PPC64) += vdso64_wrapper.o
obj-$(CONFIG_ALTIVEC) += vecemu.o
obj-$(CONFIG_PPC_BOOK3S_IDLE) += idle_book3s.o
procfs-y := proc_powerpc.o
@@ -71,7 +72,7 @@ obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o
obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
obj-$(CONFIG_PPC_DT_CPU_FTRS) += dt_cpu_ftrs.o
-obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
+obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_cache.o \
eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
@@ -95,7 +96,8 @@ obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o
obj-$(CONFIG_PPC_DOORBELL) += dbell.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
-extra-y := head_$(BITS).o
+extra-$(CONFIG_PPC64) := head_64.o
+extra-$(CONFIG_PPC_BOOK3S_32) := head_book3s_32.o
extra-$(CONFIG_40x) := head_40x.o
extra-$(CONFIG_44x) := head_44x.o
extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o
@@ -104,7 +106,7 @@ extra-y += vmlinux.lds
obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o
-obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o
+obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o static_call.o
obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_BOOTX_TEXT) += btext.o
@@ -114,7 +116,6 @@ obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o
obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o
obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
-obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o
obj-$(CONFIG_ARCH_HAS_DMA_SET_MASK) += dma-mask.o
@@ -162,6 +163,9 @@ UBSAN_SANITIZE_kprobes.o := n
GCOV_PROFILE_kprobes-ftrace.o := n
KCOV_INSTRUMENT_kprobes-ftrace.o := n
UBSAN_SANITIZE_kprobes-ftrace.o := n
+GCOV_PROFILE_syscall_64.o := n
+KCOV_INSTRUMENT_syscall_64.o := n
+UBSAN_SANITIZE_syscall_64.o := n
UBSAN_SANITIZE_vdso.o := n
# Necessary for booting with kcov enabled on book3e machines
@@ -169,6 +173,9 @@ KCOV_INSTRUMENT_cputable.o := n
KCOV_INSTRUMENT_setup_64.o := n
KCOV_INSTRUMENT_paca.o := n
+CFLAGS_setup_64.o += -fno-stack-protector
+CFLAGS_paca.o += -fno-stack-protector
+
extra-$(CONFIG_PPC_FPU) += fpu.o
extra-$(CONFIG_ALTIVEC) += vector.o
extra-$(CONFIG_PPC64) += entry_64.o
@@ -184,3 +191,10 @@ $(obj)/prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o FORCE
targets += prom_init_check
clean-files := vmlinux.lds
+
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32/vdso32.so.dbg
+$(obj)/vdso64_wrapper.o : $(obj)/vdso64/vdso64.so.dbg
+
+# for cleaning
+subdir- += vdso32 vdso64
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 92045ed64976..bf96b954a4eb 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -24,6 +24,7 @@
#include <asm/disassemble.h>
#include <asm/cpu_has_feature.h>
#include <asm/sstep.h>
+#include <asm/inst.h>
struct aligninfo {
unsigned char len;
@@ -104,9 +105,8 @@ static struct aligninfo spe_aligninfo[32] = {
* so we don't need the address swizzling.
*/
static int emulate_spe(struct pt_regs *regs, unsigned int reg,
- unsigned int instr)
+ struct ppc_inst ppc_instr)
{
- int ret;
union {
u64 ll;
u32 w[2];
@@ -115,8 +115,9 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
} data, temp;
unsigned char __user *p, *addr;
unsigned long *evr = &current->thread.evr[reg];
- unsigned int nb, flags;
+ unsigned int nb, flags, instr;
+ instr = ppc_inst_val(ppc_instr);
instr = (instr >> 1) & 0x1f;
/* DAR has the operand effective address */
@@ -125,11 +126,6 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
nb = spe_aligninfo[instr].len;
flags = spe_aligninfo[instr].flags;
- /* Verify the address of the operand */
- if (unlikely(user_mode(regs) &&
- !access_ok(addr, nb)))
- return -EFAULT;
-
/* userland only */
if (unlikely(!user_mode(regs)))
return 0;
@@ -167,26 +163,27 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
}
} else {
temp.ll = data.ll = 0;
- ret = 0;
p = addr;
+ if (!user_read_access_begin(addr, nb))
+ return -EFAULT;
+
switch (nb) {
case 8:
- ret |= __get_user_inatomic(temp.v[0], p++);
- ret |= __get_user_inatomic(temp.v[1], p++);
- ret |= __get_user_inatomic(temp.v[2], p++);
- ret |= __get_user_inatomic(temp.v[3], p++);
- /* fall through */
+ unsafe_get_user(temp.v[0], p++, Efault_read);
+ unsafe_get_user(temp.v[1], p++, Efault_read);
+ unsafe_get_user(temp.v[2], p++, Efault_read);
+ unsafe_get_user(temp.v[3], p++, Efault_read);
+ fallthrough;
case 4:
- ret |= __get_user_inatomic(temp.v[4], p++);
- ret |= __get_user_inatomic(temp.v[5], p++);
- /* fall through */
+ unsafe_get_user(temp.v[4], p++, Efault_read);
+ unsafe_get_user(temp.v[5], p++, Efault_read);
+ fallthrough;
case 2:
- ret |= __get_user_inatomic(temp.v[6], p++);
- ret |= __get_user_inatomic(temp.v[7], p++);
- if (unlikely(ret))
- return -EFAULT;
+ unsafe_get_user(temp.v[6], p++, Efault_read);
+ unsafe_get_user(temp.v[7], p++, Efault_read);
}
+ user_read_access_end();
switch (instr) {
case EVLDD:
@@ -253,31 +250,41 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
/* Store result to memory or update registers */
if (flags & ST) {
- ret = 0;
p = addr;
+
+ if (!user_write_access_begin(addr, nb))
+ return -EFAULT;
+
switch (nb) {
case 8:
- ret |= __put_user_inatomic(data.v[0], p++);
- ret |= __put_user_inatomic(data.v[1], p++);
- ret |= __put_user_inatomic(data.v[2], p++);
- ret |= __put_user_inatomic(data.v[3], p++);
- /* fall through */
+ unsafe_put_user(data.v[0], p++, Efault_write);
+ unsafe_put_user(data.v[1], p++, Efault_write);
+ unsafe_put_user(data.v[2], p++, Efault_write);
+ unsafe_put_user(data.v[3], p++, Efault_write);
+ fallthrough;
case 4:
- ret |= __put_user_inatomic(data.v[4], p++);
- ret |= __put_user_inatomic(data.v[5], p++);
- /* fall through */
+ unsafe_put_user(data.v[4], p++, Efault_write);
+ unsafe_put_user(data.v[5], p++, Efault_write);
+ fallthrough;
case 2:
- ret |= __put_user_inatomic(data.v[6], p++);
- ret |= __put_user_inatomic(data.v[7], p++);
+ unsafe_put_user(data.v[6], p++, Efault_write);
+ unsafe_put_user(data.v[7], p++, Efault_write);
}
- if (unlikely(ret))
- return -EFAULT;
+ user_write_access_end();
} else {
*evr = data.w[0];
regs->gpr[reg] = data.w[1];
}
return 1;
+
+Efault_read:
+ user_read_access_end();
+ return -EFAULT;
+
+Efault_write:
+ user_write_access_end();
+ return -EFAULT;
}
#endif /* CONFIG_SPE */
@@ -293,28 +300,27 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
int fix_alignment(struct pt_regs *regs)
{
- unsigned int instr;
+ struct ppc_inst instr;
struct instruction_op op;
int r, type;
- /*
- * We require a complete register set, if not, then our assembly
- * is broken
- */
- CHECK_FULL_REGS(regs);
+ if (is_kernel_addr(regs->nip))
+ r = copy_inst_from_kernel_nofault(&instr, (void *)regs->nip);
+ else
+ r = __get_user_instr(instr, (void __user *)regs->nip);
- if (unlikely(__get_user(instr, (unsigned int __user *)regs->nip)))
+ if (unlikely(r))
return -EFAULT;
if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
/* We don't handle PPC little-endian any more... */
if (cpu_has_feature(CPU_FTR_PPC_LE))
return -EIO;
- instr = swab32(instr);
+ instr = ppc_inst_swab(instr);
}
#ifdef CONFIG_SPE
- if ((instr >> 26) == 0x4) {
- int reg = (instr >> 21) & 0x1f;
+ if (ppc_inst_primary_opcode(instr) == 0x4) {
+ int reg = (ppc_inst_val(instr) >> 21) & 0x1f;
PPC_WARN_ALIGNMENT(spe, regs);
return emulate_spe(regs, reg, instr);
}
@@ -331,7 +337,7 @@ int fix_alignment(struct pt_regs *regs)
* when pasting to a co-processor. Furthermore, paste_last is the
* synchronisation point for preceding copy/paste sequences.
*/
- if ((instr & 0xfc0006fe) == (PPC_INST_COPY & 0xfc0006fe))
+ if ((ppc_inst_val(instr) & 0xfc0006fe) == (PPC_INST_COPY & 0xfc0006fe))
return -EIO;
r = analyse_instr(&op, regs, instr);
@@ -343,6 +349,7 @@ int fix_alignment(struct pt_regs *regs)
if (op.type != CACHEOP + DCBZ)
return -EINVAL;
PPC_WARN_ALIGNMENT(dcbz, regs);
+ WARN_ON_ONCE(!user_mode(regs));
r = emulate_dcbz(op.ea, regs);
} else {
if (type == LARX || type == STCX)
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index fcf24a365fc0..cc05522f50bf 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -9,8 +9,6 @@
* #defines from the assembly-language output.
*/
-#define GENERATING_ASM_OFFSETS /* asm/smp.h */
-
#include <linux/compat.h>
#include <linux/signal.h>
#include <linux/sched.h>
@@ -30,7 +28,6 @@
#include <asm/io.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
@@ -70,6 +67,10 @@
#include <asm/fixmap.h>
#endif
+#ifdef CONFIG_XMON
+#include "../xmon/xmon_bpts.h"
+#endif
+
#define STACK_PT_REGS_OFFSET(sym, val) \
DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val))
@@ -83,19 +84,14 @@ int main(void)
OFFSET(PACA_CANARY, paca_struct, canary);
#endif
#endif
- OFFSET(MMCONTEXTID, mm_struct, context.id);
-#ifdef CONFIG_PPC64
- DEFINE(SIGSEGV, SIGSEGV);
- DEFINE(NMI_MASK, NMI_MASK);
-#else
- OFFSET(KSP_LIMIT, thread_struct, ksp_limit);
+#ifdef CONFIG_PPC32
#ifdef CONFIG_PPC_RTAS
OFFSET(RTAS_SP, thread_struct, rtas_sp);
#endif
#endif /* CONFIG_PPC64 */
OFFSET(TASK_STACK, task_struct, stack);
#ifdef CONFIG_SMP
- OFFSET(TASK_CPU, task_struct, cpu);
+ OFFSET(TASK_CPU, task_struct, thread_info.cpu);
#endif
#ifdef CONFIG_LIVEPATCH
@@ -107,15 +103,16 @@ int main(void)
#ifdef CONFIG_BOOKE
OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]);
#endif
+#ifdef CONFIG_PPC_FPU
OFFSET(THREAD_FPEXC_MODE, thread_struct, fpexc_mode);
OFFSET(THREAD_FPSTATE, thread_struct, fp_state.fpr);
OFFSET(THREAD_FPSAVEAREA, thread_struct, fp_save_area);
+#endif
OFFSET(FPSTATE_FPSCR, thread_fp_state, fpscr);
OFFSET(THREAD_LOAD_FP, thread_struct, load_fp);
#ifdef CONFIG_ALTIVEC
OFFSET(THREAD_VRSTATE, thread_struct, vr_state.vr);
OFFSET(THREAD_VRSAVEAREA, thread_struct, vr_save_area);
- OFFSET(THREAD_VRSAVE, thread_struct, vrsave);
OFFSET(THREAD_USED_VR, thread_struct, used_vr);
OFFSET(VRSTATE_VSCR, thread_vr_state, vscr);
OFFSET(THREAD_LOAD_VEC, thread_struct, load_vec);
@@ -127,7 +124,6 @@ int main(void)
OFFSET(KSP_VSID, thread_struct, ksp_vsid);
#else /* CONFIG_PPC64 */
OFFSET(PGDIR, thread_struct, pgdir);
-#ifdef CONFIG_VMAP_STACK
OFFSET(SRR0, thread_struct, srr0);
OFFSET(SRR1, thread_struct, srr1);
OFFSET(DAR, thread_struct, dar);
@@ -144,26 +140,18 @@ int main(void)
OFFSET(THLR, thread_struct, lr);
OFFSET(THCTR, thread_struct, ctr);
#endif
-#endif
#ifdef CONFIG_SPE
OFFSET(THREAD_EVR0, thread_struct, evr[0]);
OFFSET(THREAD_ACC, thread_struct, acc);
- OFFSET(THREAD_SPEFSCR, thread_struct, spefscr);
OFFSET(THREAD_USED_SPE, thread_struct, used_spe);
#endif /* CONFIG_SPE */
#endif /* CONFIG_PPC64 */
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- OFFSET(THREAD_DBCR0, thread_struct, debug.dbcr0);
-#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
OFFSET(THREAD_KVM_SVCPU, thread_struct, kvm_shadow_vcpu);
#endif
#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu);
#endif
-#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
- OFFSET(KUAP, thread_struct, kuap);
-#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
OFFSET(PACATMSCRATCH, paca_struct, tm_scratch);
@@ -173,6 +161,7 @@ int main(void)
OFFSET(THREAD_TM_TAR, thread_struct, tm_tar);
OFFSET(THREAD_TM_PPR, thread_struct, tm_ppr);
OFFSET(THREAD_TM_DSCR, thread_struct, tm_dscr);
+ OFFSET(THREAD_TM_AMR, thread_struct, tm_amr);
OFFSET(PT_CKPT_REGS, thread_struct, ckpt_regs);
OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr);
OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave);
@@ -182,19 +171,12 @@ int main(void)
sizeof(struct pt_regs) + 16);
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
- OFFSET(TI_FLAGS, thread_info, flags);
OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags);
- OFFSET(TI_PREEMPT, thread_info, preempt_count);
#ifdef CONFIG_PPC64
OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size);
OFFSET(DCACHEL1LOGBLOCKSIZE, ppc64_caches, l1d.log_block_size);
- OFFSET(DCACHEL1BLOCKSPERPAGE, ppc64_caches, l1d.blocks_per_page);
- OFFSET(ICACHEL1BLOCKSIZE, ppc64_caches, l1i.block_size);
- OFFSET(ICACHEL1LOGBLOCKSIZE, ppc64_caches, l1i.log_block_size);
- OFFSET(ICACHEL1BLOCKSPERPAGE, ppc64_caches, l1i.blocks_per_page);
/* paca */
- DEFINE(PACA_SIZE, sizeof(struct paca_struct));
OFFSET(PACAPACAINDEX, paca_struct, paca_index);
OFFSET(PACAPROCSTART, paca_struct, cpu_start);
OFFSET(PACAKSAVE, paca_struct, kstack);
@@ -206,18 +188,13 @@ int main(void)
OFFSET(PACATOC, paca_struct, kernel_toc);
OFFSET(PACAKBASE, paca_struct, kernelbase);
OFFSET(PACAKMSR, paca_struct, kernel_msr);
+#ifdef CONFIG_PPC_BOOK3S_64
+ OFFSET(PACAHSRR_VALID, paca_struct, hsrr_valid);
+ OFFSET(PACASRR_VALID, paca_struct, srr_valid);
+#endif
OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask);
OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled);
-#ifdef CONFIG_PPC_BOOK3S
- OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id);
-#ifdef CONFIG_PPC_MM_SLICES
- OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize);
- OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize);
- OFFSET(PACA_SLB_ADDR_LIMIT, paca_struct, mm_ctx_slb_addr_limit);
- DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
-#endif /* CONFIG_PPC_MM_SLICES */
-#endif
#ifdef CONFIG_PPC_BOOK3E
OFFSET(PACAPGD, paca_struct, pgd);
@@ -238,22 +215,9 @@ int main(void)
#endif /* CONFIG_PPC_BOOK3E */
#ifdef CONFIG_PPC_BOOK3S_64
- OFFSET(PACASLBCACHE, paca_struct, slb_cache);
- OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
- OFFSET(PACASTABRR, paca_struct, stab_rr);
- OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
-#ifdef CONFIG_PPC_MM_SLICES
- OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp);
-#else
- OFFSET(PACACONTEXTSLLP, paca_struct, mm_ctx_sllp);
-#endif /* CONFIG_PPC_MM_SLICES */
OFFSET(PACA_EXGEN, paca_struct, exgen);
OFFSET(PACA_EXMC, paca_struct, exmc);
- OFFSET(PACA_EXSLB, paca_struct, exslb);
OFFSET(PACA_EXNMI, paca_struct, exnmi);
-#ifdef CONFIG_PPC_PSERIES
- OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr);
-#endif
OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);
@@ -262,9 +226,7 @@ int main(void)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use);
#endif
- OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx);
OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
- OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx);
#endif /* CONFIG_PPC_BOOK3S_64 */
OFFSET(PACAEMERGSP, paca_struct, emergency_sp);
#ifdef CONFIG_PPC_BOOK3S_64
@@ -280,21 +242,14 @@ int main(void)
OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default);
- OFFSET(ACCOUNT_STARTTIME, paca_struct, accounting.starttime);
- OFFSET(ACCOUNT_STARTTIME_USER, paca_struct, accounting.starttime_user);
- OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime);
- OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime);
+#ifdef CONFIG_PPC64
+ OFFSET(PACA_EXIT_SAVE_R1, paca_struct, exit_save_r1);
+#endif
#ifdef CONFIG_PPC_BOOK3E
OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
#endif
OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
#else /* CONFIG_PPC64 */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- OFFSET(ACCOUNT_STARTTIME, thread_info, accounting.starttime);
- OFFSET(ACCOUNT_STARTTIME_USER, thread_info, accounting.starttime_user);
- OFFSET(ACCOUNT_USER_TIME, thread_info, accounting.utime);
- OFFSET(ACCOUNT_SYSTEM_TIME, thread_info, accounting.stime);
-#endif
#endif /* CONFIG_PPC64 */
/* RTAS */
@@ -303,7 +258,7 @@ int main(void)
/* Interrupt register frame */
DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
- DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
+ DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS);
STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
@@ -318,9 +273,6 @@ int main(void)
STACK_PT_REGS_OFFSET(GPR11, gpr[11]);
STACK_PT_REGS_OFFSET(GPR12, gpr[12]);
STACK_PT_REGS_OFFSET(GPR13, gpr[13]);
-#ifndef CONFIG_PPC64
- STACK_PT_REGS_OFFSET(GPR14, gpr[14]);
-#endif /* CONFIG_PPC64 */
/*
* Note: these symbols include _ because they overlap with special
* register names
@@ -332,52 +284,38 @@ int main(void)
STACK_PT_REGS_OFFSET(_CCR, ccr);
STACK_PT_REGS_OFFSET(_XER, xer);
STACK_PT_REGS_OFFSET(_DAR, dar);
+ STACK_PT_REGS_OFFSET(_DEAR, dear);
STACK_PT_REGS_OFFSET(_DSISR, dsisr);
+ STACK_PT_REGS_OFFSET(_ESR, esr);
STACK_PT_REGS_OFFSET(ORIG_GPR3, orig_gpr3);
STACK_PT_REGS_OFFSET(RESULT, result);
STACK_PT_REGS_OFFSET(_TRAP, trap);
-#ifndef CONFIG_PPC64
- /*
- * The PowerPC 400-class & Book-E processors have neither the DAR
- * nor the DSISR SPRs. Hence, we overload them to hold the similar
- * DEAR and ESR SPRs for such processors. For critical interrupts
- * we use them to hold SRR0 and SRR1.
- */
- STACK_PT_REGS_OFFSET(_DEAR, dar);
- STACK_PT_REGS_OFFSET(_ESR, dsisr);
-#else /* CONFIG_PPC64 */
+#ifdef CONFIG_PPC64
STACK_PT_REGS_OFFSET(SOFTE, softe);
STACK_PT_REGS_OFFSET(_PPR, ppr);
-#endif /* CONFIG_PPC64 */
+#endif
-#ifdef CONFIG_PPC_KUAP
- STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap);
+#ifdef CONFIG_PPC_PKEY
+ STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr);
+ STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr);
#endif
-#if defined(CONFIG_PPC32)
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
- DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE);
- DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
+#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE)
+ STACK_PT_REGS_OFFSET(MAS0, mas0);
/* we overload MMUCR for 44x on MAS0 since they are mutually exclusive */
- DEFINE(MMUCR, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
- DEFINE(MAS1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas1));
- DEFINE(MAS2, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas2));
- DEFINE(MAS3, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas3));
- DEFINE(MAS6, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas6));
- DEFINE(MAS7, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas7));
- DEFINE(_SRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr0));
- DEFINE(_SRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr1));
- DEFINE(_CSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr0));
- DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1));
- DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0));
- DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1));
- DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit));
+ STACK_PT_REGS_OFFSET(MMUCR, mas0);
+ STACK_PT_REGS_OFFSET(MAS1, mas1);
+ STACK_PT_REGS_OFFSET(MAS2, mas2);
+ STACK_PT_REGS_OFFSET(MAS3, mas3);
+ STACK_PT_REGS_OFFSET(MAS6, mas6);
+ STACK_PT_REGS_OFFSET(MAS7, mas7);
+ STACK_PT_REGS_OFFSET(_SRR0, srr0);
+ STACK_PT_REGS_OFFSET(_SRR1, srr1);
+ STACK_PT_REGS_OFFSET(_CSRR0, csrr0);
+ STACK_PT_REGS_OFFSET(_CSRR1, csrr1);
+ STACK_PT_REGS_OFFSET(_DSRR0, dsrr0);
+ STACK_PT_REGS_OFFSET(_DSRR1, dsrr1);
#endif
-#endif
-
-#ifndef CONFIG_PPC64
- OFFSET(MM_PGD, mm_struct, pgd);
-#endif /* ! CONFIG_PPC64 */
/* About the CPU features table */
OFFSET(CPU_SPEC_FEATURES, cpu_spec, cpu_features);
@@ -394,59 +332,23 @@ int main(void)
#endif /* ! CONFIG_PPC64 */
/* datapage offsets for use by vdso */
- OFFSET(CFG_TB_ORIG_STAMP, vdso_data, tb_orig_stamp);
- OFFSET(CFG_TB_TICKS_PER_SEC, vdso_data, tb_ticks_per_sec);
- OFFSET(CFG_TB_TO_XS, vdso_data, tb_to_xs);
- OFFSET(CFG_TB_UPDATE_COUNT, vdso_data, tb_update_count);
- OFFSET(CFG_TZ_MINUTEWEST, vdso_data, tz_minuteswest);
- OFFSET(CFG_TZ_DSTTIME, vdso_data, tz_dsttime);
- OFFSET(CFG_SYSCALL_MAP32, vdso_data, syscall_map_32);
- OFFSET(WTOM_CLOCK_SEC, vdso_data, wtom_clock_sec);
- OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec);
- OFFSET(STAMP_XTIME_SEC, vdso_data, stamp_xtime_sec);
- OFFSET(STAMP_XTIME_NSEC, vdso_data, stamp_xtime_nsec);
- OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction);
- OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res);
+ OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data);
+ OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec);
#ifdef CONFIG_PPC64
- OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size);
- OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size);
- OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size);
- OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size);
- OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
- OFFSET(TVAL64_TV_SEC, __kernel_old_timeval, tv_sec);
- OFFSET(TVAL64_TV_USEC, __kernel_old_timeval, tv_usec);
-#endif
- OFFSET(TSPC64_TV_SEC, __kernel_timespec, tv_sec);
- OFFSET(TSPC64_TV_NSEC, __kernel_timespec, tv_nsec);
- OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec);
- OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec);
- OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec);
- OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec);
- /* timeval/timezone offsets for use by vdso */
- OFFSET(TZONE_TZ_MINWEST, timezone, tz_minuteswest);
- OFFSET(TZONE_TZ_DSTTIME, timezone, tz_dsttime);
-
- /* Other bits used by the vdso */
- DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
- DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
- DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
- DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
- DEFINE(CLOCK_MAX, CLOCK_TAI);
- DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
- DEFINE(EINVAL, EINVAL);
- DEFINE(KTIME_LOW_RES, KTIME_LOW_RES);
+ OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size);
+ OFFSET(CFG_DCACHE_BLOCKSZ, vdso_arch_data, dcache_block_size);
+ OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_arch_data, icache_log_block_size);
+ OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_arch_data, dcache_log_block_size);
+ OFFSET(CFG_SYSCALL_MAP64, vdso_arch_data, syscall_map);
+ OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, compat_syscall_map);
+#else
+ OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map);
+#endif
#ifdef CONFIG_BUG
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- DEFINE(PGD_TABLE_SIZE, (sizeof(pgd_t) << max(RADIX_PGD_INDEX_SIZE, H_PGD_INDEX_SIZE)));
-#else
- DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
-#endif
- DEFINE(PTE_SIZE, sizeof(pte_t));
-
#ifdef CONFIG_KVM
OFFSET(VCPU_HOST_STACK, kvm_vcpu, arch.host_stack);
OFFSET(VCPU_HOST_PID, kvm_vcpu, arch.host_pid);
@@ -518,11 +420,9 @@ int main(void)
OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid);
OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr);
OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1);
- OFFSET(KVM_NEED_FLUSH, kvm, arch.need_tlb_flush.bits);
OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls);
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
OFFSET(KVM_RADIX, kvm, arch.radix);
- OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
@@ -544,11 +444,12 @@ int main(void)
OFFSET(VCPU_CTRL, kvm_vcpu, arch.ctrl);
OFFSET(VCPU_DABR, kvm_vcpu, arch.dabr);
OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx);
- OFFSET(VCPU_DAWR, kvm_vcpu, arch.dawr);
- OFFSET(VCPU_DAWRX, kvm_vcpu, arch.dawrx);
+ OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0);
+ OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0);
+ OFFSET(VCPU_DAWR1, kvm_vcpu, arch.dawr1);
+ OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1);
OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
- OFFSET(VCPU_DEC, kvm_vcpu, arch.dec);
OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires);
OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
@@ -556,8 +457,9 @@ int main(void)
OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
+ OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra);
+ OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs);
OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
- OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc);
OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar);
OFFSET(VCPU_SDAR, kvm_vcpu, arch.sdar);
OFFSET(VCPU_SIER, kvm_vcpu, arch.sier);
@@ -566,7 +468,6 @@ int main(void)
OFFSET(VCPU_SLB_NR, kvm_vcpu, arch.slb_nr);
OFFSET(VCPU_FAULT_DSISR, kvm_vcpu, arch.fault_dsisr);
OFFSET(VCPU_FAULT_DAR, kvm_vcpu, arch.fault_dar);
- OFFSET(VCPU_FAULT_GPA, kvm_vcpu, arch.fault_gpa);
OFFSET(VCPU_INTR_MSR, kvm_vcpu, arch.intr_msr);
OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
OFFSET(VCPU_TRAP, kvm_vcpu, arch.trap);
@@ -678,13 +579,10 @@ int main(void)
HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
- HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys);
HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt);
- HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_PTID, ptid);
- HSTATE_FIELD(HSTATE_TID, tid);
HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend);
HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
@@ -693,6 +591,9 @@ int main(void)
HSTATE_FIELD(HSTATE_SDAR, host_mmcr[4]);
HSTATE_FIELD(HSTATE_MMCR2, host_mmcr[5]);
HSTATE_FIELD(HSTATE_SIER, host_mmcr[6]);
+ HSTATE_FIELD(HSTATE_MMCR3, host_mmcr[7]);
+ HSTATE_FIELD(HSTATE_SIER2, host_mmcr[8]);
+ HSTATE_FIELD(HSTATE_SIER3, host_mmcr[9]);
HSTATE_FIELD(HSTATE_PMC1, host_pmc[0]);
HSTATE_FIELD(HSTATE_PMC2, host_pmc[1]);
HSTATE_FIELD(HSTATE_PMC3, host_pmc[2]);
@@ -711,8 +612,6 @@ int main(void)
OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar);
OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap);
OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped);
- OFFSET(KVM_SPLIT_DO_SET, kvm_split_mode, do_set);
- OFFSET(KVM_SPLIT_DO_RESTORE, kvm_split_mode, do_restore);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_PPC_BOOK3S_64
@@ -789,11 +688,14 @@ int main(void)
#endif
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
- DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE);
#ifdef CONFIG_PPC_8xx
DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE));
#endif
+#ifdef CONFIG_XMON
+ DEFINE(BPT_SIZE, BPT_SIZE);
+#endif
+
return 0;
}
diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c
index a2dddd7f3d09..1bcfca5fdf67 100644
--- a/arch/powerpc/kernel/audit.c
+++ b/arch/powerpc/kernel/audit.c
@@ -47,15 +47,17 @@ int audit_classify_syscall(int abi, unsigned syscall)
#endif
switch(syscall) {
case __NR_open:
- return 2;
+ return AUDITSC_OPEN;
case __NR_openat:
- return 3;
+ return AUDITSC_OPENAT;
case __NR_socketcall:
- return 4;
+ return AUDITSC_SOCKETCALL;
case __NR_execve:
- return 5;
+ return AUDITSC_EXECVE;
+ case __NR_openat2:
+ return AUDITSC_OPENAT2;
default:
- return 0;
+ return AUDITSC_NATIVE;
}
}
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 6dfceaa820e4..803c2a45b22a 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -9,13 +9,13 @@
#include <linux/init.h>
#include <linux/export.h>
#include <linux/memblock.h>
+#include <linux/pgtable.h>
#include <asm/sections.h>
#include <asm/prom.h>
#include <asm/btext.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/udbg.h>
@@ -26,7 +26,7 @@
static void scrollscreen(void);
#endif
-#define __force_data __attribute__((__section__(".data")))
+#define __force_data __section(".data")
static int g_loc_X __force_data;
static int g_loc_Y __force_data;
@@ -95,19 +95,10 @@ void __init btext_prepare_BAT(void)
boot_text_mapped = 0;
return;
}
- if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
- /* 603, 604, G3, G4, ... */
- lowbits = addr & ~0xFF000000UL;
- addr &= 0xFF000000UL;
- disp_BAT[0] = vaddr | (BL_16M<<2) | 2;
- disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW);
- } else {
- /* 601 */
- lowbits = addr & ~0xFF800000UL;
- addr &= 0xFF800000UL;
- disp_BAT[0] = vaddr | (_PAGE_NO_CACHE | PP_RWXX) | 4;
- disp_BAT[1] = addr | BL_8M | 0x40;
- }
+ lowbits = addr & ~0xFF000000UL;
+ addr &= 0xFF000000UL;
+ disp_BAT[0] = vaddr | (BL_16M<<2) | 2;
+ disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW);
logicalDisplayBase = (void *) (vaddr + lowbits);
}
#endif
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 470336277c67..cf1be75b7833 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -7,6 +7,8 @@
* Author: Nathan Lynch
*/
+#define pr_fmt(fmt) "cacheinfo: " fmt
+
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/kernel.h>
@@ -118,6 +120,7 @@ struct cache {
struct cpumask shared_cpu_map; /* online CPUs using this cache */
int type; /* split cache disambiguation */
int level; /* level not explicit in device tree */
+ int group_id; /* id of the group of threads that share this cache */
struct list_head list; /* global list of cache objects */
struct cache *next_local; /* next cache of >= level */
};
@@ -140,22 +143,24 @@ static const char *cache_type_string(const struct cache *cache)
}
static void cache_init(struct cache *cache, int type, int level,
- struct device_node *ofnode)
+ struct device_node *ofnode, int group_id)
{
cache->type = type;
cache->level = level;
cache->ofnode = of_node_get(ofnode);
+ cache->group_id = group_id;
INIT_LIST_HEAD(&cache->list);
list_add(&cache->list, &cache_list);
}
-static struct cache *new_cache(int type, int level, struct device_node *ofnode)
+static struct cache *new_cache(int type, int level,
+ struct device_node *ofnode, int group_id)
{
struct cache *cache;
cache = kzalloc(sizeof(*cache), GFP_KERNEL);
if (cache)
- cache_init(cache, type, level, ofnode);
+ cache_init(cache, type, level, ofnode, group_id);
return cache;
}
@@ -166,7 +171,7 @@ static void release_cache_debugcheck(struct cache *cache)
list_for_each_entry(iter, &cache_list, list)
WARN_ONCE(iter->next_local == cache,
- "cache for %pOF(%s) refers to cache for %pOF(%s)\n",
+ "cache for %pOFP(%s) refers to cache for %pOFP(%s)\n",
iter->ofnode,
cache_type_string(iter),
cache->ofnode,
@@ -178,7 +183,7 @@ static void release_cache(struct cache *cache)
if (!cache)
return;
- pr_debug("freeing L%d %s cache for %pOF\n", cache->level,
+ pr_debug("freeing L%d %s cache for %pOFP\n", cache->level,
cache_type_string(cache), cache->ofnode);
release_cache_debugcheck(cache);
@@ -193,7 +198,7 @@ static void cache_cpu_set(struct cache *cache, int cpu)
while (next) {
WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map),
- "CPU %i already accounted in %pOF(%s)\n",
+ "CPU %i already accounted in %pOFP(%s)\n",
cpu, next->ofnode,
cache_type_string(next));
cpumask_set_cpu(cpu, &next->shared_cpu_map);
@@ -307,20 +312,24 @@ static struct cache *cache_find_first_sibling(struct cache *cache)
return cache;
list_for_each_entry(iter, &cache_list, list)
- if (iter->ofnode == cache->ofnode && iter->next_local == cache)
+ if (iter->ofnode == cache->ofnode &&
+ iter->group_id == cache->group_id &&
+ iter->next_local == cache)
return iter;
return cache;
}
-/* return the first cache on a local list matching node */
-static struct cache *cache_lookup_by_node(const struct device_node *node)
+/* return the first cache on a local list matching node and thread-group id */
+static struct cache *cache_lookup_by_node_group(const struct device_node *node,
+ int group_id)
{
struct cache *cache = NULL;
struct cache *iter;
list_for_each_entry(iter, &cache_list, list) {
- if (iter->ofnode != node)
+ if (iter->ofnode != node ||
+ iter->group_id != group_id)
continue;
cache = cache_find_first_sibling(iter);
break;
@@ -350,23 +359,24 @@ static int cache_is_unified_d(const struct device_node *np)
CACHE_TYPE_UNIFIED_D : CACHE_TYPE_UNIFIED;
}
-static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level)
+static struct cache *cache_do_one_devnode_unified(struct device_node *node, int group_id,
+ int level)
{
- pr_debug("creating L%d ucache for %pOF\n", level, node);
+ pr_debug("creating L%d ucache for %pOFP\n", level, node);
- return new_cache(cache_is_unified_d(node), level, node);
+ return new_cache(cache_is_unified_d(node), level, node, group_id);
}
-static struct cache *cache_do_one_devnode_split(struct device_node *node,
+static struct cache *cache_do_one_devnode_split(struct device_node *node, int group_id,
int level)
{
struct cache *dcache, *icache;
- pr_debug("creating L%d dcache and icache for %pOF\n", level,
+ pr_debug("creating L%d dcache and icache for %pOFP\n", level,
node);
- dcache = new_cache(CACHE_TYPE_DATA, level, node);
- icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
+ dcache = new_cache(CACHE_TYPE_DATA, level, node, group_id);
+ icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node, group_id);
if (!dcache || !icache)
goto err;
@@ -380,31 +390,32 @@ err:
return NULL;
}
-static struct cache *cache_do_one_devnode(struct device_node *node, int level)
+static struct cache *cache_do_one_devnode(struct device_node *node, int group_id, int level)
{
struct cache *cache;
if (cache_node_is_unified(node))
- cache = cache_do_one_devnode_unified(node, level);
+ cache = cache_do_one_devnode_unified(node, group_id, level);
else
- cache = cache_do_one_devnode_split(node, level);
+ cache = cache_do_one_devnode_split(node, group_id, level);
return cache;
}
static struct cache *cache_lookup_or_instantiate(struct device_node *node,
+ int group_id,
int level)
{
struct cache *cache;
- cache = cache_lookup_by_node(node);
+ cache = cache_lookup_by_node_group(node, group_id);
WARN_ONCE(cache && cache->level != level,
"cache level mismatch on lookup (got %d, expected %d)\n",
cache->level, level);
if (!cache)
- cache = cache_do_one_devnode(node, level);
+ cache = cache_do_one_devnode(node, group_id, level);
return cache;
}
@@ -418,15 +429,53 @@ static void link_cache_lists(struct cache *smaller, struct cache *bigger)
}
smaller->next_local = bigger;
+
+ /*
+ * The cache->next_local list sorts by level ascending:
+ * L1d -> L1i -> L2 -> L3 ...
+ */
+ WARN_ONCE((smaller->level == 1 && bigger->level > 2) ||
+ (smaller->level > 1 && bigger->level != smaller->level + 1),
+ "linking L%i cache %pOFP to L%i cache %pOFP; skipped a level?\n",
+ smaller->level, smaller->ofnode, bigger->level, bigger->ofnode);
}
static void do_subsidiary_caches_debugcheck(struct cache *cache)
{
- WARN_ON_ONCE(cache->level != 1);
- WARN_ON_ONCE(!of_node_is_type(cache->ofnode, "cpu"));
+ WARN_ONCE(cache->level != 1,
+ "instantiating cache chain from L%d %s cache for "
+ "%pOFP instead of an L1\n", cache->level,
+ cache_type_string(cache), cache->ofnode);
+ WARN_ONCE(!of_node_is_type(cache->ofnode, "cpu"),
+ "instantiating cache chain from node %pOFP of type '%s' "
+ "instead of a cpu node\n", cache->ofnode,
+ of_node_get_device_type(cache->ofnode));
}
-static void do_subsidiary_caches(struct cache *cache)
+/*
+ * If sub-groups of threads in a core containing @cpu_id share the
+ * L@level-cache (information obtained via "ibm,thread-groups"
+ * device-tree property), then we identify the group by the first
+ * thread-sibling in the group. We define this to be the group-id.
+ *
+ * In the absence of any thread-group information for L@level-cache,
+ * this function returns -1.
+ */
+static int get_group_id(unsigned int cpu_id, int level)
+{
+ if (has_big_cores && level == 1)
+ return cpumask_first(per_cpu(thread_group_l1_cache_map,
+ cpu_id));
+ else if (thread_group_shares_l2 && level == 2)
+ return cpumask_first(per_cpu(thread_group_l2_cache_map,
+ cpu_id));
+ else if (thread_group_shares_l3 && level == 3)
+ return cpumask_first(per_cpu(thread_group_l3_cache_map,
+ cpu_id));
+ return -1;
+}
+
+static void do_subsidiary_caches(struct cache *cache, unsigned int cpu_id)
{
struct device_node *subcache_node;
int level = cache->level;
@@ -435,9 +484,11 @@ static void do_subsidiary_caches(struct cache *cache)
while ((subcache_node = of_find_next_cache_node(cache->ofnode))) {
struct cache *subcache;
+ int group_id;
level++;
- subcache = cache_lookup_or_instantiate(subcache_node, level);
+ group_id = get_group_id(cpu_id, level);
+ subcache = cache_lookup_or_instantiate(subcache_node, group_id, level);
of_node_put(subcache_node);
if (!subcache)
break;
@@ -451,6 +502,7 @@ static struct cache *cache_chain_instantiate(unsigned int cpu_id)
{
struct device_node *cpu_node;
struct cache *cpu_cache = NULL;
+ int group_id;
pr_debug("creating cache object(s) for CPU %i\n", cpu_id);
@@ -459,11 +511,13 @@ static struct cache *cache_chain_instantiate(unsigned int cpu_id)
if (!cpu_node)
goto out;
- cpu_cache = cache_lookup_or_instantiate(cpu_node, 1);
+ group_id = get_group_id(cpu_id, 1);
+
+ cpu_cache = cache_lookup_or_instantiate(cpu_node, group_id, 1);
if (!cpu_cache)
goto out;
- do_subsidiary_caches(cpu_cache);
+ do_subsidiary_caches(cpu_cache, cpu_id);
cache_cpu_set(cpu_cache, cpu_id);
out:
@@ -624,56 +678,37 @@ static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *
static struct kobj_attribute cache_level_attr =
__ATTR(level, 0444, level_show, NULL);
-static unsigned int index_dir_to_cpu(struct cache_index_dir *index)
-{
- struct kobject *index_dir_kobj = &index->kobj;
- struct kobject *cache_dir_kobj = index_dir_kobj->parent;
- struct kobject *cpu_dev_kobj = cache_dir_kobj->parent;
- struct device *dev = kobj_to_dev(cpu_dev_kobj);
-
- return dev->id;
-}
-
-/*
- * On big-core systems, each core has two groups of CPUs each of which
- * has its own L1-cache. The thread-siblings which share l1-cache with
- * @cpu can be obtained via cpu_smallcore_mask().
- */
-static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct cache *cache)
-{
- if (cache->level == 1)
- return cpu_smallcore_mask(cpu);
-
- return &cache->shared_cpu_map;
-}
-
-static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+static ssize_t
+show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bool list)
{
struct cache_index_dir *index;
struct cache *cache;
const struct cpumask *mask;
- int ret, cpu;
index = kobj_to_cache_index_dir(k);
cache = index->cache;
- if (has_big_cores) {
- cpu = index_dir_to_cpu(index);
- mask = get_big_core_shared_cpu_map(cpu, cache);
- } else {
- mask = &cache->shared_cpu_map;
- }
+ mask = &cache->shared_cpu_map;
- ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb\n",
- cpumask_pr_args(mask));
- buf[ret++] = '\n';
- buf[ret] = '\0';
- return ret;
+ return cpumap_print_to_pagebuf(list, buf, mask);
+}
+
+static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ return show_shared_cpumap(k, attr, buf, false);
+}
+
+static ssize_t shared_cpu_list_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ return show_shared_cpumap(k, attr, buf, true);
}
static struct kobj_attribute cache_shared_cpu_map_attr =
__ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL);
+static struct kobj_attribute cache_shared_cpu_list_attr =
+ __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL);
+
/* Attributes which should always be created -- the kobject/sysfs core
* does this automatically via kobj_type->default_attrs. This is the
* minimum data required to uniquely identify a cache.
@@ -682,6 +717,7 @@ static struct attribute *cache_index_default_attrs[] = {
&cache_type_attr.attr,
&cache_level_attr.attr,
&cache_shared_cpu_map_attr.attr,
+ &cache_shared_cpu_list_attr.attr,
NULL,
};
@@ -733,13 +769,13 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
rc = attr->show(&dir->kobj, attr, buf);
if (rc <= 0) {
pr_debug("not creating %s attribute for "
- "%pOF(%s) (rc = %zd)\n",
+ "%pOFP(%s) (rc = %zd)\n",
attr->attr.name, cache->ofnode,
cache_type, rc);
continue;
}
if (sysfs_create_file(&dir->kobj, &attr->attr))
- pr_debug("could not create %s attribute for %pOF(%s)\n",
+ pr_debug("could not create %s attribute for %pOFP(%s)\n",
attr->attr.name, cache->ofnode, cache_type);
}
@@ -810,13 +846,15 @@ static struct cache *cache_lookup_by_cpu(unsigned int cpu_id)
{
struct device_node *cpu_node;
struct cache *cache;
+ int group_id;
cpu_node = of_get_cpu_node(cpu_id, NULL);
WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
if (!cpu_node)
return NULL;
- cache = cache_lookup_by_node(cpu_node);
+ group_id = get_group_id(cpu_id, 1);
+ cache = cache_lookup_by_node_group(cpu_node, group_id);
of_node_put(cpu_node);
return cache;
@@ -855,7 +893,7 @@ static void cache_cpu_clear(struct cache *cache, int cpu)
struct cache *next = cache->next_local;
WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map),
- "CPU %i not accounted in %pOF(%s)\n",
+ "CPU %i not accounted in %pOFP(%s)\n",
cpu, cache->ofnode,
cache_type_string(cache));
diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c
index 55c6ccda0a85..d92ffe4e5dc1 100644
--- a/arch/powerpc/kernel/compat_audit.c
+++ b/arch/powerpc/kernel/compat_audit.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#undef __powerpc64__
+#include <linux/audit_arch.h>
#include <asm/unistd.h>
unsigned ppc32_dir_class[] = {
@@ -31,14 +32,16 @@ int ppc32_classify_syscall(unsigned syscall)
{
switch(syscall) {
case __NR_open:
- return 2;
+ return AUDITSC_OPEN;
case __NR_openat:
- return 3;
+ return AUDITSC_OPENAT;
case __NR_socketcall:
- return 4;
+ return AUDITSC_SOCKETCALL;
case __NR_execve:
- return 5;
+ return AUDITSC_EXECVE;
+ case __NR_openat2:
+ return AUDITSC_OPENAT2;
default:
- return 1;
+ return AUDITSC_COMPAT;
}
}
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index f6517f67265a..f8b5ff64b604 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -288,6 +288,7 @@ _GLOBAL(__init_fpu_registers)
mtmsr r10
isync
blr
+_ASM_NOKPROBE_SYMBOL(__init_fpu_registers)
/* Definitions for the table use to save CPU states */
@@ -483,4 +484,5 @@ _GLOBAL(__restore_cpu_setup)
1:
mtcr r7
blr
+_ASM_NOKPROBE_SYMBOL(__restore_cpu_setup)
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 1d308780e0d3..4bf33f1b4193 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -108,15 +108,6 @@ _GLOBAL(__setup_cpu_e6500)
#endif /* CONFIG_PPC_E500MC */
#ifdef CONFIG_PPC32
-#ifdef CONFIG_E200
-_GLOBAL(__setup_cpu_e200)
- /* enable dedicated debug exception handling resources (Debug APU) */
- mfspr r3,SPRN_HID0
- ori r3,r3,HID0_DAPUEN@l
- mtspr SPRN_HID0,r3
- b __setup_e200_ivors
-#endif /* CONFIG_E200 */
-
#ifdef CONFIG_E500
#ifndef CONFIG_PPC_E500MC
_GLOBAL(__setup_cpu_e500v1)
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
deleted file mode 100644
index a460298c7ddb..000000000000
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ /dev/null
@@ -1,219 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains low level CPU setup functions.
- * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
- */
-
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cache.h>
-#include <asm/book3s/64/mmu-hash.h>
-
-/* Entry: r3 = crap, r4 = ptr to cputable entry
- *
- * Note that we can be called twice for pseudo-PVRs
- */
-_GLOBAL(__setup_cpu_power7)
- mflr r11
- bl __init_hvmode_206
- mtlr r11
- beqlr
- li r0,0
- mtspr SPRN_LPID,r0
- LOAD_REG_IMMEDIATE(r0, PCR_MASK)
- mtspr SPRN_PCR,r0
- mfspr r3,SPRN_LPCR
- li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
- bl __init_LPCR_ISA206
- mtlr r11
- blr
-
-_GLOBAL(__restore_cpu_power7)
- mflr r11
- mfmsr r3
- rldicl. r0,r3,4,63
- beqlr
- li r0,0
- mtspr SPRN_LPID,r0
- LOAD_REG_IMMEDIATE(r0, PCR_MASK)
- mtspr SPRN_PCR,r0
- mfspr r3,SPRN_LPCR
- li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
- bl __init_LPCR_ISA206
- mtlr r11
- blr
-
-_GLOBAL(__setup_cpu_power8)
- mflr r11
- bl __init_FSCR
- bl __init_PMU
- bl __init_PMU_ISA207
- bl __init_hvmode_206
- mtlr r11
- beqlr
- li r0,0
- mtspr SPRN_LPID,r0
- LOAD_REG_IMMEDIATE(r0, PCR_MASK)
- mtspr SPRN_PCR,r0
- mfspr r3,SPRN_LPCR
- ori r3, r3, LPCR_PECEDH
- li r4,0 /* LPES = 0 */
- bl __init_LPCR_ISA206
- bl __init_HFSCR
- bl __init_PMU_HV
- bl __init_PMU_HV_ISA207
- mtlr r11
- blr
-
-_GLOBAL(__restore_cpu_power8)
- mflr r11
- bl __init_FSCR
- bl __init_PMU
- bl __init_PMU_ISA207
- mfmsr r3
- rldicl. r0,r3,4,63
- mtlr r11
- beqlr
- li r0,0
- mtspr SPRN_LPID,r0
- LOAD_REG_IMMEDIATE(r0, PCR_MASK)
- mtspr SPRN_PCR,r0
- mfspr r3,SPRN_LPCR
- ori r3, r3, LPCR_PECEDH
- li r4,0 /* LPES = 0 */
- bl __init_LPCR_ISA206
- bl __init_HFSCR
- bl __init_PMU_HV
- bl __init_PMU_HV_ISA207
- mtlr r11
- blr
-
-_GLOBAL(__setup_cpu_power9)
- mflr r11
- bl __init_FSCR
- bl __init_PMU
- bl __init_hvmode_206
- mtlr r11
- beqlr
- li r0,0
- mtspr SPRN_PSSCR,r0
- mtspr SPRN_LPID,r0
- mtspr SPRN_PID,r0
- LOAD_REG_IMMEDIATE(r0, PCR_MASK)
- mtspr SPRN_PCR,r0
- mfspr r3,SPRN_LPCR
- LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
- or r3, r3, r4
- LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR)
- andc r3, r3, r4
- li r4,0 /* LPES = 0 */
- bl __init_LPCR_ISA300
- bl __init_HFSCR
- bl __init_PMU_HV
- mtlr r11
- blr
-
-_GLOBAL(__restore_cpu_power9)
- mflr r11
- bl __init_FSCR
- bl __init_PMU
- mfmsr r3
- rldicl. r0,r3,4,63
- mtlr r11
- beqlr
- li r0,0
- mtspr SPRN_PSSCR,r0
- mtspr SPRN_LPID,r0
- mtspr SPRN_PID,r0
- LOAD_REG_IMMEDIATE(r0, PCR_MASK)
- mtspr SPRN_PCR,r0
- mfspr r3,SPRN_LPCR
- LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
- or r3, r3, r4
- LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR)
- andc r3, r3, r4
- li r4,0 /* LPES = 0 */
- bl __init_LPCR_ISA300
- bl __init_HFSCR
- bl __init_PMU_HV
- mtlr r11
- blr
-
-__init_hvmode_206:
- /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */
- mfmsr r3
- rldicl. r0,r3,4,63
- bnelr
- ld r5,CPU_SPEC_FEATURES(r4)
- LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST)
- andc r5,r5,r6
- std r5,CPU_SPEC_FEATURES(r4)
- blr
-
-__init_LPCR_ISA206:
- /* Setup a sane LPCR:
- * Called with initial LPCR in R3 and desired LPES 2-bit value in R4
- *
- * LPES = 0b01 (HSRR0/1 used for 0x500)
- * PECE = 0b111
- * DPFD = 4
- * HDICE = 0
- * VC = 0b100 (VPM0=1, VPM1=0, ISL=0)
- * VRMASD = 0b10000 (L=1, LP=00)
- *
- * Other bits untouched for now
- */
- li r5,0x10
- rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5
-
- /* POWER9 has no VRMASD */
-__init_LPCR_ISA300:
- rldimi r3,r4, LPCR_LPES_SH, 64-LPCR_LPES_SH-2
- ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)
- li r5,4
- rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3
- clrrdi r3,r3,1 /* clear HDICE */
- li r5,4
- rldimi r3,r5, LPCR_VC_SH, 0
- mtspr SPRN_LPCR,r3
- isync
- blr
-
-__init_FSCR:
- mfspr r3,SPRN_FSCR
- ori r3,r3,FSCR_TAR|FSCR_DSCR|FSCR_EBB
- mtspr SPRN_FSCR,r3
- blr
-
-__init_HFSCR:
- mfspr r3,SPRN_HFSCR
- ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\
- HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP
- mtspr SPRN_HFSCR,r3
- blr
-
-__init_PMU_HV:
- li r5,0
- mtspr SPRN_MMCRC,r5
- blr
-
-__init_PMU_HV_ISA207:
- li r5,0
- mtspr SPRN_MMCRH,r5
- blr
-
-__init_PMU:
- li r5,0
- mtspr SPRN_MMCRA,r5
- mtspr SPRN_MMCR0,r5
- mtspr SPRN_MMCR1,r5
- mtspr SPRN_MMCR2,r5
- blr
-
-__init_PMU_ISA207:
- li r5,0
- mtspr SPRN_MMCRS,r5
- blr
diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
new file mode 100644
index 000000000000..3cca88ee96d7
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_power.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2020, Jordan Niethe, IBM Corporation.
+ *
+ * This file contains low level CPU setup functions.
+ * Originally written in assembly by Benjamin Herrenschmidt & various other
+ * authors.
+ */
+
+#include <asm/reg.h>
+#include <asm/synch.h>
+#include <linux/bitops.h>
+#include <asm/cputable.h>
+#include <asm/cpu_setup_power.h>
+
+/* Disable CPU_FTR_HVMODE and return false if MSR:HV is not set */
+static bool init_hvmode_206(struct cpu_spec *t)
+{
+ u64 msr;
+
+ msr = mfmsr();
+ if (msr & MSR_HV)
+ return true;
+
+ t->cpu_features &= ~(CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST);
+ return false;
+}
+
+static void init_LPCR_ISA300(u64 lpcr, u64 lpes)
+{
+ /* POWER9 has no VRMASD */
+ lpcr |= (lpes << LPCR_LPES_SH) & LPCR_LPES;
+ lpcr |= LPCR_PECE0|LPCR_PECE1|LPCR_PECE2;
+ lpcr |= (4ull << LPCR_DPFD_SH) & LPCR_DPFD;
+ lpcr &= ~LPCR_HDICE; /* clear HDICE */
+ lpcr |= (4ull << LPCR_VC_SH);
+ mtspr(SPRN_LPCR, lpcr);
+ isync();
+}
+
+/*
+ * Setup a sane LPCR:
+ * Called with initial LPCR and desired LPES 2-bit value
+ *
+ * LPES = 0b01 (HSRR0/1 used for 0x500)
+ * PECE = 0b111
+ * DPFD = 4
+ * HDICE = 0
+ * VC = 0b100 (VPM0=1, VPM1=0, ISL=0)
+ * VRMASD = 0b10000 (L=1, LP=00)
+ *
+ * Other bits untouched for now
+ */
+static void init_LPCR_ISA206(u64 lpcr, u64 lpes)
+{
+ lpcr |= (0x10ull << LPCR_VRMASD_SH) & LPCR_VRMASD;
+ init_LPCR_ISA300(lpcr, lpes);
+}
+
+static void init_FSCR(void)
+{
+ u64 fscr;
+
+ fscr = mfspr(SPRN_FSCR);
+ fscr |= FSCR_TAR|FSCR_EBB;
+ mtspr(SPRN_FSCR, fscr);
+}
+
+static void init_FSCR_power9(void)
+{
+ u64 fscr;
+
+ fscr = mfspr(SPRN_FSCR);
+ fscr |= FSCR_SCV;
+ mtspr(SPRN_FSCR, fscr);
+ init_FSCR();
+}
+
+static void init_FSCR_power10(void)
+{
+ u64 fscr;
+
+ fscr = mfspr(SPRN_FSCR);
+ fscr |= FSCR_PREFIX;
+ mtspr(SPRN_FSCR, fscr);
+ init_FSCR_power9();
+}
+
+static void init_HFSCR(void)
+{
+ u64 hfscr;
+
+ hfscr = mfspr(SPRN_HFSCR);
+ hfscr |= HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|HFSCR_DSCR|\
+ HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP;
+ mtspr(SPRN_HFSCR, hfscr);
+}
+
+static void init_PMU_HV(void)
+{
+ mtspr(SPRN_MMCRC, 0);
+}
+
+static void init_PMU_HV_ISA207(void)
+{
+ mtspr(SPRN_MMCRH, 0);
+}
+
+static void init_PMU(void)
+{
+ mtspr(SPRN_MMCRA, 0);
+ mtspr(SPRN_MMCR0, 0);
+ mtspr(SPRN_MMCR1, 0);
+ mtspr(SPRN_MMCR2, 0);
+}
+
+static void init_PMU_ISA207(void)
+{
+ mtspr(SPRN_MMCRS, 0);
+}
+
+static void init_PMU_ISA31(void)
+{
+ mtspr(SPRN_MMCR3, 0);
+ mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
+ mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
+}
+
+/*
+ * Note that we can be called twice of pseudo-PVRs.
+ * The parameter offset is not used.
+ */
+
+void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t)
+{
+ if (!init_hvmode_206(t))
+ return;
+
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
+}
+
+void __restore_cpu_power7(void)
+{
+ u64 msr;
+
+ msr = mfmsr();
+ if (!(msr & MSR_HV))
+ return;
+
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
+}
+
+void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t)
+{
+ init_FSCR();
+ init_PMU();
+ init_PMU_ISA207();
+
+ if (!init_hvmode_206(t))
+ return;
+
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
+ init_HFSCR();
+ init_PMU_HV();
+ init_PMU_HV_ISA207();
+}
+
+void __restore_cpu_power8(void)
+{
+ u64 msr;
+
+ init_FSCR();
+ init_PMU();
+ init_PMU_ISA207();
+
+ msr = mfmsr();
+ if (!(msr & MSR_HV))
+ return;
+
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
+ init_HFSCR();
+ init_PMU_HV();
+ init_PMU_HV_ISA207();
+}
+
+void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t)
+{
+ init_FSCR_power9();
+ init_PMU();
+
+ if (!init_hvmode_206(t))
+ return;
+
+ mtspr(SPRN_PSSCR, 0);
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_PID, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
+ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+ init_HFSCR();
+ init_PMU_HV();
+}
+
+void __restore_cpu_power9(void)
+{
+ u64 msr;
+
+ init_FSCR_power9();
+ init_PMU();
+
+ msr = mfmsr();
+ if (!(msr & MSR_HV))
+ return;
+
+ mtspr(SPRN_PSSCR, 0);
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_PID, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
+ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+ init_HFSCR();
+ init_PMU_HV();
+}
+
+void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t)
+{
+ init_FSCR_power10();
+ init_PMU();
+ init_PMU_ISA31();
+
+ if (!init_hvmode_206(t))
+ return;
+
+ mtspr(SPRN_PSSCR, 0);
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_PID, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
+ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+ init_HFSCR();
+ init_PMU_HV();
+}
+
+void __restore_cpu_power10(void)
+{
+ u64 msr;
+
+ init_FSCR_power10();
+ init_PMU();
+ init_PMU_ISA31();
+
+ msr = mfmsr();
+ if (!(msr & MSR_HV))
+ return;
+
+ mtspr(SPRN_PSSCR, 0);
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_PID, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
+ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+ init_HFSCR();
+ init_PMU_HV();
+}
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 245be4fafe13..ae0fdef0ac11 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -13,9 +13,9 @@
#include <linux/export.h>
#include <linux/jump_label.h>
-#include <asm/oprofile_impl.h>
#include <asm/cputable.h>
#include <asm/prom.h> /* for PTRRELOC on ARCH=ppc */
+#include <asm/mce.h>
#include <asm/mmu.h>
#include <asm/setup.h>
@@ -35,7 +35,6 @@ const char *powerpc_base_platform;
* and ppc64
*/
#ifdef CONFIG_PPC32
-extern void __setup_cpu_e200(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec* spec);
@@ -59,17 +58,12 @@ extern void __setup_cpu_7410(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec);
#endif /* CONFIG_PPC32 */
#ifdef CONFIG_PPC64
+#include <asm/cpu_setup_power.h>
extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_pa6t(void);
extern void __restore_cpu_ppc970(void);
-extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec);
-extern void __restore_cpu_power7(void);
-extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
-extern void __restore_cpu_power8(void);
-extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec);
-extern void __restore_cpu_power9(void);
extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
@@ -118,7 +112,19 @@ extern void __restore_cpu_e6500(void);
#define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \
PPC_FEATURE2_ARCH_3_00 | \
PPC_FEATURE2_HAS_IEEE128 | \
- PPC_FEATURE2_DARN )
+ PPC_FEATURE2_DARN | \
+ PPC_FEATURE2_SCV)
+#define COMMON_USER_POWER10 COMMON_USER_POWER9
+#define COMMON_USER2_POWER10 (PPC_FEATURE2_ARCH_3_1 | \
+ PPC_FEATURE2_MMA | \
+ PPC_FEATURE2_ARCH_3_00 | \
+ PPC_FEATURE2_HAS_IEEE128 | \
+ PPC_FEATURE2_DARN | \
+ PPC_FEATURE2_SCV | \
+ PPC_FEATURE2_ARCH_2_07 | \
+ PPC_FEATURE2_DSCR | \
+ PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
+ PPC_FEATURE2_VEC_CRYPTO)
#ifdef CONFIG_PPC_BOOK3E_64
#define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE)
@@ -144,7 +150,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_setup = __setup_cpu_ppc970,
.cpu_restore = __restore_cpu_ppc970,
.oprofile_cpu_type = "ppc64/970",
- .oprofile_type = PPC_OPROFILE_POWER4,
.platform = "ppc970",
},
{ /* PPC970FX */
@@ -162,7 +167,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_setup = __setup_cpu_ppc970,
.cpu_restore = __restore_cpu_ppc970,
.oprofile_cpu_type = "ppc64/970",
- .oprofile_type = PPC_OPROFILE_POWER4,
.platform = "ppc970",
},
{ /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */
@@ -180,7 +184,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_setup = __setup_cpu_ppc970,
.cpu_restore = __restore_cpu_ppc970,
.oprofile_cpu_type = "ppc64/970MP",
- .oprofile_type = PPC_OPROFILE_POWER4,
.platform = "ppc970",
},
{ /* PPC970MP */
@@ -198,7 +201,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_setup = __setup_cpu_ppc970MP,
.cpu_restore = __restore_cpu_ppc970,
.oprofile_cpu_type = "ppc64/970MP",
- .oprofile_type = PPC_OPROFILE_POWER4,
.platform = "ppc970",
},
{ /* PPC970GX */
@@ -215,7 +217,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pmc_type = PPC_PMC_IBM,
.cpu_setup = __setup_cpu_ppc970,
.oprofile_cpu_type = "ppc64/970",
- .oprofile_type = PPC_OPROFILE_POWER4,
.platform = "ppc970",
},
{ /* Power5 GR */
@@ -230,12 +231,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power5",
- .oprofile_type = PPC_OPROFILE_POWER4,
- /* SIHV / SIPR bits are implemented on POWER4+ (GQ)
- * and above but only works on POWER5 and above
- */
- .oprofile_mmcra_sihv = MMCRA_SIHV,
- .oprofile_mmcra_sipr = MMCRA_SIPR,
.platform = "power5",
},
{ /* Power5++ */
@@ -249,9 +244,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.dcache_bsize = 128,
.num_pmcs = 6,
.oprofile_cpu_type = "ppc64/power5++",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .oprofile_mmcra_sihv = MMCRA_SIHV,
- .oprofile_mmcra_sipr = MMCRA_SIPR,
.platform = "power5+",
},
{ /* Power5 GS */
@@ -266,9 +258,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power5+",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .oprofile_mmcra_sihv = MMCRA_SIHV,
- .oprofile_mmcra_sipr = MMCRA_SIPR,
.platform = "power5+",
},
{ /* POWER6 in P5+ mode; 2.04-compliant processor */
@@ -281,7 +270,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.icache_bsize = 128,
.dcache_bsize = 128,
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
- .oprofile_type = PPC_OPROFILE_POWER4,
.platform = "power5+",
},
{ /* Power6 */
@@ -297,11 +285,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power6",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .oprofile_mmcra_sihv = POWER6_MMCRA_SIHV,
- .oprofile_mmcra_sipr = POWER6_MMCRA_SIPR,
- .oprofile_mmcra_clear = POWER6_MMCRA_THRM |
- POWER6_MMCRA_OTHER,
.platform = "power6x",
},
{ /* 2.05-compliant processor, i.e. Power6 "architected" mode */
@@ -314,7 +297,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.icache_bsize = 128,
.dcache_bsize = 128,
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
- .oprofile_type = PPC_OPROFILE_POWER4,
.platform = "power6",
},
{ /* 2.06-compliant processor, i.e. Power7 "architected" mode */
@@ -327,7 +309,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.mmu_features = MMU_FTRS_POWER7,
.icache_bsize = 128,
.dcache_bsize = 128,
- .oprofile_type = PPC_OPROFILE_POWER4,
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
@@ -344,7 +325,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.mmu_features = MMU_FTRS_POWER8,
.icache_bsize = 128,
.dcache_bsize = 128,
- .oprofile_type = PPC_OPROFILE_INVALID,
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
@@ -361,12 +341,26 @@ static struct cpu_spec __initdata cpu_specs[] = {
.mmu_features = MMU_FTRS_POWER9,
.icache_bsize = 128,
.dcache_bsize = 128,
- .oprofile_type = PPC_OPROFILE_INVALID,
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
.platform = "power9",
},
+ { /* 3.1-compliant processor, i.e. Power10 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000006,
+ .cpu_name = "POWER10 (architected)",
+ .cpu_features = CPU_FTRS_POWER10,
+ .cpu_user_features = COMMON_USER_POWER10,
+ .cpu_user_features2 = COMMON_USER2_POWER10,
+ .mmu_features = MMU_FTRS_POWER10,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .oprofile_cpu_type = "ppc64/ibm-compat-v1",
+ .cpu_setup = __setup_cpu_power10,
+ .cpu_restore = __restore_cpu_power10,
+ .platform = "power10",
+ },
{ /* Power7 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x003f0000,
@@ -380,7 +374,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power7",
- .oprofile_type = PPC_OPROFILE_POWER4,
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
.machine_check_early = __machine_check_early_realmode_p7,
@@ -399,7 +392,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power7",
- .oprofile_type = PPC_OPROFILE_POWER4,
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
.machine_check_early = __machine_check_early_realmode_p7,
@@ -418,7 +410,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power8",
- .oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
.machine_check_early = __machine_check_early_realmode_p8,
@@ -437,7 +428,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power8",
- .oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
.machine_check_early = __machine_check_early_realmode_p8,
@@ -456,7 +446,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power8",
- .oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
.machine_check_early = __machine_check_early_realmode_p8,
@@ -475,7 +464,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power9",
- .oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
.machine_check_early = __machine_check_early_realmode_p9,
@@ -494,7 +482,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power9",
- .oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
.machine_check_early = __machine_check_early_realmode_p9,
@@ -513,12 +500,29 @@ static struct cpu_spec __initdata cpu_specs[] = {
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power9",
- .oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
+ { /* Power10 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00800000,
+ .cpu_name = "POWER10 (raw)",
+ .cpu_features = CPU_FTRS_POWER10,
+ .cpu_user_features = COMMON_USER_POWER10,
+ .cpu_user_features2 = COMMON_USER2_POWER10,
+ .mmu_features = MMU_FTRS_POWER10,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power10",
+ .cpu_setup = __setup_cpu_power10,
+ .cpu_restore = __restore_cpu_power10,
+ .machine_check_early = __machine_check_early_realmode_p10,
+ .platform = "power10",
+ },
{ /* Cell Broadband Engine */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00700000,
@@ -533,7 +537,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.num_pmcs = 4,
.pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/cell-be",
- .oprofile_type = PPC_OPROFILE_CELL,
.platform = "ppc-cell-be",
},
{ /* PA Semi PA6T */
@@ -550,7 +553,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_setup = __setup_cpu_pa6t,
.cpu_restore = __restore_cpu_pa6t,
.oprofile_cpu_type = "ppc64/pa6t",
- .oprofile_type = PPC_OPROFILE_PA6T,
.platform = "pa6t",
},
{ /* default match */
@@ -569,61 +571,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC32
-#ifdef CONFIG_PPC_BOOK3S_601
- { /* 601 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00010000,
- .cpu_name = "601",
- .cpu_features = CPU_FTRS_PPC601,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_601_INSTR |
- PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_generic,
- .platform = "ppc601",
- },
-#endif /* CONFIG_PPC_BOOK3S_601 */
-#ifdef CONFIG_PPC_BOOK3S_6xx
- { /* 603 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00030000,
- .cpu_name = "603",
- .cpu_features = CPU_FTRS_603,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = 0,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
- { /* 603e */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00060000,
- .cpu_name = "603e",
- .cpu_features = CPU_FTRS_603,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = 0,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
- { /* 603ev */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00070000,
- .cpu_name = "603ev",
- .cpu_features = CPU_FTRS_603,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = 0,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
+#ifdef CONFIG_PPC_BOOK3S_32
+#ifdef CONFIG_PPC_BOOK3S_604
{ /* 604 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00040000,
@@ -768,7 +717,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc750",
.oprofile_cpu_type = "ppc/750",
- .oprofile_type = PPC_OPROFILE_G4,
},
{ /* 745/755 */
.pvr_mask = 0xfffff000,
@@ -800,7 +748,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc750",
.oprofile_cpu_type = "ppc/750",
- .oprofile_type = PPC_OPROFILE_G4,
},
{ /* 750FX rev 2.0 must disable HID0[DPM] */
.pvr_mask = 0xffffffff,
@@ -817,7 +764,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc750",
.oprofile_cpu_type = "ppc/750",
- .oprofile_type = PPC_OPROFILE_G4,
},
{ /* 750FX (All revs except 2.0) */
.pvr_mask = 0xffff0000,
@@ -834,7 +780,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc750",
.oprofile_cpu_type = "ppc/750",
- .oprofile_type = PPC_OPROFILE_G4,
},
{ /* 750GX */
.pvr_mask = 0xffff0000,
@@ -851,7 +796,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc750",
.oprofile_cpu_type = "ppc/750",
- .oprofile_type = PPC_OPROFILE_G4,
},
{ /* 740/750 (L2CR bit need fixup for 740) */
.pvr_mask = 0xffff0000,
@@ -930,7 +874,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
.machine_check = machine_check_generic,
.platform = "ppc7450",
},
@@ -948,7 +891,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
.machine_check = machine_check_generic,
.platform = "ppc7450",
},
@@ -966,7 +908,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
.machine_check = machine_check_generic,
.platform = "ppc7450",
},
@@ -984,7 +925,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
.machine_check = machine_check_generic,
.platform = "ppc7450",
},
@@ -1002,7 +942,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
.machine_check = machine_check_generic,
.platform = "ppc7450",
},
@@ -1020,7 +959,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
.machine_check = machine_check_generic,
.platform = "ppc7450",
},
@@ -1038,7 +976,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
.machine_check = machine_check_generic,
.platform = "ppc7450",
},
@@ -1056,7 +993,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
.machine_check = machine_check_generic,
.platform = "ppc7450",
},
@@ -1073,7 +1009,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
.machine_check = machine_check_generic,
.platform = "ppc7450",
},
@@ -1091,7 +1026,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
.machine_check = machine_check_generic,
.platform = "ppc7450",
},
@@ -1109,10 +1043,50 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
.machine_check = machine_check_generic,
.platform = "ppc7450",
},
+#endif /* CONFIG_PPC_BOOK3S_604 */
+#ifdef CONFIG_PPC_BOOK3S_603
+ { /* 603 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00030000,
+ .cpu_name = "603",
+ .cpu_features = CPU_FTRS_603,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = 0,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* 603e */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00060000,
+ .cpu_name = "603e",
+ .cpu_features = CPU_FTRS_603,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = 0,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* 603ev */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00070000,
+ .cpu_name = "603ev",
+ .cpu_features = CPU_FTRS_603,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = 0,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
{ /* 82xx (8240, 8245, 8260 are all 603e cores) */
.pvr_mask = 0x7fff0000,
.pvr_value = 0x00810000,
@@ -1181,7 +1155,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_83xx,
.num_pmcs = 4,
.oprofile_cpu_type = "ppc/e300",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
.platform = "ppc603",
},
{ /* e300c4 (e300c1, plus one IU) */
@@ -1198,10 +1171,11 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_83xx,
.num_pmcs = 4,
.oprofile_cpu_type = "ppc/e300",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
.platform = "ppc603",
},
#endif
+#endif /* CONFIG_PPC_BOOK3S_603 */
+#ifdef CONFIG_PPC_BOOK3S_604
{ /* default match, we assume split I/D cache & TB (non-601)... */
.pvr_mask = 0x00000000,
.pvr_value = 0x00000000,
@@ -1214,7 +1188,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc603",
},
-#endif /* CONFIG_PPC_BOOK3S_6xx */
+#endif /* CONFIG_PPC_BOOK3S_604 */
+#endif /* CONFIG_PPC_BOOK3S_32 */
#ifdef CONFIG_PPC_8xx
{ /* 8xx */
.pvr_mask = 0xffff0000,
@@ -1232,69 +1207,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
},
#endif /* CONFIG_PPC_8xx */
#ifdef CONFIG_40x
- { /* 403GC */
- .pvr_mask = 0xffffff00,
- .pvr_value = 0x00200200,
- .cpu_name = "403GC",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 16,
- .dcache_bsize = 16,
- .machine_check = machine_check_4xx,
- .platform = "ppc403",
- },
- { /* 403GCX */
- .pvr_mask = 0xffffff00,
- .pvr_value = 0x00201400,
- .cpu_name = "403GCX",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_NO_TB,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 16,
- .dcache_bsize = 16,
- .machine_check = machine_check_4xx,
- .platform = "ppc403",
- },
- { /* 403G ?? */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00200000,
- .cpu_name = "403G ??",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 16,
- .dcache_bsize = 16,
- .machine_check = machine_check_4xx,
- .platform = "ppc403",
- },
- { /* 405GP */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x40110000,
- .cpu_name = "405GP",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* STB 03xxx */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x40130000,
- .cpu_name = "STB03xxx",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
{ /* STB 04xxx */
.pvr_mask = 0xffff0000,
.pvr_value = 0x41810000,
@@ -1385,32 +1297,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_4xx,
.platform = "ppc405",
},
- { /* Xilinx Virtex-II Pro */
- .pvr_mask = 0xfffff000,
- .pvr_value = 0x20010000,
- .cpu_name = "Virtex-II Pro",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* Xilinx Virtex-4 FX */
- .pvr_mask = 0xfffff000,
- .pvr_value = 0x20011000,
- .cpu_name = "Virtex-4 FX",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
{ /* 405EP */
.pvr_mask = 0xffff0000,
.pvr_value = 0x51210000,
@@ -1597,6 +1483,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
#endif /* CONFIG_40x */
#ifdef CONFIG_44x
+#ifndef CONFIG_PPC_47x
{
.pvr_mask = 0xf0000fff,
.pvr_value = 0x40000850,
@@ -1800,19 +1687,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_440A,
.platform = "ppc440",
},
- { /* 440 in Xilinx Virtex-5 FXT */
- .pvr_mask = 0xfffffff0,
- .pvr_value = 0x7ff21910,
- .cpu_name = "440 in Virtex-5 FXT",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440x5,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
{ /* 460EX */
.pvr_mask = 0xffff0006,
.pvr_value = 0x13020002,
@@ -1892,7 +1766,19 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_440A,
.platform = "ppc440",
},
-#ifdef CONFIG_PPC_47x
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic 44x PPC)",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ }
+#else /* CONFIG_PPC_47x */
{ /* 476 DD2 core */
.pvr_mask = 0xffffffff,
.pvr_value = 0x11a52080,
@@ -1949,65 +1835,20 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_47x,
.platform = "ppc470",
},
-#endif /* CONFIG_PPC_47x */
{ /* default match */
.pvr_mask = 0x00000000,
.pvr_value = 0x00000000,
- .cpu_name = "(generic 44x PPC)",
- .cpu_features = CPU_FTRS_44X,
+ .cpu_name = "(generic 47x PPC)",
+ .cpu_features = CPU_FTRS_47X,
.cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
+ .mmu_features = MMU_FTR_TYPE_47x,
.icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc440",
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
}
+#endif /* CONFIG_PPC_47x */
#endif /* CONFIG_44x */
-#ifdef CONFIG_E200
- { /* e200z5 */
- .pvr_mask = 0xfff00000,
- .pvr_value = 0x81000000,
- .cpu_name = "e200z5",
- /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */
- .cpu_features = CPU_FTRS_E200,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_EFP_SINGLE |
- PPC_FEATURE_UNIFIED_CACHE,
- .mmu_features = MMU_FTR_TYPE_FSL_E,
- .dcache_bsize = 32,
- .machine_check = machine_check_e200,
- .platform = "ppc5554",
- },
- { /* e200z6 */
- .pvr_mask = 0xfff00000,
- .pvr_value = 0x81100000,
- .cpu_name = "e200z6",
- /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */
- .cpu_features = CPU_FTRS_E200,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_SPE_COMP |
- PPC_FEATURE_HAS_EFP_SINGLE_COMP |
- PPC_FEATURE_UNIFIED_CACHE,
- .mmu_features = MMU_FTR_TYPE_FSL_E,
- .dcache_bsize = 32,
- .machine_check = machine_check_e200,
- .platform = "ppc5554",
- },
- { /* default match */
- .pvr_mask = 0x00000000,
- .pvr_value = 0x00000000,
- .cpu_name = "(generic E200 PPC)",
- .cpu_features = CPU_FTRS_E200,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_EFP_SINGLE |
- PPC_FEATURE_UNIFIED_CACHE,
- .mmu_features = MMU_FTR_TYPE_FSL_E,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_e200,
- .machine_check = machine_check_e200,
- .platform = "ppc5554",
- }
-#endif /* CONFIG_E200 */
#endif /* CONFIG_PPC32 */
#ifdef CONFIG_E500
#ifdef CONFIG_PPC32
@@ -2026,7 +1867,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.dcache_bsize = 32,
.num_pmcs = 4,
.oprofile_cpu_type = "ppc/e500",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
.cpu_setup = __setup_cpu_e500v1,
.machine_check = machine_check_e500,
.platform = "ppc8540",
@@ -2046,7 +1886,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.dcache_bsize = 32,
.num_pmcs = 4,
.oprofile_cpu_type = "ppc/e500",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
.cpu_setup = __setup_cpu_e500v2,
.machine_check = machine_check_e500,
.platform = "ppc8548",
@@ -2066,7 +1905,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.dcache_bsize = 64,
.num_pmcs = 4,
.oprofile_cpu_type = "ppc/e500mc",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
.cpu_setup = __setup_cpu_e500mc,
.machine_check = machine_check_e500mc,
.platform = "ppce500mc",
@@ -2088,7 +1926,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.dcache_bsize = 64,
.num_pmcs = 4,
.oprofile_cpu_type = "ppc/e500mc",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
.cpu_setup = __setup_cpu_e5500,
#ifndef CONFIG_PPC32
.cpu_restore = __restore_cpu_e5500,
@@ -2111,7 +1948,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.dcache_bsize = 64,
.num_pmcs = 6,
.oprofile_cpu_type = "ppc/e6500",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
.cpu_setup = __setup_cpu_e6500,
#ifndef CONFIG_PPC32
.cpu_restore = __restore_cpu_e6500,
@@ -2177,10 +2013,6 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
if (old.num_pmcs && !s->num_pmcs) {
t->num_pmcs = old.num_pmcs;
t->pmc_type = old.pmc_type;
- t->oprofile_type = old.oprofile_type;
- t->oprofile_mmcra_sihv = old.oprofile_mmcra_sihv;
- t->oprofile_mmcra_sipr = old.oprofile_mmcra_sipr;
- t->oprofile_mmcra_clear = old.oprofile_mmcra_clear;
/*
* If we have passed through this logic once before and
@@ -2198,7 +2030,6 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
*/
if (old.oprofile_cpu_type != NULL) {
t->oprofile_cpu_type = old.oprofile_cpu_type;
- t->oprofile_type = old.oprofile_type;
t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG;
}
}
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 05745ddbd229..5693e1c67c2b 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -18,6 +18,7 @@
#include <asm/firmware.h>
#include <linux/uaccess.h>
#include <asm/rtas.h>
+#include <asm/inst.h>
#ifdef DEBUG
#include <asm/udbg.h>
@@ -34,7 +35,7 @@ void __init reserve_kdump_trampoline(void)
static void __init create_trampoline(unsigned long addr)
{
- unsigned int *p = (unsigned int *)addr;
+ u32 *p = (u32 *)addr;
/* The maximum range of a single instruction branch, is the current
* instruction's address + (32 MB - 4) bytes. For the trampoline we
@@ -44,8 +45,8 @@ static void __init create_trampoline(unsigned long addr)
* branch to "addr" we jump to ("addr" + 32 MB). Although it requires
* two instructions it doesn't require any registers.
*/
- patch_instruction(p, PPC_INST_NOP);
- patch_branch(++p, addr + PHYSICAL_START, 0);
+ patch_instruction(p, ppc_inst(PPC_RAW_NOP()));
+ patch_branch(p + 1, addr + PHYSICAL_START, 0);
}
void __init setup_kdump_trampoline(void)
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index cc14aa6c4a1b..64e423d2fe0f 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -9,14 +9,13 @@
#include <linux/export.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
-#include <asm/debugfs.h>
#include <asm/machdep.h>
#include <asm/hvcall.h>
bool dawr_force_enable;
EXPORT_SYMBOL_GPL(dawr_force_enable);
-int set_dawr(struct arch_hw_breakpoint *brk)
+int set_dawr(int nr, struct arch_hw_breakpoint *brk)
{
unsigned long dawr, dawrx, mrd;
@@ -37,17 +36,26 @@ int set_dawr(struct arch_hw_breakpoint *brk)
dawrx |= (mrd & 0x3f) << (63 - 53);
if (ppc_md.set_dawr)
- return ppc_md.set_dawr(dawr, dawrx);
-
- mtspr(SPRN_DAWR, dawr);
- mtspr(SPRN_DAWRX, dawrx);
+ return ppc_md.set_dawr(nr, dawr, dawrx);
+
+ if (nr == 0) {
+ mtspr(SPRN_DAWR0, dawr);
+ mtspr(SPRN_DAWRX0, dawrx);
+ } else {
+ mtspr(SPRN_DAWR1, dawr);
+ mtspr(SPRN_DAWRX1, dawrx);
+ }
return 0;
}
-static void set_dawr_cb(void *info)
+static void disable_dawrs_cb(void *info)
{
- set_dawr(info);
+ struct arch_hw_breakpoint null_brk = {0};
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++)
+ set_dawr(i, &null_brk);
}
static ssize_t dawr_write_file_bool(struct file *file,
@@ -60,7 +68,7 @@ static ssize_t dawr_write_file_bool(struct file *file,
/* Send error to user if they hypervisor won't allow us to write DAWR */
if (!dawr_force_enable &&
firmware_has_feature(FW_FEATURE_LPAR) &&
- set_dawr(&null_brk) != H_SUCCESS)
+ set_dawr(0, &null_brk) != H_SUCCESS)
return -ENODEV;
rc = debugfs_write_file_bool(file, user_buf, count, ppos);
@@ -69,7 +77,7 @@ static ssize_t dawr_write_file_bool(struct file *file,
/* If we are clearing, make sure all CPUs have the DAWR cleared */
if (!dawr_force_enable)
- smp_call_function(set_dawr_cb, &null_brk, 0);
+ smp_call_function(disable_dawrs_cb, NULL, 0);
return rc;
}
@@ -92,7 +100,7 @@ static int __init dawr_force_setup(void)
if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
/* Turn DAWR off by default, but allow admin to turn it on */
debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
- powerpc_debugfs_root,
+ arch_debugfs_dir,
&dawr_force_enable,
&dawr_enable_fops);
}
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index f17ff1200eaa..5545c9cd17c1 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -12,72 +12,17 @@
#include <linux/hardirq.h>
#include <asm/dbell.h>
+#include <asm/interrupt.h>
#include <asm/irq_regs.h>
#include <asm/kvm_ppc.h>
#include <asm/trace.h>
#ifdef CONFIG_SMP
-/*
- * Doorbells must only be used if CPU_FTR_DBELL is available.
- * msgsnd is used in HV, and msgsndp is used in !HV.
- *
- * These should be used by platform code that is aware of restrictions.
- * Other arch code should use ->cause_ipi.
- *
- * doorbell_global_ipi() sends a dbell to any target CPU.
- * Must be used only by architectures that address msgsnd target
- * by PIR/get_hard_smp_processor_id.
- */
-void doorbell_global_ipi(int cpu)
-{
- u32 tag = get_hard_smp_processor_id(cpu);
-
- kvmppc_set_host_ipi(cpu);
- /* Order previous accesses vs. msgsnd, which is treated as a store */
- ppc_msgsnd_sync();
- ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
-}
-
-/*
- * doorbell_core_ipi() sends a dbell to a target CPU in the same core.
- * Must be used only by architectures that address msgsnd target
- * by TIR/cpu_thread_in_core.
- */
-void doorbell_core_ipi(int cpu)
-{
- u32 tag = cpu_thread_in_core(cpu);
-
- kvmppc_set_host_ipi(cpu);
- /* Order previous accesses vs. msgsnd, which is treated as a store */
- ppc_msgsnd_sync();
- ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
-}
-
-/*
- * Attempt to cause a core doorbell if destination is on the same core.
- * Returns 1 on success, 0 on failure.
- */
-int doorbell_try_core_ipi(int cpu)
-{
- int this_cpu = get_cpu();
- int ret = 0;
-
- if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
- doorbell_core_ipi(cpu);
- ret = 1;
- }
-
- put_cpu();
-
- return ret;
-}
-
-void doorbell_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- irq_enter();
trace_doorbell_entry(regs);
ppc_msgsync();
@@ -90,13 +35,12 @@ void doorbell_exception(struct pt_regs *regs)
smp_ipi_demux_relaxed(); /* already performed the barrier */
trace_doorbell_exit(regs);
- irq_exit();
+
set_irq_regs(old_regs);
}
#else /* CONFIG_SMP */
-void doorbell_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
{
printk(KERN_WARNING "Received doorbell on non-smp system\n");
}
#endif /* CONFIG_SMP */
-
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index e486d1d78de2..038ce8d9061d 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -10,26 +10,66 @@
#include <linux/pci.h>
#include <asm/iommu.h>
-/*
- * Generic iommu implementation
- */
+#ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT
+#define can_map_direct(dev, addr) \
+ ((dev)->bus_dma_limit >= phys_to_dma((dev), (addr)))
-/*
- * The coherent mask may be smaller than the real mask, check if we can
- * really use a direct window.
- */
-static inline bool dma_iommu_alloc_bypass(struct device *dev)
+bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr)
+{
+ if (likely(!dev->bus_dma_limit))
+ return false;
+
+ return can_map_direct(dev, addr);
+}
+
+#define is_direct_handle(dev, h) ((h) >= (dev)->archdata.dma_offset)
+
+bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle)
{
- return dev->archdata.iommu_bypass && !iommu_fixed_is_weak &&
- dma_direct_supported(dev, dev->coherent_dma_mask);
+ if (likely(!dev->bus_dma_limit))
+ return false;
+
+ return is_direct_handle(dev, dma_handle);
}
-static inline bool dma_iommu_map_bypass(struct device *dev,
- unsigned long attrs)
+bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg,
+ int nents)
{
- return dev->archdata.iommu_bypass &&
- (!iommu_fixed_is_weak || (attrs & DMA_ATTR_WEAK_ORDERING));
+ struct scatterlist *s;
+ int i;
+
+ if (likely(!dev->bus_dma_limit))
+ return false;
+
+ for_each_sg(sg, s, nents, i) {
+ if (!can_map_direct(dev, sg_phys(s) + s->offset + s->length))
+ return false;
+ }
+
+ return true;
+}
+
+bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg,
+ int nents)
+{
+ struct scatterlist *s;
+ int i;
+
+ if (likely(!dev->bus_dma_limit))
+ return false;
+
+ for_each_sg(sg, s, nents, i) {
+ if (!is_direct_handle(dev, s->dma_address + s->length))
+ return false;
+ }
+
+ return true;
}
+#endif /* CONFIG_ARCH_HAS_DMA_MAP_DIRECT */
+
+/*
+ * Generic iommu implementation
+ */
/* Allocates a contiguous real buffer and creates mappings over it.
* Returns the virtual address of the buffer and sets dma_handle
@@ -39,8 +79,6 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag,
unsigned long attrs)
{
- if (dma_iommu_alloc_bypass(dev))
- return dma_direct_alloc(dev, size, dma_handle, flag, attrs);
return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
dma_handle, dev->coherent_dma_mask, flag,
dev_to_node(dev));
@@ -50,11 +88,7 @@ static void dma_iommu_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle,
unsigned long attrs)
{
- if (dma_iommu_alloc_bypass(dev))
- dma_direct_free(dev, size, vaddr, dma_handle, attrs);
- else
- iommu_free_coherent(get_iommu_table_base(dev), size, vaddr,
- dma_handle);
+ iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
}
/* Creates TCEs for a user provided buffer. The user buffer must be
@@ -67,9 +101,6 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page,
enum dma_data_direction direction,
unsigned long attrs)
{
- if (dma_iommu_map_bypass(dev, attrs))
- return dma_direct_map_page(dev, page, offset, size, direction,
- attrs);
return iommu_map_page(dev, get_iommu_table_base(dev), page, offset,
size, dma_get_mask(dev), direction, attrs);
}
@@ -79,11 +110,8 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction direction,
unsigned long attrs)
{
- if (!dma_iommu_map_bypass(dev, attrs))
- iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size,
- direction, attrs);
- else
- dma_direct_unmap_page(dev, dma_handle, size, direction, attrs);
+ iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction,
+ attrs);
}
@@ -91,8 +119,6 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
unsigned long attrs)
{
- if (dma_iommu_map_bypass(dev, attrs))
- return dma_direct_map_sg(dev, sglist, nelems, direction, attrs);
return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
dma_get_mask(dev), direction, attrs);
}
@@ -101,11 +127,8 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
unsigned long attrs)
{
- if (!dma_iommu_map_bypass(dev, attrs))
- ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
+ ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
direction, attrs);
- else
- dma_direct_unmap_sg(dev, sglist, nelems, direction, attrs);
}
static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
@@ -113,8 +136,9 @@ static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
struct pci_dev *pdev = to_pci_dev(dev);
struct pci_controller *phb = pci_bus_to_host(pdev->bus);
- return phb->controller_ops.iommu_bypass_supported &&
- phb->controller_ops.iommu_bypass_supported(pdev, mask);
+ if (iommu_fixed_is_weak || !phb->controller_ops.iommu_bypass_supported)
+ return false;
+ return phb->controller_ops.iommu_bypass_supported(pdev, mask);
}
/* We support DMA to/from any memory page via the iommu */
@@ -123,8 +147,18 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
struct iommu_table *tbl = get_iommu_table_base(dev);
if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
- dev->archdata.iommu_bypass = true;
- dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+ /*
+ * dma_iommu_bypass_supported() sets dma_max when there is
+ * 1:1 mapping but it is somehow limited.
+ * ibm,pmemory is one example.
+ */
+ dev->dma_ops_bypass = dev->bus_dma_limit == 0;
+ if (!dev->dma_ops_bypass)
+ dev_warn(dev,
+ "iommu: 64-bit OK but direct DMA is limited by %llx\n",
+ dev->bus_dma_limit);
+ else
+ dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
return 1;
}
@@ -141,7 +175,7 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
}
dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
- dev->archdata.iommu_bypass = false;
+ dev->dma_ops_bypass = false;
return 1;
}
@@ -150,50 +184,25 @@ u64 dma_iommu_get_required_mask(struct device *dev)
struct iommu_table *tbl = get_iommu_table_base(dev);
u64 mask;
- if (!tbl)
- return 0;
-
if (dev_is_pci(dev)) {
u64 bypass_mask = dma_direct_get_required_mask(dev);
- if (dma_iommu_bypass_supported(dev, bypass_mask))
+ if (dma_iommu_dma_supported(dev, bypass_mask)) {
+ dev_info(dev, "%s: returning bypass mask 0x%llx\n", __func__, bypass_mask);
return bypass_mask;
+ }
}
- mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1);
+ if (!tbl)
+ return 0;
+
+ mask = 1ULL << (fls_long(tbl->it_offset + tbl->it_size) +
+ tbl->it_page_shift - 1);
mask += mask - 1;
return mask;
}
-static void dma_iommu_sync_for_cpu(struct device *dev, dma_addr_t addr,
- size_t size, enum dma_data_direction dir)
-{
- if (dma_iommu_alloc_bypass(dev))
- dma_direct_sync_single_for_cpu(dev, addr, size, dir);
-}
-
-static void dma_iommu_sync_for_device(struct device *dev, dma_addr_t addr,
- size_t sz, enum dma_data_direction dir)
-{
- if (dma_iommu_alloc_bypass(dev))
- dma_direct_sync_single_for_device(dev, addr, sz, dir);
-}
-
-extern void dma_iommu_sync_sg_for_cpu(struct device *dev,
- struct scatterlist *sgl, int nents, enum dma_data_direction dir)
-{
- if (dma_iommu_alloc_bypass(dev))
- dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir);
-}
-
-extern void dma_iommu_sync_sg_for_device(struct device *dev,
- struct scatterlist *sgl, int nents, enum dma_data_direction dir)
-{
- if (dma_iommu_alloc_bypass(dev))
- dma_direct_sync_sg_for_device(dev, sgl, nents, dir);
-}
-
const struct dma_map_ops dma_iommu_ops = {
.alloc = dma_iommu_alloc_coherent,
.free = dma_iommu_free_coherent,
@@ -203,10 +212,8 @@ const struct dma_map_ops dma_iommu_ops = {
.map_page = dma_iommu_map_page,
.unmap_page = dma_iommu_unmap_page,
.get_required_mask = dma_iommu_get_required_mask,
- .sync_single_for_cpu = dma_iommu_sync_for_cpu,
- .sync_single_for_device = dma_iommu_sync_for_device,
- .sync_sg_for_cpu = dma_iommu_sync_sg_for_cpu,
- .sync_sg_for_device = dma_iommu_sync_sg_for_device,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
+ .alloc_pages = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
};
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 182b4047c1ef..ba527fb52993 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -17,15 +17,15 @@
#include <asm/cputable.h>
#include <asm/dt_cpu_ftrs.h>
+#include <asm/mce.h>
#include <asm/mmu.h>
-#include <asm/oprofile_impl.h>
#include <asm/prom.h>
#include <asm/setup.h>
/* Device-tree visible constants follow */
-#define ISA_V2_07B 2070
#define ISA_V3_0B 3000
+#define ISA_V3_1 3100
#define USABLE_PR (1U << 0)
#define USABLE_OS (1U << 1)
@@ -64,44 +64,24 @@ struct dt_cpu_feature {
* Set up the base CPU
*/
-extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
-extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
-
static int hv_mode;
static struct {
u64 lpcr;
- u64 lpcr_clear;
u64 hfscr;
u64 fscr;
+ u64 pcr;
} system_registers;
static void (*init_pmu_registers)(void);
static void __restore_cpu_cpufeatures(void)
{
- u64 lpcr;
-
- /*
- * LPCR is restored by the power on engine already. It can be changed
- * after early init e.g., by radix enable, and we have no unified API
- * for saving and restoring such SPRs.
- *
- * This ->restore hook should really be removed from idle and register
- * restore moved directly into the idle restore code, because this code
- * doesn't know how idle is implemented or what it needs restored here.
- *
- * The best we can do to accommodate secondary boot and idle restore
- * for now is "or" LPCR with existing.
- */
- lpcr = mfspr(SPRN_LPCR);
- lpcr |= system_registers.lpcr;
- lpcr &= ~system_registers.lpcr_clear;
- mtspr(SPRN_LPCR, lpcr);
+ mtspr(SPRN_LPCR, system_registers.lpcr);
if (hv_mode) {
mtspr(SPRN_LPID, 0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
- mtspr(SPRN_PCR, PCR_MASK);
+ mtspr(SPRN_PCR, system_registers.pcr);
}
mtspr(SPRN_FSCR, system_registers.fscr);
@@ -122,7 +102,6 @@ static struct cpu_spec __initdata base_cpu_spec = {
.num_pmcs = 0,
.pmc_type = PPC_PMC_DEFAULT,
.oprofile_cpu_type = NULL,
- .oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = NULL,
.cpu_restore = __restore_cpu_cpufeatures,
.machine_check_early = NULL,
@@ -139,7 +118,6 @@ static void __init cpufeatures_setup_cpu(void)
/* Initialize the base environment -- clear FSCR/HFSCR. */
hv_mode = !!(mfmsr() & MSR_HV);
if (hv_mode) {
- /* CPU_FTR_HVMODE is used early in PACA setup */
cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
mtspr(SPRN_HFSCR, 0);
}
@@ -275,13 +253,6 @@ static int __init feat_enable_idle_nap(struct dt_cpu_feature *f)
return 1;
}
-static int __init feat_enable_align_dsisr(struct dt_cpu_feature *f)
-{
- cur_cpu_spec->cpu_features &= ~CPU_FTR_NODSISRALIGN;
-
- return 1;
-}
-
static int __init feat_enable_idle_stop(struct dt_cpu_feature *f)
{
u64 lpcr;
@@ -319,7 +290,6 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
{
u64 lpcr;
- system_registers.lpcr_clear |= (LPCR_ISL | LPCR_UPRT | LPCR_HR);
lpcr = mfspr(SPRN_LPCR);
lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR);
mtspr(SPRN_LPCR, lpcr);
@@ -336,6 +306,7 @@ static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f)
#ifdef CONFIG_PPC_RADIX_MMU
cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+ cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
return 1;
@@ -347,6 +318,14 @@ static int __init feat_enable_dscr(struct dt_cpu_feature *f)
{
u64 lpcr;
+ /*
+ * Linux relies on FSCR[DSCR] being clear, so that we can take the
+ * facility unavailable interrupt and track the task's usage of DSCR.
+ * See facility_unavailable_exception().
+ * Clear the bit here so that feat_enable() doesn't set it.
+ */
+ f->fscr_bit_nr = -1;
+
feat_enable(f);
lpcr = mfspr(SPRN_LPCR);
@@ -441,6 +420,40 @@ static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f)
return 1;
}
+static void init_pmu_power10(void)
+{
+ init_pmu_power9();
+
+ mtspr(SPRN_MMCR3, 0);
+ mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
+ mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
+}
+
+static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
+{
+ hfscr_pmu_enable();
+
+ init_pmu_power10();
+ init_pmu_registers = init_pmu_power10;
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT;
+
+ cur_cpu_spec->num_pmcs = 6;
+ cur_cpu_spec->pmc_type = PPC_PMC_IBM;
+ cur_cpu_spec->oprofile_cpu_type = "ppc64/power10";
+
+ return 1;
+}
+
+static int __init feat_enable_mce_power10(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->platform = "power10";
+ cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p10;
+
+ return 1;
+}
+
static int __init feat_enable_tm(struct dt_cpu_feature *f)
{
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -553,6 +566,18 @@ static int __init feat_enable_large_ci(struct dt_cpu_feature *f)
return 1;
}
+static int __init feat_enable_mma(struct dt_cpu_feature *f)
+{
+ u64 pcr;
+
+ feat_enable(f);
+ pcr = mfspr(SPRN_PCR);
+ pcr &= ~PCR_MMA_DIS;
+ mtspr(SPRN_PCR, pcr);
+
+ return 1;
+}
+
struct dt_cpu_feature_match {
const char *name;
int (*enable)(struct dt_cpu_feature *f);
@@ -566,6 +591,7 @@ static struct dt_cpu_feature_match __initdata
{"little-endian", feat_enable_le, CPU_FTR_REAL_LE},
{"smt", feat_enable_smt, 0},
{"interrupt-facilities", feat_enable, 0},
+ {"system-call-vectored", feat_enable, 0},
{"timer-facilities", feat_enable, 0},
{"timer-facilities-v3", feat_enable, 0},
{"debug-facilities", feat_enable, 0},
@@ -588,7 +614,7 @@ static struct dt_cpu_feature_match __initdata
{"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST},
{"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG},
{"idle-nap", feat_enable_idle_nap, 0},
- {"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0},
+ /* alignment-interrupt-dsisr ignored */
{"idle-stop", feat_enable_idle_stop, 0},
{"machine-check-power8", feat_enable_mce_power8, 0},
{"performance-monitor-power8", feat_enable_pmu_power8, 0},
@@ -617,7 +643,9 @@ static struct dt_cpu_feature_match __initdata
{"group-start-register", feat_enable, 0},
{"pc-relative-addressing", feat_enable, 0},
{"machine-check-power9", feat_enable_mce_power9, 0},
+ {"machine-check-power10", feat_enable_mce_power10, 0},
{"performance-monitor-power9", feat_enable_pmu_power9, 0},
+ {"performance-monitor-power10", feat_enable_pmu_power10, 0},
{"event-based-branch-v3", feat_enable, 0},
{"random-number-generator", feat_enable, 0},
{"system-call-vectored", feat_disable, 0},
@@ -626,6 +654,9 @@ static struct dt_cpu_feature_match __initdata
{"vector-binary128", feat_enable, 0},
{"vector-binary16", feat_enable, 0},
{"wait-v3", feat_enable, 0},
+ {"prefix-instructions", feat_enable, 0},
+ {"matrix-multiply-assist", feat_enable_mma, 0},
+ {"debug-facilities-v31", feat_enable, CPU_FTR_DAWR1},
};
static bool __initdata using_dt_cpu_ftrs;
@@ -651,10 +682,15 @@ static void __init cpufeatures_setup_start(u32 isa)
{
pr_info("setup for ISA %d\n", isa);
- if (isa >= 3000) {
+ if (isa >= ISA_V3_0B) {
cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_300;
cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00;
}
+
+ if (isa >= ISA_V3_1) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_31;
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_1;
+ }
}
static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
@@ -748,12 +784,6 @@ static __init void cpufeatures_cpu_quirks(void)
}
update_tlbie_feature_flag(version);
- /*
- * PKEY was not in the initial base or feature node
- * specification, but it should become optional in the next
- * cpu feature version sequence.
- */
- cur_cpu_spec->cpu_features |= CPU_FTR_PKEY;
}
static void __init cpufeatures_setup_finished(void)
@@ -771,6 +801,7 @@ static void __init cpufeatures_setup_finished(void)
system_registers.lpcr = mfspr(SPRN_LPCR);
system_registers.hfscr = mfspr(SPRN_HFSCR);
system_registers.fscr = mfspr(SPRN_FSCR);
+ system_registers.pcr = mfspr(SPRN_PCR);
pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n",
cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features);
@@ -1064,8 +1095,8 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
cpufeatures_setup_finished();
- memblock_free(__pa(dt_cpu_features),
- sizeof(struct dt_cpu_feature)*nr_dt_cpu_features);
+ memblock_free(dt_cpu_features,
+ sizeof(struct dt_cpu_feature) * nr_dt_cpu_features);
return 0;
}
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 17cb3e9b5697..28bb1e7263a6 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -21,9 +21,9 @@
#include <linux/spinlock.h>
#include <linux/export.h>
#include <linux/of.h>
+#include <linux/debugfs.h>
#include <linux/atomic.h>
-#include <asm/debugfs.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/io.h>
@@ -167,39 +167,33 @@ void eeh_show_enabled(void)
*/
static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
{
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
u32 cfg;
int cap, i;
int n = 0, l = 0;
char buffer[128];
- if (!pdn) {
- pr_warn("EEH: Note: No error log for absent device.\n");
- return 0;
- }
-
n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
- pdn->phb->global_number, pdn->busno,
- PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+ edev->pe->phb->global_number, edev->bdfn >> 8,
+ PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn));
pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
- pdn->phb->global_number, pdn->busno,
- PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+ edev->pe->phb->global_number, edev->bdfn >> 8,
+ PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn));
- eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
+ eeh_ops->read_config(edev, PCI_VENDOR_ID, 4, &cfg);
n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
- eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg);
+ eeh_ops->read_config(edev, PCI_COMMAND, 4, &cfg);
n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
/* Gather bridge-specific registers */
if (edev->mode & EEH_DEV_BRIDGE) {
- eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
+ eeh_ops->read_config(edev, PCI_SEC_STATUS, 2, &cfg);
n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
- eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
+ eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &cfg);
n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
pr_warn("EEH: Bridge control: %04x\n", cfg);
}
@@ -207,11 +201,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
/* Dump out the PCI-X command and status regs */
cap = edev->pcix_cap;
if (cap) {
- eeh_ops->read_config(pdn, cap, 4, &cfg);
+ eeh_ops->read_config(edev, cap, 4, &cfg);
n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
pr_warn("EEH: PCI-X cmd: %08x\n", cfg);
- eeh_ops->read_config(pdn, cap+4, 4, &cfg);
+ eeh_ops->read_config(edev, cap+4, 4, &cfg);
n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
pr_warn("EEH: PCI-X status: %08x\n", cfg);
}
@@ -223,7 +217,7 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
pr_warn("EEH: PCI-E capabilities and status follow:\n");
for (i=0; i<=8; i++) {
- eeh_ops->read_config(pdn, cap+4*i, 4, &cfg);
+ eeh_ops->read_config(edev, cap+4*i, 4, &cfg);
n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
if ((i % 4) == 0) {
@@ -250,7 +244,7 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
pr_warn("EEH: PCI-E AER capability register set follows:\n");
for (i=0; i<=13; i++) {
- eeh_ops->read_config(pdn, cap+4*i, 4, &cfg);
+ eeh_ops->read_config(edev, cap+4*i, 4, &cfg);
n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
if ((i % 4) == 0) {
@@ -352,31 +346,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
*/
static inline unsigned long eeh_token_to_phys(unsigned long token)
{
- pte_t *ptep;
- unsigned long pa;
- int hugepage_shift;
-
- /*
- * We won't find hugepages here(this is iomem). Hence we are not
- * worried about _PAGE_SPLITTING/collapse. Also we will not hit
- * page table free, because of init_mm.
- */
- ptep = find_init_mm_pte(token, &hugepage_shift);
- if (!ptep)
- return token;
-
- pa = pte_pfn(*ptep);
-
- /* On radix we can do hugepage mappings for io, so handle that */
- if (hugepage_shift) {
- pa <<= hugepage_shift;
- pa |= token & ((1ul << hugepage_shift) - 1);
- } else {
- pa <<= PAGE_SHIFT;
- pa |= token & (PAGE_SIZE - 1);
- }
-
- return pa;
+ return ppc_find_vmap_phys(token);
}
/*
@@ -429,6 +399,14 @@ out:
return ret;
}
+static inline const char *eeh_driver_name(struct pci_dev *pdev)
+{
+ if (pdev)
+ return dev_driver_string(&pdev->dev);
+
+ return "<null>";
+}
+
/**
* eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
* @edev: eeh device
@@ -472,11 +450,6 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
return 0;
}
- if (!pe->addr && !pe->config_addr) {
- eeh_stats.no_cfg_addr++;
- return 0;
- }
-
/*
* On PowerNV platform, we might already have fenced PHB
* there and we need take care of that firstly.
@@ -624,6 +597,7 @@ EXPORT_SYMBOL(eeh_check_failure);
/**
* eeh_pci_enable - Enable MMIO or DMA transfers for this slot
* @pe: EEH PE
+ * @function: EEH option
*
* This routine should be called to reenable frozen MMIO or DMA
* so that it would work correctly again. It's useful while doing
@@ -726,7 +700,6 @@ static void eeh_disable_and_save_dev_state(struct eeh_dev *edev,
static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
{
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
struct pci_dev *dev = userdata;
@@ -734,73 +707,14 @@ static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
return;
/* Apply customization from firmware */
- if (pdn && eeh_ops->restore_config)
- eeh_ops->restore_config(pdn);
+ if (eeh_ops->restore_config)
+ eeh_ops->restore_config(edev);
/* The caller should restore state for the specified device */
if (pdev != dev)
pci_restore_state(pdev);
}
-int eeh_restore_vf_config(struct pci_dn *pdn)
-{
- struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
- u32 devctl, cmd, cap2, aer_capctl;
- int old_mps;
-
- if (edev->pcie_cap) {
- /* Restore MPS */
- old_mps = (ffs(pdn->mps) - 8) << 5;
- eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, &devctl);
- devctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
- devctl |= old_mps;
- eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, devctl);
-
- /* Disable Completion Timeout if possible */
- eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2,
- 4, &cap2);
- if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) {
- eeh_ops->read_config(pdn,
- edev->pcie_cap + PCI_EXP_DEVCTL2,
- 4, &cap2);
- cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS;
- eeh_ops->write_config(pdn,
- edev->pcie_cap + PCI_EXP_DEVCTL2,
- 4, cap2);
- }
- }
-
- /* Enable SERR and parity checking */
- eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd);
- cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
- eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd);
-
- /* Enable report various errors */
- if (edev->pcie_cap) {
- eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, &devctl);
- devctl &= ~PCI_EXP_DEVCTL_CERE;
- devctl |= (PCI_EXP_DEVCTL_NFERE |
- PCI_EXP_DEVCTL_FERE |
- PCI_EXP_DEVCTL_URRE);
- eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, devctl);
- }
-
- /* Enable ECRC generation and check */
- if (edev->pcie_cap && edev->aer_cap) {
- eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP,
- 4, &aer_capctl);
- aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
- eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP,
- 4, aer_capctl);
- }
-
- return 0;
-}
-
/**
* pcibios_set_pcie_reset_state - Set PCI-E reset state
* @dev: pci device struct
@@ -850,14 +764,14 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
default:
eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true);
return -EINVAL;
- };
+ }
return 0;
}
/**
- * eeh_set_pe_freset - Check the required reset for the indicated device
- * @data: EEH device
+ * eeh_set_dev_freset - Check the required reset for the indicated device
+ * @edev: EEH device
* @flag: return value
*
* Each device might have its preferred reset type: fundamental or
@@ -896,6 +810,7 @@ static void eeh_pe_refreeze_passed(struct eeh_pe *root)
/**
* eeh_pe_reset_full - Complete a full reset process on the indicated PE
* @pe: EEH PE
+ * @include_passed: include passed-through devices?
*
* This function executes a full reset procedure on a PE, including setting
* the appropriate flags, performing a fundamental or hot reset, and then
@@ -977,15 +892,13 @@ int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed)
*/
void eeh_save_bars(struct eeh_dev *edev)
{
- struct pci_dn *pdn;
int i;
- pdn = eeh_dev_to_pdn(edev);
- if (!pdn)
+ if (!edev)
return;
for (i = 0; i < 16; i++)
- eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]);
+ eeh_ops->read_config(edev, i * 4, 4, &edev->config_space[i]);
/*
* For PCI bridges including root port, we need enable bus
@@ -997,56 +910,6 @@ void eeh_save_bars(struct eeh_dev *edev)
edev->config_space[1] |= PCI_COMMAND_MASTER;
}
-/**
- * eeh_ops_register - Register platform dependent EEH operations
- * @ops: platform dependent EEH operations
- *
- * Register the platform dependent EEH operation callback
- * functions. The platform should call this function before
- * any other EEH operations.
- */
-int __init eeh_ops_register(struct eeh_ops *ops)
-{
- if (!ops->name) {
- pr_warn("%s: Invalid EEH ops name for %p\n",
- __func__, ops);
- return -EINVAL;
- }
-
- if (eeh_ops && eeh_ops != ops) {
- pr_warn("%s: EEH ops of platform %s already existing (%s)\n",
- __func__, eeh_ops->name, ops->name);
- return -EEXIST;
- }
-
- eeh_ops = ops;
-
- return 0;
-}
-
-/**
- * eeh_ops_unregister - Unreigster platform dependent EEH operations
- * @name: name of EEH platform operations
- *
- * Unregister the platform dependent EEH operation callback
- * functions.
- */
-int __exit eeh_ops_unregister(const char *name)
-{
- if (!name || !strlen(name)) {
- pr_warn("%s: Invalid EEH ops name\n",
- __func__);
- return -EINVAL;
- }
-
- if (eeh_ops && !strcmp(eeh_ops->name, name)) {
- eeh_ops = NULL;
- return 0;
- }
-
- return -EEXIST;
-}
-
static int eeh_reboot_notifier(struct notifier_block *nb,
unsigned long action, void *unused)
{
@@ -1058,45 +921,66 @@ static struct notifier_block eeh_reboot_nb = {
.notifier_call = eeh_reboot_notifier,
};
+static int eeh_device_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+
+ switch (action) {
+ /*
+ * Note: It's not possible to perform EEH device addition (i.e.
+ * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on
+ * the device's resources, which have not yet been set up.
+ */
+ case BUS_NOTIFY_DEL_DEVICE:
+ eeh_remove_device(to_pci_dev(dev));
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_device_nb = {
+ .notifier_call = eeh_device_notifier,
+};
+
/**
- * eeh_init - EEH initialization
+ * eeh_init - System wide EEH initialization
+ * @ops: struct to trace EEH operation callback functions
*
- * Initialize EEH by trying to enable it for all of the adapters in the system.
- * As a side effect we can determine here if eeh is supported at all.
- * Note that we leave EEH on so failed config cycles won't cause a machine
- * check. If a user turns off EEH for a particular adapter they are really
- * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
- * grant access to a slot if EEH isn't enabled, and so we always enable
- * EEH for all slots/all devices.
- *
- * The eeh-force-off option disables EEH checking globally, for all slots.
- * Even if force-off is set, the EEH hardware is still enabled, so that
- * newer systems can boot.
+ * It's the platform's job to call this from an arch_initcall().
*/
-static int eeh_init(void)
+int eeh_init(struct eeh_ops *ops)
{
struct pci_controller *hose, *tmp;
int ret = 0;
+ /* the platform should only initialise EEH once */
+ if (WARN_ON(eeh_ops))
+ return -EEXIST;
+ if (WARN_ON(!ops))
+ return -ENOENT;
+ eeh_ops = ops;
+
/* Register reboot notifier */
ret = register_reboot_notifier(&eeh_reboot_nb);
if (ret) {
- pr_warn("%s: Failed to register notifier (%d)\n",
+ pr_warn("%s: Failed to register reboot notifier (%d)\n",
__func__, ret);
return ret;
}
- /* call platform initialization function */
- if (!eeh_ops) {
- pr_warn("%s: Platform EEH operation not found\n",
- __func__);
- return -EEXIST;
- } else if ((ret = eeh_ops->init()))
+ ret = bus_register_notifier(&pci_bus_type, &eeh_device_nb);
+ if (ret) {
+ pr_warn("%s: Failed to register bus notifier (%d)\n",
+ __func__, ret);
return ret;
+ }
/* Initialize PHB PEs */
list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
- eeh_dev_phb_init_dynamic(hose);
+ eeh_phb_pe_create(hose);
eeh_addr_cache_init();
@@ -1104,91 +988,45 @@ static int eeh_init(void)
return eeh_event_init();
}
-core_initcall_sync(eeh_init);
-
-/**
- * eeh_add_device_early - Enable EEH for the indicated device node
- * @pdn: PCI device node for which to set up EEH
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices that were added after system boot (e.g. hotplug, dlpar).
- * This routine must be called before any i/o is performed to the
- * adapter (inluding any config-space i/o).
- * Whether this actually enables EEH or not for this device depends
- * on the CEC architecture, type of the device, on earlier boot
- * command-line arguments & etc.
- */
-void eeh_add_device_early(struct pci_dn *pdn)
-{
- struct pci_controller *phb = pdn ? pdn->phb : NULL;
- struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
-
- if (!edev)
- return;
-
- if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))
- return;
-
- /* USB Bus children of PCI devices will not have BUID's */
- if (NULL == phb ||
- (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid))
- return;
-
- eeh_ops->probe(pdn, NULL);
-}
-
/**
- * eeh_add_device_tree_early - Enable EEH for the indicated device
- * @pdn: PCI device node
- *
- * This routine must be used to perform EEH initialization for the
- * indicated PCI device that was added after system boot (e.g.
- * hotplug, dlpar).
- */
-void eeh_add_device_tree_early(struct pci_dn *pdn)
-{
- struct pci_dn *n;
-
- if (!pdn)
- return;
-
- list_for_each_entry(n, &pdn->child_list, list)
- eeh_add_device_tree_early(n);
- eeh_add_device_early(pdn);
-}
-EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
-
-/**
- * eeh_add_device_late - Perform EEH initialization for the indicated pci device
+ * eeh_probe_device() - Perform EEH initialization for the indicated pci device
* @dev: pci device for which to set up EEH
*
* This routine must be used to complete EEH initialization for PCI
* devices that were added after system boot (e.g. hotplug, dlpar).
*/
-void eeh_add_device_late(struct pci_dev *dev)
+void eeh_probe_device(struct pci_dev *dev)
{
- struct pci_dn *pdn;
struct eeh_dev *edev;
- if (!dev)
+ pr_debug("EEH: Adding device %s\n", pci_name(dev));
+
+ /*
+ * pci_dev_to_eeh_dev() can only work if eeh_probe_dev() was
+ * already called for this device.
+ */
+ if (WARN_ON_ONCE(pci_dev_to_eeh_dev(dev))) {
+ pci_dbg(dev, "Already bound to an eeh_dev!\n");
return;
+ }
- pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
- edev = pdn_to_eeh_dev(pdn);
- eeh_edev_dbg(edev, "Adding device\n");
- if (edev->pdev == dev) {
- eeh_edev_dbg(edev, "Device already referenced!\n");
+ edev = eeh_ops->probe(dev);
+ if (!edev) {
+ pr_debug("EEH: Adding device failed\n");
return;
}
/*
- * The EEH cache might not be removed correctly because of
- * unbalanced kref to the device during unplug time, which
- * relies on pcibios_release_device(). So we have to remove
- * that here explicitly.
+ * FIXME: We rely on pcibios_release_device() to remove the
+ * existing EEH state. The release function is only called if
+ * the pci_dev's refcount drops to zero so if something is
+ * keeping a ref to a device (e.g. a filesystem) we need to
+ * remove the old EEH state.
+ *
+ * FIXME: HEY MA, LOOK AT ME, NO LOCKING!
*/
- if (edev->pdev) {
- eeh_rmv_from_parent_pe(edev);
+ if (edev->pdev && edev->pdev != dev) {
+ eeh_pe_tree_remove(edev);
eeh_addr_cache_rmv_dev(edev->pdev);
eeh_sysfs_remove_device(edev->pdev);
@@ -1198,69 +1036,16 @@ void eeh_add_device_late(struct pci_dev *dev)
* into error handler afterwards.
*/
edev->mode |= EEH_DEV_NO_HANDLER;
-
- edev->pdev = NULL;
- dev->dev.archdata.edev = NULL;
}
- if (eeh_has_flag(EEH_PROBE_MODE_DEV))
- eeh_ops->probe(pdn, NULL);
-
+ /* bind the pdev and the edev together */
edev->pdev = dev;
dev->dev.archdata.edev = edev;
-
eeh_addr_cache_insert_dev(dev);
+ eeh_sysfs_add_device(dev);
}
/**
- * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
- * @bus: PCI bus
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices which are attached to the indicated PCI bus. The PCI bus
- * is added after system boot through hotplug or dlpar.
- */
-void eeh_add_device_tree_late(struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- if (eeh_has_flag(EEH_FORCE_DISABLED))
- return;
- list_for_each_entry(dev, &bus->devices, bus_list) {
- eeh_add_device_late(dev);
- if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
- struct pci_bus *subbus = dev->subordinate;
- if (subbus)
- eeh_add_device_tree_late(subbus);
- }
- }
-}
-EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
-
-/**
- * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus
- * @bus: PCI bus
- *
- * This routine must be used to add EEH sysfs files for PCI
- * devices which are attached to the indicated PCI bus. The PCI bus
- * is added after system boot through hotplug or dlpar.
- */
-void eeh_add_sysfs_files(struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- list_for_each_entry(dev, &bus->devices, bus_list) {
- eeh_sysfs_add_device(dev);
- if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
- struct pci_bus *subbus = dev->subordinate;
- if (subbus)
- eeh_add_sysfs_files(subbus);
- }
- }
-}
-EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
-
-/**
* eeh_remove_device - Undo EEH setup for the indicated pci device
* @dev: pci device to be removed
*
@@ -1320,7 +1105,7 @@ void eeh_remove_device(struct pci_dev *dev)
edev->in_error = false;
dev->dev.archdata.edev = NULL;
if (!(edev->pe->state & EEH_PE_KEEP))
- eeh_rmv_from_parent_pe(edev);
+ eeh_pe_tree_remove(edev);
else
edev->mode |= EEH_DEV_DISCONNECTED;
}
@@ -1668,6 +1453,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed)
* eeh_pe_reset - Issue PE reset according to specified type
* @pe: EEH PE
* @option: reset type
+ * @include_passed: include passed-through devices?
*
* The routine is called to reset the specified PE with the
* indicated type, either fundamental reset or hot reset.
@@ -1739,12 +1525,12 @@ EXPORT_SYMBOL_GPL(eeh_pe_configure);
* eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE
* @pe: the indicated PE
* @type: error type
- * @function: error function
+ * @func: error function
* @addr: address
* @mask: address mask
*
* The routine is called to inject the specified PCI error, which
- * is determined by @type and @function, to the indicated PE for
+ * is determined by @type and @func, to the indicated PE for
* testing purpose.
*/
int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
@@ -1770,6 +1556,7 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
}
EXPORT_SYMBOL_GPL(eeh_pe_inject_err);
+#ifdef CONFIG_PROC_FS
static int proc_eeh_show(struct seq_file *m, void *v)
{
if (!eeh_enabled()) {
@@ -1796,8 +1583,38 @@ static int proc_eeh_show(struct seq_file *m, void *v)
return 0;
}
+#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_DEBUG_FS
+
+
+static struct pci_dev *eeh_debug_lookup_pdev(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ uint32_t domain, bus, dev, fn;
+ struct pci_dev *pdev;
+ char buf[20];
+ int ret;
+
+ memset(buf, 0, sizeof(buf));
+ ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
+ if (!ret)
+ return ERR_PTR(-EFAULT);
+
+ ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
+ if (ret != 4) {
+ pr_err("%s: expected 4 args, got %d\n", __func__, ret);
+ return ERR_PTR(-EINVAL);
+ }
+
+ pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
+ if (!pdev)
+ return ERR_PTR(-ENODEV);
+
+ return pdev;
+}
+
static int eeh_enable_dbgfs_set(void *data, u64 val)
{
if (val)
@@ -1854,7 +1671,7 @@ static ssize_t eeh_force_recover_write(struct file *filp,
return -ENODEV;
/* Retrieve PE */
- pe = eeh_pe_get(hose, pe_no, 0);
+ pe = eeh_pe_get(hose, pe_no);
if (!pe)
return -ENODEV;
@@ -1890,26 +1707,13 @@ static ssize_t eeh_dev_check_write(struct file *filp,
const char __user *user_buf,
size_t count, loff_t *ppos)
{
- uint32_t domain, bus, dev, fn;
struct pci_dev *pdev;
struct eeh_dev *edev;
- char buf[20];
int ret;
- memset(buf, 0, sizeof(buf));
- ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
- if (!ret)
- return -EFAULT;
-
- ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
- if (ret != 4) {
- pr_err("%s: expected 4 args, got %d\n", __func__, ret);
- return -EINVAL;
- }
-
- pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
- if (!pdev)
- return -ENODEV;
+ pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos);
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
edev = pci_dev_to_eeh_dev(pdev);
if (!edev) {
@@ -1919,8 +1723,8 @@ static ssize_t eeh_dev_check_write(struct file *filp,
}
ret = eeh_dev_check_failure(edev);
- pci_info(pdev, "eeh_dev_check_failure(%04x:%02x:%02x.%01x) = %d\n",
- domain, bus, dev, fn, ret);
+ pci_info(pdev, "eeh_dev_check_failure(%s) = %d\n",
+ pci_name(pdev), ret);
pci_dev_put(pdev);
@@ -2031,25 +1835,12 @@ static ssize_t eeh_dev_break_write(struct file *filp,
const char __user *user_buf,
size_t count, loff_t *ppos)
{
- uint32_t domain, bus, dev, fn;
struct pci_dev *pdev;
- char buf[20];
int ret;
- memset(buf, 0, sizeof(buf));
- ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
- if (!ret)
- return -EFAULT;
-
- ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
- if (ret != 4) {
- pr_err("%s: expected 4 args, got %d\n", __func__, ret);
- return -EINVAL;
- }
-
- pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
- if (!pdev)
- return -ENODEV;
+ pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos);
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
ret = eeh_debugfs_break_device(pdev);
pci_dev_put(pdev);
@@ -2067,6 +1858,53 @@ static const struct file_operations eeh_dev_break_fops = {
.read = eeh_debugfs_dev_usage,
};
+static ssize_t eeh_dev_can_recover(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct pci_driver *drv;
+ struct pci_dev *pdev;
+ size_t ret;
+
+ pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos);
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
+
+ /*
+ * In order for error recovery to work the driver needs to implement
+ * .error_detected(), so it can quiesce IO to the device, and
+ * .slot_reset() so it can re-initialise the device after a reset.
+ *
+ * Ideally they'd implement .resume() too, but some drivers which
+ * we need to support (notably IPR) don't so I guess we can tolerate
+ * that.
+ *
+ * .mmio_enabled() is mostly there as a work-around for devices which
+ * take forever to re-init after a hot reset. Implementing that is
+ * strictly optional.
+ */
+ drv = pci_dev_driver(pdev);
+ if (drv &&
+ drv->err_handler &&
+ drv->err_handler->error_detected &&
+ drv->err_handler->slot_reset) {
+ ret = count;
+ } else {
+ ret = -EOPNOTSUPP;
+ }
+
+ pci_dev_put(pdev);
+
+ return ret;
+}
+
+static const struct file_operations eeh_dev_can_recover_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = eeh_dev_can_recover,
+ .read = eeh_debugfs_dev_usage,
+};
+
#endif
static int __init eeh_init_proc(void)
@@ -2075,22 +1913,25 @@ static int __init eeh_init_proc(void)
proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show);
#ifdef CONFIG_DEBUG_FS
debugfs_create_file_unsafe("eeh_enable", 0600,
- powerpc_debugfs_root, NULL,
+ arch_debugfs_dir, NULL,
&eeh_enable_dbgfs_ops);
debugfs_create_u32("eeh_max_freezes", 0600,
- powerpc_debugfs_root, &eeh_max_freezes);
+ arch_debugfs_dir, &eeh_max_freezes);
debugfs_create_bool("eeh_disable_recovery", 0600,
- powerpc_debugfs_root,
+ arch_debugfs_dir,
&eeh_debugfs_no_recover);
debugfs_create_file_unsafe("eeh_dev_check", 0600,
- powerpc_debugfs_root, NULL,
+ arch_debugfs_dir, NULL,
&eeh_dev_check_fops);
debugfs_create_file_unsafe("eeh_dev_break", 0600,
- powerpc_debugfs_root, NULL,
+ arch_debugfs_dir, NULL,
&eeh_dev_break_fops);
debugfs_create_file_unsafe("eeh_force_recover", 0600,
- powerpc_debugfs_root, NULL,
+ arch_debugfs_dir, NULL,
&eeh_force_recover_fops);
+ debugfs_create_file_unsafe("eeh_dev_can_recover", 0600,
+ arch_debugfs_dir, NULL,
+ &eeh_dev_can_recover_fops);
eeh_cache_debugfs_init();
#endif
}
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 6b50bf15d8c1..9bdaaf7fddc9 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -12,8 +12,8 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
+#include <linux/debugfs.h>
#include <asm/pci-bridge.h>
-#include <asm/debugfs.h>
#include <asm/ppc-pci.h>
@@ -264,8 +264,9 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v)
{
struct pci_io_addr_range *piar;
struct rb_node *n;
+ unsigned long flags;
- spin_lock(&pci_io_addr_cache_root.piar_lock);
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) {
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
@@ -273,7 +274,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v)
(piar->flags & IORESOURCE_IO) ? "i/o" : "mem",
&piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));
}
- spin_unlock(&pci_io_addr_cache_root.piar_lock);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
return 0;
}
@@ -282,6 +283,6 @@ DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache);
void eeh_cache_debugfs_init(void)
{
debugfs_create_file_unsafe("eeh_address_cache", 0400,
- powerpc_debugfs_root, NULL,
+ arch_debugfs_dir, NULL,
&eeh_addr_cache_fops);
}
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
deleted file mode 100644
index 7370185c7a05..000000000000
--- a/arch/powerpc/kernel/eeh_dev.c
+++ /dev/null
@@ -1,67 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The file intends to implement dynamic creation of EEH device, which will
- * be bound with OF node and PCI device simutaneously. The EEH devices would
- * be foundamental information for EEH core components to work proerly. Besides,
- * We have to support multiple situations where dynamic creation of EEH device
- * is required:
- *
- * 1) Before PCI emunation starts, we need create EEH devices according to the
- * PCI sensitive OF nodes.
- * 2) When PCI emunation is done, we need do the binding between PCI device and
- * the associated EEH device.
- * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device
- * will be created while PCI sensitive OF node is detected from DR.
- * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If
- * PHB is newly inserted, we also need create EEH devices accordingly.
- *
- * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
- */
-
-#include <linux/export.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-
-/**
- * eeh_dev_init - Create EEH device according to OF node
- * @pdn: PCI device node
- *
- * It will create EEH device according to the given OF node. The function
- * might be called by PCI emunation, DR, PHB hotplug.
- */
-struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
-{
- struct eeh_dev *edev;
-
- /* Allocate EEH device */
- edev = kzalloc(sizeof(*edev), GFP_KERNEL);
- if (!edev)
- return NULL;
-
- /* Associate EEH device with OF node */
- pdn->edev = edev;
- edev->pdn = pdn;
- edev->bdfn = (pdn->busno << 8) | pdn->devfn;
- edev->controller = pdn->phb;
-
- return edev;
-}
-
-/**
- * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB
- * @phb: PHB
- *
- * Scan the PHB OF node and its child association, then create the
- * EEH devices accordingly
- */
-void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
-{
- /* EEH PE for PHB */
- eeh_phb_pe_create(phb);
-}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 7b048cee767c..350dab18e137 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -104,13 +104,13 @@ static bool eeh_edev_actionable(struct eeh_dev *edev)
*/
static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
{
- if (!pdev || !pdev->driver)
+ if (!pdev || !pdev->dev.driver)
return NULL;
- if (!try_module_get(pdev->driver->driver.owner))
+ if (!try_module_get(pdev->dev.driver->owner))
return NULL;
- return pdev->driver;
+ return to_pci_driver(pdev->dev.driver);
}
/**
@@ -122,10 +122,10 @@ static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
*/
static inline void eeh_pcid_put(struct pci_dev *pdev)
{
- if (!pdev || !pdev->driver)
+ if (!pdev || !pdev->dev.driver)
return;
- module_put(pdev->driver->driver.owner);
+ module_put(pdev->dev.driver->owner);
}
/**
@@ -214,7 +214,7 @@ static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
pci_save_state(pdev);
}
-static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
+static void eeh_set_channel_state(struct eeh_pe *root, pci_channel_state_t s)
{
struct eeh_pe *pe;
struct eeh_dev *edev, *tmp;
@@ -425,8 +425,8 @@ static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
#ifdef CONFIG_PCI_IOV
- if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
- eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
+ if (eeh_ops->notify_resume)
+ eeh_ops->notify_resume(edev);
#endif
return PCI_ERS_RESULT_NONE;
}
@@ -477,7 +477,7 @@ static void *eeh_add_virt_device(struct eeh_dev *edev)
}
#ifdef CONFIG_PCI_IOV
- pci_iov_add_virtfn(edev->physfn, eeh_dev_to_pdn(edev)->vf_index);
+ pci_iov_add_virtfn(edev->physfn, edev->vf_index);
#endif
return NULL;
}
@@ -521,9 +521,7 @@ static void eeh_rmv_device(struct eeh_dev *edev, void *userdata)
if (edev->physfn) {
#ifdef CONFIG_PCI_IOV
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
-
- pci_iov_remove_virtfn(edev->physfn, pdn->vf_index);
+ pci_iov_remove_virtfn(edev->physfn, edev->vf_index);
edev->pdev = NULL;
#endif
if (rmv_data)
@@ -544,7 +542,7 @@ static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
continue;
edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
- eeh_rmv_from_parent_pe(edev);
+ eeh_pe_tree_remove(edev);
}
return NULL;
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 177852e39a25..845e024321d4 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -251,43 +251,21 @@ void eeh_pe_dev_traverse(struct eeh_pe *root,
/**
* __eeh_pe_get - Check the PE address
- * @data: EEH PE
- * @flag: EEH device
*
* For one particular PE, it can be identified by PE address
* or tranditional BDF address. BDF address is composed of
* Bus/Device/Function number. The extra data referred by flag
* indicates which type of address should be used.
*/
-struct eeh_pe_get_flag {
- int pe_no;
- int config_addr;
-};
-
static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
{
- struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag;
+ int *target_pe = flag;
- /* Unexpected PHB PE */
+ /* PHB PEs are special and should be ignored */
if (pe->type & EEH_PE_PHB)
return NULL;
- /*
- * We prefer PE address. For most cases, we should
- * have non-zero PE address
- */
- if (eeh_has_flag(EEH_VALID_PE_ZERO)) {
- if (tmp->pe_no == pe->addr)
- return pe;
- } else {
- if (tmp->pe_no &&
- (tmp->pe_no == pe->addr))
- return pe;
- }
-
- /* Try BDF address */
- if (tmp->config_addr &&
- (tmp->config_addr == pe->config_addr))
+ if (*target_pe == pe->addr)
return pe;
return NULL;
@@ -297,7 +275,6 @@ static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
* eeh_pe_get - Search PE based on the given address
* @phb: PCI controller
* @pe_no: PE number
- * @config_addr: Config address
*
* Search the corresponding PE based on the specified address which
* is included in the eeh device. The function is used to check if
@@ -306,75 +283,30 @@ static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
* which is composed of PCI bus/device/function number, or unified
* PE address.
*/
-struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
- int pe_no, int config_addr)
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no)
{
struct eeh_pe *root = eeh_phb_pe_get(phb);
- struct eeh_pe_get_flag tmp = { pe_no, config_addr };
- struct eeh_pe *pe;
-
- pe = eeh_pe_traverse(root, __eeh_pe_get, &tmp);
- return pe;
+ return eeh_pe_traverse(root, __eeh_pe_get, &pe_no);
}
/**
- * eeh_pe_get_parent - Retrieve the parent PE
+ * eeh_pe_tree_insert - Add EEH device to parent PE
* @edev: EEH device
+ * @new_pe_parent: PE to create additional PEs under
*
- * The whole PEs existing in the system are organized as hierarchy
- * tree. The function is used to retrieve the parent PE according
- * to the parent EEH device.
- */
-static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
-{
- struct eeh_dev *parent;
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
-
- /*
- * It might have the case for the indirect parent
- * EEH device already having associated PE, but
- * the direct parent EEH device doesn't have yet.
- */
- if (edev->physfn)
- pdn = pci_get_pdn(edev->physfn);
- else
- pdn = pdn ? pdn->parent : NULL;
- while (pdn) {
- /* We're poking out of PCI territory */
- parent = pdn_to_eeh_dev(pdn);
- if (!parent)
- return NULL;
-
- if (parent->pe)
- return parent->pe;
-
- pdn = pdn->parent;
- }
-
- return NULL;
-}
-
-/**
- * eeh_add_to_parent_pe - Add EEH device to parent PE
- * @edev: EEH device
+ * Add EEH device to the PE in edev->pe_config_addr. If a PE already
+ * exists with that address then @edev is added to that PE. Otherwise
+ * a new PE is created and inserted into the PE tree as a child of
+ * @new_pe_parent.
*
- * Add EEH device to the parent PE. If the parent PE already
- * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
- * we have to create new PE to hold the EEH device and the new
- * PE will be linked to its parent PE as well.
+ * If @new_pe_parent is NULL then the new PE will be inserted under
+ * directly under the the PHB.
*/
-int eeh_add_to_parent_pe(struct eeh_dev *edev)
+int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent)
{
+ struct pci_controller *hose = edev->controller;
struct eeh_pe *pe, *parent;
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
- int config_addr = (pdn->busno << 8) | (pdn->devfn);
-
- /* Check if the PE number is valid */
- if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
- eeh_edev_err(edev, "PE#0 is invalid for this PHB!\n");
- return -EINVAL;
- }
/*
* Search the PE has been existing or not according
@@ -382,7 +314,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* PE should be composed of PCI bus and its subordinate
* components.
*/
- pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr);
+ pe = eeh_pe_get(hose, edev->pe_config_addr);
if (pe) {
if (pe->type & EEH_PE_INVALID) {
list_add_tail(&edev->entry, &pe->edevs);
@@ -399,8 +331,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
parent = parent->parent;
}
- eeh_edev_dbg(edev,
- "Added to device PE (parent: PE#%x)\n",
+ eeh_edev_dbg(edev, "Added to existing PE (parent: PE#%x)\n",
pe->parent->addr);
} else {
/* Mark the PE as type of PCI bus */
@@ -416,15 +347,15 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
/* Create a new EEH PE */
if (edev->physfn)
- pe = eeh_pe_alloc(pdn->phb, EEH_PE_VF);
+ pe = eeh_pe_alloc(hose, EEH_PE_VF);
else
- pe = eeh_pe_alloc(pdn->phb, EEH_PE_DEVICE);
+ pe = eeh_pe_alloc(hose, EEH_PE_DEVICE);
if (!pe) {
pr_err("%s: out of memory!\n", __func__);
return -ENOMEM;
}
- pe->addr = edev->pe_config_addr;
- pe->config_addr = config_addr;
+
+ pe->addr = edev->pe_config_addr;
/*
* Put the new EEH PE into hierarchy tree. If the parent
@@ -432,34 +363,35 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* to PHB directly. Otherwise, we have to associate the
* PE with its parent.
*/
- parent = eeh_pe_get_parent(edev);
- if (!parent) {
- parent = eeh_phb_pe_get(pdn->phb);
- if (!parent) {
+ if (!new_pe_parent) {
+ new_pe_parent = eeh_phb_pe_get(hose);
+ if (!new_pe_parent) {
pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
- __func__, pdn->phb->global_number);
+ __func__, hose->global_number);
edev->pe = NULL;
kfree(pe);
return -EEXIST;
}
}
- pe->parent = parent;
+
+ /* link new PE into the tree */
+ pe->parent = new_pe_parent;
+ list_add_tail(&pe->child, &new_pe_parent->child_list);
/*
* Put the newly created PE into the child list and
* link the EEH device accordingly.
*/
- list_add_tail(&pe->child, &parent->child_list);
list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
- eeh_edev_dbg(edev, "Added to device PE (parent: PE#%x)\n",
- pe->parent->addr);
+ eeh_edev_dbg(edev, "Added to new (parent: PE#%x)\n",
+ new_pe_parent->addr);
return 0;
}
/**
- * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
+ * eeh_pe_tree_remove - Remove one EEH device from the associated PE
* @edev: EEH device
*
* The PE hierarchy tree might be changed when doing PCI hotplug.
@@ -467,7 +399,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* during EEH recovery. So we have to call the function remove the
* corresponding PE accordingly if necessary.
*/
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
+int eeh_pe_tree_remove(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent, *child;
bool keep, recover;
@@ -698,7 +630,6 @@ void eeh_pe_state_clear(struct eeh_pe *root, int state, bool include_passed)
*/
static void eeh_bridge_check_link(struct eeh_dev *edev)
{
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
int cap;
uint32_t val;
int timeout = 0;
@@ -714,32 +645,32 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
/* Check slot status */
cap = edev->pcie_cap;
- eeh_ops->read_config(pdn, cap + PCI_EXP_SLTSTA, 2, &val);
+ eeh_ops->read_config(edev, cap + PCI_EXP_SLTSTA, 2, &val);
if (!(val & PCI_EXP_SLTSTA_PDS)) {
eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val);
return;
}
/* Check power status if we have the capability */
- eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCAP, 2, &val);
+ eeh_ops->read_config(edev, cap + PCI_EXP_SLTCAP, 2, &val);
if (val & PCI_EXP_SLTCAP_PCP) {
- eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCTL, 2, &val);
+ eeh_ops->read_config(edev, cap + PCI_EXP_SLTCTL, 2, &val);
if (val & PCI_EXP_SLTCTL_PCC) {
eeh_edev_dbg(edev, "In power-off state, power it on ...\n");
val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
- eeh_ops->write_config(pdn, cap + PCI_EXP_SLTCTL, 2, val);
+ eeh_ops->write_config(edev, cap + PCI_EXP_SLTCTL, 2, val);
msleep(2 * 1000);
}
}
/* Enable link */
- eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCTL, 2, &val);
+ eeh_ops->read_config(edev, cap + PCI_EXP_LNKCTL, 2, &val);
val &= ~PCI_EXP_LNKCTL_LD;
- eeh_ops->write_config(pdn, cap + PCI_EXP_LNKCTL, 2, val);
+ eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val);
/* Check link */
- eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCAP, 4, &val);
+ eeh_ops->read_config(edev, cap + PCI_EXP_LNKCAP, 4, &val);
if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val);
msleep(1000);
@@ -752,7 +683,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
msleep(20);
timeout += 20;
- eeh_ops->read_config(pdn, cap + PCI_EXP_LNKSTA, 2, &val);
+ eeh_ops->read_config(edev, cap + PCI_EXP_LNKSTA, 2, &val);
if (val & PCI_EXP_LNKSTA_DLLLA)
break;
}
@@ -769,7 +700,6 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
static void eeh_restore_bridge_bars(struct eeh_dev *edev)
{
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
int i;
/*
@@ -777,20 +707,20 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev)
* Bus numbers and windows: 0x18 - 0x30
*/
for (i = 4; i < 13; i++)
- eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]);
+ eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]);
/* Rom: 0x38 */
- eeh_ops->write_config(pdn, 14*4, 4, edev->config_space[14]);
+ eeh_ops->write_config(edev, 14*4, 4, edev->config_space[14]);
/* Cache line & Latency timer: 0xC 0xD */
- eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
+ eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1,
SAVED_BYTE(PCI_CACHE_LINE_SIZE));
- eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1,
- SAVED_BYTE(PCI_LATENCY_TIMER));
+ eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1,
+ SAVED_BYTE(PCI_LATENCY_TIMER));
/* Max latency, min grant, interrupt ping and line: 0x3C */
- eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]);
+ eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]);
/* PCI Command: 0x4 */
- eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1] |
+ eeh_ops->write_config(edev, PCI_COMMAND, 4, edev->config_space[1] |
PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
/* Check the PCIe link is ready */
@@ -799,28 +729,27 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev)
static void eeh_restore_device_bars(struct eeh_dev *edev)
{
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
int i;
u32 cmd;
for (i = 4; i < 10; i++)
- eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]);
+ eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]);
/* 12 == Expansion ROM Address */
- eeh_ops->write_config(pdn, 12*4, 4, edev->config_space[12]);
+ eeh_ops->write_config(edev, 12*4, 4, edev->config_space[12]);
- eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
+ eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1,
SAVED_BYTE(PCI_CACHE_LINE_SIZE));
- eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1,
+ eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1,
SAVED_BYTE(PCI_LATENCY_TIMER));
/* max latency, min grant, interrupt pin and line */
- eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]);
+ eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]);
/*
* Restore PERR & SERR bits, some devices require it,
* don't touch the other command bits
*/
- eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cmd);
+ eeh_ops->read_config(edev, PCI_COMMAND, 4, &cmd);
if (edev->config_space[1] & PCI_COMMAND_PARITY)
cmd |= PCI_COMMAND_PARITY;
else
@@ -829,7 +758,7 @@ static void eeh_restore_device_bars(struct eeh_dev *edev)
cmd |= PCI_COMMAND_SERR;
else
cmd &= ~PCI_COMMAND_SERR;
- eeh_ops->write_config(pdn, PCI_COMMAND, 4, cmd);
+ eeh_ops->write_config(edev, PCI_COMMAND, 4, cmd);
}
/**
@@ -843,16 +772,14 @@ static void eeh_restore_device_bars(struct eeh_dev *edev)
*/
static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
{
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
-
/* Do special restore for bridges */
if (edev->mode & EEH_DEV_BRIDGE)
eeh_restore_bridge_bars(edev);
else
eeh_restore_device_bars(edev);
- if (eeh_ops->restore_config && pdn)
- eeh_ops->restore_config(pdn);
+ if (eeh_ops->restore_config)
+ eeh_ops->restore_config(edev);
}
/**
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index 4fb0f1e1017a..429620da73ba 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -99,7 +99,7 @@ static ssize_t eeh_notify_resume_store(struct device *dev,
if (!edev || !edev->pe || !eeh_ops->notify_resume)
return -ENODEV;
- if (eeh_ops->notify_resume(pci_get_pdn(pdev)))
+ if (eeh_ops->notify_resume(edev))
return -EIO;
return count;
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 16af0d8d90a8..61fdd53cdd9a 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -28,256 +28,37 @@
#include <asm/unistd.h>
#include <asm/ptrace.h>
#include <asm/export.h>
-#include <asm/asm-405.h>
#include <asm/feature-fixups.h>
#include <asm/barrier.h>
#include <asm/kup.h>
#include <asm/bug.h>
+#include <asm/interrupt.h>
#include "head_32.h"
/*
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional
+ * synchronisation instructions.
+ */
+
+/*
* Align to 4k in order to ensure that all functions modyfing srr0/srr1
* fit into one page in order to not encounter a TLB miss between the
* modification of srr0/srr1 and the associated rfi.
*/
.align 12
-#ifdef CONFIG_BOOKE
- .globl mcheck_transfer_to_handler
-mcheck_transfer_to_handler:
- mfspr r0,SPRN_DSRR0
- stw r0,_DSRR0(r11)
- mfspr r0,SPRN_DSRR1
- stw r0,_DSRR1(r11)
- /* fall through */
-
- .globl debug_transfer_to_handler
-debug_transfer_to_handler:
- mfspr r0,SPRN_CSRR0
- stw r0,_CSRR0(r11)
- mfspr r0,SPRN_CSRR1
- stw r0,_CSRR1(r11)
- /* fall through */
-
- .globl crit_transfer_to_handler
-crit_transfer_to_handler:
-#ifdef CONFIG_PPC_BOOK3E_MMU
- mfspr r0,SPRN_MAS0
- stw r0,MAS0(r11)
- mfspr r0,SPRN_MAS1
- stw r0,MAS1(r11)
- mfspr r0,SPRN_MAS2
- stw r0,MAS2(r11)
- mfspr r0,SPRN_MAS3
- stw r0,MAS3(r11)
- mfspr r0,SPRN_MAS6
- stw r0,MAS6(r11)
-#ifdef CONFIG_PHYS_64BIT
- mfspr r0,SPRN_MAS7
- stw r0,MAS7(r11)
-#endif /* CONFIG_PHYS_64BIT */
-#endif /* CONFIG_PPC_BOOK3E_MMU */
-#ifdef CONFIG_44x
- mfspr r0,SPRN_MMUCR
- stw r0,MMUCR(r11)
-#endif
- mfspr r0,SPRN_SRR0
- stw r0,_SRR0(r11)
- mfspr r0,SPRN_SRR1
- stw r0,_SRR1(r11)
-
- /* set the stack limit to the current stack */
- mfspr r8,SPRN_SPRG_THREAD
- lwz r0,KSP_LIMIT(r8)
- stw r0,SAVED_KSP_LIMIT(r11)
- rlwinm r0,r1,0,0,(31 - THREAD_SHIFT)
- stw r0,KSP_LIMIT(r8)
- /* fall through */
-#endif
-
-#ifdef CONFIG_40x
- .globl crit_transfer_to_handler
-crit_transfer_to_handler:
- lwz r0,crit_r10@l(0)
- stw r0,GPR10(r11)
- lwz r0,crit_r11@l(0)
- stw r0,GPR11(r11)
- mfspr r0,SPRN_SRR0
- stw r0,crit_srr0@l(0)
- mfspr r0,SPRN_SRR1
- stw r0,crit_srr1@l(0)
-
- /* set the stack limit to the current stack */
- mfspr r8,SPRN_SPRG_THREAD
- lwz r0,KSP_LIMIT(r8)
- stw r0,saved_ksp_limit@l(0)
- rlwinm r0,r1,0,0,(31 - THREAD_SHIFT)
- stw r0,KSP_LIMIT(r8)
- /* fall through */
-#endif
-
-/*
- * This code finishes saving the registers to the exception frame
- * and jumps to the appropriate handler for the exception, turning
- * on address translation.
- * Note that we rely on the caller having set cr0.eq iff the exception
- * occurred in kernel mode (i.e. MSR:PR = 0).
- */
- .globl transfer_to_handler_full
-transfer_to_handler_full:
- SAVE_NVGPRS(r11)
- /* fall through */
-
- .globl transfer_to_handler
-transfer_to_handler:
- stw r2,GPR2(r11)
- stw r12,_NIP(r11)
- stw r9,_MSR(r11)
- andi. r2,r9,MSR_PR
- mfctr r12
- mfspr r2,SPRN_XER
- stw r12,_CTR(r11)
- stw r2,_XER(r11)
- mfspr r12,SPRN_SPRG_THREAD
- tovirt_vmstack r12, r12
- beq 2f /* if from user, fix up THREAD.regs */
- addi r2, r12, -THREAD
- addi r11,r1,STACK_FRAME_OVERHEAD
- stw r11,PT_REGS(r12)
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
- /* Check to see if the dbcr0 register is set up to debug. Use the
- internal debug mode bit to do this. */
- lwz r12,THREAD_DBCR0(r12)
- andis. r12,r12,DBCR0_IDM@h
-#endif
- ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
-#ifdef CONFIG_PPC_BOOK3S_32
- kuep_lock r11, r12
-#endif
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
- beq+ 3f
- /* From user and task is ptraced - load up global dbcr0 */
- li r12,-1 /* clear all pending debug events */
- mtspr SPRN_DBSR,r12
- lis r11,global_dbcr0@ha
- tophys(r11,r11)
- addi r11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
- lwz r9,TASK_CPU(r2)
- slwi r9,r9,3
- add r11,r11,r9
-#endif
- lwz r12,0(r11)
- mtspr SPRN_DBCR0,r12
- lwz r12,4(r11)
- addi r12,r12,-1
- stw r12,4(r11)
-#endif
-
- b 3f
-
-2: /* if from kernel, check interrupted DOZE/NAP mode and
- * check for stack overflow
- */
- kuap_save_and_lock r11, r12, r9, r2, r6
- addi r2, r12, -THREAD
-#ifndef CONFIG_VMAP_STACK
- lwz r9,KSP_LIMIT(r12)
- cmplw r1,r9 /* if r1 <= ksp_limit */
- ble- stack_ovf /* then the kernel stack overflowed */
-#endif
-5:
#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
+ .globl prepare_transfer_to_handler
+prepare_transfer_to_handler:
+ /* if from kernel, check interrupted DOZE/NAP mode */
lwz r12,TI_LOCAL_FLAGS(r2)
mtcrf 0x01,r12
bt- 31-TLF_NAPPING,4f
bt- 31-TLF_SLEEPING,7f
-#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
- .globl transfer_to_handler_cont
-transfer_to_handler_cont:
-3:
- mflr r9
- tovirt_novmstack r2, r2 /* set r2 to current */
- tovirt_vmstack r9, r9
- lwz r11,0(r9) /* virtual address of handler */
- lwz r9,4(r9) /* where to go when done */
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
- mtspr SPRN_NRI, r0
-#endif
-#ifdef CONFIG_TRACE_IRQFLAGS
- /*
- * When tracing IRQ state (lockdep) we enable the MMU before we call
- * the IRQ tracing functions as they might access vmalloc space or
- * perform IOs for console output.
- *
- * To speed up the syscall path where interrupts stay on, let's check
- * first if we are changing the MSR value at all.
- */
- tophys_novmstack r12, r1
- lwz r12,_MSR(r12)
- andi. r12,r12,MSR_EE
- bne 1f
-
- /* MSR isn't changing, just transition directly */
-#endif
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r10
- mtlr r9
- SYNC
- RFI /* jump to handler, enable MMU */
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to
- * keep interrupts disabled at this point otherwise we might risk
- * taking an interrupt before we tell lockdep they are enabled.
- */
- lis r12,reenable_mmu@h
- ori r12,r12,reenable_mmu@l
- LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
- mtspr SPRN_SRR0,r12
- mtspr SPRN_SRR1,r0
- SYNC
- RFI
-
-reenable_mmu:
- /*
- * We save a bunch of GPRs,
- * r3 can be different from GPR3(r1) at this point, r9 and r11
- * contains the old MSR and handler address respectively,
- * r4 & r5 can contain page fault arguments that need to be passed
- * along as well. r12, CCR, CTR, XER etc... are left clobbered as
- * they aren't useful past this point (aren't syscall arguments),
- * the rest is restored from the exception frame.
- */
-
- stwu r1,-32(r1)
- stw r9,8(r1)
- stw r11,12(r1)
- stw r3,16(r1)
- stw r4,20(r1)
- stw r5,24(r1)
-
- /* If we are disabling interrupts (normal case), simply log it with
- * lockdep
- */
-1: bl trace_hardirqs_off
-2: lwz r5,24(r1)
- lwz r4,20(r1)
- lwz r3,16(r1)
- lwz r11,12(r1)
- lwz r9,8(r1)
- addi r1,r1,32
- lwz r0,GPR0(r1)
- lwz r6,GPR6(r1)
- lwz r7,GPR7(r1)
- lwz r8,GPR8(r1)
- mtctr r11
- mtlr r9
- bctr /* jump to handler */
-#endif /* CONFIG_TRACE_IRQFLAGS */
+ blr
-#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
4: rlwinm r12,r12,0,~_TLF_NAPPING
stw r12,TI_LOCAL_FLAGS(r2)
b power_save_ppc32_restore
@@ -287,186 +68,81 @@ reenable_mmu:
lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */
rlwinm r9,r9,0,~MSR_EE
lwz r12,_LINK(r11) /* and return to address in LR */
- kuap_restore r11, r2, r3, r4, r5
lwz r2, GPR2(r11)
b fast_exception_return
-#endif
-
-#ifndef CONFIG_VMAP_STACK
-/*
- * On kernel stack overflow, load up an initial stack pointer
- * and call StackOverflow(regs), which should not return.
- */
-stack_ovf:
- /* sometimes we use a statically-allocated stack, which is OK. */
- lis r12,_end@h
- ori r12,r12,_end@l
- cmplw r1,r12
- ble 5b /* r1 <= &_end is OK */
- SAVE_NVGPRS(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- lis r1,init_thread_union@ha
- addi r1,r1,init_thread_union@l
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
- lis r9,StackOverflow@ha
- addi r9,r9,StackOverflow@l
- LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
- mtspr SPRN_NRI, r0
-#endif
- mtspr SPRN_SRR0,r9
- mtspr SPRN_SRR1,r10
- SYNC
- RFI
-#endif
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-trace_syscall_entry_irq_off:
- /*
- * Syscall shouldn't happen while interrupts are disabled,
- * so let's do a warning here.
- */
-0: trap
- EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
- bl trace_hardirqs_on
-
- /* Now enable for real */
- LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
- mtmsr r10
-
- REST_GPR(0, r1)
- REST_4GPRS(3, r1)
- REST_2GPRS(7, r1)
- b DoSyscall
-#endif /* CONFIG_TRACE_IRQFLAGS */
+_ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
+#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
.globl transfer_to_syscall
transfer_to_syscall:
-#ifdef CONFIG_TRACE_IRQFLAGS
- andi. r12,r9,MSR_EE
- beq- trace_syscall_entry_irq_off
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
-/*
- * Handle a system call.
- */
- .stabs "arch/powerpc/kernel/",N_SO,0,0,0f
- .stabs "entry_32.S",N_SO,0,0,0f
-0:
-
-_GLOBAL(DoSyscall)
- stw r3,ORIG_GPR3(r1)
- li r12,0
- stw r12,RESULT(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* Make sure interrupts are enabled */
- mfmsr r11
- andi. r12,r11,MSR_EE
- /* We came in with interrupts disabled, we WARN and mark them enabled
- * for lockdep now */
-0: tweqi r12, 0
- EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
-#endif /* CONFIG_TRACE_IRQFLAGS */
- lwz r11,TI_FLAGS(r2)
- andi. r11,r11,_TIF_SYSCALL_DOTRACE
- bne- syscall_dotrace
-syscall_dotrace_cont:
- cmplwi 0,r0,NR_syscalls
- lis r10,sys_call_table@h
- ori r10,r10,sys_call_table@l
- slwi r0,r0,2
- bge- 66f
+ stw r11, GPR1(r1)
+ stw r11, 0(r1)
+ mflr r12
+ stw r12, _LINK(r1)
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+#endif
+ lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ SAVE_GPR(2, r1)
+ addi r12,r12,STACK_FRAME_REGS_MARKER@l
+ stw r9,_MSR(r1)
+ li r2, INTERRUPT_SYSCALL
+ stw r12,8(r1)
+ stw r2,_TRAP(r1)
+ SAVE_GPR(0, r1)
+ SAVE_4GPRS(3, r1)
+ SAVE_2GPRS(7, r1)
+ addi r2,r10,-THREAD
+ SAVE_NVGPRS(r1)
- barrier_nospec_asm
- /*
- * Prevent the load of the handler below (based on the user-passed
- * system call number) being speculatively executed until the test
- * against NR_syscalls and branch to .66f above has
- * committed.
- */
+ /* Calling convention has r9 = orig r0, r10 = regs */
+ addi r10,r1,STACK_FRAME_OVERHEAD
+ mr r9,r0
+ bl system_call_exception
- lwzx r10,r10,r0 /* Fetch system call handler [ptr] */
- mtlr r10
- addi r9,r1,STACK_FRAME_OVERHEAD
- PPC440EP_ERR42
- blrl /* Call handler */
- .globl ret_from_syscall
ret_from_syscall:
-#ifdef CONFIG_DEBUG_RSEQ
- /* Check whether the syscall is issued inside a restartable sequence */
- stw r3,GPR3(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl rseq_syscall
- lwz r3,GPR3(r1)
-#endif
- mr r6,r3
- /* disable interrupts so current_thread_info()->flags can't change */
- LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */
- /* Note: We don't bother telling lockdep about it */
- SYNC
- mtmsr r10
- lwz r9,TI_FLAGS(r2)
- li r8,-MAX_ERRNO
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
- bne- syscall_exit_work
- cmplw 0,r3,r8
- blt+ syscall_exit_cont
- lwz r11,_CCR(r1) /* Load CR */
- neg r3,r3
- oris r11,r11,0x1000 /* Set SO bit in CR */
- stw r11,_CCR(r1)
-syscall_exit_cont:
- lwz r8,_MSR(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* If we are going to return from the syscall with interrupts
- * off, we trace that here. It shouldn't normally happen.
- */
- andi. r10,r8,MSR_EE
- bne+ 1f
- stw r3,GPR3(r1)
- bl trace_hardirqs_off
- lwz r3,GPR3(r1)
-1:
-#endif /* CONFIG_TRACE_IRQFLAGS */
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- /* If the process has its own DBCR0 value, load it up. The internal
- debug mode bit tells us that dbcr0 should be loaded. */
- lwz r0,THREAD+THREAD_DBCR0(r2)
- andis. r10,r0,DBCR0_IDM@h
- bnel- load_dbcr0
-#endif
-#ifdef CONFIG_44x
-BEGIN_MMU_FTR_SECTION
+ addi r4,r1,STACK_FRAME_OVERHEAD
+ li r5,0
+ bl syscall_exit_prepare
+#ifdef CONFIG_PPC_47x
lis r4,icache_44x_need_flush@ha
lwz r5,icache_44x_need_flush@l(r4)
cmplwi cr0,r5,0
bne- 2f
-1:
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x)
-#endif /* CONFIG_44x */
-BEGIN_FTR_SECTION
- lwarx r7,0,r1
-END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
- stwcx. r0,0,r1 /* to clear the reservation */
- ACCOUNT_CPU_USER_EXIT(r2, r5, r7)
-#ifdef CONFIG_PPC_BOOK3S_32
- kuep_unlock r5, r7
-#endif
- kuap_check r2, r4
+#endif /* CONFIG_PPC_47x */
lwz r4,_LINK(r1)
lwz r5,_CCR(r1)
mtlr r4
- mtcr r5
lwz r7,_NIP(r1)
- lwz r2,GPR2(r1)
- lwz r1,GPR1(r1)
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
- mtspr SPRN_NRI, r0
-#endif
+ lwz r8,_MSR(r1)
+ cmpwi r3,0
+ lwz r3,GPR3(r1)
+syscall_exit_finish:
mtspr SPRN_SRR0,r7
mtspr SPRN_SRR1,r8
- SYNC
- RFI
+
+ bne 3f
+ mtcr r5
+
+1: lwz r2,GPR2(r1)
+ lwz r1,GPR1(r1)
+ rfi
+#ifdef CONFIG_40x
+ b . /* Prevent prefetch past rfi */
+#endif
+
+3: mtcr r5
+ lwz r4,_CTR(r1)
+ lwz r5,_XER(r1)
+ REST_NVGPRS(r1)
+ mtctr r4
+ mtxer r5
+ lwz r0,GPR0(r1)
+ lwz r3,GPR3(r1)
+ REST_8GPRS(4,r1)
+ lwz r12,GPR12(r1)
+ b 1b
+
#ifdef CONFIG_44x
2: li r7,0
iccci r0,r0
@@ -474,9 +150,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
b 1b
#endif /* CONFIG_44x */
-66: li r3,-ENOSYS
- b ret_from_syscall
-
.globl ret_from_fork
ret_from_fork:
REST_NVGPRS(r1)
@@ -488,175 +161,13 @@ ret_from_fork:
ret_from_kernel_thread:
REST_NVGPRS(r1)
bl schedule_tail
- mtlr r14
+ mtctr r14
mr r3,r15
PPC440EP_ERR42
- blrl
+ bctrl
li r3,0
b ret_from_syscall
-/* Traced system call support */
-syscall_dotrace:
- SAVE_NVGPRS(r1)
- li r0,0xc00
- stw r0,_TRAP(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_enter
- /*
- * Restore argument registers possibly just changed.
- * We use the return value of do_syscall_trace_enter
- * for call number to look up in the table (r0).
- */
- mr r0,r3
- lwz r3,GPR3(r1)
- lwz r4,GPR4(r1)
- lwz r5,GPR5(r1)
- lwz r6,GPR6(r1)
- lwz r7,GPR7(r1)
- lwz r8,GPR8(r1)
- REST_NVGPRS(r1)
-
- cmplwi r0,NR_syscalls
- /* Return code is already in r3 thanks to do_syscall_trace_enter() */
- bge- ret_from_syscall
- b syscall_dotrace_cont
-
-syscall_exit_work:
- andi. r0,r9,_TIF_RESTOREALL
- beq+ 0f
- REST_NVGPRS(r1)
- b 2f
-0: cmplw 0,r3,r8
- blt+ 1f
- andi. r0,r9,_TIF_NOERROR
- bne- 1f
- lwz r11,_CCR(r1) /* Load CR */
- neg r3,r3
- oris r11,r11,0x1000 /* Set SO bit in CR */
- stw r11,_CCR(r1)
-
-1: stw r6,RESULT(r1) /* Save result */
- stw r3,GPR3(r1) /* Update return value */
-2: andi. r0,r9,(_TIF_PERSYSCALL_MASK)
- beq 4f
-
- /* Clear per-syscall TIF flags if any are set. */
-
- li r11,_TIF_PERSYSCALL_MASK
- addi r12,r2,TI_FLAGS
-3: lwarx r8,0,r12
- andc r8,r8,r11
-#ifdef CONFIG_IBM405_ERR77
- dcbt 0,r12
-#endif
- stwcx. r8,0,r12
- bne- 3b
-
-4: /* Anything which requires enabling interrupts? */
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
- beq ret_from_except
-
- /* Re-enable interrupts. There is no need to trace that with
- * lockdep as we are supposed to have IRQs on at this point
- */
- ori r10,r10,MSR_EE
- SYNC
- mtmsr r10
-
- /* Save NVGPRS if they're not saved already */
- lwz r4,_TRAP(r1)
- andi. r4,r4,1
- beq 5f
- SAVE_NVGPRS(r1)
- li r4,0xc00
- stw r4,_TRAP(r1)
-5:
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_leave
- b ret_from_except_full
-
-/*
- * The fork/clone functions need to copy the full register set into
- * the child process. Therefore we need to save all the nonvolatile
- * registers (r13 - r31) before calling the C code.
- */
- .globl ppc_fork
-ppc_fork:
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
- stw r0,_TRAP(r1) /* register set saved */
- b sys_fork
-
- .globl ppc_vfork
-ppc_vfork:
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
- stw r0,_TRAP(r1) /* register set saved */
- b sys_vfork
-
- .globl ppc_clone
-ppc_clone:
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
- stw r0,_TRAP(r1) /* register set saved */
- b sys_clone
-
- .globl ppc_clone3
-ppc_clone3:
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
- stw r0,_TRAP(r1) /* register set saved */
- b sys_clone3
-
- .globl ppc_swapcontext
-ppc_swapcontext:
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
- stw r0,_TRAP(r1) /* register set saved */
- b sys_swapcontext
-
-/*
- * Top-level page fault handling.
- * This is in assembler because if do_page_fault tells us that
- * it is a bad kernel page fault, we want to save the non-volatile
- * registers before calling bad_page_fault.
- */
- .globl handle_page_fault
-handle_page_fault:
- addi r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_BOOK3S_32
- andis. r0,r5,DSISR_DABRMATCH@h
- bne- handle_dabr_fault
-#endif
- bl do_page_fault
- cmpwi r3,0
- beq+ ret_from_except
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- clrrwi r0,r0,1
- stw r0,_TRAP(r1)
- mr r5,r3
- addi r3,r1,STACK_FRAME_OVERHEAD
- lwz r4,_DAR(r1)
- bl bad_page_fault
- b ret_from_except_full
-
-#ifdef CONFIG_PPC_BOOK3S_32
- /* We have a data breakpoint exception - handle it */
-handle_dabr_fault:
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- clrrwi r0,r0,1
- stw r0,_TRAP(r1)
- bl do_break
- b ret_from_except_full
-#endif
-
/*
* This routine switches between two different tasks. The process
* state of one is saved on its kernel stack. Then the state
@@ -684,33 +195,10 @@ _GLOBAL(_switch)
/* r3-r12 are caller saved -- Cort */
SAVE_NVGPRS(r1)
stw r0,_NIP(r1) /* Return to switch caller */
- mfmsr r11
- li r0,MSR_FP /* Disable floating-point */
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
- oris r0,r0,MSR_VEC@h /* Disable altivec */
- mfspr r12,SPRN_VRSAVE /* save vrsave register value */
- stw r12,THREAD+THREAD_VRSAVE(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_SPE
-BEGIN_FTR_SECTION
- oris r0,r0,MSR_SPE@h /* Disable SPE */
- mfspr r12,SPRN_SPEFSCR /* save spefscr register value */
- stw r12,THREAD+THREAD_SPEFSCR(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_SPE)
-#endif /* CONFIG_SPE */
- and. r0,r0,r11 /* FP or altivec or SPE enabled? */
- beq+ 1f
- andc r11,r11,r0
- mtmsr r11
- isync
-1: stw r11,_MSR(r1)
mfcr r10
stw r10,_CCR(r1)
stw r1,KSP(r3) /* Set old stack pointer */
- kuap_check r2, r4
#ifdef CONFIG_SMP
/* We need a sync somewhere here to make sure that if the
* previous task gets rescheduled on another CPU, it sees all
@@ -727,19 +215,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
mr r3,r2
addi r2,r4,-THREAD /* Update current */
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
- lwz r0,THREAD+THREAD_VRSAVE(r2)
- mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_SPE
-BEGIN_FTR_SECTION
- lwz r0,THREAD+THREAD_SPEFSCR(r2)
- mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */
-END_FTR_SECTION_IFSET(CPU_FTR_SPE)
-#endif /* CONFIG_SPE */
-
lwz r0,_CCR(r1)
mtcrf 0xFF,r0
/* r3-r12 are destroyed -- Cort */
@@ -754,12 +229,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
fast_exception_return:
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
andi. r10,r9,MSR_RI /* check for recoverable interrupt */
- beq 1f /* if not, we've got problems */
+ beq 3f /* if not, we've got problems */
#endif
2: REST_4GPRS(3, r11)
lwz r10,_CCR(r11)
- REST_GPR(1, r11)
+ REST_2GPRS(1, r11)
mtcr r10
lwz r10,_LINK(r11)
mtlr r10
@@ -775,278 +250,153 @@ fast_exception_return:
REST_GPR(9, r11)
REST_GPR(12, r11)
lwz r11,GPR11(r11)
- SYNC
- RFI
-
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-/* check if the exception happened in a restartable section */
-1: lis r3,exc_exit_restart_end@ha
- addi r3,r3,exc_exit_restart_end@l
- cmplw r12,r3
-#ifdef CONFIG_PPC_BOOK3S_601
- bge 2b
-#else
- bge 3f
-#endif
- lis r4,exc_exit_restart@ha
- addi r4,r4,exc_exit_restart@l
- cmplw r12,r4
-#ifdef CONFIG_PPC_BOOK3S_601
- blt 2b
-#else
- blt 3f
+ rfi
+#ifdef CONFIG_40x
+ b . /* Prevent prefetch past rfi */
#endif
- lis r3,fee_restarts@ha
- tophys(r3,r3)
- lwz r5,fee_restarts@l(r3)
- addi r5,r5,1
- stw r5,fee_restarts@l(r3)
- mr r12,r4 /* restart at exc_exit_restart */
- b 2b
-
- .section .bss
- .align 2
-fee_restarts:
- .space 4
- .previous
+_ASM_NOKPROBE_SYMBOL(fast_exception_return)
/* aargh, a nonrecoverable interrupt, panic */
/* aargh, we don't know which trap this is */
-/* but the 601 doesn't implement the RI bit, so assume it's OK */
3:
li r10,-1
stw r10,_TRAP(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- lis r10,MSR_KERNEL@h
- ori r10,r10,MSR_KERNEL@l
- bl transfer_to_handler_full
- .long unrecoverable_exception
- .long ret_from_except
-#endif
-
- .globl ret_from_except_full
-ret_from_except_full:
- REST_NVGPRS(r1)
- /* fall through */
-
- .globl ret_from_except
-ret_from_except:
- /* Hard-disable interrupts so that current_thread_info()->flags
- * can't change between when we test it and when we return
- * from the interrupt. */
- /* Note: We don't bother telling lockdep about it */
- LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
- SYNC /* Some chip revs have problems here... */
- mtmsr r10 /* disable interrupts */
-
- lwz r3,_MSR(r1) /* Returning to user mode? */
- andi. r0,r3,MSR_PR
- beq resume_kernel
-
-user_exc_return: /* r10 contains MSR_KERNEL here */
- /* Check current_thread_info()->flags */
- lwz r9,TI_FLAGS(r2)
- andi. r0,r9,_TIF_USER_WORK_MASK
- bne do_work
+ prepare_transfer_to_handler
+ bl unrecoverable_exception
+ trap /* should not get here */
-restore_user:
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- /* Check whether this process has its own DBCR0 value. The internal
- debug mode bit tells us that dbcr0 should be loaded. */
- lwz r0,THREAD+THREAD_DBCR0(r2)
- andis. r10,r0,DBCR0_IDM@h
- bnel- load_dbcr0
-#endif
- ACCOUNT_CPU_USER_EXIT(r2, r10, r11)
-#ifdef CONFIG_PPC_BOOK3S_32
- kuep_unlock r10, r11
-#endif
+ .globl interrupt_return
+interrupt_return:
+ lwz r4,_MSR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ andi. r0,r4,MSR_PR
+ beq .Lkernel_interrupt_return
+ bl interrupt_exit_user_prepare
+ cmpwi r3,0
+ bne- .Lrestore_nvgprs
- b restore
+.Lfast_user_interrupt_return:
+ lwz r11,_NIP(r1)
+ lwz r12,_MSR(r1)
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
-/* N.B. the only way to get here is from the beq following ret_from_except. */
-resume_kernel:
- /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
- lwz r8,TI_FLAGS(r2)
- andis. r0,r8,_TIF_EMULATE_STACK_STORE@h
- beq+ 1f
+BEGIN_FTR_SECTION
+ stwcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ lwarx r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
- addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
+ lwz r3,_CCR(r1)
+ lwz r4,_LINK(r1)
+ lwz r5,_CTR(r1)
+ lwz r6,_XER(r1)
+ li r0,0
- lwz r3,GPR1(r1)
- subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
- mr r4,r1 /* src: current exception frame */
- mr r1,r3 /* Reroute the trampoline frame to r1 */
+ /*
+ * Leaving a stale exception_marker on the stack can confuse
+ * the reliable stack unwinder later on. Clear it.
+ */
+ stw r0,8(r1)
+ REST_4GPRS(7, r1)
+ REST_2GPRS(11, r1)
- /* Copy from the original to the trampoline. */
- li r5,INT_FRAME_SIZE/4 /* size: INT_FRAME_SIZE */
- li r6,0 /* start offset: 0 */
+ mtcr r3
+ mtlr r4
mtctr r5
-2: lwzx r0,r6,r4
- stwx r0,r6,r3
- addi r6,r6,4
- bdnz 2b
-
- /* Do real store operation to complete stwu */
- lwz r5,GPR1(r1)
- stw r8,0(r5)
+ mtspr SPRN_XER,r6
- /* Clear _TIF_EMULATE_STACK_STORE flag */
- lis r11,_TIF_EMULATE_STACK_STORE@h
- addi r5,r2,TI_FLAGS
-0: lwarx r8,0,r5
- andc r8,r8,r11
-#ifdef CONFIG_IBM405_ERR77
- dcbt 0,r5
-#endif
- stwcx. r8,0,r5
- bne- 0b
-1:
-
-#ifdef CONFIG_PREEMPTION
- /* check current_thread_info->preempt_count */
- lwz r0,TI_PREEMPT(r2)
- cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
- bne restore_kuap
- andi. r8,r8,_TIF_NEED_RESCHED
- beq+ restore_kuap
- lwz r3,_MSR(r1)
- andi. r0,r3,MSR_EE /* interrupts off? */
- beq restore_kuap /* don't schedule if so */
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* Lockdep thinks irqs are enabled, we need to call
- * preempt_schedule_irq with IRQs off, so we inform lockdep
- * now that we -did- turn them off already
- */
- bl trace_hardirqs_off
-#endif
- bl preempt_schedule_irq
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* And now, to properly rebalance the above, we tell lockdep they
- * are being turned back on, which will happen when we return
- */
- bl trace_hardirqs_on
+ REST_4GPRS(2, r1)
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ rfi
+#ifdef CONFIG_40x
+ b . /* Prevent prefetch past rfi */
#endif
-#endif /* CONFIG_PREEMPTION */
-restore_kuap:
- kuap_restore r1, r2, r9, r10, r0
- /* interrupts are hard-disabled at this point */
-restore:
-#ifdef CONFIG_44x
-BEGIN_MMU_FTR_SECTION
- b 1f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
- lis r4,icache_44x_need_flush@ha
- lwz r5,icache_44x_need_flush@l(r4)
- cmplwi cr0,r5,0
- beq+ 1f
- li r6,0
- iccci r0,r0
- stw r6,icache_44x_need_flush@l(r4)
-1:
-#endif /* CONFIG_44x */
-
- lwz r9,_MSR(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* Lockdep doesn't know about the fact that IRQs are temporarily turned
- * off in this assembly code while peeking at TI_FLAGS() and such. However
- * we need to inform it if the exception turned interrupts off, and we
- * are about to trun them back on.
- */
- andi. r10,r9,MSR_EE
- beq 1f
- stwu r1,-32(r1)
- mflr r0
- stw r0,4(r1)
- bl trace_hardirqs_on
- addi r1, r1, 32
- lwz r9,_MSR(r1)
-1:
-#endif /* CONFIG_TRACE_IRQFLAGS */
+.Lrestore_nvgprs:
+ REST_NVGPRS(r1)
+ b .Lfast_user_interrupt_return
- lwz r0,GPR0(r1)
- lwz r2,GPR2(r1)
- REST_4GPRS(3, r1)
- REST_2GPRS(7, r1)
+.Lkernel_interrupt_return:
+ bl interrupt_exit_kernel_prepare
- lwz r10,_XER(r1)
- lwz r11,_CTR(r1)
- mtspr SPRN_XER,r10
- mtctr r11
+.Lfast_kernel_interrupt_return:
+ cmpwi cr1,r3,0
+ lwz r11,_NIP(r1)
+ lwz r12,_MSR(r1)
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
- PPC405_ERR77(0,r1)
BEGIN_FTR_SECTION
- lwarx r11,0,r1
-END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
- stwcx. r0,0,r1 /* to clear the reservation */
+ stwcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ lwarx r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+ lwz r3,_LINK(r1)
+ lwz r4,_CTR(r1)
+ lwz r5,_XER(r1)
+ lwz r6,_CCR(r1)
+ li r0,0
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
- andi. r10,r9,MSR_RI /* check if this exception occurred */
- beql nonrecoverable /* at a bad place (MSR:RI = 0) */
+ REST_4GPRS(7, r1)
+ REST_2GPRS(11, r1)
- lwz r10,_CCR(r1)
- lwz r11,_LINK(r1)
- mtcrf 0xFF,r10
- mtlr r11
+ mtlr r3
+ mtctr r4
+ mtspr SPRN_XER,r5
- /* Clear the exception_marker on the stack to avoid confusing stacktrace */
- li r10, 0
- stw r10, 8(r1)
/*
- * Once we put values in SRR0 and SRR1, we are in a state
- * where exceptions are not recoverable, since taking an
- * exception will trash SRR0 and SRR1. Therefore we clear the
- * MSR:RI bit to indicate this. If we do take an exception,
- * we can't return to the point of the exception but we
- * can restart the exception exit path at the label
- * exc_exit_restart below. -- paulus
+ * Leaving a stale exception_marker on the stack can confuse
+ * the reliable stack unwinder later on. Clear it.
*/
- LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
- SYNC
- mtmsr r10 /* clear the RI bit */
- .globl exc_exit_restart
-exc_exit_restart:
- lwz r12,_NIP(r1)
- mtspr SPRN_SRR0,r12
- mtspr SPRN_SRR1,r9
- REST_4GPRS(9, r1)
- lwz r1,GPR1(r1)
- .globl exc_exit_restart_end
-exc_exit_restart_end:
- SYNC
- RFI
+ stw r0,8(r1)
-#else /* !(CONFIG_4xx || CONFIG_BOOKE) */
- /*
- * This is a bit different on 4xx/Book-E because it doesn't have
- * the RI bit in the MSR.
- * The TLB miss handler checks if we have interrupted
- * the exception exit path and restarts it if so
- * (well maybe one day it will... :).
+ REST_4GPRS(2, r1)
+
+ bne- cr1,1f /* emulate stack store */
+ mtcr r6
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ rfi
+#ifdef CONFIG_40x
+ b . /* Prevent prefetch past rfi */
+#endif
+
+1: /*
+ * Emulate stack store with update. New r1 value was already calculated
+ * and updated in our interrupt regs by emulate_loadstore, but we can't
+ * store the previous value of r1 to the stack before re-loading our
+ * registers from it, otherwise they could be clobbered. Use
+ * SPRG Scratch0 as temporary storage to hold the store
+ * data, as interrupts are disabled here so it won't be clobbered.
*/
- lwz r11,_LINK(r1)
- mtlr r11
- lwz r10,_CCR(r1)
- mtcrf 0xff,r10
- /* Clear the exception_marker on the stack to avoid confusing stacktrace */
- li r10, 0
- stw r10, 8(r1)
- REST_2GPRS(9, r1)
- .globl exc_exit_restart
-exc_exit_restart:
- lwz r11,_NIP(r1)
- lwz r12,_MSR(r1)
-exc_exit_start:
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r12
- REST_2GPRS(11, r1)
- lwz r1,GPR1(r1)
- .globl exc_exit_restart_end
-exc_exit_restart_end:
- PPC405_ERR77_SYNC
+ mtcr r6
+#ifdef CONFIG_BOOKE
+ mtspr SPRN_SPRG_WSCRATCH0, r9
+#else
+ mtspr SPRN_SPRG_SCRATCH0, r9
+#endif
+ addi r9,r1,INT_FRAME_SIZE /* get original r1 */
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ stw r9,0(r1) /* perform store component of stwu */
+#ifdef CONFIG_BOOKE
+ mfspr r9, SPRN_SPRG_RSCRATCH0
+#else
+ mfspr r9, SPRN_SPRG_SCRATCH0
+#endif
rfi
- b . /* prevent prefetch past rfi */
+#ifdef CONFIG_40x
+ b . /* Prevent prefetch past rfi */
+#endif
+_ASM_NOKPROBE_SYMBOL(interrupt_return)
+
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
/*
* Returning from a critical interrupt in user mode doesn't need
@@ -1077,8 +427,7 @@ exc_exit_restart_end:
REST_NVGPRS(r1); \
lwz r3,_MSR(r1); \
andi. r3,r3,MSR_PR; \
- LOAD_REG_IMMEDIATE(r10,MSR_KERNEL); \
- bne user_exc_return; \
+ bne interrupt_return; \
lwz r0,GPR0(r1); \
lwz r2,GPR2(r1); \
REST_4GPRS(3, r1); \
@@ -1087,7 +436,6 @@ exc_exit_restart_end:
lwz r11,_CTR(r1); \
mtspr SPRN_XER,r10; \
mtctr r11; \
- PPC405_ERR77(0,r1); \
stwcx. r0,0,r1; /* to clear the reservation */ \
lwz r11,_LINK(r1); \
mtlr r11; \
@@ -1107,7 +455,6 @@ exc_exit_restart_end:
lwz r10,GPR10(r1); \
lwz r11,GPR11(r1); \
lwz r1,GPR1(r1); \
- PPC405_ERR77_SYNC; \
exc_lvl_rfi; \
b .; /* prevent prefetch past exc_lvl_rfi */
@@ -1148,11 +495,6 @@ exc_exit_restart_end:
#ifdef CONFIG_40x
.globl ret_from_crit_exc
ret_from_crit_exc:
- mfspr r9,SPRN_SPRG_THREAD
- lis r10,saved_ksp_limit@ha;
- lwz r10,saved_ksp_limit@l(r10);
- tovirt(r9,r9);
- stw r10,KSP_LIMIT(r9)
lis r9,crit_srr0@ha;
lwz r9,crit_srr0@l(r9);
lis r10,crit_srr1@ha;
@@ -1160,168 +502,36 @@ ret_from_crit_exc:
mtspr SPRN_SRR0,r9;
mtspr SPRN_SRR1,r10;
RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
+_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc)
#endif /* CONFIG_40x */
#ifdef CONFIG_BOOKE
.globl ret_from_crit_exc
ret_from_crit_exc:
- mfspr r9,SPRN_SPRG_THREAD
- lwz r10,SAVED_KSP_LIMIT(r1)
- stw r10,KSP_LIMIT(r9)
RESTORE_xSRR(SRR0,SRR1);
RESTORE_MMU_REGS;
RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
+_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc)
.globl ret_from_debug_exc
ret_from_debug_exc:
- mfspr r9,SPRN_SPRG_THREAD
- lwz r10,SAVED_KSP_LIMIT(r1)
- stw r10,KSP_LIMIT(r9)
RESTORE_xSRR(SRR0,SRR1);
RESTORE_xSRR(CSRR0,CSRR1);
RESTORE_MMU_REGS;
RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, PPC_RFDI)
+_ASM_NOKPROBE_SYMBOL(ret_from_debug_exc)
.globl ret_from_mcheck_exc
ret_from_mcheck_exc:
- mfspr r9,SPRN_SPRG_THREAD
- lwz r10,SAVED_KSP_LIMIT(r1)
- stw r10,KSP_LIMIT(r9)
RESTORE_xSRR(SRR0,SRR1);
RESTORE_xSRR(CSRR0,CSRR1);
RESTORE_xSRR(DSRR0,DSRR1);
RESTORE_MMU_REGS;
RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI)
+_ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc)
#endif /* CONFIG_BOOKE */
-
-/*
- * Load the DBCR0 value for a task that is being ptraced,
- * having first saved away the global DBCR0. Note that r0
- * has the dbcr0 value to set upon entry to this.
- */
-load_dbcr0:
- mfmsr r10 /* first disable debug exceptions */
- rlwinm r10,r10,0,~MSR_DE
- mtmsr r10
- isync
- mfspr r10,SPRN_DBCR0
- lis r11,global_dbcr0@ha
- addi r11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
- lwz r9,TASK_CPU(r2)
- slwi r9,r9,3
- add r11,r11,r9
-#endif
- stw r10,0(r11)
- mtspr SPRN_DBCR0,r0
- lwz r10,4(r11)
- addi r10,r10,1
- stw r10,4(r11)
- li r11,-1
- mtspr SPRN_DBSR,r11 /* clear all pending debug events */
- blr
-
- .section .bss
- .align 4
- .global global_dbcr0
-global_dbcr0:
- .space 8*NR_CPUS
- .previous
#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
-do_work: /* r10 contains MSR_KERNEL here */
- andi. r0,r9,_TIF_NEED_RESCHED
- beq do_user_signal
-
-do_resched: /* r10 contains MSR_KERNEL here */
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_on
- mfmsr r10
-#endif
- ori r10,r10,MSR_EE
- SYNC
- mtmsr r10 /* hard-enable interrupts */
- bl schedule
-recheck:
- /* Note: And we don't tell it we are disabling them again
- * neither. Those disable/enable cycles used to peek at
- * TI_FLAGS aren't advertised.
- */
- LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
- SYNC
- mtmsr r10 /* disable interrupts */
- lwz r9,TI_FLAGS(r2)
- andi. r0,r9,_TIF_NEED_RESCHED
- bne- do_resched
- andi. r0,r9,_TIF_USER_WORK_MASK
- beq restore_user
-do_user_signal: /* r10 contains MSR_KERNEL here */
- ori r10,r10,MSR_EE
- SYNC
- mtmsr r10 /* hard-enable interrupts */
- /* save r13-r31 in the exception frame, if not already done */
- lwz r3,_TRAP(r1)
- andi. r0,r3,1
- beq 2f
- SAVE_NVGPRS(r1)
- rlwinm r3,r3,0,0,30
- stw r3,_TRAP(r1)
-2: addi r3,r1,STACK_FRAME_OVERHEAD
- mr r4,r9
- bl do_notify_resume
- REST_NVGPRS(r1)
- b recheck
-
-/*
- * We come here when we are at the end of handling an exception
- * that occurred at a place where taking an exception will lose
- * state information, such as the contents of SRR0 and SRR1.
- */
-nonrecoverable:
- lis r10,exc_exit_restart_end@ha
- addi r10,r10,exc_exit_restart_end@l
- cmplw r12,r10
-#ifdef CONFIG_PPC_BOOK3S_601
- bgelr
-#else
- bge 3f
-#endif
- lis r11,exc_exit_restart@ha
- addi r11,r11,exc_exit_restart@l
- cmplw r12,r11
-#ifdef CONFIG_PPC_BOOK3S_601
- bltlr
-#else
- blt 3f
-#endif
- lis r10,ee_restarts@ha
- lwz r12,ee_restarts@l(r10)
- addi r12,r12,1
- stw r12,ee_restarts@l(r10)
- mr r12,r11 /* restart at exc_exit_restart */
- blr
-3: /* OK, we can't recover, kill this process */
- /* but the 601 doesn't implement the RI bit, so assume it's OK */
- lwz r3,_TRAP(r1)
- andi. r0,r3,1
- beq 5f
- SAVE_NVGPRS(r1)
- rlwinm r3,r3,0,0,30
- stw r3,_TRAP(r1)
-5: mfspr r2,SPRN_SPRG_THREAD
- addi r2,r2,-THREAD
- tovirt(r2,r2) /* set back r2 to current */
-4: addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- /* shouldn't return */
- b 4b
-
- .section .bss
- .align 2
-ee_restarts:
- .space 4
- .previous
-
/*
* PROM code for specific machines follows. Put it
* here so it's easy to add arch-specific sections later.
@@ -1340,39 +550,33 @@ _GLOBAL(enter_rtas)
lis r6,1f@ha /* physical return address for rtas */
addi r6,r6,1f@l
tophys(r6,r6)
- tophys_novmstack r7, r1
lwz r8,RTASENTRY(r4)
lwz r4,RTASBASE(r4)
mfmsr r9
stw r9,8(r1)
LOAD_REG_IMMEDIATE(r0,MSR_KERNEL)
- SYNC /* disable interrupts so SRR0/1 */
- mtmsr r0 /* don't get trashed */
+ mtmsr r0 /* disable interrupts so SRR0/1 don't get trashed */
li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
mtlr r6
- stw r7, THREAD + RTAS_SP(r2)
+ stw r1, THREAD + RTAS_SP(r2)
mtspr SPRN_SRR0,r8
mtspr SPRN_SRR1,r9
- RFI
-1: tophys_novmstack r9, r1
-#ifdef CONFIG_VMAP_STACK
- li r0, MSR_KERNEL & ~MSR_IR /* can take DTLB miss */
- mtmsr r0
- isync
-#endif
- lwz r8,INT_FRAME_SIZE+4(r9) /* get return address */
- lwz r9,8(r9) /* original msr value */
- addi r1,r1,INT_FRAME_SIZE
- li r0,0
- tophys_novmstack r7, r2
- stw r0, THREAD + RTAS_SP(r7)
+ rfi
+1:
+ lis r8, 1f@h
+ ori r8, r8, 1f@l
+ LOAD_REG_IMMEDIATE(r9,MSR_KERNEL)
mtspr SPRN_SRR0,r8
mtspr SPRN_SRR1,r9
- RFI /* return to caller */
-
- .globl machine_check_in_rtas
-machine_check_in_rtas:
- twi 31,0,0
- /* XXX load up BATs and panic */
-
+ rfi /* Reactivate MMU translation */
+1:
+ lwz r8,INT_FRAME_SIZE+4(r1) /* get return address */
+ lwz r9,8(r1) /* original msr value */
+ addi r1,r1,INT_FRAME_SIZE
+ li r0,0
+ stw r0, THREAD + RTAS_SP(r2)
+ mtlr r8
+ mtmsr r9
+ blr /* return to caller */
+_ASM_NOKPROBE_SYMBOL(enter_rtas)
#endif /* CONFIG_PPC_RTAS */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6ba675b0cf7d..70cff7b49e17 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -16,6 +16,7 @@
#include <linux/errno.h>
#include <linux/err.h>
+#include <asm/cache.h>
#include <asm/unistd.h>
#include <asm/processor.h>
#include <asm/page.h>
@@ -31,7 +32,6 @@
#include <asm/irqflags.h>
#include <asm/hw_irq.h>
#include <asm/context_tracking.h>
-#include <asm/tm.h>
#include <asm/ppc-opcode.h>
#include <asm/barrier.h>
#include <asm/export.h>
@@ -47,483 +47,17 @@
/*
* System calls.
*/
- .section ".toc","aw"
-SYS_CALL_TABLE:
- .tc sys_call_table[TC],sys_call_table
-
-COMPAT_SYS_CALL_TABLE:
- .tc compat_sys_call_table[TC],compat_sys_call_table
-
-/* This value is used to mark exception frames on the stack. */
-exception_marker:
- .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
-
.section ".text"
- .align 7
-
- .globl system_call_common
-system_call_common:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-BEGIN_FTR_SECTION
- extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
- bne .Ltabort_syscall
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
-#endif
- mr r10,r1
- ld r1,PACAKSAVE(r13)
- std r10,0(r1)
- std r11,_NIP(r1)
- std r12,_MSR(r1)
- std r0,GPR0(r1)
- std r10,GPR1(r1)
-#ifdef CONFIG_PPC_FSL_BOOK3E
-START_BTB_FLUSH_SECTION
- BTB_FLUSH(r10)
-END_BTB_FLUSH_SECTION
-#endif
- ACCOUNT_CPU_USER_ENTRY(r13, r10, r11)
- std r2,GPR2(r1)
- std r3,GPR3(r1)
- mfcr r2
- std r4,GPR4(r1)
- std r5,GPR5(r1)
- std r6,GPR6(r1)
- std r7,GPR7(r1)
- std r8,GPR8(r1)
- li r11,0
- std r11,GPR9(r1)
- std r11,GPR10(r1)
- std r11,GPR11(r1)
- std r11,GPR12(r1)
- std r11,_XER(r1)
- std r11,_CTR(r1)
- std r9,GPR13(r1)
- mflr r10
- /*
- * This clears CR0.SO (bit 28), which is the error indication on
- * return from this system call.
- */
- rldimi r2,r11,28,(63-28)
- li r11,0xc01
- std r10,_LINK(r1)
- std r11,_TRAP(r1)
- std r3,ORIG_GPR3(r1)
- std r2,_CCR(r1)
- ld r2,PACATOC(r13)
- addi r9,r1,STACK_FRAME_OVERHEAD
- ld r11,exception_marker@toc(r2)
- std r11,-16(r9) /* "regshere" marker */
-
- kuap_check_amr r10, r11
-
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
-BEGIN_FW_FTR_SECTION
- /* see if there are any DTL entries to process */
- ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
- ld r11,PACA_DTL_RIDX(r13) /* get log read index */
- addi r10,r10,LPPACA_DTLIDX
- LDX_BE r10,0,r10 /* get log write index */
- cmpd r11,r10
- beq+ 33f
- bl accumulate_stolen_time
- REST_GPR(0,r1)
- REST_4GPRS(3,r1)
- REST_2GPRS(7,r1)
- addi r9,r1,STACK_FRAME_OVERHEAD
-33:
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
-
- /*
- * A syscall should always be called with interrupts enabled
- * so we just unconditionally hard-enable here. When some kind
- * of irq tracing is used, we additionally check that condition
- * is correct
- */
-#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
- lbz r10,PACAIRQSOFTMASK(r13)
-1: tdnei r10,IRQS_ENABLED
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
-
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 1
-#else
- li r11,MSR_RI
- ori r11,r11,MSR_EE
- mtmsrd r11,1
-#endif /* CONFIG_PPC_BOOK3E */
-
-system_call: /* label this so stack traces look sane */
- /* We do need to set SOFTE in the stack frame or the return
- * from interrupt will be painful
- */
- li r10,IRQS_ENABLED
- std r10,SOFTE(r1)
-
- ld r11, PACA_THREAD_INFO(r13)
- ld r10,TI_FLAGS(r11)
- andi. r11,r10,_TIF_SYSCALL_DOTRACE
- bne .Lsyscall_dotrace /* does not return */
- cmpldi 0,r0,NR_syscalls
- bge- .Lsyscall_enosys
-
-.Lsyscall:
-/*
- * Need to vector to 32 Bit or default sys_call_table here,
- * based on caller's run-mode / personality.
- */
- ld r11,SYS_CALL_TABLE@toc(2)
- andis. r10,r10,_TIF_32BIT@h
- beq 15f
- ld r11,COMPAT_SYS_CALL_TABLE@toc(2)
- clrldi r3,r3,32
- clrldi r4,r4,32
- clrldi r5,r5,32
- clrldi r6,r6,32
- clrldi r7,r7,32
- clrldi r8,r8,32
-15:
- slwi r0,r0,3
-
- barrier_nospec_asm
- /*
- * Prevent the load of the handler below (based on the user-passed
- * system call number) being speculatively executed until the test
- * against NR_syscalls and branch to .Lsyscall_enosys above has
- * committed.
- */
-
- ldx r12,r11,r0 /* Fetch system call handler [ptr] */
- mtctr r12
- bctrl /* Call handler */
-
- /* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */
-.Lsyscall_exit:
- std r3,RESULT(r1)
-
-#ifdef CONFIG_DEBUG_RSEQ
- /* Check whether the syscall is issued inside a restartable sequence */
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl rseq_syscall
- ld r3,RESULT(r1)
-#endif
-
- ld r12, PACA_THREAD_INFO(r13)
-
- ld r8,_MSR(r1)
-
-/*
- * This is a few instructions into the actual syscall exit path (which actually
- * starts at .Lsyscall_exit) to cater to kprobe blacklisting and to reduce the
- * number of visible symbols for profiling purposes.
- *
- * We can probe from system_call until this point as MSR_RI is set. But once it
- * is cleared below, we won't be able to take a trap.
- *
- * This is blacklisted from kprobes further below with _ASM_NOKPROBE_SYMBOL().
- */
-system_call_exit:
- /*
- * Disable interrupts so current_thread_info()->flags can't change,
- * and so that we don't get interrupted after loading SRR0/1.
- *
- * Leave MSR_RI enabled for now, because with THREAD_INFO_IN_TASK we
- * could fault on the load of the TI_FLAGS below.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- li r11,MSR_RI
- mtmsrd r11,1
-#endif /* CONFIG_PPC_BOOK3E */
-
- ld r9,TI_FLAGS(r12)
- li r11,-MAX_ERRNO
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
- bne- .Lsyscall_exit_work
-
- andi. r0,r8,MSR_FP
- beq 2f
-#ifdef CONFIG_ALTIVEC
- andis. r0,r8,MSR_VEC@h
- bne 3f
-#endif
-2: addi r3,r1,STACK_FRAME_OVERHEAD
- bl restore_math
- ld r8,_MSR(r1)
- ld r3,RESULT(r1)
- li r11,-MAX_ERRNO
-
-3: cmpld r3,r11
- ld r5,_CCR(r1)
- bge- .Lsyscall_error
-.Lsyscall_error_cont:
- ld r7,_NIP(r1)
-BEGIN_FTR_SECTION
- stdcx. r0,0,r1 /* to clear the reservation */
-END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
- andi. r6,r8,MSR_PR
- ld r4,_LINK(r1)
-
- kuap_check_amr r10, r11
-
-#ifdef CONFIG_PPC_BOOK3S
- /*
- * Clear MSR_RI, MSR_EE is already and remains disabled. We could do
- * this later, but testing shows that doing it here causes less slow
- * down than doing it closer to the rfid.
- */
- li r11,0
- mtmsrd r11,1
-#endif
-
- beq- 1f
- ACCOUNT_CPU_USER_EXIT(r13, r11, r12)
-
-BEGIN_FTR_SECTION
- HMT_MEDIUM_LOW
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- std r8, PACATMSCRATCH(r13)
-#endif
-
- /*
- * We don't need to restore AMR on the way back to userspace for KUAP.
- * The value of AMR only matters while we're in the kernel.
- */
- ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
- ld r2,GPR2(r1)
- ld r1,GPR1(r1)
- mtlr r4
- mtcr r5
- mtspr SPRN_SRR0,r7
- mtspr SPRN_SRR1,r8
- RFI_TO_USER
- b . /* prevent speculative execution */
-
-1: /* exit to kernel */
- kuap_restore_amr r2
-
- ld r2,GPR2(r1)
- ld r1,GPR1(r1)
- mtlr r4
- mtcr r5
- mtspr SPRN_SRR0,r7
- mtspr SPRN_SRR1,r8
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-
-.Lsyscall_error:
- oris r5,r5,0x1000 /* Set SO bit in CR */
- neg r3,r3
- std r5,_CCR(r1)
- b .Lsyscall_error_cont
-
-/* Traced system call support */
-.Lsyscall_dotrace:
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_enter
-
- /*
- * We use the return value of do_syscall_trace_enter() as the syscall
- * number. If the syscall was rejected for any reason do_syscall_trace_enter()
- * returns an invalid syscall number and the test below against
- * NR_syscalls will fail.
- */
- mr r0,r3
-
- /* Restore argument registers just clobbered and/or possibly changed. */
- ld r3,GPR3(r1)
- ld r4,GPR4(r1)
- ld r5,GPR5(r1)
- ld r6,GPR6(r1)
- ld r7,GPR7(r1)
- ld r8,GPR8(r1)
-
- /* Repopulate r9 and r10 for the syscall path */
- addi r9,r1,STACK_FRAME_OVERHEAD
- ld r10, PACA_THREAD_INFO(r13)
- ld r10,TI_FLAGS(r10)
-
- cmpldi r0,NR_syscalls
- blt+ .Lsyscall
-
- /* Return code is already in r3 thanks to do_syscall_trace_enter() */
- b .Lsyscall_exit
-
-
-.Lsyscall_enosys:
- li r3,-ENOSYS
- b .Lsyscall_exit
-
-.Lsyscall_exit_work:
- /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
- If TIF_NOERROR is set, just save r3 as it is. */
-
- andi. r0,r9,_TIF_RESTOREALL
- beq+ 0f
- REST_NVGPRS(r1)
- b 2f
-0: cmpld r3,r11 /* r11 is -MAX_ERRNO */
- blt+ 1f
- andi. r0,r9,_TIF_NOERROR
- bne- 1f
- ld r5,_CCR(r1)
- neg r3,r3
- oris r5,r5,0x1000 /* Set SO bit in CR */
- std r5,_CCR(r1)
-1: std r3,GPR3(r1)
-2: andi. r0,r9,(_TIF_PERSYSCALL_MASK)
- beq 4f
-
- /* Clear per-syscall TIF flags if any are set. */
-
- li r11,_TIF_PERSYSCALL_MASK
- addi r12,r12,TI_FLAGS
-3: ldarx r10,0,r12
- andc r10,r10,r11
- stdcx. r10,0,r12
- bne- 3b
- subi r12,r12,TI_FLAGS
-
-4: /* Anything else left to do? */
-BEGIN_FTR_SECTION
- lis r3,DEFAULT_PPR@highest /* Set default PPR */
- sldi r3,r3,32 /* bits 11-13 are used for ppr */
- std r3,_PPR(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
- beq ret_from_except_lite
-
- /* Re-enable interrupts */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 1
-#else
- li r10,MSR_RI
- ori r10,r10,MSR_EE
- mtmsrd r10,1
-#endif /* CONFIG_PPC_BOOK3E */
-
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_leave
- b ret_from_except
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-.Ltabort_syscall:
- /* Firstly we need to enable TM in the kernel */
- mfmsr r10
- li r9, 1
- rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG
- mtmsrd r10, 0
-
- /* tabort, this dooms the transaction, nothing else */
- li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
- TABORT(R9)
-
- /*
- * Return directly to userspace. We have corrupted user register state,
- * but userspace will never see that register state. Execution will
- * resume after the tbegin of the aborted transaction with the
- * checkpointed register state.
- */
- li r9, MSR_RI
- andc r10, r10, r9
- mtmsrd r10, 1
- mtspr SPRN_SRR0, r11
- mtspr SPRN_SRR1, r12
- RFI_TO_USER
- b . /* prevent speculative execution */
-#endif
-_ASM_NOKPROBE_SYMBOL(system_call_common);
-_ASM_NOKPROBE_SYMBOL(system_call_exit);
-
-/* Save non-volatile GPRs, if not already saved. */
-_GLOBAL(save_nvgprs)
- ld r11,_TRAP(r1)
- andi. r0,r11,1
- beqlr-
- SAVE_NVGPRS(r1)
- clrrdi r0,r11,1
- std r0,_TRAP(r1)
- blr
-_ASM_NOKPROBE_SYMBOL(save_nvgprs);
-
-
-/*
- * The sigsuspend and rt_sigsuspend system calls can call do_signal
- * and thus put the process into the stopped state where we might
- * want to examine its user state with ptrace. Therefore we need
- * to save all the nonvolatile registers (r14 - r31) before calling
- * the C code. Similarly, fork, vfork and clone need the full
- * register state on the stack so that it can be copied to the child.
- */
-
-_GLOBAL(ppc_fork)
- bl save_nvgprs
- bl sys_fork
- b .Lsyscall_exit
-
-_GLOBAL(ppc_vfork)
- bl save_nvgprs
- bl sys_vfork
- b .Lsyscall_exit
-
-_GLOBAL(ppc_clone)
- bl save_nvgprs
- bl sys_clone
- b .Lsyscall_exit
-
-_GLOBAL(ppc_clone3)
- bl save_nvgprs
- bl sys_clone3
- b .Lsyscall_exit
-
-_GLOBAL(ppc32_swapcontext)
- bl save_nvgprs
- bl compat_sys_swapcontext
- b .Lsyscall_exit
-
-_GLOBAL(ppc64_swapcontext)
- bl save_nvgprs
- bl sys_swapcontext
- b .Lsyscall_exit
-
-_GLOBAL(ppc_switch_endian)
- bl save_nvgprs
- bl sys_switch_endian
- b .Lsyscall_exit
-
-_GLOBAL(ret_from_fork)
- bl schedule_tail
- REST_NVGPRS(r1)
- li r3,0
- b .Lsyscall_exit
-
-_GLOBAL(ret_from_kernel_thread)
- bl schedule_tail
- REST_NVGPRS(r1)
- mtlr r14
- mr r3,r15
-#ifdef PPC64_ELF_ABI_v2
- mr r12,r14
-#endif
- blrl
- li r3,0
- b .Lsyscall_exit
#ifdef CONFIG_PPC_BOOK3S_64
#define FLUSH_COUNT_CACHE \
1: nop; \
- patch_site 1b, patch__call_flush_count_cache
-
-
-#define BCCTR_FLUSH .long 0x4c400420
+ patch_site 1b, patch__call_flush_branch_caches1; \
+1: nop; \
+ patch_site 1b, patch__call_flush_branch_caches2; \
+1: nop; \
+ patch_site 1b, patch__call_flush_branch_caches3
.macro nops number
.rept \number
@@ -532,8 +66,8 @@ _GLOBAL(ret_from_kernel_thread)
.endm
.balign 32
-.global flush_count_cache
-flush_count_cache:
+.global flush_branch_caches
+flush_branch_caches:
/* Save LR into r9 */
mflr r9
@@ -555,7 +89,7 @@ flush_count_cache:
li r9,0x7fff
mtctr r9
- BCCTR_FLUSH
+ PPC_BCCTR_FLUSH
2: nop
patch_site 2b patch__flush_count_cache_return
@@ -564,7 +98,7 @@ flush_count_cache:
.rept 278
.balign 32
- BCCTR_FLUSH
+ PPC_BCCTR_FLUSH
nops 7
.endr
@@ -578,7 +112,7 @@ flush_count_cache:
* state of one is saved on its kernel stack. Then the state
* of the other is restored from its kernel stack. The memory
* management hardware is updated to the second process's state.
- * Finally, we can return to the second process, via ret_from_except.
+ * Finally, we can return to the second process, via interrupt_return.
* On entry, r3 points to the THREAD for the current task, r4
* points to the THREAD for the new task.
*
@@ -605,7 +139,7 @@ _GLOBAL(_switch)
kuap_check_amr r9, r10
- FLUSH_COUNT_CACHE
+ FLUSH_COUNT_CACHE /* Clobbers r9, ctr */
/*
* On SMP kernels, care must be taken because a task may be
@@ -618,7 +152,7 @@ _GLOBAL(_switch)
* kernel/sched/core.c).
*
* Uncacheable stores in the case of involuntary preemption must
- * be taken care of. The smp_mb__before_spin_lock() in __schedule()
+ * be taken care of. The smp_mb__after_spinlock() in __schedule()
* is implemented as hwsync on powerpc, which orders MMIO too. So
* long as there is an hwsync in the context switch path, it will
* be executed on the source CPU after the task has performed
@@ -730,409 +264,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
addi r1,r1,SWITCH_FRAME_SIZE
blr
- .align 7
-_GLOBAL(ret_from_except)
- ld r11,_TRAP(r1)
- andi. r0,r11,1
- bne ret_from_except_lite
- REST_NVGPRS(r1)
-
-_GLOBAL(ret_from_except_lite)
- /*
- * Disable interrupts so that current_thread_info()->flags
- * can't change between when we test it and when we return
- * from the interrupt.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- li r10,MSR_RI
- mtmsrd r10,1 /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
-
- ld r9, PACA_THREAD_INFO(r13)
- ld r3,_MSR(r1)
-#ifdef CONFIG_PPC_BOOK3E
- ld r10,PACACURRENT(r13)
-#endif /* CONFIG_PPC_BOOK3E */
- ld r4,TI_FLAGS(r9)
- andi. r3,r3,MSR_PR
- beq resume_kernel
-#ifdef CONFIG_PPC_BOOK3E
- lwz r3,(THREAD+THREAD_DBCR0)(r10)
-#endif /* CONFIG_PPC_BOOK3E */
-
- /* Check current_thread_info()->flags */
- andi. r0,r4,_TIF_USER_WORK_MASK
- bne 1f
-#ifdef CONFIG_PPC_BOOK3E
- /*
- * Check to see if the dbcr0 register is set up to debug.
- * Use the internal debug mode bit to do this.
- */
- andis. r0,r3,DBCR0_IDM@h
- beq restore
- mfmsr r0
- rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */
- mtmsr r0
- mtspr SPRN_DBCR0,r3
- li r10, -1
- mtspr SPRN_DBSR,r10
- b restore
-#else
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl restore_math
- b restore
-#endif
-1: andi. r0,r4,_TIF_NEED_RESCHED
- beq 2f
- bl restore_interrupts
- SCHEDULE_USER
- b ret_from_except_lite
-2:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- andi. r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM
- bne 3f /* only restore TM if nothing else to do */
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl restore_tm_state
- b restore
-3:
-#endif
- bl save_nvgprs
- /*
- * Use a non volatile GPR to save and restore our thread_info flags
- * across the call to restore_interrupts.
- */
- mr r30,r4
- bl restore_interrupts
- mr r4,r30
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_notify_resume
- b ret_from_except
-
-resume_kernel:
- /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
- andis. r8,r4,_TIF_EMULATE_STACK_STORE@h
- beq+ 1f
-
- addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
-
- ld r3,GPR1(r1)
- subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
- mr r4,r1 /* src: current exception frame */
- mr r1,r3 /* Reroute the trampoline frame to r1 */
-
- /* Copy from the original to the trampoline. */
- li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */
- li r6,0 /* start offset: 0 */
- mtctr r5
-2: ldx r0,r6,r4
- stdx r0,r6,r3
- addi r6,r6,8
- bdnz 2b
-
- /* Do real store operation to complete stdu */
- ld r5,GPR1(r1)
- std r8,0(r5)
-
- /* Clear _TIF_EMULATE_STACK_STORE flag */
- lis r11,_TIF_EMULATE_STACK_STORE@h
- addi r5,r9,TI_FLAGS
-0: ldarx r4,0,r5
- andc r4,r4,r11
- stdcx. r4,0,r5
- bne- 0b
-1:
-
-#ifdef CONFIG_PREEMPTION
- /* Check if we need to preempt */
- andi. r0,r4,_TIF_NEED_RESCHED
- beq+ restore
- /* Check that preempt_count() == 0 and interrupts are enabled */
- lwz r8,TI_PREEMPT(r9)
- cmpwi cr0,r8,0
- bne restore
- ld r0,SOFTE(r1)
- andi. r0,r0,IRQS_DISABLED
- bne restore
-
- /*
- * Here we are preempting the current task. We want to make
- * sure we are soft-disabled first and reconcile irq state.
- */
- RECONCILE_IRQ_STATE(r3,r4)
- bl preempt_schedule_irq
-
- /*
- * arch_local_irq_restore() from preempt_schedule_irq above may
- * enable hard interrupt but we really should disable interrupts
- * when we return from the interrupt, and so that we don't get
- * interrupted after loading SRR0/1.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- li r10,MSR_RI
- mtmsrd r10,1 /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
-#endif /* CONFIG_PREEMPTION */
-
- .globl fast_exc_return_irq
-fast_exc_return_irq:
-restore:
- /*
- * This is the main kernel exit path. First we check if we
- * are about to re-enable interrupts
- */
- ld r5,SOFTE(r1)
- lbz r6,PACAIRQSOFTMASK(r13)
- andi. r5,r5,IRQS_DISABLED
- bne .Lrestore_irq_off
-
- /* We are enabling, were we already enabled ? Yes, just return */
- andi. r6,r6,IRQS_DISABLED
- beq cr0,.Ldo_restore
-
- /*
- * We are about to soft-enable interrupts (we are hard disabled
- * at this point). We check if there's anything that needs to
- * be replayed first.
- */
- lbz r0,PACAIRQHAPPENED(r13)
- cmpwi cr0,r0,0
- bne- .Lrestore_check_irq_replay
-
- /*
- * Get here when nothing happened while soft-disabled, just
- * soft-enable and move-on. We will hard-enable as a side
- * effect of rfi
- */
-.Lrestore_no_replay:
- TRACE_ENABLE_INTS
- li r0,IRQS_ENABLED
- stb r0,PACAIRQSOFTMASK(r13);
-
- /*
- * Final return path. BookE is handled in a different file
- */
-.Ldo_restore:
-#ifdef CONFIG_PPC_BOOK3E
- b exception_return_book3e
-#else
- /*
- * Clear the reservation. If we know the CPU tracks the address of
- * the reservation then we can potentially save some cycles and use
- * a larx. On POWER6 and POWER7 this is significantly faster.
- */
-BEGIN_FTR_SECTION
- stdcx. r0,0,r1 /* to clear the reservation */
-FTR_SECTION_ELSE
- ldarx r4,0,r1
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
-
- /*
- * Some code path such as load_up_fpu or altivec return directly
- * here. They run entirely hard disabled and do not alter the
- * interrupt state. They also don't use lwarx/stwcx. and thus
- * are known not to leave dangling reservations.
- */
- .globl fast_exception_return
-fast_exception_return:
- ld r3,_MSR(r1)
- ld r4,_CTR(r1)
- ld r0,_LINK(r1)
- mtctr r4
- mtlr r0
- ld r4,_XER(r1)
- mtspr SPRN_XER,r4
-
- kuap_check_amr r5, r6
-
- REST_8GPRS(5, r1)
-
- andi. r0,r3,MSR_RI
- beq- .Lunrecov_restore
-
- /*
- * Clear RI before restoring r13. If we are returning to
- * userspace and we take an exception after restoring r13,
- * we end up corrupting the userspace r13 value.
- */
- li r4,0
- mtmsrd r4,1
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* TM debug */
- std r3, PACATMSCRATCH(r13) /* Stash returned-to MSR */
-#endif
- /*
- * r13 is our per cpu area, only restore it if we are returning to
- * userspace the value stored in the stack frame may belong to
- * another CPU.
- */
- andi. r0,r3,MSR_PR
- beq 1f
-BEGIN_FTR_SECTION
- /* Restore PPR */
- ld r2,_PPR(r1)
- mtspr SPRN_PPR,r2
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
- ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
- REST_GPR(13, r1)
-
- /*
- * We don't need to restore AMR on the way back to userspace for KUAP.
- * The value of AMR only matters while we're in the kernel.
- */
- mtspr SPRN_SRR1,r3
-
- ld r2,_CCR(r1)
- mtcrf 0xFF,r2
- ld r2,_NIP(r1)
- mtspr SPRN_SRR0,r2
-
- ld r0,GPR0(r1)
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
- ld r4,GPR4(r1)
- ld r1,GPR1(r1)
- RFI_TO_USER
- b . /* prevent speculative execution */
-
-1: mtspr SPRN_SRR1,r3
-
- ld r2,_CCR(r1)
- mtcrf 0xFF,r2
- ld r2,_NIP(r1)
- mtspr SPRN_SRR0,r2
-
- /*
- * Leaving a stale exception_marker on the stack can confuse
- * the reliable stack unwinder later on. Clear it.
- */
- li r2,0
- std r2,STACK_FRAME_OVERHEAD-16(r1)
-
- ld r0,GPR0(r1)
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
-
- kuap_restore_amr r4
-
- ld r4,GPR4(r1)
- ld r1,GPR1(r1)
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-
-#endif /* CONFIG_PPC_BOOK3E */
-
- /*
- * We are returning to a context with interrupts soft disabled.
- *
- * However, we may also about to hard enable, so we need to
- * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
- * or that bit can get out of sync and bad things will happen
- */
-.Lrestore_irq_off:
- ld r3,_MSR(r1)
- lbz r7,PACAIRQHAPPENED(r13)
- andi. r0,r3,MSR_EE
- beq 1f
- rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS
- stb r7,PACAIRQHAPPENED(r13)
-1:
-#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
- /* The interrupt should not have soft enabled. */
- lbz r7,PACAIRQSOFTMASK(r13)
-1: tdeqi r7,IRQS_ENABLED
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
- b .Ldo_restore
-
- /*
- * Something did happen, check if a re-emit is needed
- * (this also clears paca->irq_happened)
- */
-.Lrestore_check_irq_replay:
- /* XXX: We could implement a fast path here where we check
- * for irq_happened being just 0x01, in which case we can
- * clear it and return. That means that we would potentially
- * miss a decrementer having wrapped all the way around.
- *
- * Still, this might be useful for things like hash_page
- */
- bl __check_irq_replay
- cmpwi cr0,r3,0
- beq .Lrestore_no_replay
-
- /*
- * We need to re-emit an interrupt. We do so by re-using our
- * existing exception frame. We first change the trap value,
- * but we need to ensure we preserve the low nibble of it
- */
- ld r4,_TRAP(r1)
- clrldi r4,r4,60
- or r4,r4,r3
- std r4,_TRAP(r1)
-
- /*
- * PACA_IRQ_HARD_DIS won't always be set here, so set it now
- * to reconcile the IRQ state. Tracing is already accounted for.
- */
- lbz r4,PACAIRQHAPPENED(r13)
- ori r4,r4,PACA_IRQ_HARD_DIS
- stb r4,PACAIRQHAPPENED(r13)
-
- /*
- * Then find the right handler and call it. Interrupts are
- * still soft-disabled and we keep them that way.
- */
- cmpwi cr0,r3,0x500
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl do_IRQ
- b ret_from_except
-1: cmpwi cr0,r3,0xf00
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl performance_monitor_exception
- b ret_from_except
-1: cmpwi cr0,r3,0xe60
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl handle_hmi_exception
- b ret_from_except
-1: cmpwi cr0,r3,0x900
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl timer_interrupt
- b ret_from_except
-#ifdef CONFIG_PPC_DOORBELL
-1:
-#ifdef CONFIG_PPC_BOOK3E
- cmpwi cr0,r3,0x280
-#else
- cmpwi cr0,r3,0xa00
-#endif /* CONFIG_PPC_BOOK3E */
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl doorbell_exception
-#endif /* CONFIG_PPC_DOORBELL */
-1: b ret_from_except /* What else to do here ? */
-
-.Lunrecov_restore:
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b .Lunrecov_restore
-
-_ASM_NOKPROBE_SYMBOL(ret_from_except);
-_ASM_NOKPROBE_SYMBOL(ret_from_except_lite);
-_ASM_NOKPROBE_SYMBOL(resume_kernel);
-_ASM_NOKPROBE_SYMBOL(fast_exc_return_irq);
-_ASM_NOKPROBE_SYMBOL(restore);
-_ASM_NOKPROBE_SYMBOL(fast_exception_return);
-
-
#ifdef CONFIG_PPC_RTAS
/*
* On CHRP, the Run-Time Abstraction Services (RTAS) have to be
@@ -1178,7 +309,7 @@ _GLOBAL(enter_rtas)
*/
lbz r0,PACAIRQSOFTMASK(r13)
1: tdeqi r0,IRQS_ENABLED
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
+ EMIT_WARN_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
#endif
/* Hard-disable interrupts */
@@ -1316,7 +447,7 @@ _GLOBAL(enter_prom)
mtsrr1 r11
rfi
#else /* CONFIG_PPC_BOOK3E */
- LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
+ LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_LE)
andc r11,r11,r12
mtsrr1 r11
RFI_TO_KERNEL
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index 9d32158ce36f..93b0f3ec8fb0 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -11,6 +11,7 @@
#include <asm/cacheflush.h>
#include <asm/code-patching.h>
#include <asm/machdep.h>
+#include <asm/inst.h>
#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
extern void epapr_ev_idle(void);
@@ -36,7 +37,7 @@ static int __init early_init_dt_scan_epapr(unsigned long node,
return -1;
for (i = 0; i < (len / 4); i++) {
- u32 inst = be32_to_cpu(insts[i]);
+ struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i]));
patch_instruction(epapr_hypercall_start + i, inst);
#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
patch_instruction(epapr_ev_idle_start + i, inst);
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index e4076e3c072d..711c66b76df1 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -24,6 +24,11 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
#include <asm/feature-fixups.h>
+#include <asm/context_tracking.h>
+
+/* 64e interrupt returns always use SRR registers */
+#define fast_interrupt_return fast_interrupt_return_srr
+#define interrupt_return interrupt_return_srr
/* XXX This will ultimately add space for a special exception save
* structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
@@ -62,9 +67,6 @@
ld reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1)
special_reg_save:
- lbz r9,PACAIRQHAPPENED(r13)
- RECONCILE_IRQ_STATE(r3,r4)
-
/*
* We only need (or have stack space) to save this stuff if
* we interrupted the kernel.
@@ -118,15 +120,11 @@ BEGIN_FTR_SECTION
mtspr SPRN_MAS5,r10
mtspr SPRN_MAS8,r10
END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
- SPECIAL_EXC_STORE(r9,IRQHAPPENED)
-
mfspr r10,SPRN_DEAR
SPECIAL_EXC_STORE(r10,DEAR)
mfspr r10,SPRN_ESR
SPECIAL_EXC_STORE(r10,ESR)
- lbz r10,PACAIRQSOFTMASK(r13)
- SPECIAL_EXC_STORE(r10,SOFTE)
ld r10,_NIP(r1)
SPECIAL_EXC_STORE(r10,CSRR0)
ld r10,_MSR(r1)
@@ -138,7 +136,8 @@ ret_from_level_except:
ld r3,_MSR(r1)
andi. r3,r3,MSR_PR
beq 1f
- b ret_from_except
+ REST_NVGPRS(r1)
+ b interrupt_return
1:
LOAD_REG_ADDR(r11,extlb_level_exc)
@@ -192,27 +191,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_MAS8,r10
END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
- lbz r6,PACAIRQSOFTMASK(r13)
- ld r5,SOFTE(r1)
-
- /* Interrupts had better not already be enabled... */
- tweqi r6,IRQS_ENABLED
-
- andi. r6,r5,IRQS_DISABLED
- bne 1f
-
- TRACE_ENABLE_INTS
- stb r5,PACAIRQSOFTMASK(r13)
-1:
- /*
- * Restore PACAIRQHAPPENED rather than setting it based on
- * the return MSR[EE], since we could have interrupted
- * __check_irq_replay() or other inconsistent transitory
- * states that must remain that way.
- */
- SPECIAL_EXC_LOAD(r10,IRQHAPPENED)
- stb r10,PACAIRQHAPPENED(r13)
-
SPECIAL_EXC_LOAD(r10,DEAR)
mtspr SPRN_DEAR,r10
SPECIAL_EXC_LOAD(r10,ESR)
@@ -366,6 +344,12 @@ ret_from_mc_except:
andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \
bne masked_interrupt_book3e_##n
+/*
+ * Additional regs must be re-loaded from paca before EXCEPTION_COMMON* is
+ * called, because that does SAVE_NVGPRS which must see the original register
+ * values, otherwise the scratch values might be restored when exiting the
+ * interrupt.
+ */
#define PROLOG_ADDITION_2REGS_GEN(n) \
std r14,PACA_EXGEN+EX_R14(r13); \
std r15,PACA_EXGEN+EX_R15(r13)
@@ -397,7 +381,6 @@ exc_##n##_common: \
std r10,_NIP(r1); /* save SRR0 to stackframe */ \
std r11,_MSR(r1); /* save SRR1 to stackframe */ \
beq 2f; /* if from kernel mode */ \
- ACCOUNT_CPU_USER_ENTRY(r13,r10,r11);/* accounting (uses cr0+eq) */ \
2: ld r3,excf+EX_R10(r13); /* get back r10 */ \
ld r4,excf+EX_R11(r13); /* get back r11 */ \
mfspr r5,scratch; /* get back r13 */ \
@@ -417,14 +400,15 @@ exc_##n##_common: \
std r6,_LINK(r1); \
std r7,_CTR(r1); \
std r8,_XER(r1); \
- li r3,(n)+1; /* indicate partial regs in trap */ \
+ li r3,(n); /* regs.trap vector */ \
std r9,0(r1); /* store stack frame back link */ \
std r10,_CCR(r1); /* store orig CR in stackframe */ \
std r9,GPR1(r1); /* store stack frame back link */ \
std r11,SOFTE(r1); /* and save it to stackframe */ \
std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \
std r3,_TRAP(r1); /* set trap number */ \
- std r0,RESULT(r1); /* clear regs->result */
+ std r0,RESULT(r1); /* clear regs->result */ \
+ SAVE_NVGPRS(r1);
#define EXCEPTION_COMMON(n) \
EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN)
@@ -435,28 +419,6 @@ exc_##n##_common: \
#define EXCEPTION_COMMON_DBG(n) \
EXCEPTION_COMMON_LVL(n, SPRN_SPRG_DBG_SCRATCH, PACA_EXDBG)
-/*
- * This is meant for exceptions that don't immediately hard-enable. We
- * set a bit in paca->irq_happened to ensure that a subsequent call to
- * arch_local_irq_restore() will properly hard-enable and avoid the
- * fast-path, and then reconcile irq state.
- */
-#define INTS_DISABLE RECONCILE_IRQ_STATE(r3,r4)
-
-/*
- * This is called by exceptions that don't use INTS_DISABLE (that did not
- * touch irq indicators in the PACA). This will restore MSR:EE to it's
- * previous value
- *
- * XXX In the long run, we may want to open-code it in order to separate the
- * load from the wrtee, thus limiting the latency caused by the dependency
- * but at this point, I'll favor code clarity until we have a near to final
- * implementation
- */
-#define INTS_RESTORE_HARD \
- ld r11,_MSR(r1); \
- wrtee r11;
-
/* XXX FIXME: Restore r14/r15 when necessary */
#define BAD_STACK_TRAMPOLINE(n) \
exc_##n##_bad_stack: \
@@ -505,12 +467,11 @@ exc_##n##_bad_stack: \
START_EXCEPTION(label); \
NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\
EXCEPTION_COMMON(trapnum) \
- INTS_DISABLE; \
ack(r8); \
CHECK_NAPPING(); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
bl hdlr; \
- b ret_from_except_lite;
+ b interrupt_return
/* This value is used to mark exception frames on the stack. */
.section ".toc","aw"
@@ -561,11 +522,10 @@ __end_interrupts:
CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON_CRIT(0x100)
- bl save_nvgprs
bl special_reg_save
CHECK_NAPPING();
addi r3,r1,STACK_FRAME_OVERHEAD
- bl unknown_exception
+ bl unknown_nmi_exception
b ret_from_crit_except
/* Machine Check Interrupt */
@@ -573,7 +533,6 @@ __end_interrupts:
MC_EXCEPTION_PROLOG(0x000, BOOKE_INTERRUPT_MACHINE_CHECK,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON_MC(0x000)
- bl save_nvgprs
bl special_reg_save
CHECK_NAPPING();
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -586,8 +545,11 @@ __end_interrupts:
PROLOG_ADDITION_2REGS)
mfspr r14,SPRN_DEAR
mfspr r15,SPRN_ESR
+ std r14,_DEAR(r1)
+ std r15,_ESR(r1)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x300)
- INTS_DISABLE
b storage_fault_common
/* Instruction Storage Interrupt */
@@ -596,8 +558,11 @@ __end_interrupts:
PROLOG_ADDITION_2REGS)
li r15,0
mr r14,r10
+ std r14,_DEAR(r1)
+ std r15,_ESR(r1)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x400)
- INTS_DISABLE
b storage_fault_common
/* External Input Interrupt */
@@ -610,6 +575,10 @@ __end_interrupts:
PROLOG_ADDITION_2REGS)
mfspr r14,SPRN_DEAR
mfspr r15,SPRN_ESR
+ std r14,_DEAR(r1)
+ std r15,_ESR(r1)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x600)
b alignment_more /* no room, go out of line */
@@ -618,14 +587,13 @@ __end_interrupts:
NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
PROLOG_ADDITION_1REG)
mfspr r14,SPRN_ESR
+ std r14,_ESR(r1)
+ ld r14,PACA_EXGEN+EX_R14(r13)
EXCEPTION_COMMON(0x700)
- INTS_DISABLE
- std r14,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- ld r14,PACA_EXGEN+EX_R14(r13)
- bl save_nvgprs
bl program_check_exception
- b ret_from_except
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Floating Point Unavailable Interrupt */
START_EXCEPTION(fp_unavailable);
@@ -637,12 +605,10 @@ __end_interrupts:
andi. r0,r12,MSR_PR;
beq- 1f
bl load_up_fpu
- b fast_exception_return
-1: INTS_DISABLE
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
+ b fast_interrupt_return
+1: addi r3,r1,STACK_FRAME_OVERHEAD
bl kernel_fp_unavailable_exception
- b ret_from_except
+ b interrupt_return
/* Altivec Unavailable Interrupt */
START_EXCEPTION(altivec_unavailable);
@@ -656,15 +622,13 @@ BEGIN_FTR_SECTION
andi. r0,r12,MSR_PR;
beq- 1f
bl load_up_altivec
- b fast_exception_return
+ b fast_interrupt_return
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
- INTS_DISABLE
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl altivec_unavailable_exception
- b ret_from_except
+ b interrupt_return
/* AltiVec Assist */
START_EXCEPTION(altivec_assist);
@@ -672,17 +636,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
BOOKE_INTERRUPT_ALTIVEC_ASSIST,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x220)
- INTS_DISABLE
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
bl altivec_assist_exception
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ REST_NVGPRS(r1)
#else
bl unknown_exception
#endif
- b ret_from_except
+ b interrupt_return
/* Decrementer Interrupt */
@@ -698,14 +661,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON_CRIT(0x9f0)
- bl save_nvgprs
bl special_reg_save
CHECK_NAPPING();
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_BOOKE_WDT
bl WatchdogException
#else
- bl unknown_exception
+ bl unknown_nmi_exception
#endif
b ret_from_crit_except
@@ -722,11 +684,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0xf20)
- INTS_DISABLE
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl unknown_exception
- b ret_from_except
+ b interrupt_return
/* Debug exception as a critical interrupt*/
START_EXCEPTION(debug_crit);
@@ -787,15 +747,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
* normal exception
*/
mfspr r14,SPRN_DBSR
- EXCEPTION_COMMON_CRIT(0xd00)
std r14,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- mr r4,r14
ld r14,PACA_EXCRIT+EX_R14(r13)
ld r15,PACA_EXCRIT+EX_R15(r13)
- bl save_nvgprs
+ EXCEPTION_COMMON_CRIT(0xd00)
+ addi r3,r1,STACK_FRAME_OVERHEAD
bl DebugException
- b ret_from_except
+ REST_NVGPRS(r1)
+ b interrupt_return
kernel_dbg_exc:
b . /* NYI */
@@ -859,26 +818,23 @@ kernel_dbg_exc:
* normal exception
*/
mfspr r14,SPRN_DBSR
- EXCEPTION_COMMON_DBG(0xd08)
- INTS_DISABLE
std r14,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- mr r4,r14
ld r14,PACA_EXDBG+EX_R14(r13)
ld r15,PACA_EXDBG+EX_R15(r13)
- bl save_nvgprs
+ EXCEPTION_COMMON_DBG(0xd08)
+ addi r3,r1,STACK_FRAME_OVERHEAD
bl DebugException
- b ret_from_except
+ REST_NVGPRS(r1)
+ b interrupt_return
START_EXCEPTION(perfmon);
NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x260)
- INTS_DISABLE
CHECK_NAPPING()
addi r3,r1,STACK_FRAME_OVERHEAD
bl performance_monitor_exception
- b ret_from_except_lite
+ b interrupt_return
/* Doorbell interrupt */
MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL,
@@ -889,11 +845,10 @@ kernel_dbg_exc:
CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON_CRIT(0x2a0)
- bl save_nvgprs
bl special_reg_save
CHECK_NAPPING();
addi r3,r1,STACK_FRAME_OVERHEAD
- bl unknown_exception
+ bl unknown_nmi_exception
b ret_from_crit_except
/*
@@ -905,21 +860,18 @@ kernel_dbg_exc:
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x2c0)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl save_nvgprs
- INTS_RESTORE_HARD
bl unknown_exception
- b ret_from_except
+ b interrupt_return
/* Guest Doorbell critical Interrupt */
START_EXCEPTION(guest_doorbell_crit);
CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON_CRIT(0x2e0)
- bl save_nvgprs
bl special_reg_save
CHECK_NAPPING();
addi r3,r1,STACK_FRAME_OVERHEAD
- bl unknown_exception
+ bl unknown_nmi_exception
b ret_from_crit_except
/* Hypervisor call */
@@ -928,10 +880,8 @@ kernel_dbg_exc:
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x310)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl save_nvgprs
- INTS_RESTORE_HARD
bl unknown_exception
- b ret_from_except
+ b interrupt_return
/* Embedded Hypervisor priviledged */
START_EXCEPTION(ehpriv);
@@ -939,10 +889,8 @@ kernel_dbg_exc:
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x320)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl save_nvgprs
- INTS_RESTORE_HARD
bl unknown_exception
- b ret_from_except
+ b interrupt_return
/* LRAT Error interrupt */
START_EXCEPTION(lrat_error);
@@ -950,10 +898,36 @@ kernel_dbg_exc:
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x340)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl save_nvgprs
- INTS_RESTORE_HARD
bl unknown_exception
- b ret_from_except
+ b interrupt_return
+
+.macro SEARCH_RESTART_TABLE
+#ifdef CONFIG_RELOCATABLE
+ ld r11,PACATOC(r13)
+ ld r14,__start___restart_table@got(r11)
+ ld r15,__stop___restart_table@got(r11)
+#else
+ LOAD_REG_IMMEDIATE_SYM(r14, r11, __start___restart_table)
+ LOAD_REG_IMMEDIATE_SYM(r15, r11, __stop___restart_table)
+#endif
+300:
+ cmpd r14,r15
+ beq 302f
+ ld r11,0(r14)
+ cmpld r10,r11
+ blt 301f
+ ld r11,8(r14)
+ cmpld r10,r11
+ bge 301f
+ ld r11,16(r14)
+ b 303f
+301:
+ addi r14,r14,24
+ b 300b
+302:
+ li r11,0
+303:
+.endm
/*
* An interrupt came in while soft-disabled; We mark paca->irq_happened
@@ -963,6 +937,9 @@ kernel_dbg_exc:
*/
.macro masked_interrupt_book3e paca_irq full_mask
+ std r14,PACA_EXGEN+EX_R14(r13)
+ std r15,PACA_EXGEN+EX_R15(r13)
+
lbz r10,PACAIRQHAPPENED(r13)
.if \full_mask == 1
ori r10,r10,\paca_irq | PACA_IRQ_HARD_DIS
@@ -972,22 +949,29 @@ kernel_dbg_exc:
stb r10,PACAIRQHAPPENED(r13)
.if \full_mask == 1
- rldicl r10,r11,48,1 /* clear MSR_EE */
- rotldi r11,r10,16
+ xori r11,r11,MSR_EE /* clear MSR_EE */
mtspr SPRN_SRR1,r11
.endif
+ mfspr r10,SPRN_SRR0
+ SEARCH_RESTART_TABLE
+ cmpdi r11,0
+ beq 1f
+ mtspr SPRN_SRR0,r11 /* return to restart address */
+1:
+
lwz r11,PACA_EXGEN+EX_CR(r13)
mtcr r11
ld r10,PACA_EXGEN+EX_R10(r13)
ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
mfspr r13,SPRN_SPRG_GEN_SCRATCH
rfi
b .
.endm
masked_interrupt_book3e_0x500:
- // XXX When adding support for EPR, use PACA_IRQ_EE_EDGE
masked_interrupt_book3e PACA_IRQ_EE 1
masked_interrupt_book3e_0x900:
@@ -1003,126 +987,24 @@ masked_interrupt_book3e_0x2c0:
masked_interrupt_book3e PACA_IRQ_DBELL 0
/*
- * Called from arch_local_irq_enable when an interrupt needs
- * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280
- * to indicate the kind of interrupt. MSR:EE is already off.
- * We generate a stackframe like if a real interrupt had happened.
- *
- * Note: While MSR:EE is off, we need to make sure that _MSR
- * in the generated frame has EE set to 1 or the exception
- * handler will not properly re-enable them.
- */
-_GLOBAL(__replay_interrupt)
- /* We are going to jump to the exception common code which
- * will retrieve various register values from the PACA which
- * we don't give a damn about.
- */
- mflr r10
- mfmsr r11
- mfcr r4
- mtspr SPRN_SPRG_GEN_SCRATCH,r13;
- std r1,PACA_EXGEN+EX_R1(r13);
- stw r4,PACA_EXGEN+EX_CR(r13);
- ori r11,r11,MSR_EE
- subi r1,r1,INT_FRAME_SIZE;
- cmpwi cr0,r3,0x500
- beq exc_0x500_common
- cmpwi cr0,r3,0x900
- beq exc_0x900_common
- cmpwi cr0,r3,0x280
- beq exc_0x280_common
- blr
-
-
-/*
* This is called from 0x300 and 0x400 handlers after the prologs with
* r14 and r15 containing the fault address and error code, with the
* original values stashed away in the PACA
*/
storage_fault_common:
- std r14,_DAR(r1)
- std r15,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- mr r4,r14
- mr r5,r15
- ld r14,PACA_EXGEN+EX_R14(r13)
- ld r15,PACA_EXGEN+EX_R15(r13)
bl do_page_fault
- cmpdi r3,0
- bne- 1f
- b ret_from_except_lite
-1: bl save_nvgprs
- mr r5,r3
- addi r3,r1,STACK_FRAME_OVERHEAD
- ld r4,_DAR(r1)
- bl bad_page_fault
- b ret_from_except
+ b interrupt_return
/*
* Alignment exception doesn't fit entirely in the 0x100 bytes so it
* continues here.
*/
alignment_more:
- std r14,_DAR(r1)
- std r15,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- ld r14,PACA_EXGEN+EX_R14(r13)
- ld r15,PACA_EXGEN+EX_R15(r13)
- bl save_nvgprs
- INTS_RESTORE_HARD
bl alignment_exception
- b ret_from_except
-
-/*
- * We branch here from entry_64.S for the last stage of the exception
- * return code path. MSR:EE is expected to be off at that point
- */
-_GLOBAL(exception_return_book3e)
- b 1f
-
-/* This is the return from load_up_fpu fast path which could do with
- * less GPR restores in fact, but for now we have a single return path
- */
- .globl fast_exception_return
-fast_exception_return:
- wrteei 0
-1: mr r0,r13
- ld r10,_MSR(r1)
- REST_4GPRS(2, r1)
- andi. r6,r10,MSR_PR
- REST_2GPRS(6, r1)
- beq 1f
- ACCOUNT_CPU_USER_EXIT(r13, r10, r11)
- ld r0,GPR13(r1)
-
-1: stdcx. r0,0,r1 /* to clear the reservation */
-
- ld r8,_CCR(r1)
- ld r9,_LINK(r1)
- ld r10,_CTR(r1)
- ld r11,_XER(r1)
- mtcr r8
- mtlr r9
- mtctr r10
- mtxer r11
- REST_2GPRS(8, r1)
- ld r10,GPR10(r1)
- ld r11,GPR11(r1)
- ld r12,GPR12(r1)
- mtspr SPRN_SPRG_GEN_SCRATCH,r0
-
- std r10,PACA_EXGEN+EX_R10(r13);
- std r11,PACA_EXGEN+EX_R11(r13);
- ld r10,_NIP(r1)
- ld r11,_MSR(r1)
- ld r0,GPR0(r1)
- ld r1,GPR1(r1)
- mtspr SPRN_SRR0,r10
- mtspr SPRN_SRR1,r11
- ld r10,PACA_EXGEN+EX_R10(r13)
- ld r11,PACA_EXGEN+EX_R11(r13)
- mfspr r13,SPRN_SPRG_GEN_SCRATCH
- rfi
+ REST_NVGPRS(r1)
+ b interrupt_return
/*
* Trampolines used when spotting a bad kernel stack pointer in
@@ -1175,8 +1057,8 @@ bad_stack_book3e:
std r11,_CCR(r1)
mfspr r10,SPRN_DEAR
mfspr r11,SPRN_ESR
- std r10,_DAR(r1)
- std r11,_DSISR(r1)
+ std r10,_DEAR(r1)
+ std r11,_ESR(r1)
std r0,GPR0(r1); /* save r0 in stackframe */ \
std r2,GPR2(r1); /* save r2 in stackframe */ \
SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
@@ -1245,7 +1127,7 @@ found_iprot:
* r3 = MAS0_TLBSEL (for the iprot array)
* r4 = SPRN_TLBnCFG
*/
- bl invstr /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
invstr: mflr r6 /* Make it accessible */
mfmsr r7
rlwinm r5,r7,27,31,31 /* extract MSR[IS] */
@@ -1314,7 +1196,7 @@ skpinv: addi r6,r6,1 /* Increment */
mfmsr r6
xori r6,r6,MSR_IS
mtspr SPRN_SRR1,r6
- bl 1f /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
1: mflr r6
addi r6,r6,(2f - 1b)
mtspr SPRN_SRR0,r6
@@ -1374,7 +1256,7 @@ skpinv: addi r6,r6,1 /* Increment */
* r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
*/
/* Now we branch the new virtual address mapped by this entry */
- bl 1f /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
1: mflr r6
addi r6,r6,(2f - 1b)
tovirt(r6,r6)
@@ -1443,7 +1325,12 @@ a2_tlbinit_code_start:
a2_tlbinit_after_linear_map:
/* Now we branch the new virtual address mapped by this entry */
+#ifdef CONFIG_RELOCATABLE
+ ld r5,PACATOC(r13)
+ ld r3,1f@got(r5)
+#else
LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f)
+#endif
mtctr r3
bctr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ffc15f4f079d..eaf1f72131a1 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -21,28 +21,6 @@
#include <asm/feature-fixups.h>
#include <asm/kup.h>
-/* PACA save area offsets (exgen, exmc, etc) */
-#define EX_R9 0
-#define EX_R10 8
-#define EX_R11 16
-#define EX_R12 24
-#define EX_R13 32
-#define EX_DAR 40
-#define EX_DSISR 48
-#define EX_CCR 52
-#define EX_CFAR 56
-#define EX_PPR 64
-#if defined(CONFIG_RELOCATABLE)
-#define EX_CTR 72
-.if EX_SIZE != 10
- .error "EX_SIZE is wrong"
-.endif
-#else
-.if EX_SIZE != 9
- .error "EX_SIZE is wrong"
-.endif
-#endif
-
/*
* Following are fixed section helper macros.
*
@@ -50,7 +28,6 @@
* EXC_VIRT_BEGIN/END - virt (AIL), unrelocated exception vectors
* TRAMP_REAL_BEGIN - real, unrelocated helpers (virt may call these)
* TRAMP_VIRT_BEGIN - virt, unreloc helpers (in practice, real can use)
- * TRAMP_KVM_BEGIN - KVM handlers, these are put into real, unrelocated
* EXC_COMMON - After switching to virtual, relocated mode.
*/
@@ -80,13 +57,6 @@ name:
#define TRAMP_VIRT_BEGIN(name) \
FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#define TRAMP_KVM_BEGIN(name) \
- TRAMP_VIRT_BEGIN(name)
-#else
-#define TRAMP_KVM_BEGIN(name)
-#endif
-
#define EXC_REAL_NONE(start, size) \
FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \
FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size)
@@ -119,67 +89,6 @@ name:
ori reg,reg,(ABS_ADDR(label))@l; \
addis reg,reg,(ABS_ADDR(label))@h
-/* Exception register prefixes */
-#define EXC_HV_OR_STD 2 /* depends on HVMODE */
-#define EXC_HV 1
-#define EXC_STD 0
-
-#if defined(CONFIG_RELOCATABLE)
-/*
- * If we support interrupts with relocation on AND we're a relocatable kernel,
- * we need to use CTR to get to the 2nd level handler. So, save/restore it
- * when required.
- */
-#define SAVE_CTR(reg, area) mfctr reg ; std reg,area+EX_CTR(r13)
-#define GET_CTR(reg, area) ld reg,area+EX_CTR(r13)
-#define RESTORE_CTR(reg, area) ld reg,area+EX_CTR(r13) ; mtctr reg
-#else
-/* ...else CTR is unused and in register. */
-#define SAVE_CTR(reg, area)
-#define GET_CTR(reg, area) mfctr reg
-#define RESTORE_CTR(reg, area)
-#endif
-
-/*
- * PPR save/restore macros used in exceptions-64s.S
- * Used for P7 or later processors
- */
-#define SAVE_PPR(area, ra) \
-BEGIN_FTR_SECTION_NESTED(940) \
- ld ra,area+EX_PPR(r13); /* Read PPR from paca */ \
- std ra,_PPR(r1); \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
-
-#define RESTORE_PPR_PACA(area, ra) \
-BEGIN_FTR_SECTION_NESTED(941) \
- ld ra,area+EX_PPR(r13); \
- mtspr SPRN_PPR,ra; \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941)
-
-/*
- * Get an SPR into a register if the CPU has the given feature
- */
-#define OPT_GET_SPR(ra, spr, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- mfspr ra,spr; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Set an SPR from a register if the CPU has the given feature
- */
-#define OPT_SET_SPR(ra, spr, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- mtspr spr,ra; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Save a register to the PACA if the CPU has the given feature
- */
-#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- std ra,offset(r13); \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
/*
* Branch to label using its 0xC000 address. This results in instruction
* address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
@@ -193,195 +102,140 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mtctr reg; \
bctr
-.macro INT_KVM_HANDLER name, vec, hsrr, area, skip
- TRAMP_KVM_BEGIN(\name\()_kvm)
- KVM_HANDLER \vec, \hsrr, \area, \skip
-.endm
-
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
- * If hv is possible, interrupts come into to the hv version
- * of the kvmppc_interrupt code, which then jumps to the PR handler,
- * kvmppc_interrupt_pr, if the guest is a PR guest.
+ * Interrupt code generation macros
*/
-#define kvmppc_interrupt kvmppc_interrupt_hv
-#else
-#define kvmppc_interrupt kvmppc_interrupt_pr
-#endif
-
-.macro KVMTEST name, hsrr, n
- lbz r10,HSTATE_IN_GUEST(r13)
- cmpwi r10,0
- bne \name\()_kvm
-.endm
-
-.macro KVM_HANDLER vec, hsrr, area, skip
- .if \skip
- cmpwi r10,KVM_GUEST_MODE_SKIP
- beq 89f
- .else
-BEGIN_FTR_SECTION_NESTED(947)
- ld r10,\area+EX_CFAR(r13)
- std r10,HSTATE_CFAR(r13)
-END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947)
+#define IVEC .L_IVEC_\name\() /* Interrupt vector address */
+#define IHSRR .L_IHSRR_\name\() /* Sets SRR or HSRR registers */
+#define IHSRR_IF_HVMODE .L_IHSRR_IF_HVMODE_\name\() /* HSRR if HV else SRR */
+#define IAREA .L_IAREA_\name\() /* PACA save area */
+#define IVIRT .L_IVIRT_\name\() /* Has virt mode entry point */
+#define IISIDE .L_IISIDE_\name\() /* Uses SRR0/1 not DAR/DSISR */
+#define IDAR .L_IDAR_\name\() /* Uses DAR (or SRR0) */
+#define IDSISR .L_IDSISR_\name\() /* Uses DSISR (or SRR1) */
+#define ISET_RI .L_ISET_RI_\name\() /* Run common code w/ MSR[RI]=1 */
+#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
+#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
+#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */
+#define IKVM_REAL .L_IKVM_REAL_\name\() /* Real entry tests KVM */
+#define __IKVM_REAL(name) .L_IKVM_REAL_ ## name
+#define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */
+#define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */
+#define __ISTACK(name) .L_ISTACK_ ## name
+#define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */
+
+#define INT_DEFINE_BEGIN(n) \
+.macro int_define_ ## n name
+
+#define INT_DEFINE_END(n) \
+.endm ; \
+int_define_ ## n n ; \
+do_define_int n
+
+.macro do_define_int name
+ .ifndef IVEC
+ .error "IVEC not defined"
.endif
-
-BEGIN_FTR_SECTION_NESTED(948)
- ld r10,\area+EX_PPR(r13)
- std r10,HSTATE_PPR(r13)
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
- ld r10,\area+EX_R10(r13)
- std r12,HSTATE_SCRATCH0(r13)
- sldi r12,r9,32
- /* HSRR variants have the 0x2 bit added to their trap number */
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- ori r12,r12,(\vec + 0x2)
- FTR_SECTION_ELSE
- ori r12,r12,(\vec)
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- ori r12,r12,(\vec + 0x2)
- .else
- ori r12,r12,(\vec)
+ .ifndef IHSRR
+ IHSRR=0
.endif
-
-#ifdef CONFIG_RELOCATABLE
- /*
- * KVM requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
- * outside the head section. CONFIG_RELOCATABLE KVM expects CTR
- * to be saved in HSTATE_SCRATCH1.
- */
- mfctr r9
- std r9,HSTATE_SCRATCH1(r13)
- __LOAD_FAR_HANDLER(r9, kvmppc_interrupt)
- mtctr r9
- ld r9,\area+EX_R9(r13)
- bctr
-#else
- ld r9,\area+EX_R9(r13)
- b kvmppc_interrupt
-#endif
-
-
- .if \skip
-89: mtocrf 0x80,r9
- ld r9,\area+EX_R9(r13)
- ld r10,\area+EX_R10(r13)
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- b kvmppc_skip_Hinterrupt
- FTR_SECTION_ELSE
- b kvmppc_skip_interrupt
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- b kvmppc_skip_Hinterrupt
- .else
- b kvmppc_skip_interrupt
+ .ifndef IHSRR_IF_HVMODE
+ IHSRR_IF_HVMODE=0
.endif
+ .ifndef IAREA
+ IAREA=PACA_EXGEN
.endif
-.endm
-
-#else
-.macro KVMTEST name, hsrr, n
-.endm
-.macro KVM_HANDLER name, vec, hsrr, area, skip
-.endm
-#endif
-
-.macro INT_SAVE_SRR_AND_JUMP label, hsrr, set_ri
- ld r10,PACAKMSR(r13) /* get MSR value for kernel */
- .if ! \set_ri
- xori r10,r10,MSR_RI /* Clear MSR_RI */
+ .ifndef IVIRT
+ IVIRT=1
.endif
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- mtspr SPRN_HSRR1,r10
- FTR_SECTION_ELSE
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- mtspr SPRN_SRR1,r10
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- mtspr SPRN_HSRR1,r10
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- mtspr SPRN_SRR1,r10
+ .ifndef IISIDE
+ IISIDE=0
.endif
- LOAD_HANDLER(r10, \label\())
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mtspr SPRN_HSRR0,r10
- HRFI_TO_KERNEL
- FTR_SECTION_ELSE
- mtspr SPRN_SRR0,r10
- RFI_TO_KERNEL
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mtspr SPRN_HSRR0,r10
- HRFI_TO_KERNEL
- .else
- mtspr SPRN_SRR0,r10
- RFI_TO_KERNEL
+ .ifndef IDAR
+ IDAR=0
.endif
- b . /* prevent speculative execution */
-.endm
-
-/* INT_SAVE_SRR_AND_JUMP works for real or virt, this is faster but virt only */
-.macro INT_VIRT_SAVE_SRR_AND_JUMP label, hsrr
-#ifdef CONFIG_RELOCATABLE
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- FTR_SECTION_ELSE
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
+ .ifndef IDSISR
+ IDSISR=0
.endif
- LOAD_HANDLER(r12, \label\())
- mtctr r12
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- FTR_SECTION_ELSE
- mfspr r12,SPRN_SRR1 /* and HSRR1 */
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ .ifndef ISET_RI
+ ISET_RI=1
+ .endif
+ .ifndef IBRANCH_TO_COMMON
+ IBRANCH_TO_COMMON=1
+ .endif
+ .ifndef IREALMODE_COMMON
+ IREALMODE_COMMON=0
.else
- mfspr r12,SPRN_SRR1 /* and HSRR1 */
+ .if ! IBRANCH_TO_COMMON
+ .error "IREALMODE_COMMON=1 but IBRANCH_TO_COMMON=0"
+ .endif
.endif
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
- bctr
-#else
- .if \hsrr == EXC_HV_OR_STD
+ .ifndef IMASK
+ IMASK=0
+ .endif
+ .ifndef IKVM_REAL
+ IKVM_REAL=0
+ .endif
+ .ifndef IKVM_VIRT
+ IKVM_VIRT=0
+ .endif
+ .ifndef ISTACK
+ ISTACK=1
+ .endif
+ .ifndef IKUAP
+ IKUAP=1
+ .endif
+.endm
+
+/*
+ * All interrupts which set HSRR registers, as well as SRESET and MCE and
+ * syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
+ * so they all generally need to test whether they were taken in guest context.
+ *
+ * Note: SRESET and MCE may also be sent to the guest by the hypervisor, and be
+ * taken with MSR[HV]=0.
+ *
+ * Interrupts which set SRR registers (with the above exceptions) do not
+ * elevate to MSR[HV]=1 mode, though most can be taken when running with
+ * MSR[HV]=1 (e.g., bare metal kernel and userspace). So these interrupts do
+ * not need to test whether a guest is running because they get delivered to
+ * the guest directly, including nested HV KVM guests.
+ *
+ * The exception is PR KVM, where the guest runs with MSR[PR]=1 and the host
+ * runs with MSR[HV]=0, so the host takes all interrupts on behalf of the
+ * guest. PR KVM runs with LPCR[AIL]=0 which causes interrupts to always be
+ * delivered to the real-mode entry point, therefore such interrupts only test
+ * KVM in their real mode handlers, and only when PR KVM is possible.
+ *
+ * Interrupts that are taken in MSR[HV]=0 and escalate to MSR[HV]=1 are always
+ * delivered in real-mode when the MMU is in hash mode because the MMU
+ * registers are not set appropriately to translate host addresses. In nested
+ * radix mode these can be delivered in virt-mode as the host translations are
+ * used implicitly (see: effective LPID, effective PID).
+ */
+
+/*
+ * If an interrupt is taken while a guest is running, it is immediately routed
+ * to KVM to handle.
+ */
+
+.macro KVMTEST name handler
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+ lbz r10,HSTATE_IN_GUEST(r13)
+ cmpwi r10,0
+ /* HSRR variants have the 0x2 bit added to their trap number */
+ .if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ li r10,(IVEC + 0x2)
FTR_SECTION_ELSE
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
+ li r10,(IVEC)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ .elseif IHSRR
+ li r10,(IVEC + 0x2)
.else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
+ li r10,(IVEC)
.endif
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
- b \label
+ bne \handler
#endif
.endm
@@ -405,14 +259,41 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
* - Fall through and continue executing in real, unrelocated mode.
* This is done if early=2.
*/
-.macro INT_HANDLER name, vec, ool=0, early=0, virt=0, hsrr=0, area=PACA_EXGEN, ri=1, dar=0, dsisr=0, bitmask=0, kvm=0
+
+.macro GEN_BRANCH_TO_COMMON name, virt
+ .if IREALMODE_COMMON
+ LOAD_HANDLER(r10, \name\()_common)
+ mtctr r10
+ bctr
+ .else
+ .if \virt
+#ifndef CONFIG_RELOCATABLE
+ b \name\()_common_virt
+#else
+ LOAD_HANDLER(r10, \name\()_common_virt)
+ mtctr r10
+ bctr
+#endif
+ .else
+ LOAD_HANDLER(r10, \name\()_common_real)
+ mtctr r10
+ bctr
+ .endif
+ .endif
+.endm
+
+.macro GEN_INT_ENTRY name, virt, ool=0
SET_SCRATCH0(r13) /* save r13 */
GET_PACA(r13)
- std r9,\area\()+EX_R9(r13) /* save r9 */
- OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
+ std r9,IAREA+EX_R9(r13) /* save r9 */
+BEGIN_FTR_SECTION
+ mfspr r9,SPRN_PPR
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
HMT_MEDIUM
- std r10,\area\()+EX_R10(r13) /* save r10 - r12 */
- OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
+ std r10,IAREA+EX_R10(r13) /* save r10 - r12 */
+BEGIN_FTR_SECTION
+ mfspr r10,SPRN_CFAR
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
.if \ool
.if !\virt
b tramp_real_\name
@@ -425,47 +306,18 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endif
.endif
- OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
- OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+ std r9,IAREA+EX_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+BEGIN_FTR_SECTION
+ std r10,IAREA+EX_CFAR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
INTERRUPT_TO_KERNEL
- SAVE_CTR(r10, \area\())
+ mfctr r10
+ std r10,IAREA+EX_CTR(r13)
mfcr r9
- .if \kvm
- KVMTEST \name \hsrr \vec
- .endif
- .if \bitmask
- lbz r10,PACAIRQSOFTMASK(r13)
- andi. r10,r10,\bitmask
- /* Associate vector numbers with bits in paca->irq_happened */
- .if \vec == 0x500 || \vec == 0xea0
- li r10,PACA_IRQ_EE
- .elseif \vec == 0x900
- li r10,PACA_IRQ_DEC
- .elseif \vec == 0xa00 || \vec == 0xe80
- li r10,PACA_IRQ_DBELL
- .elseif \vec == 0xe60
- li r10,PACA_IRQ_HMI
- .elseif \vec == 0xf00
- li r10,PACA_IRQ_PMI
- .else
- .abort "Bad maskable vector"
- .endif
-
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- bne masked_Hinterrupt
- FTR_SECTION_ELSE
- bne masked_interrupt
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- bne masked_Hinterrupt
- .else
- bne masked_interrupt
- .endif
- .endif
-
- std r11,\area\()+EX_R11(r13)
- std r12,\area\()+EX_R12(r13)
+ std r11,IAREA+EX_R11(r13)
+ std r12,IAREA+EX_R12(r13)
/*
* DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
@@ -473,51 +325,165 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
* not recoverable if they are live.
*/
GET_SCRATCH0(r10)
- std r10,\area\()+EX_R13(r13)
- .if \dar
- .if \hsrr
+ std r10,IAREA+EX_R13(r13)
+ .if IDAR && !IISIDE
+ .if IHSRR
mfspr r10,SPRN_HDAR
.else
mfspr r10,SPRN_DAR
.endif
- std r10,\area\()+EX_DAR(r13)
+ std r10,IAREA+EX_DAR(r13)
.endif
- .if \dsisr
- .if \hsrr
+ .if IDSISR && !IISIDE
+ .if IHSRR
mfspr r10,SPRN_HDSISR
.else
mfspr r10,SPRN_DSISR
.endif
- stw r10,\area\()+EX_DSISR(r13)
+ stw r10,IAREA+EX_DSISR(r13)
.endif
- .if \early == 2
- /* nothing more */
- .elseif \early
- mfctr r10 /* save ctr, even for !RELOCATABLE */
- BRANCH_TO_C000(r11, \name\()_early_common)
- .elseif !\virt
- INT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr, \ri
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif IHSRR
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
.else
- INT_VIRT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
.endif
+
+ .if IBRANCH_TO_COMMON
+ GEN_BRANCH_TO_COMMON \name \virt
+ .endif
+
.if \ool
.popsection
.endif
.endm
/*
- * On entry r13 points to the paca, r9-r13 are saved in the paca,
- * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
- * SRR1, and relocation is on.
+ * __GEN_COMMON_ENTRY is required to receive the branch from interrupt
+ * entry, except in the case of the real-mode handlers which require
+ * __GEN_REALMODE_COMMON_ENTRY.
*
- * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
- * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
+ * This switches to virtual mode and sets MSR[RI].
+ */
+.macro __GEN_COMMON_ENTRY name
+DEFINE_FIXED_SYMBOL(\name\()_common_real)
+\name\()_common_real:
+ .if IKVM_REAL
+ KVMTEST \name kvm_interrupt
+ .endif
+
+ ld r10,PACAKMSR(r13) /* get MSR value for kernel */
+ /* MSR[RI] is clear iff using SRR regs */
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ xori r10,r10,MSR_RI
+ END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
+ .elseif ! IHSRR
+ xori r10,r10,MSR_RI
+ .endif
+ mtmsrd r10
+
+ .if IVIRT
+ .if IKVM_VIRT
+ b 1f /* skip the virt test coming from real */
+ .endif
+
+ .balign IFETCH_ALIGN_BYTES
+DEFINE_FIXED_SYMBOL(\name\()_common_virt)
+\name\()_common_virt:
+ .if IKVM_VIRT
+ KVMTEST \name kvm_interrupt
+1:
+ .endif
+ .endif /* IVIRT */
+.endm
+
+/*
+ * Don't switch to virt mode. Used for early MCE and HMI handlers that
+ * want to run in real mode.
*/
-.macro INT_COMMON vec, area, stack, kuap, reconcile, dar, dsisr
- .if \stack
+.macro __GEN_REALMODE_COMMON_ENTRY name
+DEFINE_FIXED_SYMBOL(\name\()_common_real)
+\name\()_common_real:
+ .if IKVM_REAL
+ KVMTEST \name kvm_interrupt
+ .endif
+.endm
+
+.macro __GEN_COMMON_BODY name
+ .if IMASK
+ .if ! ISTACK
+ .error "No support for masked interrupt to use custom stack"
+ .endif
+
+ /* If coming from user, skip soft-mask tests. */
+ andi. r10,r12,MSR_PR
+ bne 3f
+
+ /*
+ * Kernel code running below __end_soft_masked may be
+ * implicitly soft-masked if it is within the regions
+ * in the soft mask table.
+ */
+ LOAD_HANDLER(r10, __end_soft_masked)
+ cmpld r11,r10
+ bge+ 1f
+
+ /* SEARCH_SOFT_MASK_TABLE clobbers r9,r10,r12 */
+ mtctr r12
+ stw r9,PACA_EXGEN+EX_CCR(r13)
+ SEARCH_SOFT_MASK_TABLE
+ cmpdi r12,0
+ mfctr r12 /* Restore r12 to SRR1 */
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
+ beq 1f /* Not in soft-mask table */
+ li r10,IMASK
+ b 2f /* In soft-mask table, always mask */
+
+ /* Test the soft mask state against our interrupt's bit */
+1: lbz r10,PACAIRQSOFTMASK(r13)
+2: andi. r10,r10,IMASK
+ /* Associate vector numbers with bits in paca->irq_happened */
+ .if IVEC == 0x500 || IVEC == 0xea0
+ li r10,PACA_IRQ_EE
+ .elseif IVEC == 0x900
+ li r10,PACA_IRQ_DEC
+ .elseif IVEC == 0xa00 || IVEC == 0xe80
+ li r10,PACA_IRQ_DBELL
+ .elseif IVEC == 0xe60
+ li r10,PACA_IRQ_HMI
+ .elseif IVEC == 0xf00
+ li r10,PACA_IRQ_PMI
+ .else
+ .abort "Bad maskable vector"
+ .endif
+
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ bne masked_Hinterrupt
+ FTR_SECTION_ELSE
+ bne masked_interrupt
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif IHSRR
+ bne masked_Hinterrupt
+ .else
+ bne masked_interrupt
+ .endif
+ .endif
+
+ .if ISTACK
andi. r10,r12,MSR_PR /* See if coming from user */
- mr r10,r1 /* Save r1 */
+3: mr r10,r1 /* Save r1 */
subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */
beq- 100f
ld r1,PACAKSAVE(r13) /* kernel stack to use */
@@ -532,54 +498,80 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
std r0,GPR0(r1) /* save r0 in stackframe */
std r10,GPR1(r1) /* save r1 in stackframe */
- .if \stack
- .if \kuap
+ /* Mark our [H]SRRs valid for return */
+ li r10,1
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ stb r10,PACAHSRR_VALID(r13)
+ FTR_SECTION_ELSE
+ stb r10,PACASRR_VALID(r13)
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif IHSRR
+ stb r10,PACAHSRR_VALID(r13)
+ .else
+ stb r10,PACASRR_VALID(r13)
+ .endif
+
+ .if ISET_RI
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Set MSR_RI */
+ .endif
+
+ .if ISTACK
+ .if IKUAP
kuap_save_amr_and_lock r9, r10, cr1, cr0
.endif
beq 101f /* if from kernel mode */
- ACCOUNT_CPU_USER_ENTRY(r13, r9, r10)
- SAVE_PPR(\area, r9)
+BEGIN_FTR_SECTION
+ ld r9,IAREA+EX_PPR(r13) /* Read PPR from paca */
+ std r9,_PPR(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
101:
.else
- .if \kuap
+ .if IKUAP
kuap_save_amr_and_lock r9, r10, cr1
.endif
.endif
/* Save original regs values from save area to stack frame. */
- ld r9,\area+EX_R9(r13) /* move r9, r10 to stackframe */
- ld r10,\area+EX_R10(r13)
+ ld r9,IAREA+EX_R9(r13) /* move r9, r10 to stackframe */
+ ld r10,IAREA+EX_R10(r13)
std r9,GPR9(r1)
std r10,GPR10(r1)
- ld r9,\area+EX_R11(r13) /* move r11 - r13 to stackframe */
- ld r10,\area+EX_R12(r13)
- ld r11,\area+EX_R13(r13)
+ ld r9,IAREA+EX_R11(r13) /* move r11 - r13 to stackframe */
+ ld r10,IAREA+EX_R12(r13)
+ ld r11,IAREA+EX_R13(r13)
std r9,GPR11(r1)
std r10,GPR12(r1)
std r11,GPR13(r1)
- .if \dar
- .if \dar == 2
+
+ SAVE_NVGPRS(r1)
+
+ .if IDAR
+ .if IISIDE
ld r10,_NIP(r1)
.else
- ld r10,\area+EX_DAR(r13)
+ ld r10,IAREA+EX_DAR(r13)
.endif
std r10,_DAR(r1)
.endif
- .if \dsisr
- .if \dsisr == 2
+
+ .if IDSISR
+ .if IISIDE
ld r10,_MSR(r1)
lis r11,DSISR_SRR1_MATCH_64S@h
and r10,r10,r11
.else
- lwz r10,\area+EX_DSISR(r13)
+ lwz r10,IAREA+EX_DSISR(r13)
.endif
std r10,_DSISR(r1)
.endif
-BEGIN_FTR_SECTION_NESTED(66)
- ld r10,\area+EX_CFAR(r13)
+
+BEGIN_FTR_SECTION
+ ld r10,IAREA+EX_CFAR(r13)
std r10,ORIG_GPR3(r1)
-END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
- GET_CTR(r10, \area)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ ld r10,IAREA+EX_CTR(r13)
std r10,_CTR(r1)
std r2,GPR2(r1) /* save r2 in stackframe */
SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */
@@ -591,36 +583,101 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
mfspr r11,SPRN_XER /* save XER in stackframe */
std r10,SOFTE(r1)
std r11,_XER(r1)
- li r9,(\vec)+1
+ li r9,IVEC
std r9,_TRAP(r1) /* set trap number */
li r10,0
ld r11,exception_marker@toc(r2)
std r10,RESULT(r1) /* clear regs->result */
std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */
+.endm
- .if \stack
- ACCOUNT_STOLEN_TIME
- .endif
+/*
+ * On entry r13 points to the paca, r9-r13 are saved in the paca,
+ * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
+ * SRR1, and relocation is on.
+ *
+ * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
+ * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
+ */
+.macro GEN_COMMON name
+ __GEN_COMMON_ENTRY \name
+ __GEN_COMMON_BODY \name
+.endm
- .if \reconcile
- RECONCILE_IRQ_STATE(r10, r11)
- .endif
+.macro SEARCH_RESTART_TABLE
+#ifdef CONFIG_RELOCATABLE
+ mr r12,r2
+ ld r2,PACATOC(r13)
+ LOAD_REG_ADDR(r9, __start___restart_table)
+ LOAD_REG_ADDR(r10, __stop___restart_table)
+ mr r2,r12
+#else
+ LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___restart_table)
+ LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___restart_table)
+#endif
+300:
+ cmpd r9,r10
+ beq 302f
+ ld r12,0(r9)
+ cmpld r11,r12
+ blt 301f
+ ld r12,8(r9)
+ cmpld r11,r12
+ bge 301f
+ ld r12,16(r9)
+ b 303f
+301:
+ addi r9,r9,24
+ b 300b
+302:
+ li r12,0
+303:
+.endm
+
+.macro SEARCH_SOFT_MASK_TABLE
+#ifdef CONFIG_RELOCATABLE
+ mr r12,r2
+ ld r2,PACATOC(r13)
+ LOAD_REG_ADDR(r9, __start___soft_mask_table)
+ LOAD_REG_ADDR(r10, __stop___soft_mask_table)
+ mr r2,r12
+#else
+ LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___soft_mask_table)
+ LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___soft_mask_table)
+#endif
+300:
+ cmpd r9,r10
+ beq 302f
+ ld r12,0(r9)
+ cmpld r11,r12
+ blt 301f
+ ld r12,8(r9)
+ cmpld r11,r12
+ bge 301f
+ li r12,1
+ b 303f
+301:
+ addi r9,r9,16
+ b 300b
+302:
+ li r12,0
+303:
.endm
/*
* Restore all registers including H/SRR0/1 saved in a stack frame of a
* standard exception.
*/
-.macro EXCEPTION_RESTORE_REGS hsrr
+.macro EXCEPTION_RESTORE_REGS hsrr=0
/* Move original SRR0 and SRR1 into the respective regs */
ld r9,_MSR(r1)
- .if \hsrr == EXC_HV_OR_STD
- .error "EXC_HV_OR_STD Not implemented for EXCEPTION_RESTORE_REGS"
- .endif
+ li r10,0
.if \hsrr
mtspr SPRN_HSRR1,r9
+ stb r10,PACAHSRR_VALID(r13)
.else
mtspr SPRN_SRR1,r9
+ stb r10,PACASRR_VALID(r13)
.endif
ld r9,_NIP(r1)
.if \hsrr
@@ -643,55 +700,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
ld r1,GPR1(r1)
.endm
-#define RUNLATCH_ON \
-BEGIN_FTR_SECTION \
- ld r3, PACA_THREAD_INFO(r13); \
- ld r4,TI_LOCAL_FLAGS(r3); \
- andi. r0,r4,_TLF_RUNLATCH; \
- beql ppc64_runlatch_on_trampoline; \
-END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
-
-/*
- * When the idle code in power4_idle puts the CPU into NAP mode,
- * it has to do so in a loop, and relies on the external interrupt
- * and decrementer interrupt entry code to get it out of the loop.
- * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags
- * to signal that it is in the loop and needs help to get out.
- */
-#ifdef CONFIG_PPC_970_NAP
-#define FINISH_NAP \
-BEGIN_FTR_SECTION \
- ld r11, PACA_THREAD_INFO(r13); \
- ld r9,TI_LOCAL_FLAGS(r11); \
- andi. r10,r9,_TLF_NAPPING; \
- bnel power4_fixup_nap; \
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-#else
-#define FINISH_NAP
-#endif
-
-#define EXC_COMMON(name, realvec, hdlr) \
- EXC_COMMON_BEGIN(name); \
- INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
- bl save_nvgprs; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- bl hdlr; \
- b ret_from_except
-
-/*
- * Like EXC_COMMON, but for exceptions that can occur in the idle task and
- * therefore need the special idle handling (finish nap and runlatch)
- */
-#define EXC_COMMON_ASYNC(name, realvec, hdlr) \
- EXC_COMMON_BEGIN(name); \
- INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
- FINISH_NAP; \
- RUNLATCH_ON; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- bl hdlr; \
- b ret_from_except_lite
-
-
/*
* There are a few constraints to be concerned with.
* - Real mode exceptions code/data must be located at their physical location.
@@ -716,6 +724,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
* guarantee they will be delivered virtually. Some conditions (see the ISA)
* cause exceptions to be delivered in real mode.
*
+ * The scv instructions are a special case. They get a 0x3000 offset applied.
+ * scv exceptions have unique reentrancy properties, see below.
+ *
* It's impossible to receive interrupts below 0x300 via AIL.
*
* KVM: None of the virtual exceptions are from the guest. Anything that
@@ -725,8 +736,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
* We layout physical memory as follows:
* 0x0000 - 0x00ff : Secondary processor spin code
* 0x0100 - 0x18ff : Real mode pSeries interrupt vectors
- * 0x1900 - 0x3fff : Real mode trampolines
- * 0x4000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors
+ * 0x1900 - 0x2fff : Real mode trampolines
+ * 0x3000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors
* 0x5900 - 0x6fff : Relon mode trampolines
* 0x7000 - 0x7fff : FWNMI data area
* 0x8000 - .... : Common interrupt handlers, remaining early
@@ -737,8 +748,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
* vectors there.
*/
OPEN_FIXED_SECTION(real_vectors, 0x0100, 0x1900)
-OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x4000)
-OPEN_FIXED_SECTION(virt_vectors, 0x4000, 0x5900)
+OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x3000)
+OPEN_FIXED_SECTION(virt_vectors, 0x3000, 0x5900)
OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000)
#ifdef CONFIG_PPC_POWERNV
@@ -774,10 +785,132 @@ USE_FIXED_SECTION(real_vectors)
.globl __start_interrupts
__start_interrupts:
+/**
+ * Interrupt 0x3000 - System Call Vectored Interrupt (syscall).
+ * This is a synchronous interrupt invoked with the "scv" instruction. The
+ * system call does not alter the HV bit, so it is directed to the OS.
+ *
+ * Handling:
+ * scv instructions enter the kernel without changing EE, RI, ME, or HV.
+ * In particular, this means we can take a maskable interrupt at any point
+ * in the scv handler, which is unlike any other interrupt. This is solved
+ * by treating the instruction addresses in the handler as being soft-masked,
+ * by adding a SOFT_MASK_TABLE entry for them.
+ *
+ * AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and
+ * ensure scv is never executed with relocation off, which means AIL-0
+ * should never happen.
+ *
+ * Before leaving the following inside-__end_soft_masked text, at least of the
+ * following must be true:
+ * - MSR[PR]=1 (i.e., return to userspace)
+ * - MSR_EE|MSR_RI is clear (no reentrant exceptions)
+ * - Standard kernel environment is set up (stack, paca, etc)
+ *
+ * Call convention:
+ *
+ * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
+ */
+EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000)
+ /* SCV 0 */
+ mr r9,r13
+ GET_PACA(r13)
+ mflr r11
+ mfctr r12
+ li r10,IRQS_ALL_DISABLED
+ stb r10,PACAIRQSOFTMASK(r13)
+#ifdef CONFIG_RELOCATABLE
+ b system_call_vectored_tramp
+#else
+ b system_call_vectored_common
+#endif
+ nop
+
+ /* SCV 1 - 127 */
+ .rept 127
+ mr r9,r13
+ GET_PACA(r13)
+ mflr r11
+ mfctr r12
+ li r10,IRQS_ALL_DISABLED
+ stb r10,PACAIRQSOFTMASK(r13)
+ li r0,-1 /* cause failure */
+#ifdef CONFIG_RELOCATABLE
+ b system_call_vectored_sigill_tramp
+#else
+ b system_call_vectored_sigill
+#endif
+ .endr
+EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000)
+
+// Treat scv vectors as soft-masked, see comment above.
+// Use absolute values rather than labels here, so they don't get relocated,
+// because this code runs unrelocated.
+SOFT_MASK_TABLE(0xc000000000003000, 0xc000000000004000)
+
+#ifdef CONFIG_RELOCATABLE
+TRAMP_VIRT_BEGIN(system_call_vectored_tramp)
+ __LOAD_HANDLER(r10, system_call_vectored_common)
+ mtctr r10
+ bctr
+
+TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp)
+ __LOAD_HANDLER(r10, system_call_vectored_sigill)
+ mtctr r10
+ bctr
+#endif
+
+
/* No virt vectors corresponding with 0x0..0x100 */
EXC_VIRT_NONE(0x4000, 0x100)
+/**
+ * Interrupt 0x100 - System Reset Interrupt (SRESET aka NMI).
+ * This is a non-maskable, asynchronous interrupt always taken in real-mode.
+ * It is caused by:
+ * - Wake from power-saving state, on powernv.
+ * - An NMI from another CPU, triggered by firmware or hypercall.
+ * - As crash/debug signal injected from BMC, firmware or hypervisor.
+ *
+ * Handling:
+ * Power-save wakeup is the only performance critical path, so this is
+ * determined quickly as possible first. In this case volatile registers
+ * can be discarded and SPRs like CFAR don't need to be read.
+ *
+ * If not a powersave wakeup, then it's run as a regular interrupt, however
+ * it uses its own stack and PACA save area to preserve the regular kernel
+ * environment for debugging.
+ *
+ * This interrupt is not maskable, so triggering it when MSR[RI] is clear,
+ * or SCRATCH0 is in use, etc. may cause a crash. It's also not entirely
+ * correct to switch to virtual mode to run the regular interrupt handler
+ * because it might be interrupted when the MMU is in a bad state (e.g., SLB
+ * is clear).
+ *
+ * FWNMI:
+ * PAPR specifies a "fwnmi" facility which sends the sreset to a different
+ * entry point with a different register set up. Some hypervisors will
+ * send the sreset to 0x100 in the guest if it is not fwnmi capable.
+ *
+ * KVM:
+ * Unlike most SRR interrupts, this may be taken by the host while executing
+ * in a guest, so a KVM test is required. KVM will pull the CPU out of guest
+ * mode and then raise the sreset.
+ */
+INT_DEFINE_BEGIN(system_reset)
+ IVEC=0x100
+ IAREA=PACA_EXNMI
+ IVIRT=0 /* no virt entry point */
+ /*
+ * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
+ * being used, so a nested NMI exception would corrupt it.
+ */
+ ISET_RI=0
+ ISTACK=0
+ IKVM_REAL=1
+INT_DEFINE_END(system_reset)
+
EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
#ifdef CONFIG_PPC_P7_NAP
/*
@@ -815,11 +948,8 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif
- INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0, kvm=1
+ GEN_INT_ENTRY system_reset, virt=0
/*
- * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
- * being used, so a nested NMI exception would corrupt it.
- *
* In theory, we should not enable relocation here if it was disabled
* in SRR1, because the MMU may not be configured to support it (e.g.,
* SLB may have been cleared). In practice, there should only be a few
@@ -828,7 +958,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
*/
EXC_REAL_END(system_reset, 0x100, 0x100)
EXC_VIRT_NONE(0x4100, 0x100)
-INT_KVM_HANDLER system_reset 0x100, EXC_STD, PACA_EXNMI, 0
#ifdef CONFIG_PPC_P7_NAP
TRAMP_REAL_BEGIN(system_reset_idle_wake)
@@ -843,12 +972,12 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)
* Vectors for the FWNMI option. Share common code.
*/
TRAMP_REAL_BEGIN(system_reset_fwnmi)
- /* See comment at system_reset exception, don't turn on RI */
- INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0
+ GEN_INT_ENTRY system_reset, virt=0
#endif /* CONFIG_PPC_PSERIES */
EXC_COMMON_BEGIN(system_reset_common)
+ __GEN_COMMON_ENTRY system_reset
/*
* Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
* to recover, but nested NMI will notice in_nmi and not recover
@@ -864,21 +993,7 @@ EXC_COMMON_BEGIN(system_reset_common)
mr r10,r1
ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE
- INT_COMMON 0x100, PACA_EXNMI, 0, 1, 0, 0, 0
- bl save_nvgprs
- /*
- * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
- * the right thing. We do not want to reconcile because that goes
- * through irq tracing which we don't want in NMI.
- *
- * Save PACAIRQHAPPENED because some code will do a hard disable
- * (e.g., xmon). So we want to restore this back to where it was
- * when we return. DAR is unused in the stack, so save it there.
- */
- li r10,IRQS_ALL_DISABLED
- stb r10,PACAIRQSOFTMASK(r13)
- lbz r10,PACAIRQHAPPENED(r13)
- std r10,_DAR(r1)
+ __GEN_COMMON_BODY system_reset
addi r3,r1,STACK_FRAME_OVERHEAD
bl system_reset_exception
@@ -894,36 +1009,92 @@ EXC_COMMON_BEGIN(system_reset_common)
subi r10,r10,1
sth r10,PACA_IN_NMI(r13)
- /*
- * Restore soft mask settings.
- */
- ld r10,_DAR(r1)
- stb r10,PACAIRQHAPPENED(r13)
- ld r10,SOFTE(r1)
- stb r10,PACAIRQSOFTMASK(r13)
-
- EXCEPTION_RESTORE_REGS EXC_STD
+ kuap_kernel_restore r9, r10
+ EXCEPTION_RESTORE_REGS
RFI_TO_USER_OR_KERNEL
-EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
- INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+/**
+ * Interrupt 0x200 - Machine Check Interrupt (MCE).
+ * This is a non-maskable interrupt always taken in real-mode. It can be
+ * synchronous or asynchronous, caused by hardware or software, and it may be
+ * taken in a power-saving state.
+ *
+ * Handling:
+ * Similarly to system reset, this uses its own stack and PACA save area,
+ * the difference is re-entrancy is allowed on the machine check stack.
+ *
+ * machine_check_early is run in real mode, and carefully decodes the
+ * machine check and tries to handle it (e.g., flush the SLB if there was an
+ * error detected there), determines if it was recoverable and logs the
+ * event.
+ *
+ * This early code does not "reconcile" irq soft-mask state like SRESET or
+ * regular interrupts do, so irqs_disabled() among other things may not work
+ * properly (irq disable/enable already doesn't work because irq tracing can
+ * not work in real mode).
+ *
+ * Then, depending on the execution context when the interrupt is taken, there
+ * are 3 main actions:
+ * - Executing in kernel mode. The event is queued with irq_work, which means
+ * it is handled when it is next safe to do so (i.e., the kernel has enabled
+ * interrupts), which could be immediately when the interrupt returns. This
+ * avoids nasty issues like switching to virtual mode when the MMU is in a
+ * bad state, or when executing OPAL code. (SRESET is exposed to such issues,
+ * but it has different priorities). Check to see if the CPU was in power
+ * save, and return via the wake up code if it was.
+ *
+ * - Executing in user mode. machine_check_exception is run like a normal
+ * interrupt handler, which processes the data generated by the early handler.
+ *
+ * - Executing in guest mode. The interrupt is run with its KVM test, and
+ * branches to KVM to deal with. KVM may queue the event for the host
+ * to report later.
+ *
+ * This interrupt is not maskable, so if it triggers when MSR[RI] is clear,
+ * or SCRATCH0 is in use, it may cause a crash.
+ *
+ * KVM:
+ * See SRESET.
+ */
+INT_DEFINE_BEGIN(machine_check_early)
+ IVEC=0x200
+ IAREA=PACA_EXMC
+ IVIRT=0 /* no virt entry point */
+ IREALMODE_COMMON=1
/*
* MSR_RI is not enabled, because PACA_EXMC is being used, so a
* nested machine check corrupts it. machine_check_common enables
* MSR_RI.
*/
+ ISET_RI=0
+ ISTACK=0
+ IDAR=1
+ IDSISR=1
+ IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
+INT_DEFINE_END(machine_check_early)
+
+INT_DEFINE_BEGIN(machine_check)
+ IVEC=0x200
+ IAREA=PACA_EXMC
+ IVIRT=0 /* no virt entry point */
+ ISET_RI=0
+ IDAR=1
+ IDSISR=1
+ IKVM_REAL=1
+INT_DEFINE_END(machine_check)
+
+EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
+ GEN_INT_ENTRY machine_check_early, virt=0
EXC_REAL_END(machine_check, 0x200, 0x100)
EXC_VIRT_NONE(0x4200, 0x100)
#ifdef CONFIG_PPC_PSERIES
TRAMP_REAL_BEGIN(machine_check_fwnmi)
/* See comment at machine_check exception, don't turn on RI */
- INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+ GEN_INT_ENTRY machine_check_early, virt=0
#endif
-INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1
-
#define MACHINE_CHECK_HANDLER_WINDUP \
/* Clear MSR_RI before setting SRR0 and SRR1. */\
li r9,0; \
@@ -932,12 +1103,10 @@ INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1
lhz r12,PACA_IN_MCE(r13); \
subi r12,r12,1; \
sth r12,PACA_IN_MCE(r13); \
- EXCEPTION_RESTORE_REGS EXC_STD
+ EXCEPTION_RESTORE_REGS
EXC_COMMON_BEGIN(machine_check_early_common)
- mtctr r10 /* Restore ctr */
- mfspr r11,SPRN_SRR0
- mfspr r12,SPRN_SRR1
+ __GEN_REALMODE_COMMON_ENTRY machine_check_early
/*
* Switch to mc_emergency stack and handle re-entrancy (we limit
@@ -974,8 +1143,7 @@ EXC_COMMON_BEGIN(machine_check_early_common)
bgt cr1,unrecoverable_mce /* Check if we hit limit of 4 */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- /* We don't touch AMR here, we never go to virtual mode */
- INT_COMMON 0x200, PACA_EXMC, 0, 0, 0, 1, 1
+ __GEN_COMMON_BODY machine_check_early
BEGIN_FTR_SECTION
bl enable_machine_check
@@ -983,7 +1151,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
li r10,MSR_RI
mtmsrd r10,1
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_early
std r3,RESULT(r1) /* Save result */
@@ -1009,7 +1176,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
/*
* Check if we are coming from guest. If yes, then run the normal
* exception handler which will take the
- * machine_check_kvm->kvmppc_interrupt branch to deliver the MC event
+ * machine_check_kvm->kvm_interrupt branch to deliver the MC event
* to guest.
*/
lbz r11,HSTATE_IN_GUEST(r13)
@@ -1063,23 +1230,22 @@ BEGIN_FTR_SECTION
mtspr SPRN_CFAR,r10
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
MACHINE_CHECK_HANDLER_WINDUP
- /* See comment at machine_check exception, don't turn on RI */
- INT_HANDLER machine_check, 0x200, area=PACA_EXMC, ri=0, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY machine_check, virt=0
EXC_COMMON_BEGIN(machine_check_common)
/*
* Machine check is different because we use a different
* save area: PACA_EXMC instead of PACA_EXGEN.
*/
- INT_COMMON 0x200, PACA_EXMC, 1, 1, 1, 1, 1
- FINISH_NAP
+ GEN_COMMON machine_check
+
/* Enable MSR_RI when finished with PACA_EXMC */
li r10,MSR_RI
mtmsrd r10,1
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_exception
- b ret_from_except
+ bl machine_check_exception_async
+ b interrupt_return_srr
+
#ifdef CONFIG_PPC_P7_NAP
/*
@@ -1090,17 +1256,19 @@ EXC_COMMON_BEGIN(machine_check_idle_common)
bl machine_check_queue_event
/*
- * We have not used any non-volatile GPRs here, and as a rule
- * most exception code including machine check does not.
- * Therefore PACA_NAPSTATELOST does not need to be set. Idle
- * wakeup will restore volatile registers.
+ * GPR-loss wakeups are relatively straightforward, because the
+ * idle sleep code has saved all non-volatile registers on its
+ * own stack, and r1 in PACAR1.
*
- * Load the original SRR1 into r3 for pnv_powersave_wakeup_mce.
+ * For no-loss wakeups the r1 and lr registers used by the
+ * early machine check handler have to be restored first. r2 is
+ * the kernel TOC, so no need to restore it.
*
* Then decrement MCE nesting after finishing with the stack.
*/
ld r3,_MSR(r1)
ld r4,_LINK(r1)
+ ld r1,GPR1(r1)
lhz r11,PACA_IN_MCE(r13)
subi r11,r11,1
@@ -1109,7 +1277,7 @@ EXC_COMMON_BEGIN(machine_check_idle_common)
mtlr r4
rlwinm r10,r3,47-31,30,31
cmpwi cr1,r10,2
- bltlr cr1 /* no state loss, return to idle caller */
+ bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */
b idle_return_gpr_loss
#endif
@@ -1131,7 +1299,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
andc r10,r10,r3
mtmsrd r10
- /* Invoke machine_check_exception to print MCE event and panic. */
+ lhz r12,PACA_IN_MCE(r13)
+ subi r12,r12,1
+ sth r12,PACA_IN_MCE(r13)
+
+ /*
+ * Invoke machine_check_exception to print MCE event and panic.
+ * This is the NMI version of the handler because we are called from
+ * the early handler which is a true NMI.
+ */
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception
@@ -1144,148 +1320,345 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
b .
+/**
+ * Interrupt 0x300 - Data Storage Interrupt (DSI).
+ * This is a synchronous interrupt generated due to a data access exception,
+ * e.g., a load orstore which does not have a valid page table entry with
+ * permissions. DAWR matches also fault here, as do RC updates, and minor misc
+ * errors e.g., copy/paste, AMO, certain invalid CI accesses, etc.
+ *
+ * Handling:
+ * - Hash MMU
+ * Go to do_hash_fault, which attempts to fill the HPT from an entry in the
+ * Linux page table. Hash faults can hit in kernel mode in a fairly
+ * arbitrary state (e.g., interrupts disabled, locks held) when accessing
+ * "non-bolted" regions, e.g., vmalloc space. However these should always be
+ * backed by Linux page table entries.
+ *
+ * If no entry is found the Linux page fault handler is invoked (by
+ * do_hash_fault). Linux page faults can happen in kernel mode due to user
+ * copy operations of course.
+ *
+ * KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest
+ * MMU context, which may cause a DSI in the host, which must go to the
+ * KVM handler. MSR[IR] is not enabled, so the real-mode handler will
+ * always be used regardless of AIL setting.
+ *
+ * - Radix MMU
+ * The hardware loads from the Linux page table directly, so a fault goes
+ * immediately to Linux page fault.
+ *
+ * Conditions like DAWR match are handled on the way in to Linux page fault.
+ */
+INT_DEFINE_BEGIN(data_access)
+ IVEC=0x300
+ IDAR=1
+ IDSISR=1
+ IKVM_REAL=1
+INT_DEFINE_END(data_access)
+
EXC_REAL_BEGIN(data_access, 0x300, 0x80)
- INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY data_access, virt=0
EXC_REAL_END(data_access, 0x300, 0x80)
EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
- INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1
+ GEN_INT_ENTRY data_access, virt=1
EXC_VIRT_END(data_access, 0x4300, 0x80)
-INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1
EXC_COMMON_BEGIN(data_access_common)
- /*
- * Here r13 points to the paca, r9 contains the saved CR,
- * SRR0 and SRR1 are saved in r11 and r12,
- * r9 - r13 are saved in paca->exgen.
- * EX_DAR and EX_DSISR have saved DAR/DSISR
- */
- INT_COMMON 0x300, PACA_EXGEN, 1, 1, 1, 1, 1
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
+ GEN_COMMON data_access
+ ld r4,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ andis. r0,r4,DSISR_DABRMATCH@h
+ bne- 1f
BEGIN_MMU_FTR_SECTION
- ld r6,_MSR(r1)
- li r3,0x300
- b do_hash_page /* Try to handle as hpte fault */
+ bl do_hash_fault
MMU_FTR_SECTION_ELSE
- b handle_page_fault
+ bl do_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+ b interrupt_return_srr
+
+1: bl do_break
+ /*
+ * do_break() may have changed the NV GPRS while handling a breakpoint.
+ * If so, we need to restore them with their updated values.
+ */
+ REST_NVGPRS(r1)
+ b interrupt_return_srr
+/**
+ * Interrupt 0x380 - Data Segment Interrupt (DSLB).
+ * This is a synchronous interrupt in response to an MMU fault missing SLB
+ * entry for HPT, or an address outside RPT translation range.
+ *
+ * Handling:
+ * - HPT:
+ * This refills the SLB, or reports an access fault similarly to a bad page
+ * fault. When coming from user-mode, the SLB handler may access any kernel
+ * data, though it may itself take a DSLB. When coming from kernel mode,
+ * recursive faults must be avoided so access is restricted to the kernel
+ * image text/data, kernel stack, and any data allocated below
+ * ppc64_bolted_size (first segment). The kernel handler must avoid stomping
+ * on user-handler data structures.
+ *
+ * KVM: Same as 0x300, DSLB must test for KVM guest.
+ */
+INT_DEFINE_BEGIN(data_access_slb)
+ IVEC=0x380
+ IDAR=1
+ IKVM_REAL=1
+INT_DEFINE_END(data_access_slb)
+
EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
- INT_HANDLER data_access_slb, 0x380, ool=1, area=PACA_EXSLB, dar=1, kvm=1
+ GEN_INT_ENTRY data_access_slb, virt=0
EXC_REAL_END(data_access_slb, 0x380, 0x80)
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
- INT_HANDLER data_access_slb, 0x380, virt=1, area=PACA_EXSLB, dar=1
+ GEN_INT_ENTRY data_access_slb, virt=1
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
-INT_KVM_HANDLER data_access_slb, 0x380, EXC_STD, PACA_EXSLB, 1
EXC_COMMON_BEGIN(data_access_slb_common)
- INT_COMMON 0x380, PACA_EXSLB, 1, 1, 0, 1, 0
- ld r4,_DAR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ GEN_COMMON data_access_slb
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
+ addi r3,r1,STACK_FRAME_OVERHEAD
bl do_slb_fault
cmpdi r3,0
bne- 1f
- b fast_exception_return
+ b fast_interrupt_return_srr
1: /* Error case */
MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- ld r4,_DAR(r1)
- ld r5,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
- b ret_from_except
+ b interrupt_return_srr
+/**
+ * Interrupt 0x400 - Instruction Storage Interrupt (ISI).
+ * This is a synchronous interrupt in response to an MMU fault due to an
+ * instruction fetch.
+ *
+ * Handling:
+ * Similar to DSI, though in response to fetch. The faulting address is found
+ * in SRR0 (rather than DAR), and status in SRR1 (rather than DSISR).
+ */
+INT_DEFINE_BEGIN(instruction_access)
+ IVEC=0x400
+ IISIDE=1
+ IDAR=1
+ IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(instruction_access)
+
EXC_REAL_BEGIN(instruction_access, 0x400, 0x80)
- INT_HANDLER instruction_access, 0x400, kvm=1
+ GEN_INT_ENTRY instruction_access, virt=0
EXC_REAL_END(instruction_access, 0x400, 0x80)
EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80)
- INT_HANDLER instruction_access, 0x400, virt=1
+ GEN_INT_ENTRY instruction_access, virt=1
EXC_VIRT_END(instruction_access, 0x4400, 0x80)
-INT_KVM_HANDLER instruction_access, 0x400, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(instruction_access_common)
- INT_COMMON 0x400, PACA_EXGEN, 1, 1, 1, 2, 2
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
+ GEN_COMMON instruction_access
+ addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
- ld r6,_MSR(r1)
- li r3,0x400
- b do_hash_page /* Try to handle as hpte fault */
+ bl do_hash_fault
MMU_FTR_SECTION_ELSE
- b handle_page_fault
+ bl do_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+ b interrupt_return_srr
+/**
+ * Interrupt 0x480 - Instruction Segment Interrupt (ISLB).
+ * This is a synchronous interrupt in response to an MMU fault due to an
+ * instruction fetch.
+ *
+ * Handling:
+ * Similar to DSLB, though in response to fetch. The faulting address is found
+ * in SRR0 (rather than DAR).
+ */
+INT_DEFINE_BEGIN(instruction_access_slb)
+ IVEC=0x480
+ IISIDE=1
+ IDAR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(instruction_access_slb)
+
EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
- INT_HANDLER instruction_access_slb, 0x480, area=PACA_EXSLB, kvm=1
+ GEN_INT_ENTRY instruction_access_slb, virt=0
EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
- INT_HANDLER instruction_access_slb, 0x480, virt=1, area=PACA_EXSLB
+ GEN_INT_ENTRY instruction_access_slb, virt=1
EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
-INT_KVM_HANDLER instruction_access_slb, 0x480, EXC_STD, PACA_EXSLB, 0
EXC_COMMON_BEGIN(instruction_access_slb_common)
- INT_COMMON 0x480, PACA_EXSLB, 1, 1, 0, 2, 0
- ld r4,_DAR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ GEN_COMMON instruction_access_slb
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
+ addi r3,r1,STACK_FRAME_OVERHEAD
bl do_slb_fault
cmpdi r3,0
bne- 1f
- b fast_exception_return
+ b fast_interrupt_return_srr
1: /* Error case */
MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- ld r4,_DAR(r1)
- ld r5,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
- b ret_from_except
+ b interrupt_return_srr
+
+
+/**
+ * Interrupt 0x500 - External Interrupt.
+ * This is an asynchronous maskable interrupt in response to an "external
+ * exception" from the interrupt controller or hypervisor (e.g., device
+ * interrupt). It is maskable in hardware by clearing MSR[EE], and
+ * soft-maskable with IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * When running in HV mode, Linux sets up the LPCR[LPES] bit such that
+ * interrupts are delivered with HSRR registers, guests use SRRs, which
+ * reqiures IHSRR_IF_HVMODE.
+ *
+ * On bare metal POWER9 and later, Linux sets the LPCR[HVICE] bit such that
+ * external interrupts are delivered as Hypervisor Virtualization Interrupts
+ * rather than External Interrupts.
+ *
+ * Handling:
+ * This calls into Linux IRQ handler. NVGPRs are not saved to reduce overhead,
+ * because registers at the time of the interrupt are not so important as it is
+ * asynchronous.
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and clear MSR[EE] in the interrupted context.
+ */
+INT_DEFINE_BEGIN(hardware_interrupt)
+ IVEC=0x500
+ IHSRR_IF_HVMODE=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(hardware_interrupt)
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
- INT_HANDLER hardware_interrupt, 0x500, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY hardware_interrupt, virt=0
EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
- INT_HANDLER hardware_interrupt, 0x500, virt=1, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY hardware_interrupt, virt=1
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
-INT_KVM_HANDLER hardware_interrupt, 0x500, EXC_HV_OR_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
+EXC_COMMON_BEGIN(hardware_interrupt_common)
+ GEN_COMMON hardware_interrupt
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_IRQ
+ BEGIN_FTR_SECTION
+ b interrupt_return_hsrr
+ FTR_SECTION_ELSE
+ b interrupt_return_srr
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+/**
+ * Interrupt 0x600 - Alignment Interrupt
+ * This is a synchronous interrupt in response to data alignment fault.
+ */
+INT_DEFINE_BEGIN(alignment)
+ IVEC=0x600
+ IDAR=1
+ IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(alignment)
+
EXC_REAL_BEGIN(alignment, 0x600, 0x100)
- INT_HANDLER alignment, 0x600, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY alignment, virt=0
EXC_REAL_END(alignment, 0x600, 0x100)
EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
- INT_HANDLER alignment, 0x600, virt=1, dar=1, dsisr=1
+ GEN_INT_ENTRY alignment, virt=1
EXC_VIRT_END(alignment, 0x4600, 0x100)
-INT_KVM_HANDLER alignment, 0x600, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(alignment_common)
- INT_COMMON 0x600, PACA_EXGEN, 1, 1, 1, 1, 1
- bl save_nvgprs
+ GEN_COMMON alignment
addi r3,r1,STACK_FRAME_OVERHEAD
bl alignment_exception
- b ret_from_except
+ REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+ b interrupt_return_srr
+/**
+ * Interrupt 0x700 - Program Interrupt (program check).
+ * This is a synchronous interrupt in response to various instruction faults:
+ * traps, privilege errors, TM errors, floating point exceptions.
+ *
+ * Handling:
+ * This interrupt may use the "emergency stack" in some cases when being taken
+ * from kernel context, which complicates handling.
+ */
+INT_DEFINE_BEGIN(program_check)
+ IVEC=0x700
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(program_check)
+
EXC_REAL_BEGIN(program_check, 0x700, 0x100)
- INT_HANDLER program_check, 0x700, kvm=1
+
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+ /*
+ * There's a short window during boot where although the kernel is
+ * running little endian, any exceptions will cause the CPU to switch
+ * back to big endian. For example a WARN() boils down to a trap
+ * instruction, which will cause a program check, and we end up here but
+ * with the CPU in big endian mode. The first instruction of the program
+ * check handler (in GEN_INT_ENTRY below) is an mtsprg, which when
+ * executed in the wrong endian is an lhzu with a ~3GB displacement from
+ * r3. The content of r3 is random, so that is a load from some random
+ * location, and depending on the system can easily lead to a checkstop,
+ * or an infinitely recursive page fault.
+ *
+ * So to handle that case we have a trampoline here that can detect we
+ * are in the wrong endian and flip us back to the correct endian. We
+ * can't flip MSR[LE] using mtmsr, so we have to use rfid. That requires
+ * backing up SRR0/1 as well as a GPR. To do that we use SPRG0/2/3, as
+ * SPRG1 is already used for the paca. SPRG3 is user readable, but this
+ * trampoline is only active very early in boot, and SPRG3 will be
+ * reinitialised in vdso_getcpu_init() before userspace starts.
+ */
+BEGIN_FTR_SECTION
+ tdi 0,0,0x48 // Trap never, or in reverse endian: b . + 8
+ b 1f // Skip trampoline if endian is correct
+ .long 0xa643707d // mtsprg 0, r11 Backup r11
+ .long 0xa6027a7d // mfsrr0 r11
+ .long 0xa643727d // mtsprg 2, r11 Backup SRR0 in SPRG2
+ .long 0xa6027b7d // mfsrr1 r11
+ .long 0xa643737d // mtsprg 3, r11 Backup SRR1 in SPRG3
+ .long 0xa600607d // mfmsr r11
+ .long 0x01006b69 // xori r11, r11, 1 Invert MSR[LE]
+ .long 0xa6037b7d // mtsrr1 r11
+ .long 0x34076039 // li r11, 0x734
+ .long 0xa6037a7d // mtsrr0 r11
+ .long 0x2400004c // rfid
+ mfsprg r11, 3
+ mtsrr1 r11 // Restore SRR1
+ mfsprg r11, 2
+ mtsrr0 r11 // Restore SRR0
+ mfsprg r11, 0 // Restore r11
+1:
+END_FTR_SECTION(0, 1) // nop out after boot
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
+
+ GEN_INT_ENTRY program_check, virt=0
EXC_REAL_END(program_check, 0x700, 0x100)
EXC_VIRT_BEGIN(program_check, 0x4700, 0x100)
- INT_HANDLER program_check, 0x700, virt=1
+ GEN_INT_ENTRY program_check, virt=1
EXC_VIRT_END(program_check, 0x4700, 0x100)
-INT_KVM_HANDLER program_check, 0x700, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(program_check_common)
+ __GEN_COMMON_ENTRY program_check
+
/*
* It's possible to receive a TM Bad Thing type program check with
* userspace register values (in particular r1), but with SRR1 reporting
@@ -1296,43 +1669,61 @@ EXC_COMMON_BEGIN(program_check_common)
*/
andi. r10,r12,MSR_PR
- bne 2f /* If userspace, go normal path */
+ bne .Lnormal_stack /* If userspace, go normal path */
andis. r10,r12,(SRR1_PROGTM)@h
- bne 1f /* If TM, emergency */
+ bne .Lemergency_stack /* If TM, emergency */
cmpdi r1,-INT_FRAME_SIZE /* check if r1 is in userspace */
- blt 2f /* normal path if not */
+ blt .Lnormal_stack /* normal path if not */
/* Use the emergency stack */
-1: andi. r10,r12,MSR_PR /* Set CR0 correctly for label */
+.Lemergency_stack:
+ andi. r10,r12,MSR_PR /* Set CR0 correctly for label */
/* 3 in EXCEPTION_PROLOG_COMMON */
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- INT_COMMON 0x700, PACA_EXGEN, 0, 1, 1, 0, 0
- b 3f
-2:
- INT_COMMON 0x700, PACA_EXGEN, 1, 1, 1, 0, 0
-3:
- bl save_nvgprs
+ __ISTACK(program_check)=0
+ __GEN_COMMON_BODY program_check
+ b .Ldo_program_check
+
+.Lnormal_stack:
+ __ISTACK(program_check)=1
+ __GEN_COMMON_BODY program_check
+
+.Ldo_program_check:
addi r3,r1,STACK_FRAME_OVERHEAD
bl program_check_exception
- b ret_from_except
+ REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+ b interrupt_return_srr
+
+/*
+ * Interrupt 0x800 - Floating-Point Unavailable Interrupt.
+ * This is a synchronous interrupt in response to executing an fp instruction
+ * with MSR[FP]=0.
+ *
+ * Handling:
+ * This will load FP registers and enable the FP bit if coming from userspace,
+ * otherwise report a bad kernel use of FP.
+ */
+INT_DEFINE_BEGIN(fp_unavailable)
+ IVEC=0x800
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(fp_unavailable)
EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
- INT_HANDLER fp_unavailable, 0x800, kvm=1
+ GEN_INT_ENTRY fp_unavailable, virt=0
EXC_REAL_END(fp_unavailable, 0x800, 0x100)
EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100)
- INT_HANDLER fp_unavailable, 0x800, virt=1
+ GEN_INT_ENTRY fp_unavailable, virt=1
EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
-INT_KVM_HANDLER fp_unavailable, 0x800, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(fp_unavailable_common)
- INT_COMMON 0x800, PACA_EXGEN, 1, 1, 0, 0, 0
+ GEN_COMMON fp_unavailable
bne 1f /* if from user, just load it up */
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl kernel_fp_unavailable_exception
0: trap
@@ -1348,64 +1739,156 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
bl load_up_fpu
- b fast_exception_return
+ b fast_interrupt_return_srr
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl fp_unavailable_tm
- b ret_from_except
+ b interrupt_return_srr
#endif
+/**
+ * Interrupt 0x900 - Decrementer Interrupt.
+ * This is an asynchronous interrupt in response to a decrementer exception
+ * (e.g., DEC has wrapped below zero). It is maskable in hardware by clearing
+ * MSR[EE], and soft-maskable with IRQS_DISABLED mask (i.e.,
+ * local_irq_disable()).
+ *
+ * Handling:
+ * This calls into Linux timer handler. NVGPRs are not saved (see 0x500).
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and bump the decrementer to a high value, leaving MSR[EE] enabled
+ * in the interrupted context.
+ * If PPC_WATCHDOG is configured, the soft masked handler will actually set
+ * things back up to run soft_nmi_interrupt as a regular interrupt handler
+ * on the emergency stack.
+ */
+INT_DEFINE_BEGIN(decrementer)
+ IVEC=0x900
+ IMASK=IRQS_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(decrementer)
+
EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
- INT_HANDLER decrementer, 0x900, ool=1, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY decrementer, virt=0
EXC_REAL_END(decrementer, 0x900, 0x80)
EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
- INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED
+ GEN_INT_ENTRY decrementer, virt=1
EXC_VIRT_END(decrementer, 0x4900, 0x80)
-INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
+EXC_COMMON_BEGIN(decrementer_common)
+ GEN_COMMON decrementer
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl timer_interrupt
+ b interrupt_return_srr
+/**
+ * Interrupt 0x980 - Hypervisor Decrementer Interrupt.
+ * This is an asynchronous interrupt, similar to 0x900 but for the HDEC
+ * register.
+ *
+ * Handling:
+ * Linux does not use this outside KVM where it's used to keep a host timer
+ * while the guest is given control of DEC. It should normally be caught by
+ * the KVM test and routed there.
+ */
+INT_DEFINE_BEGIN(hdecrementer)
+ IVEC=0x980
+ IHSRR=1
+ ISTACK=0
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(hdecrementer)
+
EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80)
- INT_HANDLER hdecrementer, 0x980, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY hdecrementer, virt=0
EXC_REAL_END(hdecrementer, 0x980, 0x80)
EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80)
- INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY hdecrementer, virt=1
EXC_VIRT_END(hdecrementer, 0x4980, 0x80)
-INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
+EXC_COMMON_BEGIN(hdecrementer_common)
+ __GEN_COMMON_ENTRY hdecrementer
+ /*
+ * Hypervisor decrementer interrupts not caught by the KVM test
+ * shouldn't occur but are sometimes left pending on exit from a KVM
+ * guest. We don't need to do anything to clear them, as they are
+ * edge-triggered.
+ *
+ * Be careful to avoid touching the kernel stack.
+ */
+ li r10,0
+ stb r10,PACAHSRR_VALID(r13)
+ ld r10,PACA_EXGEN+EX_CTR(r13)
+ mtctr r10
+ mtcrf 0x80,r9
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ HRFI_TO_KERNEL
+
+/**
+ * Interrupt 0xa00 - Directed Privileged Doorbell Interrupt.
+ * This is an asynchronous interrupt in response to a msgsndp doorbell.
+ * It is maskable in hardware by clearing MSR[EE], and soft-maskable with
+ * IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * Handling:
+ * Guests may use this for IPIs between threads in a core if the
+ * hypervisor supports it. NVGPRS are not saved (see 0x500).
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, leaving MSR[EE] enabled in the interrupted context because the
+ * doorbells are edge triggered.
+ */
+INT_DEFINE_BEGIN(doorbell_super)
+ IVEC=0xa00
+ IMASK=IRQS_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(doorbell_super)
EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
- INT_HANDLER doorbell_super, 0xa00, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY doorbell_super, virt=0
EXC_REAL_END(doorbell_super, 0xa00, 0x100)
EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
- INT_HANDLER doorbell_super, 0xa00, virt=1, bitmask=IRQS_DISABLED
+ GEN_INT_ENTRY doorbell_super, virt=1
EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
-INT_KVM_HANDLER doorbell_super, 0xa00, EXC_STD, PACA_EXGEN, 0
+EXC_COMMON_BEGIN(doorbell_super_common)
+ GEN_COMMON doorbell_super
+ addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_DOORBELL
-EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
+ bl doorbell_exception
#else
-EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception)
+ bl unknown_async_exception
#endif
+ b interrupt_return_srr
EXC_REAL_NONE(0xb00, 0x100)
EXC_VIRT_NONE(0x4b00, 0x100)
-/*
- * system call / hypercall (0xc00, 0x4c00)
- *
- * The system call exception is invoked with "sc 0" and does not alter HV bit.
- *
- * The hypercall is invoked with "sc 1" and sets HV=1.
+/**
+ * Interrupt 0xc00 - System Call Interrupt (syscall, hcall).
+ * This is a synchronous interrupt invoked with the "sc" instruction. The
+ * system call is invoked with "sc 0" and does not alter the HV bit, so it
+ * is directed to the currently running OS. The hypercall is invoked with
+ * "sc 1" and it sets HV=1, so it elevates to hypervisor.
*
* In HPT, sc 1 always goes to 0xc00 real mode. In RADIX, sc 1 can go to
* 0x4c00 virtual mode.
*
+ * Handling:
+ * If the KVM test fires then it was due to a hypercall and is accordingly
+ * routed to KVM. Otherwise this executes a normal Linux system call.
+ *
* Call convention:
*
* syscall and hypercalls register conventions are documented in
@@ -1417,6 +1900,12 @@ EXC_VIRT_NONE(0x4b00, 0x100)
* without saving, though xer is not a good idea to use, as hardware may
* interpret some bits so it may be costly to change them.
*/
+INT_DEFINE_BEGIN(system_call)
+ IVEC=0xc00
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(system_call)
+
.macro SYSTEM_CALL virt
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
@@ -1431,7 +1920,7 @@ EXC_VIRT_NONE(0x4b00, 0x100)
GET_PACA(r13)
std r10,PACA_EXGEN+EX_R10(r13)
INTERRUPT_TO_KERNEL
- KVMTEST system_call EXC_STD 0xc00 /* uses r10, branch to system_call_kvm */
+ KVMTEST system_call kvm_hcall /* uses r10, branch to kvm_hcall */
mfctr r9
#else
mr r9,r13
@@ -1453,15 +1942,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
HMT_MEDIUM
.if ! \virt
- __LOAD_HANDLER(r10, system_call_common)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
+ __LOAD_HANDLER(r10, system_call_common_real)
+ mtctr r10
+ bctr
.else
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
#ifdef CONFIG_RELOCATABLE
__LOAD_HANDLER(r10, system_call_common)
mtctr r10
@@ -1490,108 +1974,213 @@ EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
EXC_VIRT_END(system_call, 0x4c00, 0x100)
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
- /*
- * This is a hcall, so register convention is as above, with these
- * differences:
- * r13 = PACA
- * ctr = orig r13
- * orig r10 saved in PACA
- */
-TRAMP_KVM_BEGIN(system_call_kvm)
+TRAMP_REAL_BEGIN(kvm_hcall)
+ std r9,PACA_EXGEN+EX_R9(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfcr r9
+ mfctr r10
+ std r10,PACA_EXGEN+EX_R13(r13)
+ li r10,0
+ std r10,PACA_EXGEN+EX_CFAR(r13)
+ std r10,PACA_EXGEN+EX_CTR(r13)
/*
* Save the PPR (on systems that support it) before changing to
* HMT_MEDIUM. That allows the KVM code to save that value into the
* guest state (it is the guest's PPR value).
*/
- OPT_GET_SPR(r10, SPRN_PPR, CPU_FTR_HAS_PPR)
+BEGIN_FTR_SECTION
+ mfspr r10,SPRN_PPR
+ std r10,PACA_EXGEN+EX_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
HMT_MEDIUM
- OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r10, CPU_FTR_HAS_PPR)
- mfctr r10
- SET_SCRATCH0(r10)
- std r9,PACA_EXGEN+EX_R9(r13)
- mfcr r9
- KVM_HANDLER 0xc00, EXC_STD, PACA_EXGEN, 0
+
+#ifdef CONFIG_RELOCATABLE
+ /*
+ * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
+ * outside the head section.
+ */
+ __LOAD_FAR_HANDLER(r10, kvmppc_hcall)
+ mtctr r10
+ bctr
+#else
+ b kvmppc_hcall
+#endif
#endif
+/**
+ * Interrupt 0xd00 - Trace Interrupt.
+ * This is a synchronous interrupt in response to instruction step or
+ * breakpoint faults.
+ */
+INT_DEFINE_BEGIN(single_step)
+ IVEC=0xd00
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(single_step)
EXC_REAL_BEGIN(single_step, 0xd00, 0x100)
- INT_HANDLER single_step, 0xd00, kvm=1
+ GEN_INT_ENTRY single_step, virt=0
EXC_REAL_END(single_step, 0xd00, 0x100)
EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100)
- INT_HANDLER single_step, 0xd00, virt=1
+ GEN_INT_ENTRY single_step, virt=1
EXC_VIRT_END(single_step, 0x4d00, 0x100)
-INT_KVM_HANDLER single_step, 0xd00, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON(single_step_common, 0xd00, single_step_exception)
+EXC_COMMON_BEGIN(single_step_common)
+ GEN_COMMON single_step
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl single_step_exception
+ b interrupt_return_srr
+/**
+ * Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI).
+ * This is a synchronous interrupt in response to an MMU fault caused by a
+ * guest data access.
+ *
+ * Handling:
+ * This should always get routed to KVM. In radix MMU mode, this is caused
+ * by a guest nested radix access that can't be performed due to the
+ * partition scope page table. In hash mode, this can be caused by guests
+ * running with translation disabled (virtual real mode) or with VPM enabled.
+ * KVM will update the page table structures or disallow the access.
+ */
+INT_DEFINE_BEGIN(h_data_storage)
+ IVEC=0xe00
+ IHSRR=1
+ IDAR=1
+ IDSISR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_data_storage)
+
EXC_REAL_BEGIN(h_data_storage, 0xe00, 0x20)
- INT_HANDLER h_data_storage, 0xe00, ool=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY h_data_storage, virt=0, ool=1
EXC_REAL_END(h_data_storage, 0xe00, 0x20)
EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20)
- INT_HANDLER h_data_storage, 0xe00, ool=1, virt=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY h_data_storage, virt=1, ool=1
EXC_VIRT_END(h_data_storage, 0x4e00, 0x20)
-INT_KVM_HANDLER h_data_storage, 0xe00, EXC_HV, PACA_EXGEN, 1
EXC_COMMON_BEGIN(h_data_storage_common)
- INT_COMMON 0xe00, PACA_EXGEN, 1, 1, 1, 1, 1
- bl save_nvgprs
+ GEN_COMMON h_data_storage
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
- ld r4,_DAR(r1)
- li r5,SIGSEGV
- bl bad_page_fault
+ bl do_bad_page_fault_segv
MMU_FTR_SECTION_ELSE
bl unknown_exception
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
- b ret_from_except
+ b interrupt_return_hsrr
+
+/**
+ * Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI).
+ * This is a synchronous interrupt in response to an MMU fault caused by a
+ * guest instruction fetch, similar to HDSI.
+ */
+INT_DEFINE_BEGIN(h_instr_storage)
+ IVEC=0xe20
+ IHSRR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_instr_storage)
EXC_REAL_BEGIN(h_instr_storage, 0xe20, 0x20)
- INT_HANDLER h_instr_storage, 0xe20, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY h_instr_storage, virt=0, ool=1
EXC_REAL_END(h_instr_storage, 0xe20, 0x20)
EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20)
- INT_HANDLER h_instr_storage, 0xe20, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY h_instr_storage, virt=1, ool=1
EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
-INT_KVM_HANDLER h_instr_storage, 0xe20, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception)
+EXC_COMMON_BEGIN(h_instr_storage_common)
+ GEN_COMMON h_instr_storage
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unknown_exception
+ b interrupt_return_hsrr
+
+/**
+ * Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt.
+ */
+INT_DEFINE_BEGIN(emulation_assist)
+ IVEC=0xe40
+ IHSRR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(emulation_assist)
EXC_REAL_BEGIN(emulation_assist, 0xe40, 0x20)
- INT_HANDLER emulation_assist, 0xe40, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY emulation_assist, virt=0, ool=1
EXC_REAL_END(emulation_assist, 0xe40, 0x20)
EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20)
- INT_HANDLER emulation_assist, 0xe40, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY emulation_assist, virt=1, ool=1
EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
-INT_KVM_HANDLER emulation_assist, 0xe40, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
+EXC_COMMON_BEGIN(emulation_assist_common)
+ GEN_COMMON emulation_assist
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl emulation_assist_interrupt
+ REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+ b interrupt_return_hsrr
-/*
- * hmi_exception trampoline is a special case. It jumps to hmi_exception_early
- * first, and then eventaully from there to the trampoline to get into virtual
- * mode.
+/**
+ * Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI).
+ * This is an asynchronous interrupt caused by a Hypervisor Maintenance
+ * Exception. It is always taken in real mode but uses HSRR registers
+ * unlike SRESET and MCE.
+ *
+ * It is maskable in hardware by clearing MSR[EE], and partially soft-maskable
+ * with IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * Handling:
+ * This is a special case, this is handled similarly to machine checks, with an
+ * initial real mode handler that is not soft-masked, which attempts to fix the
+ * problem. Then a regular handler which is soft-maskable and reports the
+ * problem.
+ *
+ * The emergency stack is used for the early real mode handler.
+ *
+ * XXX: unclear why MCE and HMI schemes could not be made common, e.g.,
+ * either use soft-masking for the MCE, or use irq_work for the HMI.
+ *
+ * KVM:
+ * Unlike MCE, this calls into KVM without calling the real mode handler
+ * first.
*/
+INT_DEFINE_BEGIN(hmi_exception_early)
+ IVEC=0xe60
+ IHSRR=1
+ IREALMODE_COMMON=1
+ ISTACK=0
+ IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
+ IKVM_REAL=1
+INT_DEFINE_END(hmi_exception_early)
+
+INT_DEFINE_BEGIN(hmi_exception)
+ IVEC=0xe60
+ IHSRR=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+INT_DEFINE_END(hmi_exception)
+
EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20)
- INT_HANDLER hmi_exception, 0xe60, ool=1, early=1, hsrr=EXC_HV, ri=0, kvm=1
+ GEN_INT_ENTRY hmi_exception_early, virt=0, ool=1
EXC_REAL_END(hmi_exception, 0xe60, 0x20)
EXC_VIRT_NONE(0x4e60, 0x20)
-INT_KVM_HANDLER hmi_exception, 0xe60, EXC_HV, PACA_EXGEN, 0
+
EXC_COMMON_BEGIN(hmi_exception_early_common)
- mtctr r10 /* Restore ctr */
- mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* Save HSRR1 */
+ __GEN_REALMODE_COMMON_ENTRY hmi_exception_early
+
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- /* We don't touch AMR here, we never go to virtual mode */
- INT_COMMON 0xe60, PACA_EXGEN, 0, 0, 0, 0, 0
+ __GEN_COMMON_BODY hmi_exception_early
addi r3,r1,STACK_FRAME_OVERHEAD
bl hmi_exception_realmode
cmpdi cr0,r3,0
bne 1f
- EXCEPTION_RESTORE_REGS EXC_HV
+ EXCEPTION_RESTORE_REGS hsrr=1
HRFI_TO_USER_OR_KERNEL
1:
@@ -1599,41 +2188,70 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
* Go to virtual mode and pull the HMI event information from
* firmware.
*/
- EXCEPTION_RESTORE_REGS EXC_HV
- INT_HANDLER hmi_exception, 0xe60, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ EXCEPTION_RESTORE_REGS hsrr=1
+ GEN_INT_ENTRY hmi_exception, virt=0
EXC_COMMON_BEGIN(hmi_exception_common)
- INT_COMMON 0xe60, PACA_EXGEN, 1, 1, 1, 0, 0
- FINISH_NAP
- RUNLATCH_ON
- bl save_nvgprs
+ GEN_COMMON hmi_exception
addi r3,r1,STACK_FRAME_OVERHEAD
bl handle_hmi_exception
- b ret_from_except
+ b interrupt_return_hsrr
+
+/**
+ * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
+ * This is an asynchronous interrupt in response to a msgsnd doorbell.
+ * Similar to the 0xa00 doorbell but for host rather than guest.
+ */
+INT_DEFINE_BEGIN(h_doorbell)
+ IVEC=0xe80
+ IHSRR=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_doorbell)
EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20)
- INT_HANDLER h_doorbell, 0xe80, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY h_doorbell, virt=0, ool=1
EXC_REAL_END(h_doorbell, 0xe80, 0x20)
EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
- INT_HANDLER h_doorbell, 0xe80, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY h_doorbell, virt=1, ool=1
EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
-INT_KVM_HANDLER h_doorbell, 0xe80, EXC_HV, PACA_EXGEN, 0
+EXC_COMMON_BEGIN(h_doorbell_common)
+ GEN_COMMON h_doorbell
+ addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_DOORBELL
-EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
+ bl doorbell_exception
#else
-EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception)
+ bl unknown_async_exception
#endif
+ b interrupt_return_hsrr
+
+/**
+ * Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
+ * This is an asynchronous interrupt in response to an "external exception".
+ * Similar to 0x500 but for host only.
+ */
+INT_DEFINE_BEGIN(h_virt_irq)
+ IVEC=0xea0
+ IHSRR=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_virt_irq)
EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20)
- INT_HANDLER h_virt_irq, 0xea0, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY h_virt_irq, virt=0, ool=1
EXC_REAL_END(h_virt_irq, 0xea0, 0x20)
EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
- INT_HANDLER h_virt_irq, 0xea0, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY h_virt_irq, virt=1, ool=1
EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
-INT_KVM_HANDLER h_virt_irq, 0xea0, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
+EXC_COMMON_BEGIN(h_virt_irq_common)
+ GEN_COMMON h_virt_irq
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_IRQ
+ b interrupt_return_hsrr
EXC_REAL_NONE(0xec0, 0x20)
@@ -1642,25 +2260,64 @@ EXC_REAL_NONE(0xee0, 0x20)
EXC_VIRT_NONE(0x4ee0, 0x20)
+/*
+ * Interrupt 0xf00 - Performance Monitor Interrupt (PMI, PMU).
+ * This is an asynchronous interrupt in response to a PMU exception.
+ * It is maskable in hardware by clearing MSR[EE], and soft-maskable with
+ * IRQS_PMI_DISABLED mask (NOTE: NOT local_irq_disable()).
+ *
+ * Handling:
+ * This calls into the perf subsystem.
+ *
+ * Like the watchdog soft-nmi, it appears an NMI interrupt to Linux, in that it
+ * runs under local_irq_disable. However it may be soft-masked in
+ * powerpc-specific code.
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and clear MSR[EE] in the interrupted context.
+ */
+INT_DEFINE_BEGIN(performance_monitor)
+ IVEC=0xf00
+ IMASK=IRQS_PMI_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(performance_monitor)
+
EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20)
- INT_HANDLER performance_monitor, 0xf00, ool=1, bitmask=IRQS_PMI_DISABLED, kvm=1
+ GEN_INT_ENTRY performance_monitor, virt=0, ool=1
EXC_REAL_END(performance_monitor, 0xf00, 0x20)
EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
- INT_HANDLER performance_monitor, 0xf00, ool=1, virt=1, bitmask=IRQS_PMI_DISABLED
+ GEN_INT_ENTRY performance_monitor, virt=1, ool=1
EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
-INT_KVM_HANDLER performance_monitor, 0xf00, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
+EXC_COMMON_BEGIN(performance_monitor_common)
+ GEN_COMMON performance_monitor
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl performance_monitor_exception
+ b interrupt_return_srr
+
+/**
+ * Interrupt 0xf20 - Vector Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing a vector (or altivec) instruction with MSR[VEC]=0.
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(altivec_unavailable)
+ IVEC=0xf20
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(altivec_unavailable)
EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20)
- INT_HANDLER altivec_unavailable, 0xf20, ool=1, kvm=1
+ GEN_INT_ENTRY altivec_unavailable, virt=0, ool=1
EXC_REAL_END(altivec_unavailable, 0xf20, 0x20)
EXC_VIRT_BEGIN(altivec_unavailable, 0x4f20, 0x20)
- INT_HANDLER altivec_unavailable, 0xf20, ool=1, virt=1
+ GEN_INT_ENTRY altivec_unavailable, virt=1, ool=1
EXC_VIRT_END(altivec_unavailable, 0x4f20, 0x20)
-INT_KVM_HANDLER altivec_unavailable, 0xf20, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(altivec_unavailable_common)
- INT_COMMON 0xf20, PACA_EXGEN, 1, 1, 0, 0, 0
+ GEN_COMMON altivec_unavailable
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
beq 1f
@@ -1674,34 +2331,42 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
#endif
bl load_up_altivec
- b fast_exception_return
+ b fast_interrupt_return_srr
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl altivec_unavailable_tm
- b ret_from_except
+ b interrupt_return_srr
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl altivec_unavailable_exception
- b ret_from_except
+ b interrupt_return_srr
+/**
+ * Interrupt 0xf40 - VSX Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing a VSX instruction with MSR[VSX]=0.
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(vsx_unavailable)
+ IVEC=0xf40
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(vsx_unavailable)
+
EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20)
- INT_HANDLER vsx_unavailable, 0xf40, ool=1, kvm=1
+ GEN_INT_ENTRY vsx_unavailable, virt=0, ool=1
EXC_REAL_END(vsx_unavailable, 0xf40, 0x20)
EXC_VIRT_BEGIN(vsx_unavailable, 0x4f40, 0x20)
- INT_HANDLER vsx_unavailable, 0xf40, ool=1, virt=1
+ GEN_INT_ENTRY vsx_unavailable, virt=1, ool=1
EXC_VIRT_END(vsx_unavailable, 0x4f40, 0x20)
-INT_KVM_HANDLER vsx_unavailable, 0xf40, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(vsx_unavailable_common)
- INT_COMMON 0xf40, PACA_EXGEN, 1, 1, 0, 0, 0
+ GEN_COMMON vsx_unavailable
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
beq 1f
@@ -1717,40 +2382,72 @@ BEGIN_FTR_SECTION
b load_up_vsx
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl vsx_unavailable_tm
- b ret_from_except
+ b interrupt_return_srr
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl vsx_unavailable_exception
- b ret_from_except
+ b interrupt_return_srr
+
+/**
+ * Interrupt 0xf60 - Facility Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing an instruction without access to the facility that can be
+ * resolved by the OS (e.g., FSCR, MSR).
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(facility_unavailable)
+ IVEC=0xf60
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(facility_unavailable)
EXC_REAL_BEGIN(facility_unavailable, 0xf60, 0x20)
- INT_HANDLER facility_unavailable, 0xf60, ool=1, kvm=1
+ GEN_INT_ENTRY facility_unavailable, virt=0, ool=1
EXC_REAL_END(facility_unavailable, 0xf60, 0x20)
EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20)
- INT_HANDLER facility_unavailable, 0xf60, ool=1, virt=1
+ GEN_INT_ENTRY facility_unavailable, virt=1, ool=1
EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
-INT_KVM_HANDLER facility_unavailable, 0xf60, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception)
+EXC_COMMON_BEGIN(facility_unavailable_common)
+ GEN_COMMON facility_unavailable
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl facility_unavailable_exception
+ REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+ b interrupt_return_srr
+/**
+ * Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing an instruction without access to the facility that can only
+ * be resolved in HV mode (e.g., HFSCR).
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(h_facility_unavailable)
+ IVEC=0xf80
+ IHSRR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_facility_unavailable)
+
EXC_REAL_BEGIN(h_facility_unavailable, 0xf80, 0x20)
- INT_HANDLER h_facility_unavailable, 0xf80, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY h_facility_unavailable, virt=0, ool=1
EXC_REAL_END(h_facility_unavailable, 0xf80, 0x20)
EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20)
- INT_HANDLER h_facility_unavailable, 0xf80, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY h_facility_unavailable, virt=1, ool=1
EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
-INT_KVM_HANDLER h_facility_unavailable, 0xf80, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception)
+EXC_COMMON_BEGIN(h_facility_unavailable_common)
+ GEN_COMMON h_facility_unavailable
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl facility_unavailable_exception
+ REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */
+ b interrupt_return_hsrr
EXC_REAL_NONE(0xfa0, 0x20)
@@ -1766,56 +2463,94 @@ EXC_REAL_NONE(0x1100, 0x100)
EXC_VIRT_NONE(0x5100, 0x100)
#ifdef CONFIG_CBE_RAS
+INT_DEFINE_BEGIN(cbe_system_error)
+ IVEC=0x1200
+ IHSRR=1
+INT_DEFINE_END(cbe_system_error)
+
EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100)
- INT_HANDLER cbe_system_error, 0x1200, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY cbe_system_error, virt=0
EXC_REAL_END(cbe_system_error, 0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
-INT_KVM_HANDLER cbe_system_error, 0x1200, EXC_HV, PACA_EXGEN, 1
-EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception)
+EXC_COMMON_BEGIN(cbe_system_error_common)
+ GEN_COMMON cbe_system_error
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl cbe_system_error_exception
+ b interrupt_return_hsrr
+
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
#endif
+/**
+ * Interrupt 0x1300 - Instruction Address Breakpoint Interrupt.
+ * This has been removed from the ISA before 2.01, which is the earliest
+ * 64-bit BookS ISA supported, however the G5 / 970 implements this
+ * interrupt with a non-architected feature available through the support
+ * processor interface.
+ */
+INT_DEFINE_BEGIN(instruction_breakpoint)
+ IVEC=0x1300
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(instruction_breakpoint)
EXC_REAL_BEGIN(instruction_breakpoint, 0x1300, 0x100)
- INT_HANDLER instruction_breakpoint, 0x1300, kvm=1
+ GEN_INT_ENTRY instruction_breakpoint, virt=0
EXC_REAL_END(instruction_breakpoint, 0x1300, 0x100)
EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100)
- INT_HANDLER instruction_breakpoint, 0x1300, virt=1
+ GEN_INT_ENTRY instruction_breakpoint, virt=1
EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
-INT_KVM_HANDLER instruction_breakpoint, 0x1300, EXC_STD, PACA_EXGEN, 1
-EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception)
+EXC_COMMON_BEGIN(instruction_breakpoint_common)
+ GEN_COMMON instruction_breakpoint
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl instruction_breakpoint_exception
+ b interrupt_return_srr
EXC_REAL_NONE(0x1400, 0x100)
EXC_VIRT_NONE(0x5400, 0x100)
-EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
- INT_HANDLER denorm_exception_hv, 0x1500, early=2, hsrr=EXC_HV
+/**
+ * Interrupt 0x1500 - Soft Patch Interrupt
+ *
+ * Handling:
+ * This is an implementation specific interrupt which can be used for a
+ * range of exceptions.
+ *
+ * This interrupt handler is unique in that it runs the denormal assist
+ * code even for guests (and even in guest context) without going to KVM,
+ * for speed. POWER9 does not raise denorm exceptions, so this special case
+ * could be phased out in future to reduce special cases.
+ */
+INT_DEFINE_BEGIN(denorm_exception)
+ IVEC=0x1500
+ IHSRR=1
+ IBRANCH_TO_COMMON=0
+ IKVM_REAL=1
+INT_DEFINE_END(denorm_exception)
+
+EXC_REAL_BEGIN(denorm_exception, 0x1500, 0x100)
+ GEN_INT_ENTRY denorm_exception, virt=0
#ifdef CONFIG_PPC_DENORMALISATION
- mfspr r10,SPRN_HSRR1
- andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+ andis. r10,r12,(HSRR1_DENORM)@h /* denorm? */
bne+ denorm_assist
#endif
- KVMTEST denorm_exception_hv, EXC_HV 0x1500
- INT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV, 1
-EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100)
-
+ GEN_BRANCH_TO_COMMON denorm_exception, virt=0
+EXC_REAL_END(denorm_exception, 0x1500, 0x100)
#ifdef CONFIG_PPC_DENORMALISATION
EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100)
- INT_HANDLER denorm_exception, 0x1500, 0, 2, 1, EXC_HV, PACA_EXGEN, 1, 0, 0, 0, 0
- mfspr r10,SPRN_HSRR1
- andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+ GEN_INT_ENTRY denorm_exception, virt=1
+ andis. r10,r12,(HSRR1_DENORM)@h /* denorm? */
bne+ denorm_assist
- INT_VIRT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV
+ GEN_BRANCH_TO_COMMON denorm_exception, virt=1
EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
#else
EXC_VIRT_NONE(0x5500, 0x100)
#endif
-INT_KVM_HANDLER denorm_exception_hv, 0x1500, EXC_HV, PACA_EXGEN, 0
-
#ifdef CONFIG_PPC_DENORMALISATION
TRAMP_REAL_BEGIN(denorm_assist)
BEGIN_FTR_SECTION
@@ -1872,11 +2607,16 @@ denorm_done:
mtspr SPRN_HSRR0,r11
mtcrf 0x80,r9
ld r9,PACA_EXGEN+EX_R9(r13)
- RESTORE_PPR_PACA(PACA_EXGEN, r10)
+BEGIN_FTR_SECTION
+ ld r10,PACA_EXGEN+EX_PPR(r13)
+ mtspr SPRN_PPR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
BEGIN_FTR_SECTION
ld r10,PACA_EXGEN+EX_CFAR(r13)
mtspr SPRN_CFAR,r10
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ li r10,0
+ stb r10,PACAHSRR_VALID(r13)
ld r10,PACA_EXGEN+EX_R10(r13)
ld r11,PACA_EXGEN+EX_R11(r13)
ld r12,PACA_EXGEN+EX_R12(r13)
@@ -1885,43 +2625,76 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
b .
#endif
-EXC_COMMON(denorm_common, 0x1500, unknown_exception)
+EXC_COMMON_BEGIN(denorm_exception_common)
+ GEN_COMMON denorm_exception
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unknown_exception
+ b interrupt_return_hsrr
#ifdef CONFIG_CBE_RAS
+INT_DEFINE_BEGIN(cbe_maintenance)
+ IVEC=0x1600
+ IHSRR=1
+INT_DEFINE_END(cbe_maintenance)
+
EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100)
- INT_HANDLER cbe_maintenance, 0x1600, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY cbe_maintenance, virt=0
EXC_REAL_END(cbe_maintenance, 0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
-INT_KVM_HANDLER cbe_maintenance, 0x1600, EXC_HV, PACA_EXGEN, 1
-EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception)
+EXC_COMMON_BEGIN(cbe_maintenance_common)
+ GEN_COMMON cbe_maintenance
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl cbe_maintenance_exception
+ b interrupt_return_hsrr
+
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
#endif
+INT_DEFINE_BEGIN(altivec_assist)
+ IVEC=0x1700
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(altivec_assist)
+
EXC_REAL_BEGIN(altivec_assist, 0x1700, 0x100)
- INT_HANDLER altivec_assist, 0x1700, kvm=1
+ GEN_INT_ENTRY altivec_assist, virt=0
EXC_REAL_END(altivec_assist, 0x1700, 0x100)
EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100)
- INT_HANDLER altivec_assist, 0x1700, virt=1
+ GEN_INT_ENTRY altivec_assist, virt=1
EXC_VIRT_END(altivec_assist, 0x5700, 0x100)
-INT_KVM_HANDLER altivec_assist, 0x1700, EXC_STD, PACA_EXGEN, 0
+EXC_COMMON_BEGIN(altivec_assist_common)
+ GEN_COMMON altivec_assist
+ addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_ALTIVEC
-EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception)
+ bl altivec_assist_exception
+ REST_NVGPRS(r1) /* instruction emulation may change GPRs */
#else
-EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception)
+ bl unknown_exception
#endif
+ b interrupt_return_srr
#ifdef CONFIG_CBE_RAS
+INT_DEFINE_BEGIN(cbe_thermal)
+ IVEC=0x1800
+ IHSRR=1
+INT_DEFINE_END(cbe_thermal)
+
EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100)
- INT_HANDLER cbe_thermal, 0x1800, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY cbe_thermal, virt=0
EXC_REAL_END(cbe_thermal, 0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
-INT_KVM_HANDLER cbe_thermal, 0x1800, EXC_HV, PACA_EXGEN, 1
-EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception)
+EXC_COMMON_BEGIN(cbe_thermal_common)
+ GEN_COMMON cbe_thermal
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl cbe_thermal_exception
+ b interrupt_return_hsrr
+
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
@@ -1930,14 +2703,10 @@ EXC_VIRT_NONE(0x5800, 0x100)
#ifdef CONFIG_PPC_WATCHDOG
-#define MASKED_DEC_HANDLER_LABEL 3f
-
-#define MASKED_DEC_HANDLER(_H) \
-3: /* soft-nmi */ \
- std r12,PACA_EXGEN+EX_R12(r13); \
- GET_SCRATCH0(r10); \
- std r10,PACA_EXGEN+EX_R13(r13); \
- INT_SAVE_SRR_AND_JUMP soft_nmi_common, _H, 1
+INT_DEFINE_BEGIN(soft_nmi)
+ IVEC=0x900
+ ISTACK=0
+INT_DEFINE_END(soft_nmi)
/*
* Branch to soft_nmi_interrupt using the emergency stack. The emergency
@@ -1952,15 +2721,20 @@ EXC_COMMON_BEGIN(soft_nmi_common)
mr r10,r1
ld r1,PACAEMERGSP(r13)
subi r1,r1,INT_FRAME_SIZE
- INT_COMMON 0x900, PACA_EXGEN, 0, 1, 1, 0, 0
- bl save_nvgprs
+ __GEN_COMMON_BODY soft_nmi
+
addi r3,r1,STACK_FRAME_OVERHEAD
bl soft_nmi_interrupt
- b ret_from_except
-#else /* CONFIG_PPC_WATCHDOG */
-#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */
-#define MASKED_DEC_HANDLER(_H)
+ /* Clear MSR_RI before setting SRR0 and SRR1. */
+ li r9,0
+ mtmsrd r9,1
+
+ kuap_kernel_restore r9, r10
+
+ EXCEPTION_RESTORE_REGS hsrr=0
+ RFI_TO_KERNEL
+
#endif /* CONFIG_PPC_WATCHDOG */
/*
@@ -1973,49 +2747,75 @@ EXC_COMMON_BEGIN(soft_nmi_common)
* - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
* This is called with r10 containing the value to OR to the paca field.
*/
-.macro MASKED_INTERRUPT hsrr
+.macro MASKED_INTERRUPT hsrr=0
.if \hsrr
masked_Hinterrupt:
.else
masked_interrupt:
.endif
- std r11,PACA_EXGEN+EX_R11(r13)
- lbz r11,PACAIRQHAPPENED(r13)
- or r11,r11,r10
- stb r11,PACAIRQHAPPENED(r13)
+ stw r9,PACA_EXGEN+EX_CCR(r13)
+ lbz r9,PACAIRQHAPPENED(r13)
+ or r9,r9,r10
+ stb r9,PACAIRQHAPPENED(r13)
+
+ .if ! \hsrr
cmpwi r10,PACA_IRQ_DEC
bne 1f
- lis r10,0x7fff
- ori r10,r10,0xffff
- mtspr SPRN_DEC,r10
- b MASKED_DEC_HANDLER_LABEL
+ LOAD_REG_IMMEDIATE(r9, 0x7fffffff)
+ mtspr SPRN_DEC,r9
+#ifdef CONFIG_PPC_WATCHDOG
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
+ b soft_nmi_common
+#else
+ b 2f
+#endif
+ .endif
+
1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK
beq 2f
+ xori r12,r12,MSR_EE /* clear MSR_EE */
.if \hsrr
- mfspr r10,SPRN_HSRR1
- xori r10,r10,MSR_EE /* clear MSR_EE */
- mtspr SPRN_HSRR1,r10
+ mtspr SPRN_HSRR1,r12
.else
- mfspr r10,SPRN_SRR1
- xori r10,r10,MSR_EE /* clear MSR_EE */
- mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR1,r12
.endif
- ori r11,r11,PACA_IRQ_HARD_DIS
- stb r11,PACAIRQHAPPENED(r13)
+ ori r9,r9,PACA_IRQ_HARD_DIS
+ stb r9,PACAIRQHAPPENED(r13)
2: /* done */
+ li r9,0
+ .if \hsrr
+ stb r9,PACAHSRR_VALID(r13)
+ .else
+ stb r9,PACASRR_VALID(r13)
+ .endif
+
+ SEARCH_RESTART_TABLE
+ cmpdi r12,0
+ beq 3f
+ .if \hsrr
+ mtspr SPRN_HSRR0,r12
+ .else
+ mtspr SPRN_SRR0,r12
+ .endif
+3:
+
+ ld r9,PACA_EXGEN+EX_CTR(r13)
+ mtctr r9
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
mtcrf 0x80,r9
std r1,PACAR1(r13)
ld r9,PACA_EXGEN+EX_R9(r13)
ld r10,PACA_EXGEN+EX_R10(r13)
ld r11,PACA_EXGEN+EX_R11(r13)
- /* returns to kernel where r13 must be set up, so don't restore it */
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ /* May return to masked low address where r13 is not set up */
.if \hsrr
HRFI_TO_KERNEL
.else
RFI_TO_KERNEL
.endif
b .
- MASKED_DEC_HANDLER(\hsrr\())
.endm
TRAMP_REAL_BEGIN(stf_barrier_fallback)
@@ -2031,15 +2831,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback)
.endr
blr
-TRAMP_REAL_BEGIN(rfi_flush_fallback)
- SET_SCRATCH0(r13);
- GET_PACA(r13);
- std r1,PACA_EXRFI+EX_R12(r13)
- ld r1,PACAKSAVE(r13)
- std r9,PACA_EXRFI+EX_R9(r13)
- std r10,PACA_EXRFI+EX_R10(r13)
- std r11,PACA_EXRFI+EX_R11(r13)
- mfctr r9
+/* Clobbers r10, r11, ctr */
+.macro L1D_DISPLACEMENT_FLUSH
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
@@ -2050,7 +2843,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
sync
/*
- * The load adresses are at staggered offsets within cachelines,
+ * The load addresses are at staggered offsets within cachelines,
* which suits some pipelines better (on others it should not
* hurt).
*/
@@ -2065,7 +2858,49 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
ld r11,(0x80 + 8)*7(r10)
addi r10,r10,0x80*8
bdnz 1b
+.endm
+
+TRAMP_REAL_BEGIN(entry_flush_fallback)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ mfctr r9
+ L1D_DISPLACEMENT_FLUSH
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ blr
+/*
+ * The SCV entry flush happens with interrupts enabled, so it must disable
+ * to prevent EXRFI being clobbered by NMIs (e.g., soft_nmi_common). r10
+ * (containing LR) does not need to be preserved here because scv entry
+ * puts 0 in the pt_regs, CTR can be clobbered for the same reason.
+ */
+TRAMP_REAL_BEGIN(scv_entry_flush_fallback)
+ li r10,0
+ mtmsrd r10,1
+ lbz r10,PACAIRQHAPPENED(r13)
+ ori r10,r10,PACA_IRQ_HARD_DIS
+ stb r10,PACAIRQHAPPENED(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ L1D_DISPLACEMENT_FLUSH
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ li r10,MSR_RI
+ mtmsrd r10,1
+ blr
+
+TRAMP_REAL_BEGIN(rfi_flush_fallback)
+ SET_SCRATCH0(r13);
+ GET_PACA(r13);
+ std r1,PACA_EXRFI+EX_R12(r13)
+ ld r1,PACAKSAVE(r13)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ mfctr r9
+ L1D_DISPLACEMENT_FLUSH
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
@@ -2083,6 +2918,22 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
mfctr r9
+ L1D_DISPLACEMENT_FLUSH
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ ld r1,PACA_EXRFI+EX_R12(r13)
+ GET_SCRATCH0(r13);
+ hrfid
+
+TRAMP_REAL_BEGIN(rfscv_flush_fallback)
+ /* system call volatile */
+ mr r7,r13
+ GET_PACA(r13);
+ mr r8,r1
+ ld r1,PACAKSAVE(r13)
+ mfctr r9
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
@@ -2110,57 +2961,38 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
bdnz 1b
mtctr r9
- ld r9,PACA_EXRFI+EX_R9(r13)
- ld r10,PACA_EXRFI+EX_R10(r13)
- ld r11,PACA_EXRFI+EX_R11(r13)
- ld r1,PACA_EXRFI+EX_R12(r13)
- GET_SCRATCH0(r13);
- hrfid
+ li r9,0
+ li r10,0
+ li r11,0
+ mr r1,r8
+ mr r13,r7
+ RFSCV
-/*
- * Real mode exceptions actually use this too, but alternate
- * instruction code patches (which end up in the common .text area)
- * cannot reach these if they are put there.
- */
-USE_FIXED_SECTION(virt_trampolines)
- MASKED_INTERRUPT EXC_STD
- MASKED_INTERRUPT EXC_HV
+USE_TEXT_SECTION()
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
+kvm_interrupt:
/*
- * Here all GPRs are unchanged from when the interrupt happened
- * except for r13, which is saved in SPRG_SCRATCH0.
+ * The conditional branch in KVMTEST can't reach all the way,
+ * make a stub.
*/
- mfspr r13, SPRN_SRR0
- addi r13, r13, 4
- mtspr SPRN_SRR0, r13
- GET_SCRATCH0(r13)
- RFI_TO_KERNEL
- b .
-
-TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
- /*
- * Here all GPRs are unchanged from when the interrupt happened
- * except for r13, which is saved in SPRG_SCRATCH0.
- */
- mfspr r13, SPRN_HSRR0
- addi r13, r13, 4
- mtspr SPRN_HSRR0, r13
- GET_SCRATCH0(r13)
- HRFI_TO_KERNEL
- b .
+ b kvmppc_interrupt
#endif
-/*
- * Ensure that any handlers that get invoked from the exception prologs
- * above are below the first 64KB (0x10000) of the kernel image because
- * the prologs assemble the addresses of these handlers using the
- * LOAD_HANDLER macro, which uses an ori instruction.
- */
+_GLOBAL(do_uaccess_flush)
+ UACCESS_FLUSH_FIXUP_SECTION
+ nop
+ nop
+ nop
+ blr
+ L1D_DISPLACEMENT_FLUSH
+ blr
+_ASM_NOKPROBE_SYMBOL(do_uaccess_flush)
+EXPORT_SYMBOL(do_uaccess_flush)
-/*** Common interrupt handlers ***/
+MASKED_INTERRUPT
+MASKED_INTERRUPT hsrr=1
/*
* Relocation-on interrupts: A subset of the interrupts can be delivered
@@ -2178,11 +3010,12 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
* come here.
*/
-EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline)
- b __ppc64_runlatch_on
-
USE_FIXED_SECTION(virt_trampolines)
/*
+ * All code below __end_soft_masked is treated as soft-masked. If
+ * any code runs here with MSR[EE]=1, it must then cope with pending
+ * soft interrupt being raised (i.e., by ensuring it is replayed).
+ *
* The __end_interrupts marker must be past the out-of-line (OOL)
* handlers, so that they are copied to real address 0x100 when running
* a relocatable kernel. This ensures they can be reached from the short
@@ -2194,24 +3027,6 @@ USE_FIXED_SECTION(virt_trampolines)
__end_interrupts:
DEFINE_FIXED_SYMBOL(__end_interrupts)
-#ifdef CONFIG_PPC_970_NAP
- /*
- * Called by exception entry code if _TLF_NAPPING was set, this clears
- * the NAPPING flag, and redirects the exception exit to
- * power4_fixup_nap_return.
- */
- .globl power4_fixup_nap
-EXC_COMMON_BEGIN(power4_fixup_nap)
- andc r9,r9,r10
- std r9,TI_LOCAL_FLAGS(r11)
- LOAD_REG_ADDR(r10, power4_idle_nap_return)
- std r10,_NIP(r1)
- blr
-
-power4_idle_nap_return:
- blr
-#endif
-
CLOSE_FIXED_SECTION(real_vectors);
CLOSE_FIXED_SECTION(real_trampolines);
CLOSE_FIXED_SECTION(virt_vectors);
@@ -2247,161 +3062,3 @@ disable_machine_check:
RFI_TO_KERNEL
1: mtlr r0
blr
-
-/*
- * Hash table stuff
- */
- .balign IFETCH_ALIGN_BYTES
-do_hash_page:
-#ifdef CONFIG_PPC_BOOK3S_64
- lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h
- ori r0,r0,DSISR_BAD_FAULT_64S@l
- and. r0,r5,r0 /* weird error? */
- bne- handle_page_fault /* if not, try to insert a HPTE */
- ld r11, PACA_THREAD_INFO(r13)
- lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
- andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
- bne 77f /* then don't call hash_page now */
-
- /*
- * r3 contains the trap number
- * r4 contains the faulting address
- * r5 contains dsisr
- * r6 msr
- *
- * at return r3 = 0 for success, 1 for page fault, negative for error
- */
- bl __hash_page /* build HPTE if possible */
- cmpdi r3,0 /* see if __hash_page succeeded */
-
- /* Success */
- beq fast_exc_return_irq /* Return from exception on success */
-
- /* Error */
- blt- 13f
-
- /* Reload DAR/DSISR into r4/r5 for the DABR check below */
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-/* Here we have a page fault that hash_page can't handle. */
-handle_page_fault:
-11: andis. r0,r5,DSISR_DABRMATCH@h
- bne- handle_dabr_fault
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_page_fault
- cmpdi r3,0
- beq+ ret_from_except_lite
- bl save_nvgprs
- mr r5,r3
- addi r3,r1,STACK_FRAME_OVERHEAD
- ld r4,_DAR(r1)
- bl bad_page_fault
- b ret_from_except
-
-/* We have a data breakpoint exception - handle it */
-handle_dabr_fault:
- bl save_nvgprs
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_break
- /*
- * do_break() may have changed the NV GPRS while handling a breakpoint.
- * If so, we need to restore them with their updated values. Don't use
- * ret_from_except_lite here.
- */
- b ret_from_except
-
-
-#ifdef CONFIG_PPC_BOOK3S_64
-/* We have a page fault that hash_page could handle but HV refused
- * the PTE insertion
- */
-13: bl save_nvgprs
- mr r5,r3
- addi r3,r1,STACK_FRAME_OVERHEAD
- ld r4,_DAR(r1)
- bl low_hash_fault
- b ret_from_except
-#endif
-
-/*
- * We come here as a result of a DSI at a point where we don't want
- * to call hash_page, such as when we are accessing memory (possibly
- * user memory) inside a PMU interrupt that occurred while interrupts
- * were soft-disabled. We want to invoke the exception handler for
- * the access, or panic if there isn't a handler.
- */
-77: bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- li r5,SIGSEGV
- bl bad_page_fault
- b ret_from_except
-
-/*
- * When doorbell is triggered from system reset wakeup, the message is
- * not cleared, so it would fire again when EE is enabled.
- *
- * When coming from local_irq_enable, there may be the same problem if
- * we were hard disabled.
- *
- * Execute msgclr to clear pending exceptions before handling it.
- */
-h_doorbell_common_msgclr:
- LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
- PPC_MSGCLR(3)
- b h_doorbell_common
-
-doorbell_super_common_msgclr:
- LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
- PPC_MSGCLRP(3)
- b doorbell_super_common
-
-/*
- * Called from arch_local_irq_enable when an interrupt needs
- * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate
- * which kind of interrupt. MSR:EE is already off. We generate a
- * stackframe like if a real interrupt had happened.
- *
- * Note: While MSR:EE is off, we need to make sure that _MSR
- * in the generated frame has EE set to 1 or the exception
- * handler will not properly re-enable them.
- *
- * Note that we don't specify LR as the NIP (return address) for
- * the interrupt because that would unbalance the return branch
- * predictor.
- */
-_GLOBAL(__replay_interrupt)
- /* We are going to jump to the exception common code which
- * will retrieve various register values from the PACA which
- * we don't give a damn about, so we don't bother storing them.
- */
- mfmsr r12
- LOAD_REG_ADDR(r11, replay_interrupt_return)
- mfcr r9
- ori r12,r12,MSR_EE
- cmpwi r3,0x900
- beq decrementer_common
- cmpwi r3,0x500
-BEGIN_FTR_SECTION
- beq h_virt_irq_common
-FTR_SECTION_ELSE
- beq hardware_interrupt_common
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300)
- cmpwi r3,0xf00
- beq performance_monitor_common
-BEGIN_FTR_SECTION
- cmpwi r3,0xa00
- beq h_doorbell_common_msgclr
- cmpwi r3,0xe60
- beq hmi_exception_common
-FTR_SECTION_ELSE
- cmpwi r3,0xa00
- beq doorbell_super_common_msgclr
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
-replay_interrupt_return:
- blr
-
-_ASM_NOKPROBE_SYMBOL(__replay_interrupt)
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index ff0114aeba9b..b7ceb041743c 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -24,22 +24,44 @@
#include <linux/slab.h>
#include <linux/cma.h>
#include <linux/hugetlb.h>
+#include <linux/debugfs.h>
-#include <asm/debugfs.h>
#include <asm/page.h>
#include <asm/prom.h>
#include <asm/fadump.h>
#include <asm/fadump-internal.h>
#include <asm/setup.h>
+#include <asm/interrupt.h>
+
+/*
+ * The CPU who acquired the lock to trigger the fadump crash should
+ * wait for other CPUs to enter.
+ *
+ * The timeout is in milliseconds.
+ */
+#define CRASH_TIMEOUT 500
static struct fw_dump fw_dump;
static void __init fadump_reserve_crash_area(u64 base);
#ifndef CONFIG_PRESERVE_FA_DUMP
+
+static struct kobject *fadump_kobj;
+
+static atomic_t cpus_in_fadump;
static DEFINE_MUTEX(fadump_mutex);
-struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 };
-struct fadump_mrange_info reserved_mrange_info = { "reserved", NULL, 0, 0, 0 };
+
+static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false };
+
+#define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */
+#define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \
+ sizeof(struct fadump_memory_range))
+static struct fadump_memory_range rngs[RESERVED_RNGS_CNT];
+static struct fadump_mrange_info
+reserved_mrange_info = { "reserved", rngs, RESERVED_RNGS_SZ, 0, RESERVED_RNGS_CNT, true };
+
+static void __init early_init_dt_scan_reserved_ranges(unsigned long node);
#ifdef CONFIG_CMA
static struct cma *fadump_cma;
@@ -57,7 +79,7 @@ static struct cma *fadump_cma;
* But for some reason even if it fails we still have the memory reservation
* with us and we can still continue doing fadump.
*/
-int __init fadump_cma_init(void)
+static int __init fadump_cma_init(void)
{
unsigned long long base, size;
int rc;
@@ -108,6 +130,11 @@ static int __init fadump_cma_init(void) { return 1; }
int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
int depth, void *data)
{
+ if (depth == 0) {
+ early_init_dt_scan_reserved_ranges(node);
+ return 0;
+ }
+
if (depth != 1)
return 0;
@@ -164,13 +191,13 @@ int is_fadump_active(void)
*/
static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
{
- struct memblock_region *reg;
+ phys_addr_t reg_start, reg_end;
bool ret = false;
- u64 start, end;
+ u64 i, start, end;
- for_each_memblock(memory, reg) {
- start = max_t(u64, d_start, reg->base);
- end = min_t(u64, d_end, (reg->base + reg->size));
+ for_each_mem_range(i, &reg_start, &reg_end) {
+ start = max_t(u64, d_start, reg_start);
+ end = min_t(u64, d_end, reg_end);
if (d_start < end) {
/* Memory hole from d_start to start */
if (start > d_start)
@@ -265,7 +292,7 @@ static void fadump_show_config(void)
* that is required for a kernel to boot successfully.
*
*/
-static inline u64 fadump_calculate_reserve_size(void)
+static __init u64 fadump_calculate_reserve_size(void)
{
u64 base, size, bootmem_min;
int ret;
@@ -395,44 +422,106 @@ static int __init add_boot_mem_regions(unsigned long mstart,
static int __init fadump_get_boot_mem_regions(void)
{
- unsigned long base, size, cur_size, hole_size, last_end;
+ unsigned long size, cur_size, hole_size, last_end;
unsigned long mem_size = fw_dump.boot_memory_size;
- struct memblock_region *reg;
+ phys_addr_t reg_start, reg_end;
int ret = 1;
+ u64 i;
fw_dump.boot_mem_regs_cnt = 0;
last_end = 0;
hole_size = 0;
cur_size = 0;
- for_each_memblock(memory, reg) {
- base = reg->base;
- size = reg->size;
- hole_size += (base - last_end);
+ for_each_mem_range(i, &reg_start, &reg_end) {
+ size = reg_end - reg_start;
+ hole_size += (reg_start - last_end);
if ((cur_size + size) >= mem_size) {
size = (mem_size - cur_size);
- ret = add_boot_mem_regions(base, size);
+ ret = add_boot_mem_regions(reg_start, size);
break;
}
mem_size -= size;
cur_size += size;
- ret = add_boot_mem_regions(base, size);
+ ret = add_boot_mem_regions(reg_start, size);
if (!ret)
break;
- last_end = base + size;
+ last_end = reg_end;
}
fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
return ret;
}
+/*
+ * Returns true, if the given range overlaps with reserved memory ranges
+ * starting at idx. Also, updates idx to index of overlapping memory range
+ * with the given memory range.
+ * False, otherwise.
+ */
+static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx)
+{
+ bool ret = false;
+ int i;
+
+ for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) {
+ u64 rbase = reserved_mrange_info.mem_ranges[i].base;
+ u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size;
+
+ if (end <= rbase)
+ break;
+
+ if ((end > rbase) && (base < rend)) {
+ *idx = i;
+ ret = true;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Locate a suitable memory area to reserve memory for FADump. While at it,
+ * lookup reserved-ranges & avoid overlap with them, as they are used by F/W.
+ */
+static u64 __init fadump_locate_reserve_mem(u64 base, u64 size)
+{
+ struct fadump_memory_range *mrngs;
+ phys_addr_t mstart, mend;
+ int idx = 0;
+ u64 i, ret = 0;
+
+ mrngs = reserved_mrange_info.mem_ranges;
+ for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
+ &mstart, &mend, NULL) {
+ pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n",
+ i, mstart, mend, base);
+
+ if (mstart > base)
+ base = PAGE_ALIGN(mstart);
+
+ while ((mend > base) && ((mend - base) >= size)) {
+ if (!overlaps_reserved_ranges(base, base+size, &idx)) {
+ ret = base;
+ goto out;
+ }
+
+ base = mrngs[idx].base + mrngs[idx].size;
+ base = PAGE_ALIGN(base);
+ }
+ }
+
+out:
+ return ret;
+}
+
int __init fadump_reserve_mem(void)
{
- u64 base, size, mem_boundary, bootmem_min, align = PAGE_SIZE;
- bool is_memblock_bottom_up = memblock_bottom_up();
+ u64 base, size, mem_boundary, bootmem_min;
int ret = 1;
if (!fw_dump.fadump_enabled)
@@ -453,9 +542,9 @@ int __init fadump_reserve_mem(void)
PAGE_ALIGN(fadump_calculate_reserve_size());
#ifdef CONFIG_CMA
if (!fw_dump.nocma) {
- align = FADUMP_CMA_ALIGNMENT;
fw_dump.boot_memory_size =
- ALIGN(fw_dump.boot_memory_size, align);
+ ALIGN(fw_dump.boot_memory_size,
+ FADUMP_CMA_ALIGNMENT);
}
#endif
@@ -523,13 +612,9 @@ int __init fadump_reserve_mem(void)
* Reserve memory at an offset closer to bottom of the RAM to
* minimize the impact of memory hot-remove operation.
*/
- memblock_set_bottom_up(true);
- base = memblock_find_in_range(base, mem_boundary, size, align);
-
- /* Restore the previous allocation mode */
- memblock_set_bottom_up(is_memblock_bottom_up);
+ base = fadump_locate_reserve_mem(base, size);
- if (!base) {
+ if (!base || (base + size > mem_boundary)) {
pr_err("Failed to find memory chunk for reservation!\n");
goto error_out;
}
@@ -594,8 +679,11 @@ early_param("fadump_reserve_mem", early_fadump_reserve_mem);
void crash_fadump(struct pt_regs *regs, const char *str)
{
+ unsigned int msecs;
struct fadump_crash_info_header *fdh = NULL;
int old_cpu, this_cpu;
+ /* Do not include first CPU */
+ unsigned int ncpus = num_online_cpus() - 1;
if (!should_fadump_crash())
return;
@@ -611,6 +699,8 @@ void crash_fadump(struct pt_regs *regs, const char *str)
old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu);
if (old_cpu != -1) {
+ atomic_inc(&cpus_in_fadump);
+
/*
* We can't loop here indefinitely. Wait as long as fadump
* is in force. If we race with fadump un-registration this
@@ -634,6 +724,16 @@ void crash_fadump(struct pt_regs *regs, const char *str)
fdh->online_mask = *cpu_online_mask;
+ /*
+ * If we came in via system reset, wait a while for the secondary
+ * CPUs to enter.
+ */
+ if (TRAP(&(fdh->regs)) == INTERRUPT_SYSTEM_RESET) {
+ msecs = CRASH_TIMEOUT;
+ while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0))
+ mdelay(1);
+ }
+
fw_dump.ops->fadump_trigger(fdh, str);
}
@@ -654,10 +754,8 @@ u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
void fadump_update_elfcore_header(char *bufp)
{
- struct elfhdr *elf;
struct elf_phdr *phdr;
- elf = (struct elfhdr *)bufp;
bufp += sizeof(struct elfhdr);
/* First note is a place holder for cpu notes info. */
@@ -726,10 +824,14 @@ void fadump_free_cpu_notes_buf(void)
static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info)
{
+ if (mrange_info->is_static) {
+ mrange_info->mem_range_cnt = 0;
+ return;
+ }
+
kfree(mrange_info->mem_ranges);
- mrange_info->mem_ranges = NULL;
- mrange_info->mem_ranges_sz = 0;
- mrange_info->max_mem_ranges = 0;
+ memset((void *)((u64)mrange_info + RNG_NAME_SZ), 0,
+ (sizeof(struct fadump_mrange_info) - RNG_NAME_SZ));
}
/*
@@ -786,6 +888,12 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) {
int ret;
+ if (mrange_info->is_static) {
+ pr_err("Reached array size limit for %s memory ranges\n",
+ mrange_info->name);
+ return -ENOSPC;
+ }
+
ret = fadump_alloc_mem_ranges(mrange_info);
if (ret)
return ret;
@@ -875,9 +983,8 @@ static int fadump_init_elfcore_header(char *bufp)
*/
static int fadump_setup_crash_memory_ranges(void)
{
- struct memblock_region *reg;
- u64 start, end;
- int i, ret;
+ u64 i, start, end;
+ int ret;
pr_debug("Setup crash memory ranges.\n");
crash_mrange_info.mem_range_cnt = 0;
@@ -895,10 +1002,7 @@ static int fadump_setup_crash_memory_ranges(void)
return ret;
}
- for_each_memblock(memory, reg) {
- start = (u64)reg->base;
- end = start + (u64)reg->size;
-
+ for_each_mem_range(i, &start, &end) {
/*
* skip the memory chunk that is already added
* (0 through boot_memory_top).
@@ -1132,14 +1236,17 @@ static void fadump_free_reserved_memory(unsigned long start_pfn,
*/
static void fadump_release_reserved_area(u64 start, u64 end)
{
+ unsigned long reg_spfn, reg_epfn;
u64 tstart, tend, spfn, epfn;
- struct memblock_region *reg;
+ int i;
spfn = PHYS_PFN(start);
epfn = PHYS_PFN(end);
- for_each_memblock(memory, reg) {
- tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg));
- tend = min_t(u64, epfn, memblock_region_memory_end_pfn(reg));
+
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &reg_spfn, &reg_epfn, NULL) {
+ tstart = max_t(u64, spfn, reg_spfn);
+ tend = min_t(u64, epfn, reg_epfn);
+
if (tstart < tend) {
fadump_free_reserved_memory(tstart, tend);
@@ -1202,20 +1309,19 @@ static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
* Scan reserved-ranges to consider them while reserving/releasing
* memory for FADump.
*/
-static inline int fadump_scan_reserved_mem_ranges(void)
+static void __init early_init_dt_scan_reserved_ranges(unsigned long node)
{
- struct device_node *root;
const __be32 *prop;
int len, ret = -1;
unsigned long i;
- root = of_find_node_by_path("/");
- if (!root)
- return ret;
+ /* reserved-ranges already scanned */
+ if (reserved_mrange_info.mem_range_cnt != 0)
+ return;
- prop = of_get_property(root, "reserved-ranges", &len);
+ prop = of_get_flat_dt_prop(node, "reserved-ranges", &len);
if (!prop)
- return ret;
+ return;
/*
* Each reserved range is an (address,size) pair, 2 cells each,
@@ -1237,7 +1343,8 @@ static inline int fadump_scan_reserved_mem_ranges(void)
}
}
- return ret;
+ /* Compact reserved ranges */
+ sort_and_merge_mem_ranges(&reserved_mrange_info);
}
/*
@@ -1251,32 +1358,21 @@ static void fadump_release_memory(u64 begin, u64 end)
u64 ra_start, ra_end, tstart;
int i, ret;
- fadump_scan_reserved_mem_ranges();
-
ra_start = fw_dump.reserve_dump_area_start;
ra_end = ra_start + fw_dump.reserve_dump_area_size;
/*
- * Add reserved dump area to reserved ranges list
- * and exclude all these ranges while releasing memory.
+ * If reserved ranges array limit is hit, overwrite the last reserved
+ * memory range with reserved dump area to ensure it is excluded from
+ * the memory being released (reused for next FADump registration).
*/
- ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
- if (ret != 0) {
- /*
- * Not enough memory to setup reserved ranges but the system is
- * running shortage of memory. So, release all the memory except
- * Reserved dump area (reused for next fadump registration).
- */
- if (begin < ra_end && end > ra_start) {
- if (begin < ra_start)
- fadump_release_reserved_area(begin, ra_start);
- if (end > ra_end)
- fadump_release_reserved_area(ra_end, end);
- } else
- fadump_release_reserved_area(begin, end);
+ if (reserved_mrange_info.mem_range_cnt ==
+ reserved_mrange_info.max_mem_ranges)
+ reserved_mrange_info.mem_range_cnt--;
+ ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
+ if (ret != 0)
return;
- }
/* Get the reserved ranges list in order first. */
sort_and_merge_mem_ranges(&reserved_mrange_info);
@@ -1323,9 +1419,9 @@ static void fadump_invalidate_release_mem(void)
fw_dump.ops->fadump_init_mem_struct(&fw_dump);
}
-static ssize_t fadump_release_memory_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count)
+static ssize_t release_mem_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
{
int input = -1;
@@ -1350,23 +1446,40 @@ static ssize_t fadump_release_memory_store(struct kobject *kobj,
return count;
}
-static ssize_t fadump_enabled_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *buf)
+/* Release the reserved memory and disable the FADump */
+static void unregister_fadump(void)
+{
+ fadump_cleanup();
+ fadump_release_memory(fw_dump.reserve_dump_area_start,
+ fw_dump.reserve_dump_area_size);
+ fw_dump.fadump_enabled = 0;
+ kobject_put(fadump_kobj);
+}
+
+static ssize_t enabled_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
}
-static ssize_t fadump_register_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *buf)
+static ssize_t mem_reserved_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%ld\n", fw_dump.reserve_dump_area_size);
+}
+
+static ssize_t registered_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%d\n", fw_dump.dump_registered);
}
-static ssize_t fadump_register_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count)
+static ssize_t registered_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
{
int ret = 0;
int input = -1;
@@ -1418,45 +1531,82 @@ static int fadump_region_show(struct seq_file *m, void *private)
return 0;
}
-static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem,
- 0200, NULL,
- fadump_release_memory_store);
-static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
- 0444, fadump_enabled_show,
- NULL);
-static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
- 0644, fadump_register_show,
- fadump_register_store);
+static struct kobj_attribute release_attr = __ATTR_WO(release_mem);
+static struct kobj_attribute enable_attr = __ATTR_RO(enabled);
+static struct kobj_attribute register_attr = __ATTR_RW(registered);
+static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved);
+
+static struct attribute *fadump_attrs[] = {
+ &enable_attr.attr,
+ &register_attr.attr,
+ &mem_reserved_attr.attr,
+ NULL,
+};
+
+ATTRIBUTE_GROUPS(fadump);
DEFINE_SHOW_ATTRIBUTE(fadump_region);
static void fadump_init_files(void)
{
- struct dentry *debugfs_file;
int rc = 0;
- rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
- if (rc)
- printk(KERN_ERR "fadump: unable to create sysfs file"
- " fadump_enabled (%d)\n", rc);
+ fadump_kobj = kobject_create_and_add("fadump", kernel_kobj);
+ if (!fadump_kobj) {
+ pr_err("failed to create fadump kobject\n");
+ return;
+ }
- rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
- if (rc)
- printk(KERN_ERR "fadump: unable to create sysfs file"
- " fadump_registered (%d)\n", rc);
+ debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL,
+ &fadump_region_fops);
- debugfs_file = debugfs_create_file("fadump_region", 0444,
- powerpc_debugfs_root, NULL,
- &fadump_region_fops);
- if (!debugfs_file)
- printk(KERN_ERR "fadump: unable to create debugfs file"
- " fadump_region\n");
+ if (fw_dump.dump_active) {
+ rc = sysfs_create_file(fadump_kobj, &release_attr.attr);
+ if (rc)
+ pr_err("unable to create release_mem sysfs file (%d)\n",
+ rc);
+ }
+
+ rc = sysfs_create_groups(fadump_kobj, fadump_groups);
+ if (rc) {
+ pr_err("sysfs group creation failed (%d), unregistering FADump",
+ rc);
+ unregister_fadump();
+ return;
+ }
+
+ /*
+ * The FADump sysfs are moved from kernel_kobj to fadump_kobj need to
+ * create symlink at old location to maintain backward compatibility.
+ *
+ * - fadump_enabled -> fadump/enabled
+ * - fadump_registered -> fadump/registered
+ * - fadump_release_mem -> fadump/release_mem
+ */
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
+ "enabled", "fadump_enabled");
+ if (rc) {
+ pr_err("unable to create fadump_enabled symlink (%d)", rc);
+ return;
+ }
+
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
+ "registered",
+ "fadump_registered");
+ if (rc) {
+ pr_err("unable to create fadump_registered symlink (%d)", rc);
+ sysfs_remove_link(kernel_kobj, "fadump_enabled");
+ return;
+ }
if (fw_dump.dump_active) {
- rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr);
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj,
+ fadump_kobj,
+ "release_mem",
+ "fadump_release_mem");
if (rc)
- printk(KERN_ERR "fadump: unable to create sysfs file"
- " fadump_release_mem (%d)\n", rc);
+ pr_err("unable to create fadump_release_mem symlink (%d)",
+ rc);
}
return;
}
@@ -1531,12 +1681,10 @@ int __init fadump_reserve_mem(void)
/* Preserve everything above the base address */
static void __init fadump_reserve_crash_area(u64 base)
{
- struct memblock_region *reg;
- u64 mstart, msize;
+ u64 i, mstart, mend, msize;
- for_each_memblock(memory, reg) {
- mstart = reg->base;
- msize = reg->size;
+ for_each_mem_range(i, &mstart, &mend) {
+ msize = mend - mstart;
if ((mstart + msize) < base)
continue;
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
index cc4a5e3f51f1..20328f72f9f2 100644
--- a/arch/powerpc/kernel/firmware.c
+++ b/arch/powerpc/kernel/firmware.c
@@ -11,8 +11,31 @@
#include <linux/export.h>
#include <linux/cache.h>
+#include <linux/of.h>
#include <asm/firmware.h>
+#include <asm/kvm_guest.h>
+#ifdef CONFIG_PPC64
unsigned long powerpc_firmware_features __read_mostly;
EXPORT_SYMBOL_GPL(powerpc_firmware_features);
+#endif
+
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
+DEFINE_STATIC_KEY_FALSE(kvm_guest);
+int __init check_kvm_guest(void)
+{
+ struct device_node *hyper_node;
+
+ hyper_node = of_find_node_by_path("/hypervisor");
+ if (!hyper_node)
+ return 0;
+
+ if (of_device_is_compatible(hyper_node, "linux,kvm"))
+ static_branch_enable(&kvm_guest);
+
+ of_node_put(hyper_node);
+ return 0;
+}
+core_initcall(check_kvm_guest); // before kvm_guest_init()
+#endif
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 3235a8da6af7..ba4afe3b5a9c 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -12,7 +12,6 @@
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
@@ -88,15 +87,11 @@ BEGIN_FTR_SECTION
oris r5,r5,MSR_VSX@h
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- SYNC
MTMSRD(r5) /* enable use of fpu now */
isync
/* enable use of FP after return */
#ifdef CONFIG_PPC32
- mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
-#ifdef CONFIG_VMAP_STACK
- tovirt(r5, r5)
-#endif
+ addi r5,r2,THREAD
lwz r4,THREAD_FPEXC_MODE(r5)
ori r9,r9,MSR_FP /* enable FP for current */
or r9,r9,r4
@@ -107,10 +102,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
ori r12,r12,MSR_FP
or r12,r12,r4
std r12,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3S_64
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+#endif
#endif
- /* Don't care if r4 overflows, this is desired behaviour */
- lbz r4,THREAD_LOAD_FP(r5)
- addi r4,r4,1
+ li r4,1
stb r4,THREAD_LOAD_FP(r5)
addi r10,r5,THREAD_FPSTATE
lfd fr0,FPSTATE_FPSCR(r10)
@@ -119,6 +116,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
/* restore registers and return */
/* we haven't used ctr or xer or lr */
blr
+_ASM_NOKPROBE_SYMBOL(load_up_fpu)
/*
* save_fpu(tsk)
@@ -136,18 +134,3 @@ _GLOBAL(save_fpu)
mffs fr0
stfd fr0,FPSTATE_FPSCR(r6)
blr
-
-/*
- * These are used in the alignment trap handler when emulating
- * single-precision loads and stores.
- */
-
-_GLOBAL(cvt_fd)
- lfs 0,0(r3)
- stfd 0,0(r4)
- blr
-
-_GLOBAL(cvt_df)
- lfd 0,0(r3)
- stfs 0,0(r4)
- blr
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index 8bccce6544b5..dedc17fac8f8 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* 1. Find the index of the entry we're executing in */
- bl invstr /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
invstr: mflr r6 /* Make it accessible */
mfmsr r7
rlwinm r4,r7,27,31,31 /* extract MSR[IS] */
@@ -85,7 +85,7 @@ skpinv: addi r6,r6,1 /* Increment */
addi r6,r6,10
slw r6,r8,r6 /* convert to mask */
- bl 1f /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
1: mflr r7
mfspr r8,SPRN_MAS3
@@ -117,7 +117,7 @@ skpinv: addi r6,r6,1 /* Increment */
xori r6,r4,1
slwi r6,r6,5 /* setup new context with other address space */
- bl 1f /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
1: mflr r9
rlwimi r7,r9,0,20,31
addi r7,r7,(2f - 1b)
@@ -207,7 +207,7 @@ next_tlb_setup:
lis r7,MSR_KERNEL@h
ori r7,r7,MSR_KERNEL@l
- bl 1f /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
1: mflr r9
rlwimi r6,r9,0,20,31
addi r6,r6,(2f - 1b)
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 9db162f79fe6..349c4a820231 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -10,84 +10,82 @@
* We assume sprg3 has the physical address of the current
* task's thread_struct.
*/
-.macro EXCEPTION_PROLOG handle_dar_dsisr=0
+.macro EXCEPTION_PROLOG trapno name handle_dar_dsisr=0
EXCEPTION_PROLOG_0 handle_dar_dsisr=\handle_dar_dsisr
EXCEPTION_PROLOG_1
- EXCEPTION_PROLOG_2 handle_dar_dsisr=\handle_dar_dsisr
+ EXCEPTION_PROLOG_2 \trapno \name handle_dar_dsisr=\handle_dar_dsisr
.endm
.macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0
mtspr SPRN_SPRG_SCRATCH0,r10
mtspr SPRN_SPRG_SCRATCH1,r11
-#ifdef CONFIG_VMAP_STACK
mfspr r10, SPRN_SPRG_THREAD
.if \handle_dar_dsisr
+#ifdef CONFIG_40x
+ mfspr r11, SPRN_DEAR
+#else
mfspr r11, SPRN_DAR
+#endif
stw r11, DAR(r10)
+#ifdef CONFIG_40x
+ mfspr r11, SPRN_ESR
+#else
mfspr r11, SPRN_DSISR
+#endif
stw r11, DSISR(r10)
.endif
mfspr r11, SPRN_SRR0
stw r11, SRR0(r10)
-#endif
mfspr r11, SPRN_SRR1 /* check whether user or kernel */
-#ifdef CONFIG_VMAP_STACK
stw r11, SRR1(r10)
-#endif
mfcr r10
andi. r11, r11, MSR_PR
.endm
-.macro EXCEPTION_PROLOG_1 for_rtas=0
-#ifdef CONFIG_VMAP_STACK
- .ifeq \for_rtas
- li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
- mtmsr r11
- isync
- .endif
- subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */
-#else
- tophys(r11,r1) /* use tophys(r1) if kernel */
- subi r11, r11, INT_FRAME_SIZE /* alloc exc. frame */
-#endif
+.macro EXCEPTION_PROLOG_1
+ mtspr SPRN_SPRG_SCRATCH2,r1
+ subi r1, r1, INT_FRAME_SIZE /* use r1 if kernel */
beq 1f
- mfspr r11,SPRN_SPRG_THREAD
- tovirt_vmstack r11, r11
- lwz r11,TASK_STACK-THREAD(r11)
- addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE
- tophys_novmstack r11, r11
+ mfspr r1,SPRN_SPRG_THREAD
+ lwz r1,TASK_STACK-THREAD(r1)
+ addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
1:
#ifdef CONFIG_VMAP_STACK
- mtcrf 0x7f, r11
- bt 32 - THREAD_ALIGN_SHIFT, stack_overflow
+ mtcrf 0x3f, r1
+ bt 32 - THREAD_ALIGN_SHIFT, vmap_stack_overflow
#endif
.endm
-.macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0
-#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S)
-BEGIN_MMU_FTR_SECTION
- mtcr r10
-FTR_SECTION_ELSE
- stw r10, _CCR(r11)
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
-#else
- stw r10,_CCR(r11) /* save registers */
+.macro EXCEPTION_PROLOG_2 trapno name handle_dar_dsisr=0
+#ifdef CONFIG_PPC_8xx
+ .if \handle_dar_dsisr
+ li r11, RPN_PATTERN
+ mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */
+ .endif
#endif
- mfspr r10, SPRN_SPRG_SCRATCH0
+ LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~MSR_RI) /* re-enable MMU */
+ mtspr SPRN_SRR1, r11
+ lis r11, 1f@h
+ ori r11, r11, 1f@l
+ mtspr SPRN_SRR0, r11
+ mfspr r11, SPRN_SPRG_SCRATCH2
+ rfi
+
+ .text
+\name\()_virt:
+1:
+ stw r11,GPR1(r1)
+ stw r11,0(r1)
+ mr r11, r1
+ stw r10,_CCR(r11) /* save registers */
stw r12,GPR12(r11)
stw r9,GPR9(r11)
- stw r10,GPR10(r11)
-#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S)
-BEGIN_MMU_FTR_SECTION
- mfcr r10
- stw r10, _CCR(r11)
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-#endif
+ mfspr r10,SPRN_SPRG_SCRATCH0
mfspr r12,SPRN_SPRG_SCRATCH1
+ stw r10,GPR10(r11)
stw r12,GPR11(r11)
mflr r10
stw r10,_LINK(r11)
-#ifdef CONFIG_VMAP_STACK
mfspr r12, SPRN_SPRG_THREAD
tovirt(r12, r12)
.if \handle_dar_dsisr
@@ -97,185 +95,71 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
stw r10, _DSISR(r11)
.endif
lwz r9, SRR1(r12)
-#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S)
-BEGIN_MMU_FTR_SECTION
- andi. r10, r9, MSR_PR
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-#endif
lwz r12, SRR0(r12)
-#else
- mfspr r12,SPRN_SRR0
- mfspr r9,SPRN_SRR1
-#endif
- stw r1,GPR1(r11)
- stw r1,0(r11)
- tovirt_novmstack r1, r11 /* set new kernel sp */
#ifdef CONFIG_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+#elif defined(CONFIG_PPC_8xx)
+ mtspr SPRN_EID, r2 /* Set MSR_RI */
#else
-#ifdef CONFIG_VMAP_STACK
- li r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */
-#else
- li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */
-#endif
+ li r10, MSR_KERNEL /* can take exceptions */
mtmsr r10 /* (except for mach check in rtas) */
#endif
- stw r0,GPR0(r11)
- lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
- addi r10,r10,STACK_FRAME_REGS_MARKER@l
- stw r10,8(r11)
- SAVE_4GPRS(3, r11)
- SAVE_2GPRS(7, r11)
+ COMMON_EXCEPTION_PROLOG_END \trapno
+_ASM_NOKPROBE_SYMBOL(\name\()_virt)
.endm
-.macro SYSCALL_ENTRY trapno
- mfspr r12,SPRN_SPRG_THREAD
-#ifdef CONFIG_VMAP_STACK
- mfspr r9, SPRN_SRR0
- mfspr r11, SPRN_SRR1
- stw r9, SRR0(r12)
- stw r11, SRR1(r12)
-#endif
- mfcr r10
- lwz r11,TASK_STACK-THREAD(r12)
- rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */
- addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE
-#ifdef CONFIG_VMAP_STACK
- li r9, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
- mtmsr r9
- isync
-#endif
- tovirt_vmstack r12, r12
- tophys_novmstack r11, r11
- mflr r9
- stw r10,_CCR(r11) /* save registers */
- stw r9, _LINK(r11)
-#ifdef CONFIG_VMAP_STACK
- lwz r10, SRR0(r12)
- lwz r9, SRR1(r12)
-#else
- mfspr r10,SPRN_SRR0
- mfspr r9,SPRN_SRR1
-#endif
- stw r1,GPR1(r11)
- stw r1,0(r11)
- tovirt_novmstack r1, r11 /* set new kernel sp */
- stw r10,_NIP(r11)
-#ifdef CONFIG_40x
- rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
-#else
-#ifdef CONFIG_VMAP_STACK
- LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~MSR_IR) /* can take exceptions */
-#else
- LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
-#endif
- mtmsr r10 /* (except for mach check in rtas) */
-#endif
+.macro COMMON_EXCEPTION_PROLOG_END trapno
+ stw r0,GPR0(r1)
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
- stw r2,GPR2(r11)
addi r10,r10,STACK_FRAME_REGS_MARKER@l
- stw r9,_MSR(r11)
- li r2, \trapno + 1
- stw r10,8(r11)
- stw r2,_TRAP(r11)
- SAVE_GPR(0, r11)
- SAVE_4GPRS(3, r11)
- SAVE_2GPRS(7, r11)
- addi r11,r1,STACK_FRAME_OVERHEAD
- addi r2,r12,-THREAD
- stw r11,PT_REGS(r12)
-#if defined(CONFIG_40x)
- /* Check to see if the dbcr0 register is set up to debug. Use the
- internal debug mode bit to do this. */
- lwz r12,THREAD_DBCR0(r12)
- andis. r12,r12,DBCR0_IDM@h
-#endif
- ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
-#if defined(CONFIG_40x)
- beq+ 3f
- /* From user and task is ptraced - load up global dbcr0 */
- li r12,-1 /* clear all pending debug events */
- mtspr SPRN_DBSR,r12
- lis r11,global_dbcr0@ha
- tophys(r11,r11)
- addi r11,r11,global_dbcr0@l
- lwz r12,0(r11)
- mtspr SPRN_DBCR0,r12
- lwz r12,4(r11)
- addi r12,r12,-1
- stw r12,4(r11)
-#endif
-
-3:
- tovirt_novmstack r2, r2 /* set r2 to current */
- lis r11, transfer_to_syscall@h
- ori r11, r11, transfer_to_syscall@l
-#ifdef CONFIG_TRACE_IRQFLAGS
- /*
- * If MSR is changing we need to keep interrupts disabled at this point
- * otherwise we might risk taking an interrupt before we tell lockdep
- * they are enabled.
- */
- LOAD_REG_IMMEDIATE(r10, MSR_KERNEL)
- rlwimi r10, r9, 0, MSR_EE
-#else
- LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
-#endif
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
- mtspr SPRN_NRI, r0
-#endif
- mtspr SPRN_SRR1,r10
- mtspr SPRN_SRR0,r11
- SYNC
- RFI /* jump to handler, enable MMU */
+ stw r10,8(r1)
+ li r10, \trapno
+ stw r10,_TRAP(r1)
+ SAVE_4GPRS(3, r1)
+ SAVE_2GPRS(7, r1)
+ SAVE_NVGPRS(r1)
+ stw r2,GPR2(r1)
+ stw r12,_NIP(r1)
+ stw r9,_MSR(r1)
+ mfctr r10
+ mfspr r2,SPRN_SPRG_THREAD
+ stw r10,_CTR(r1)
+ tovirt(r2, r2)
+ mfspr r10,SPRN_XER
+ addi r2, r2, -THREAD
+ stw r10,_XER(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
.endm
-.macro save_dar_dsisr_on_stack reg1, reg2, sp
-#ifndef CONFIG_VMAP_STACK
- mfspr \reg1, SPRN_DAR
- mfspr \reg2, SPRN_DSISR
- stw \reg1, _DAR(\sp)
- stw \reg2, _DSISR(\sp)
+.macro prepare_transfer_to_handler
+#ifdef CONFIG_PPC_BOOK3S_32
+ andi. r12,r9,MSR_PR
+ bne 777f
+ bl prepare_transfer_to_handler
+777:
#endif
.endm
-.macro get_and_save_dar_dsisr_on_stack reg1, reg2, sp
-#ifdef CONFIG_VMAP_STACK
- lwz \reg1, _DAR(\sp)
- lwz \reg2, _DSISR(\sp)
-#else
- save_dar_dsisr_on_stack \reg1, \reg2, \sp
-#endif
-.endm
-
-.macro tovirt_vmstack dst, src
-#ifdef CONFIG_VMAP_STACK
- tovirt(\dst, \src)
-#else
- .ifnc \dst, \src
- mr \dst, \src
- .endif
-#endif
-.endm
-
-.macro tovirt_novmstack dst, src
-#ifndef CONFIG_VMAP_STACK
- tovirt(\dst, \src)
-#else
- .ifnc \dst, \src
- mr \dst, \src
- .endif
-#endif
-.endm
-
-.macro tophys_novmstack dst, src
-#ifndef CONFIG_VMAP_STACK
- tophys(\dst, \src)
-#else
- .ifnc \dst, \src
- mr \dst, \src
- .endif
-#endif
+.macro SYSCALL_ENTRY trapno
+ mfspr r9, SPRN_SRR1
+ mfspr r12, SPRN_SRR0
+ LOAD_REG_IMMEDIATE(r11, MSR_KERNEL) /* can take exceptions */
+ lis r10, 1f@h
+ ori r10, r10, 1f@l
+ mtspr SPRN_SRR1, r11
+ mtspr SPRN_SRR0, r10
+ mfspr r10,SPRN_SPRG_THREAD
+ mr r11, r1
+ lwz r1,TASK_STACK-THREAD(r10)
+ tovirt(r10, r10)
+ addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+ rfi
+1:
+ stw r12,_NIP(r1)
+ mfcr r12
+ rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
+ stw r12,_CCR(r1)
+ b transfer_to_syscall /* jump to handler */
.endm
/*
@@ -291,61 +175,43 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
*/
#ifdef CONFIG_PPC_BOOK3S
#define START_EXCEPTION(n, label) \
+ __HEAD; \
. = n; \
DO_KVM n; \
label:
#else
#define START_EXCEPTION(n, label) \
+ __HEAD; \
. = n; \
label:
#endif
-#define EXCEPTION(n, label, hdlr, xfer) \
+#define EXCEPTION(n, label, hdlr) \
START_EXCEPTION(n, label) \
- EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
-
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- LOAD_REG_IMMEDIATE(r10, msr); \
- bl tfer; \
- .long hdlr; \
- .long ret
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
- ret_from_except)
+ EXCEPTION_PROLOG n label; \
+ prepare_transfer_to_handler; \
+ bl hdlr; \
+ b interrupt_return
.macro vmap_stack_overflow_exception
-#ifdef CONFIG_VMAP_STACK
+ __HEAD
+vmap_stack_overflow:
#ifdef CONFIG_SMP
- mfspr r11, SPRN_SPRG_THREAD
- tovirt(r11, r11)
- lwz r11, TASK_CPU - THREAD(r11)
- slwi r11, r11, 3
- addis r11, r11, emergency_ctx@ha
+ mfspr r1, SPRN_SPRG_THREAD
+ lwz r1, TASK_CPU - THREAD(r1)
+ slwi r1, r1, 3
+ addis r1, r1, emergency_ctx-PAGE_OFFSET@ha
#else
- lis r11, emergency_ctx@ha
-#endif
- lwz r11, emergency_ctx@l(r11)
- cmpwi cr1, r11, 0
- bne cr1, 1f
- lis r11, init_thread_union@ha
- addi r11, r11, init_thread_union@l
-1: addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE
- EXCEPTION_PROLOG_2
- SAVE_NVGPRS(r11)
- addi r3, r1, STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0, stack_overflow_exception)
-#endif
+ lis r1, emergency_ctx-PAGE_OFFSET@ha
+#endif
+ lwz r1, emergency_ctx-PAGE_OFFSET@l(r1)
+ addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+ EXCEPTION_PROLOG_2 0 vmap_stack_overflow
+ prepare_transfer_to_handler
+ bl stack_overflow_exception
+ b interrupt_return
.endm
#endif /* __HEAD_32_H__ */
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 9bb663977e84..7d72ee5ab387 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -26,17 +26,16 @@
*/
#include <linux/init.h>
+#include <linux/pgtable.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
#include <asm/export.h>
-#include <asm/asm-405.h>
#include "head_32.h"
@@ -73,7 +72,6 @@ turn_on_mmu:
lis r0,start_here@h
ori r0,r0,start_here@l
mtspr SPRN_SRR0,r0
- SYNC
rfi /* enables MMU */
b . /* prevent prefetch past rfi */
@@ -91,7 +89,11 @@ _ENTRY(crit_srr0)
.space 4
_ENTRY(crit_srr1)
.space 4
-_ENTRY(saved_ksp_limit)
+_ENTRY(crit_r1)
+ .space 4
+_ENTRY(crit_dear)
+ .space 4
+_ENTRY(crit_esr)
.space 4
/*
@@ -102,42 +104,62 @@ _ENTRY(saved_ksp_limit)
* Instead we use a couple of words of memory at low physical addresses.
* This is OK since we don't support SMP on these processors.
*/
-#define CRITICAL_EXCEPTION_PROLOG \
- stw r10,crit_r10@l(0); /* save two registers to work with */\
- stw r11,crit_r11@l(0); \
- mfcr r10; /* save CR in r10 for now */\
- mfspr r11,SPRN_SRR3; /* check whether user or kernel */\
- andi. r11,r11,MSR_PR; \
- lis r11,critirq_ctx@ha; \
- tophys(r11,r11); \
- lwz r11,critirq_ctx@l(r11); \
- beq 1f; \
- /* COMING FROM USER MODE */ \
- mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
- lwz r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\
-1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm */\
- tophys(r11,r11); \
- stw r10,_CCR(r11); /* save various registers */\
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\
- stw r12,_DEAR(r11); /* since they may have had stuff */\
- mfspr r9,SPRN_ESR; /* in them at the point where the */\
- stw r9,_ESR(r11); /* exception was taken */\
- mfspr r12,SPRN_SRR2; \
- stw r1,GPR1(r11); \
- mfspr r9,SPRN_SRR3; \
- stw r1,0(r11); \
- tovirt(r1,r11); \
- rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
- stw r0,GPR0(r11); \
- lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\
- addi r10, r10, STACK_FRAME_REGS_MARKER@l; \
- stw r10, 8(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
+.macro CRITICAL_EXCEPTION_PROLOG trapno name
+ stw r10,crit_r10@l(0) /* save two registers to work with */
+ stw r11,crit_r11@l(0)
+ mfspr r10,SPRN_SRR0
+ mfspr r11,SPRN_SRR1
+ stw r10,crit_srr0@l(0)
+ stw r11,crit_srr1@l(0)
+ mfspr r10,SPRN_DEAR
+ mfspr r11,SPRN_ESR
+ stw r10,crit_dear@l(0)
+ stw r11,crit_esr@l(0)
+ mfcr r10 /* save CR in r10 for now */
+ mfspr r11,SPRN_SRR3 /* check whether user or kernel */
+ andi. r11,r11,MSR_PR
+ lis r11,(critirq_ctx-PAGE_OFFSET)@ha
+ lwz r11,(critirq_ctx-PAGE_OFFSET)@l(r11)
+ beq 1f
+ /* COMING FROM USER MODE */
+ mfspr r11,SPRN_SPRG_THREAD /* if from user, start at top of */
+ lwz r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */
+1: stw r1,crit_r1@l(0)
+ addi r1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */
+ LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)) /* re-enable MMU */
+ mtspr SPRN_SRR1, r11
+ lis r11, 1f@h
+ ori r11, r11, 1f@l
+ mtspr SPRN_SRR0, r11
+ rfi
+
+ .text
+1:
+\name\()_virt:
+ lwz r11,crit_r1@l(0)
+ stw r11,GPR1(r1)
+ stw r11,0(r1)
+ mr r11,r1
+ stw r10,_CCR(r11) /* save various registers */
+ stw r12,GPR12(r11)
+ stw r9,GPR9(r11)
+ mflr r10
+ stw r10,_LINK(r11)
+ lis r9,PAGE_OFFSET@ha
+ lwz r10,crit_r10@l(r9)
+ lwz r12,crit_r11@l(r9)
+ stw r10,GPR10(r11)
+ stw r12,GPR11(r11)
+ lwz r12,crit_dear@l(r9)
+ lwz r9,crit_esr@l(r9)
+ stw r12,_DEAR(r11) /* since they may have had stuff */
+ stw r9,_ESR(r11) /* exception was taken */
+ mfspr r12,SPRN_SRR2
+ mfspr r9,SPRN_SRR3
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+ COMMON_EXCEPTION_PROLOG_END \trapno + 2
+_ASM_NOKPROBE_SYMBOL(\name\()_virt)
+.endm
/*
* State at this point:
@@ -157,10 +179,10 @@ _ENTRY(saved_ksp_limit)
*/
#define CRITICAL_EXCEPTION(n, label, hdlr) \
START_EXCEPTION(n, label); \
- CRITICAL_EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- crit_transfer_to_handler, ret_from_crit_exc)
+ CRITICAL_EXCEPTION_PROLOG n label; \
+ prepare_transfer_to_handler; \
+ bl hdlr; \
+ b ret_from_crit_exc
/*
* 0x0100 - Critical Interrupt Exception
@@ -176,191 +198,71 @@ _ENTRY(saved_ksp_limit)
* 0x0300 - Data Storage Exception
* This happens for just a few reasons. U0 set (but we don't do that),
* or zone protection fault (user violation, write to protected page).
- * If this is just an update of modified status, we do that quickly
- * and exit. Otherwise, we call heavywight functions to do the work.
+ * The other Data TLB exceptions bail out to this point
+ * if they can't resolve the lightweight TLB fault.
*/
START_EXCEPTION(0x0300, DataStorage)
- mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
- mtspr SPRN_SPRG_SCRATCH1, r11
-#ifdef CONFIG_403GCX
- stw r12, 0(r0)
- stw r9, 4(r0)
- mfcr r11
- mfspr r12, SPRN_PID
- stw r11, 8(r0)
- stw r12, 12(r0)
-#else
- mtspr SPRN_SPRG_SCRATCH3, r12
- mtspr SPRN_SPRG_SCRATCH4, r9
- mfcr r11
- mfspr r12, SPRN_PID
- mtspr SPRN_SPRG_SCRATCH6, r11
- mtspr SPRN_SPRG_SCRATCH5, r12
-#endif
-
- /* First, check if it was a zone fault (which means a user
- * tried to access a kernel or read-protected page - always
- * a SEGV). All other faults here must be stores, so no
- * need to check ESR_DST as well. */
- mfspr r10, SPRN_ESR
- andis. r10, r10, ESR_DIZ@h
- bne 2f
-
- mfspr r10, SPRN_DEAR /* Get faulting address */
-
- /* If we are faulting a kernel address, we have to use the
- * kernel page tables.
- */
- lis r11, PAGE_OFFSET@h
- cmplw r10, r11
- blt+ 3f
- lis r11, swapper_pg_dir@h
- ori r11, r11, swapper_pg_dir@l
- li r9, 0
- mtspr SPRN_PID, r9 /* TLB will have 0 TID */
- b 4f
-
- /* Get the PGD for the current thread.
- */
-3:
- mfspr r11,SPRN_SPRG_THREAD
- lwz r11,PGDIR(r11)
-4:
- tophys(r11, r11)
- rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
- lwz r11, 0(r11) /* Get L1 entry */
- rlwinm. r12, r11, 0, 0, 19 /* Extract L2 (pte) base address */
- beq 2f /* Bail if no table */
-
- rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
- lwz r11, 0(r12) /* Get Linux PTE */
-
- andi. r9, r11, _PAGE_RW /* Is it writeable? */
- beq 2f /* Bail if not */
-
- /* Update 'changed'.
- */
- ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
- stw r11, 0(r12) /* Update Linux page table */
-
- /* Most of the Linux PTE is ready to load into the TLB LO.
- * We set ZSEL, where only the LS-bit determines user access.
- * We set execute, because we don't have the granularity to
- * properly set this at the page level (Linux problem).
- * If shared is set, we cause a zero PID->TID load.
- * Many of these bits are software only. Bits we don't set
- * here we (properly should) assume have the appropriate value.
- */
- li r12, 0x0ce2
- andc r11, r11, r12 /* Make sure 20, 21 are zero */
-
- /* find the TLB index that caused the fault. It has to be here.
- */
- tlbsx r9, 0, r10
-
- tlbwe r11, r9, TLB_DATA /* Load TLB LO */
-
- /* Done...restore registers and get out of here.
- */
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG_SCRATCH5
- mfspr r11, SPRN_SPRG_SCRATCH6
- mtspr SPRN_PID, r12
- mtcr r11
- mfspr r9, SPRN_SPRG_SCRATCH4
- mfspr r12, SPRN_SPRG_SCRATCH3
-#endif
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r10, SPRN_SPRG_SCRATCH0
- PPC405_ERR77_SYNC
- rfi /* Should sync shadow TLBs */
- b . /* prevent prefetch past rfi */
-
-2:
- /* The bailout. Restore registers to pre-exception conditions
- * and call the heavyweights to help us out.
- */
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG_SCRATCH5
- mfspr r11, SPRN_SPRG_SCRATCH6
- mtspr SPRN_PID, r12
- mtcr r11
- mfspr r9, SPRN_SPRG_SCRATCH4
- mfspr r12, SPRN_SPRG_SCRATCH3
-#endif
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r10, SPRN_SPRG_SCRATCH0
- b DataAccess
+ EXCEPTION_PROLOG 0x300 DataStorage handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ bl do_page_fault
+ b interrupt_return
/*
* 0x0400 - Instruction Storage Exception
* This is caused by a fetch from non-execute or guarded pages.
*/
START_EXCEPTION(0x0400, InstructionAccess)
- EXCEPTION_PROLOG
- mr r4,r12 /* Pass SRR0 as arg2 */
- stw r4, _DEAR(r11)
- li r5,0 /* Pass zero as arg3 */
- EXC_XFER_LITE(0x400, handle_page_fault)
+ EXCEPTION_PROLOG 0x400 InstructionAccess
+ li r5,0
+ stw r5, _ESR(r11) /* Zero ESR */
+ stw r12, _DEAR(r11) /* SRR0 as DEAR */
+ prepare_transfer_to_handler
+ bl do_page_fault
+ b interrupt_return
/* 0x0500 - External Interrupt Exception */
- EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(0x0500, HardwareInterrupt, do_IRQ)
/* 0x0600 - Alignment Exception */
START_EXCEPTION(0x0600, Alignment)
- EXCEPTION_PROLOG
- mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */
- stw r4,_DEAR(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0x600, alignment_exception)
+ EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ bl alignment_exception
+ REST_NVGPRS(r1)
+ b interrupt_return
/* 0x0700 - Program Exception */
START_EXCEPTION(0x0700, ProgramCheck)
- EXCEPTION_PROLOG
- mfspr r4,SPRN_ESR /* Grab the ESR and save it */
- stw r4,_ESR(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0x700, program_check_exception)
+ EXCEPTION_PROLOG 0x700 ProgramCheck handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ bl program_check_exception
+ REST_NVGPRS(r1)
+ b interrupt_return
- EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0800, Trap_08, unknown_exception)
+ EXCEPTION(0x0900, Trap_09, unknown_exception)
+ EXCEPTION(0x0A00, Trap_0A, unknown_exception)
+ EXCEPTION(0x0B00, Trap_0B, unknown_exception)
/* 0x0C00 - System Call Exception */
START_EXCEPTION(0x0C00, SystemCall)
SYSCALL_ENTRY 0xc00
+/* Trap_0D is commented out to get more space for system call exception */
- EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD)
+/* EXCEPTION(0x0D00, Trap_0D, unknown_exception) */
+ EXCEPTION(0x0E00, Trap_0E, unknown_exception)
+ EXCEPTION(0x0F00, Trap_0F, unknown_exception)
/* 0x1000 - Programmable Interval Timer (PIT) Exception */
- . = 0x1000
+ START_EXCEPTION(0x1000, DecrementerTrap)
b Decrementer
-/* 0x1010 - Fixed Interval Timer (FIT) Exception
-*/
- . = 0x1010
+/* 0x1010 - Fixed Interval Timer (FIT) Exception */
+ START_EXCEPTION(0x1010, FITExceptionTrap)
b FITException
-/* 0x1020 - Watchdog Timer (WDT) Exception
-*/
- . = 0x1020
+/* 0x1020 - Watchdog Timer (WDT) Exception */
+ START_EXCEPTION(0x1020, WDTExceptionTrap)
b WDTException
/* 0x1100 - Data TLB Miss Exception
@@ -369,23 +271,13 @@ _ENTRY(saved_ksp_limit)
* load TLB entries from the page table if they exist.
*/
START_EXCEPTION(0x1100, DTLBMiss)
- mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
- mtspr SPRN_SPRG_SCRATCH1, r11
-#ifdef CONFIG_403GCX
- stw r12, 0(r0)
- stw r9, 4(r0)
- mfcr r11
- mfspr r12, SPRN_PID
- stw r11, 8(r0)
- stw r12, 12(r0)
-#else
+ mtspr SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */
+ mtspr SPRN_SPRG_SCRATCH6, r11
mtspr SPRN_SPRG_SCRATCH3, r12
mtspr SPRN_SPRG_SCRATCH4, r9
- mfcr r11
- mfspr r12, SPRN_PID
- mtspr SPRN_SPRG_SCRATCH6, r11
- mtspr SPRN_SPRG_SCRATCH5, r12
-#endif
+ mfcr r12
+ mfspr r9, SPRN_PID
+ rlwimi r12, r9, 0, 0xff
mfspr r10, SPRN_DEAR /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
@@ -408,28 +300,30 @@ _ENTRY(saved_ksp_limit)
4:
tophys(r11, r11)
rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
- lwz r12, 0(r11) /* Get L1 entry */
- andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */
+ lwz r11, 0(r11) /* Get L1 entry */
+ andi. r9, r11, _PMD_PRESENT /* Check if it points to a PTE page */
beq 2f /* Bail if no table */
- rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
- lwz r11, 0(r12) /* Get Linux PTE */
- andi. r9, r11, _PAGE_PRESENT
- beq 5f
+ rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */
+ lwz r11, 0(r11) /* Get Linux PTE */
+ li r9, _PAGE_PRESENT | _PAGE_ACCESSED
+ andc. r9, r9, r11 /* Check permission */
+ bne 5f
- ori r11, r11, _PAGE_ACCESSED
- stw r11, 0(r12)
+ rlwinm r9, r11, 1, _PAGE_RW /* dirty => rw */
+ and r9, r9, r11 /* hwwrite = dirty & rw */
+ rlwimi r11, r9, 0, _PAGE_RW /* replace rw by hwwrite */
/* Create TLB tag. This is the faulting address plus a static
* set of bits. These are size, valid, E, U0.
*/
- li r12, 0x00c0
- rlwimi r10, r12, 0, 20, 31
+ li r9, 0x00c0
+ rlwimi r10, r9, 0, 20, 31
b finish_tlb_load
2: /* Check for possible large-page pmd entry */
- rlwinm. r9, r12, 2, 22, 24
+ rlwinm. r9, r11, 2, 22, 24
beq 5f
/* Create TLB tag. This is the faulting address, plus a static
@@ -437,7 +331,6 @@ _ENTRY(saved_ksp_limit)
*/
ori r9, r9, 0x40
rlwimi r10, r9, 0, 20, 31
- mr r11, r12
b finish_tlb_load
@@ -445,47 +338,26 @@ _ENTRY(saved_ksp_limit)
/* The bailout. Restore registers to pre-exception conditions
* and call the heavyweights to help us out.
*/
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG_SCRATCH5
- mfspr r11, SPRN_SPRG_SCRATCH6
mtspr SPRN_PID, r12
- mtcr r11
+ mtcrf 0x80, r12
mfspr r9, SPRN_SPRG_SCRATCH4
mfspr r12, SPRN_SPRG_SCRATCH3
-#endif
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r10, SPRN_SPRG_SCRATCH0
- b DataAccess
+ mfspr r11, SPRN_SPRG_SCRATCH6
+ mfspr r10, SPRN_SPRG_SCRATCH5
+ b DataStorage
/* 0x1200 - Instruction TLB Miss Exception
* Nearly the same as above, except we get our information from different
* registers and bailout to a different point.
*/
START_EXCEPTION(0x1200, ITLBMiss)
- mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
- mtspr SPRN_SPRG_SCRATCH1, r11
-#ifdef CONFIG_403GCX
- stw r12, 0(r0)
- stw r9, 4(r0)
- mfcr r11
- mfspr r12, SPRN_PID
- stw r11, 8(r0)
- stw r12, 12(r0)
-#else
+ mtspr SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */
+ mtspr SPRN_SPRG_SCRATCH6, r11
mtspr SPRN_SPRG_SCRATCH3, r12
mtspr SPRN_SPRG_SCRATCH4, r9
- mfcr r11
- mfspr r12, SPRN_PID
- mtspr SPRN_SPRG_SCRATCH6, r11
- mtspr SPRN_SPRG_SCRATCH5, r12
-#endif
+ mfcr r12
+ mfspr r9, SPRN_PID
+ rlwimi r12, r9, 0, 0xff
mfspr r10, SPRN_SRR0 /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
@@ -508,28 +380,30 @@ _ENTRY(saved_ksp_limit)
4:
tophys(r11, r11)
rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
- lwz r12, 0(r11) /* Get L1 entry */
- andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */
+ lwz r11, 0(r11) /* Get L1 entry */
+ andi. r9, r11, _PMD_PRESENT /* Check if it points to a PTE page */
beq 2f /* Bail if no table */
- rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
- lwz r11, 0(r12) /* Get Linux PTE */
- andi. r9, r11, _PAGE_PRESENT
- beq 5f
+ rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */
+ lwz r11, 0(r11) /* Get Linux PTE */
+ li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+ andc. r9, r9, r11 /* Check permission */
+ bne 5f
- ori r11, r11, _PAGE_ACCESSED
- stw r11, 0(r12)
+ rlwinm r9, r11, 1, _PAGE_RW /* dirty => rw */
+ and r9, r9, r11 /* hwwrite = dirty & rw */
+ rlwimi r11, r9, 0, _PAGE_RW /* replace rw by hwwrite */
/* Create TLB tag. This is the faulting address plus a static
* set of bits. These are size, valid, E, U0.
*/
- li r12, 0x00c0
- rlwimi r10, r12, 0, 20, 31
+ li r9, 0x00c0
+ rlwimi r10, r9, 0, 20, 31
b finish_tlb_load
2: /* Check for possible large-page pmd entry */
- rlwinm. r9, r12, 2, 22, 24
+ rlwinm. r9, r11, 2, 22, 24
beq 5f
/* Create TLB tag. This is the faulting address, plus a static
@@ -537,7 +411,6 @@ _ENTRY(saved_ksp_limit)
*/
ori r9, r9, 0x40
rlwimi r10, r9, 0, 20, 31
- mr r11, r12
b finish_tlb_load
@@ -545,44 +418,27 @@ _ENTRY(saved_ksp_limit)
/* The bailout. Restore registers to pre-exception conditions
* and call the heavyweights to help us out.
*/
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG_SCRATCH5
- mfspr r11, SPRN_SPRG_SCRATCH6
mtspr SPRN_PID, r12
- mtcr r11
+ mtcrf 0x80, r12
mfspr r9, SPRN_SPRG_SCRATCH4
mfspr r12, SPRN_SPRG_SCRATCH3
-#endif
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH6
+ mfspr r10, SPRN_SPRG_SCRATCH5
b InstructionAccess
- EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
-#ifdef CONFIG_IBM405_ERR51
- /* 405GP errata 51 */
- START_EXCEPTION(0x1700, Trap_17)
- b DTLBMiss
-#else
- EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
-#endif
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1300, Trap_13, unknown_exception)
+ EXCEPTION(0x1400, Trap_14, unknown_exception)
+ EXCEPTION(0x1500, Trap_15, unknown_exception)
+ EXCEPTION(0x1600, Trap_16, unknown_exception)
+ EXCEPTION(0x1700, Trap_17, unknown_exception)
+ EXCEPTION(0x1800, Trap_18, unknown_exception)
+ EXCEPTION(0x1900, Trap_19, unknown_exception)
+ EXCEPTION(0x1A00, Trap_1A, unknown_exception)
+ EXCEPTION(0x1B00, Trap_1B, unknown_exception)
+ EXCEPTION(0x1C00, Trap_1C, unknown_exception)
+ EXCEPTION(0x1D00, Trap_1D, unknown_exception)
+ EXCEPTION(0x1E00, Trap_1E, unknown_exception)
+ EXCEPTION(0x1F00, Trap_1F, unknown_exception)
/* Check for a single step debug exception while in an exception
* handler before state has been saved. This is to catch the case
@@ -599,7 +455,7 @@ _ENTRY(saved_ksp_limit)
*/
/* 0x2000 - Debug Exception */
START_EXCEPTION(0x2000, DebugTrap)
- CRITICAL_EXCEPTION_PROLOG
+ CRITICAL_EXCEPTION_PROLOG 0x2000 DebugTrap
/*
* If this is a single step or branch-taken exception in an
@@ -635,50 +491,41 @@ _ENTRY(saved_ksp_limit)
lwz r12,GPR12(r11)
lwz r10,crit_r10@l(0)
lwz r11,crit_r11@l(0)
- PPC405_ERR77_SYNC
rfci
b .
/* continue normal handling for a critical exception... */
2: mfspr r4,SPRN_DBSR
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_TEMPLATE(DebugException, 0x2002, \
- (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- crit_transfer_to_handler, ret_from_crit_exc)
+ stw r4,_ESR(r11) /* DebugException takes DBSR in _ESR */
+ prepare_transfer_to_handler
+ bl DebugException
+ b ret_from_crit_exc
/* Programmable Interval Timer (PIT) Exception. (from 0x1000) */
+ __HEAD
Decrementer:
- EXCEPTION_PROLOG
+ EXCEPTION_PROLOG 0x1000 Decrementer
lis r0,TSR_PIS@h
mtspr SPRN_TSR,r0 /* Clear the PIT exception */
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_LITE(0x1000, timer_interrupt)
+ prepare_transfer_to_handler
+ bl timer_interrupt
+ b interrupt_return
/* Fixed Interval Timer (FIT) Exception. (from 0x1010) */
+ __HEAD
FITException:
- EXCEPTION_PROLOG
- addi r3,r1,STACK_FRAME_OVERHEAD;
- EXC_XFER_STD(0x1010, unknown_exception)
+ EXCEPTION_PROLOG 0x1010 FITException
+ prepare_transfer_to_handler
+ bl unknown_exception
+ b interrupt_return
/* Watchdog Timer (WDT) Exception. (from 0x1020) */
+ __HEAD
WDTException:
- CRITICAL_EXCEPTION_PROLOG;
- addi r3,r1,STACK_FRAME_OVERHEAD;
- EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2,
- (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)),
- crit_transfer_to_handler, ret_from_crit_exc)
-
-/*
- * The other Data TLB exceptions bail out to this point
- * if they can't resolve the lightweight TLB fault.
- */
-DataAccess:
- EXCEPTION_PROLOG
- mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
- stw r5,_ESR(r11)
- mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
- stw r4, _DEAR(r11)
- EXC_XFER_LITE(0x300, handle_page_fault)
+ CRITICAL_EXCEPTION_PROLOG 0x1020 WDTException
+ prepare_transfer_to_handler
+ bl WatchdogException
+ b ret_from_crit_exc
/* Other PowerPC processors, namely those derived from the 6xx-series
* have vectors from 0x2100 through 0x2F00 defined, but marked as reserved.
@@ -686,12 +533,13 @@ DataAccess:
* reserved.
*/
+ __HEAD
/* Damn, I came up one instruction too many to fit into the
* exception space :-). Both the instruction and data TLB
* miss get to this point to load the TLB.
* r10 - TLB_TAG value
* r11 - Linux PTE
- * r12, r9 - available to use
+ * r9 - available to use
* PID - loaded with proper value when we get here
* Upon exit, we reload everything and RFI.
* Actually, it will fit now, but oh well.....a common place
@@ -700,45 +548,31 @@ DataAccess:
tlb_4xx_index:
.long 0
finish_tlb_load:
- /* load the next available TLB index.
- */
- lwz r9, tlb_4xx_index@l(0)
- addi r9, r9, 1
- andi. r9, r9, (PPC40X_TLB_SIZE-1)
- stw r9, tlb_4xx_index@l(0)
-
-6:
/*
* Clear out the software-only bits in the PTE to generate the
* TLB_DATA value. These are the bottom 2 bits of the RPM, the
* top 3 bits of the zone field, and M.
*/
- li r12, 0x0ce2
- andc r11, r11, r12
+ li r9, 0x0ce2
+ andc r11, r11, r9
+
+ /* load the next available TLB index. */
+ lwz r9, tlb_4xx_index@l(0)
+ addi r9, r9, 1
+ andi. r9, r9, PPC40X_TLB_SIZE - 1
+ stw r9, tlb_4xx_index@l(0)
tlbwe r11, r9, TLB_DATA /* Load TLB LO */
tlbwe r10, r9, TLB_TAG /* Load TLB HI */
/* Done...restore registers and get out of here.
*/
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG_SCRATCH5
- mfspr r11, SPRN_SPRG_SCRATCH6
mtspr SPRN_PID, r12
- mtcr r11
+ mtcrf 0x80, r12
mfspr r9, SPRN_SPRG_SCRATCH4
mfspr r12, SPRN_SPRG_SCRATCH3
-#endif
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r10, SPRN_SPRG_SCRATCH0
- PPC405_ERR77_SYNC
+ mfspr r11, SPRN_SPRG_SCRATCH6
+ mfspr r10, SPRN_SPRG_SCRATCH5
rfi /* Should sync shadow TLBs */
b . /* prevent prefetch past rfi */
@@ -801,7 +635,7 @@ start_here:
ori r6, r6, swapper_pg_dir@l
lis r5, abatron_pteptrs@h
ori r5, r5, abatron_pteptrs@l
- stw r5, 0xf0(r0) /* Must match your Abatron config file */
+ stw r5, 0xf0(0) /* Must match your Abatron config file */
tophys(r5,r5)
stw r6, 0(r5)
@@ -867,39 +701,3 @@ _GLOBAL(abort)
mfspr r13,SPRN_DBCR0
oris r13,r13,DBCR0_RST_SYSTEM@h
mtspr SPRN_DBCR0,r13
-
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is the second parameter.
- */
- lis r5, abatron_pteptrs@ha
- stw r4, abatron_pteptrs@l + 0x4(r5)
-#endif
- sync
- mtspr SPRN_PID,r3
- isync /* Need an isync to flush shadow */
- /* TLBs after changing PID */
- blr
-
-/* We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
- .data
- .align 12
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space 4096
-EXPORT_SYMBOL(empty_zero_page)
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/* Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 51dd01a27314..02d2928d1e01 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -25,10 +25,10 @@
*/
#include <linux/init.h>
+#include <linux/pgtable.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
@@ -70,7 +70,7 @@ _ENTRY(_start);
* address.
* r21 will be loaded with the physical runtime address of _stext
*/
- bl 0f /* Get our runtime address */
+ bcl 20,31,$+4 /* Get our runtime address */
0: mflr r21 /* Make it accessible */
addis r21,r21,(_stext - 0b)@ha
addi r21,r21,(_stext - 0b)@l /* Get our current runtime base */
@@ -263,8 +263,7 @@ interrupt_base:
INSTRUCTION_STORAGE_EXCEPTION
/* External Input Interrupt */
- EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \
- do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, do_IRQ)
/* Alignment Interrupt */
ALIGNMENT_EXCEPTION
@@ -277,7 +276,7 @@ interrupt_base:
FP_UNAVAILABLE_EXCEPTION
#else
EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
- FloatingPointUnavailable, unknown_exception, EXC_XFER_STD)
+ FloatingPointUnavailable, unknown_exception)
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
@@ -285,15 +284,14 @@ interrupt_base:
/* Auxiliary Processor Unavailable Interrupt */
EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
- AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_STD)
+ AuxillaryProcessorUnavailable, unknown_exception)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
- EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \
- unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, unknown_exception)
/* Watchdog Timer Interrupt */
/* TODO: Add watchdog support */
@@ -376,7 +374,7 @@ interrupt_base:
/* Load the next available TLB index */
lwz r13,tlb_44x_index@l(r10)
- bne 2f /* Bail if permission mismach */
+ bne 2f /* Bail if permission mismatch */
/* Increment, rollover, and store TLB index */
addi r13,r13,1
@@ -471,7 +469,7 @@ interrupt_base:
/* Load the next available TLB index */
lwz r13,tlb_44x_index@l(r10)
- bne 2f /* Bail if permission mismach */
+ bne 2f /* Bail if permission mismatch */
/* Increment, rollover, and store TLB index */
addi r13,r13,1
@@ -534,6 +532,10 @@ finish_tlb_load_44x:
andi. r10,r12,_PAGE_USER /* User page ? */
beq 1f /* nope, leave U bits empty */
rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
+#ifdef CONFIG_PPC_KUEP
+0: rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */
+ patch_site 0b, patch__tlb_44x_kuep
+#endif
1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */
/* Done...restore registers and get out of here.
@@ -745,6 +747,10 @@ finish_tlb_load_47x:
andi. r10,r12,_PAGE_USER /* User page ? */
beq 1f /* nope, leave U bits empty */
rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
+#ifdef CONFIG_PPC_KUEP
+0: rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */
+ patch_site 0b, patch__tlb_47x_kuep
+#endif
1: tlbwe r11,r13,2
/* Done...restore registers and get out of here.
@@ -782,20 +788,6 @@ _GLOBAL(__fixup_440A_mcheck)
sync
blr
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is the second parameter.
- */
- lis r5, abatron_pteptrs@h
- ori r5, r5, abatron_pteptrs@l
- stw r4, 0x4(r5)
-#endif
- mtspr SPRN_PID,r3
- isync /* Force context change */
- blr
-
/*
* Init CPU state. This is called at boot time or for secondary CPUs
* to setup initial TLB entries, setup IVORs, etc...
@@ -861,7 +853,7 @@ _GLOBAL(init_cpu_state)
wmmucr: mtspr SPRN_MMUCR,r3 /* Put MMUCR */
sync
- bl invstr /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
invstr: mflr r5 /* Make it accessible */
tlbsx r23,0,r5 /* Find entry we are in */
li r4,0 /* Start at TLB entry 0 */
@@ -1053,7 +1045,7 @@ head_start_47x:
sync
/* Find the entry we are running from */
- bl 1f
+ bcl 20,31,$+4
1: mflr r23
tlbsx r23,0,r23
tlbre r24,r23,0
@@ -1241,34 +1233,8 @@ head_start_common:
isync
blr
-/*
- * We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
- .data
- .align PAGE_SHIFT
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
-
-/*
- * To support >32-bit physical addresses, we use an 8KB pgdir.
- */
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/*
- * Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
-
#ifdef CONFIG_SMP
+ .data
.align 12
temp_boot_stack:
.space 1024
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index ad79fddb974d..f17ae2083733 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -41,6 +41,11 @@
#include <asm/ppc-opcode.h>
#include <asm/export.h>
#include <asm/feature-fixups.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
/* The physical memory is laid out such that the secondary processor
* spin code sits at 0x0000...0x00ff. On server, the vectors follow
@@ -189,8 +194,9 @@ CLOSE_FIXED_SECTION(first_256B)
/* This value is used to mark exception frames on the stack. */
.section ".toc","aw"
+/* This value is used to mark exception frames on the stack. */
exception_marker:
- .tc ID_72656773_68657265[TC],0x7265677368657265
+ .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
.previous
/*
@@ -206,6 +212,8 @@ OPEN_TEXT_SECTION(0x100)
USE_TEXT_SECTION()
+#include "interrupt_64.S"
+
#ifdef CONFIG_PPC_BOOK3E
/*
* The booting_thread_hwid holds the thread id we want to boot in cpu
@@ -300,9 +308,6 @@ _GLOBAL(fsl_secondary_thread_init)
rlwimi r3, r3, 30, 2, 30
mtspr SPRN_PIR, r3
1:
-#endif
-
-_GLOBAL(generic_secondary_thread_init)
mr r24,r3
/* turn on 64-bit mode */
@@ -312,13 +317,13 @@ _GLOBAL(generic_secondary_thread_init)
bl relative_toc
tovirt(r2,r2)
-#ifdef CONFIG_PPC_BOOK3E
/* Book3E initialization */
mr r3,r24
bl book3e_secondary_thread_init
-#endif
b generic_secondary_common_init
+#endif /* CONFIG_PPC_BOOK3E */
+
/*
* On pSeries and most other platforms, secondary processors spin
* in the following code.
@@ -420,6 +425,10 @@ generic_secondary_common_init:
/* From now on, r24 is expected to be logical cpuid */
mr r24,r5
+ /* Create a temp kernel stack for use before relocation is on. */
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,STACK_FRAME_OVERHEAD
+
/* See if we need to call a cpu state restore handler */
LOAD_REG_ADDR(r23, cur_cpu_spec)
ld r23,0(r23)
@@ -448,10 +457,6 @@ generic_secondary_common_init:
sync /* order paca.run and cur_cpu_spec */
isync /* In case code patching happened */
- /* Create a temp kernel stack for use before relocation is on. */
- ld r1,PACAEMERGSP(r13)
- subi r1,r1,STACK_FRAME_OVERHEAD
-
b __secondary_start
#endif /* SMP */
@@ -537,6 +542,7 @@ __start_initialization_multiplatform:
b __after_prom_start
#endif /* CONFIG_PPC_BOOK3E */
+__REF
__boot_from_prom:
#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
/* Save parameters */
@@ -574,6 +580,7 @@ __boot_from_prom:
/* We never return. We also hit that trap if trying to boot
* from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
trap
+ .previous
__after_prom_start:
#ifdef CONFIG_RELOCATABLE
@@ -705,6 +712,8 @@ _GLOBAL(copy_and_flush)
isync
blr
+_ASM_NOKPROBE_SYMBOL(copy_and_flush); /* Called in real mode */
+
.align 8
copy_to_here:
@@ -830,7 +839,7 @@ __secondary_start:
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- RFI
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
/*
@@ -866,8 +875,7 @@ enable_64b_mode:
oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */
mtmsr r11
#else /* CONFIG_PPC_BOOK3E */
- li r12,(MSR_64BIT | MSR_ISF)@highest
- sldi r12,r12,48
+ LOAD_REG_IMMEDIATE(r12, MSR_64BIT)
or r11,r11,r12
mtmsrd r11
isync
@@ -945,15 +953,8 @@ start_here_multiplatform:
std r0,0(r4)
#endif
- /* The following gets the stack set up with the regs */
- /* pointing to the real addr of the kernel stack. This is */
- /* all done to support the C function call below which sets */
- /* up the htab. This is done because we have relocated the */
- /* kernel but are still running in real mode. */
-
- LOAD_REG_ADDR(r3,init_thread_union)
-
/* set up a stack pointer */
+ LOAD_REG_ADDR(r3,init_thread_union)
LOAD_REG_IMMEDIATE(r1,THREAD_SIZE)
add r1,r3,r1
li r0,0
@@ -974,10 +975,9 @@ start_here_multiplatform:
ld r4,PACAKMSR(r13)
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- RFI
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
- .previous
/* This is where all platforms converge execution */
start_here_common:
@@ -999,25 +999,6 @@ start_here_common:
bl start_kernel
/* Not reached */
- trap
+0: trap
EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the bss, which is page-aligned.
- */
- .section ".bss"
-/*
- * pgd dir should be aligned to PGD_TABLE_SIZE which is 64K.
- * We will need to find a better way to fix this
- */
- .align 16
-
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
- .globl empty_zero_page
-empty_zero_page:
- .space PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
+ .previous
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 073a651787df..0d073b9fd52c 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -16,11 +16,12 @@
#include <linux/init.h>
#include <linux/magic.h>
+#include <linux/pgtable.h>
+#include <linux/sizes.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/cache.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
@@ -28,23 +29,7 @@
#include <asm/ptrace.h>
#include <asm/export.h>
#include <asm/code-patching-asm.h>
-
-#include "head_32.h"
-
-#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
-/* By simply checking Address >= 0x80000000, we know if its a kernel address */
-#define SIMPLE_KERNEL_ADDRESS 1
-#endif
-
-/*
- * We need an ITLB miss handler for kernel addresses if:
- * - Either we have modules
- * - Or we have not pinned the first 8M
- */
-#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) || \
- defined(CONFIG_DEBUG_PAGEALLOC)
-#define ITLB_MISS_KERNEL 1
-#endif
+#include <asm/interrupt.h>
/*
* Value for the bits that have fixed value in RPN entries.
@@ -52,6 +37,18 @@
*/
#define RPN_PATTERN 0x00f0
+#include "head_32.h"
+
+.macro compare_to_kernel_boundary scratch, addr
+#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
+/* By simply checking Address >= 0x80000000, we know if its a kernel address */
+ not. \scratch, \addr
+#else
+ rlwinm \scratch, \addr, 16, 0xfff8
+ cmpli cr0, \scratch, PAGE_OFFSET@h
+#endif
+.endm
+
#define PAGE_SHIFT_512K 19
#define PAGE_SHIFT_8M 23
@@ -122,56 +119,54 @@ instruction_counter:
#endif
/* System reset */
- EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD)
+ EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, system_reset_exception)
/* Machine check */
- . = 0x200
-MachineCheck:
- EXCEPTION_PROLOG handle_dar_dsisr=1
- save_dar_dsisr_on_stack r4, r5, r11
- li r6, RPN_PATTERN
- mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0x200, machine_check_exception)
+ START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck)
+ EXCEPTION_PROLOG INTERRUPT_MACHINE_CHECK MachineCheck handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ bl machine_check_exception
+ b interrupt_return
/* External interrupt */
- EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ)
/* Alignment exception */
- . = 0x600
-Alignment:
- EXCEPTION_PROLOG handle_dar_dsisr=1
- save_dar_dsisr_on_stack r4, r5, r11
- li r6, RPN_PATTERN
- mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */
- addi r3,r1,STACK_FRAME_OVERHEAD
- b .Lalignment_exception_ool
+ START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment)
+ EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ bl alignment_exception
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Program check exception */
- EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
+ START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck)
+ EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck
+ prepare_transfer_to_handler
+ bl program_check_exception
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Decrementer */
- EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
-
- /* With VMAP_STACK there's not enough room for this at 0x600 */
- . = 0xa00
-.Lalignment_exception_ool:
- EXC_XFER_STD(0x600, alignment_exception)
+ EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt)
/* System call */
- . = 0xc00
-SystemCall:
- SYSCALL_ENTRY 0xc00
+ START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall)
+ SYSCALL_ENTRY INTERRUPT_SYSCALL
/* Single step - not used on 601 */
- EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
+ EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception)
/* On the MPC8xx, this is a software emulation interrupt. It occurs
* for all unimplemented and illegal instructions.
*/
- EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD)
+ START_EXCEPTION(INTERRUPT_SOFT_EMU_8xx, SoftEmu)
+ EXCEPTION_PROLOG INTERRUPT_SOFT_EMU_8xx SoftEmu
+ prepare_transfer_to_handler
+ bl emulation_assist_interrupt
+ REST_NVGPRS(r1)
+ b interrupt_return
- . = 0x1100
/*
* For the MPC8xx, this is a software tablewalk to load the instruction
* TLB. The task switch loads the M_TWB register with the pointer to the first
@@ -184,72 +179,44 @@ SystemCall:
*/
#ifdef CONFIG_8xx_CPU15
-#define INVALIDATE_ADJACENT_PAGES_CPU15(addr) \
- addi addr, addr, PAGE_SIZE; \
- tlbie addr; \
- addi addr, addr, -(PAGE_SIZE << 1); \
- tlbie addr; \
- addi addr, addr, PAGE_SIZE
+#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp) \
+ addi tmp, addr, PAGE_SIZE; \
+ tlbie tmp; \
+ addi tmp, addr, -PAGE_SIZE; \
+ tlbie tmp
#else
-#define INVALIDATE_ADJACENT_PAGES_CPU15(addr)
+#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp)
#endif
-InstructionTLBMiss:
- mtspr SPRN_SPRG_SCRATCH0, r10
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP)
- mtspr SPRN_SPRG_SCRATCH1, r11
-#endif
+ START_EXCEPTION(INTERRUPT_INST_TLB_MISS_8xx, InstructionTLBMiss)
+ mtspr SPRN_SPRG_SCRATCH2, r10
+ mtspr SPRN_M_TW, r11
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
*/
mfspr r10, SPRN_SRR0 /* Get effective address of fault */
- INVALIDATE_ADJACENT_PAGES_CPU15(r10)
+ INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
mtspr SPRN_MD_EPN, r10
- /* Only modules will cause ITLB Misses as we always
- * pin the first 8MB of kernel memory */
-#ifdef ITLB_MISS_KERNEL
+#ifdef CONFIG_MODULES
mfcr r11
-#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
- cmpi cr0, r10, 0 /* Address >= 0x80000000 */
-#else
- rlwinm r10, r10, 16, 0xfff8
- cmpli cr0, r10, PAGE_OFFSET@h
-#ifndef CONFIG_PIN_TLB_TEXT
- /* It is assumed that kernel code fits into the first 32M */
-0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h
- patch_site 0b, patch__itlbmiss_linmem_top
-#endif
-#endif
+ compare_to_kernel_boundary r10, r10
#endif
mfspr r10, SPRN_M_TWB /* Get level 1 table */
-#ifdef ITLB_MISS_KERNEL
-#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
- bge+ 3f
-#else
+#ifdef CONFIG_MODULES
blt+ 3f
-#endif
-#ifndef CONFIG_PIN_TLB_TEXT
- blt cr7, ITLBMissLinear
-#endif
rlwinm r10, r10, 0, 20, 31
oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
3:
+ mtcr r11
#endif
- lwz r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */
- mtspr SPRN_MI_TWC, r10 /* Set segment attributes */
-
- mtspr SPRN_MD_TWC, r10
+ lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */
+ mtspr SPRN_MD_TWC, r11
mfspr r10, SPRN_MD_TWC
lwz r10, 0(r10) /* Get the pte */
-#ifdef ITLB_MISS_KERNEL
- mtcr r11
-#endif
-#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
- and r11, r11, r10
- rlwimi r10, r11, 0, _PAGE_PRESENT
-#endif
+ rlwimi r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED
+ rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K
+ mtspr SPRN_MI_TWC, r11
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 20 and 23 must be clear.
* Software indicator bits 22, 24, 25, 26, and 27 must be
@@ -262,10 +229,8 @@ InstructionTLBMiss:
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
/* Restore registers */
-0: mfspr r10, SPRN_SPRG_SCRATCH0
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP)
- mfspr r11, SPRN_SPRG_SCRATCH1
-#endif
+0: mfspr r10, SPRN_SPRG_SCRATCH2
+ mfspr r11, SPRN_M_TW
rfi
patch_site 0b, patch__itlbmiss_exit_1
@@ -274,43 +239,13 @@ InstructionTLBMiss:
0: lwz r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
addi r10, r10, 1
stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
- mfspr r10, SPRN_SPRG_SCRATCH0
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP)
- mfspr r11, SPRN_SPRG_SCRATCH1
-#endif
- rfi
-#endif
-
-#ifndef CONFIG_PIN_TLB_TEXT
-ITLBMissLinear:
- mtcr r11
-#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23
- patch_site 0f, patch__itlbmiss_linmem_top8
-
- mfspr r10, SPRN_SRR0
-0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha
- rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K
- ori r11, r11, MI_PS512K | MI_SVALID
- rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */
-#else
- /* Set 8M byte page and mark it valid */
- li r11, MI_PS8MEG | MI_SVALID
- rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */
-#endif
- mtspr SPRN_MI_TWC, r11
- ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
- _PAGE_PRESENT
- mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
-
-0: mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r10, SPRN_SPRG_SCRATCH2
+ mfspr r11, SPRN_M_TW
rfi
- patch_site 0b, patch__itlbmiss_exit_2
#endif
- . = 0x1200
-DataStoreTLBMiss:
- mtspr SPRN_DAR, r10
+ START_EXCEPTION(INTERRUPT_DATA_TLB_MISS_8xx, DataStoreTLBMiss)
+ mtspr SPRN_SPRG_SCRATCH2, r10
mtspr SPRN_M_TW, r11
mfcr r11
@@ -318,21 +253,9 @@ DataStoreTLBMiss:
* kernel page tables.
*/
mfspr r10, SPRN_MD_EPN
- rlwinm r10, r10, 16, 0xfff8
- cmpli cr0, r10, PAGE_OFFSET@h
-#ifndef CONFIG_PIN_TLB_IMMR
- cmpli cr6, r10, VIRT_IMMR_BASE@h
-#endif
-0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h
- patch_site 0b, patch__dtlbmiss_linmem_top
-
+ compare_to_kernel_boundary r10, r10
mfspr r10, SPRN_M_TWB /* Get level 1 table */
blt+ 3f
-#ifndef CONFIG_PIN_TLB_IMMR
-0: beq- cr6, DTLBMissIMMR
- patch_site 0b, patch__dtlbmiss_immr_jmp
-#endif
- blt cr7, DTLBMissLinear
rlwinm r10, r10, 0, 20, 31
oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
3:
@@ -343,29 +266,16 @@ DataStoreTLBMiss:
mfspr r10, SPRN_MD_TWC
lwz r10, 0(r10) /* Get the pte */
- /* Insert the Guarded flag into the TWC from the Linux PTE.
+ /* Insert Guarded and Accessed flags into the TWC from the Linux PTE.
* It is bit 27 of both the Linux PTE and the TWC (at least
* I got that right :-). It will be better when we can put
* this into the Linux pgd/pmd and load it in the operation
* above.
*/
- rlwimi r11, r10, 0, _PAGE_GUARDED
+ rlwimi r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED
+ rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K
mtspr SPRN_MD_TWC, r11
- /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
- * We also need to know if the insn is a load/store, so:
- * Clear _PAGE_PRESENT and load that which will
- * trap into DTLB Error with store bit set accordinly.
- */
- /* PRESENT=0x1, ACCESSED=0x20
- * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
- * r10 = (r10 & ~PRESENT) | r11;
- */
-#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
- and r11, r11, r10
- rlwimi r10, r11, 0, _PAGE_PRESENT
-#endif
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
@@ -374,165 +284,97 @@ DataStoreTLBMiss:
li r11, RPN_PATTERN
rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
+ mtspr SPRN_DAR, r11 /* Tag DAR */
/* Restore registers */
-0: mfspr r10, SPRN_DAR
- mtspr SPRN_DAR, r11 /* Tag DAR */
+0: mfspr r10, SPRN_SPRG_SCRATCH2
mfspr r11, SPRN_M_TW
rfi
patch_site 0b, patch__dtlbmiss_exit_1
-DTLBMissIMMR:
- mtcr r11
- /* Set 512k byte guarded page and mark it valid */
- li r10, MD_PS512K | MD_GUARDED | MD_SVALID
- mtspr SPRN_MD_TWC, r10
- mfspr r10, SPRN_IMMR /* Get current IMMR */
- rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
- _PAGE_PRESENT | _PAGE_NO_CACHE
- mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
-
- li r11, RPN_PATTERN
-
-0: mfspr r10, SPRN_DAR
- mtspr SPRN_DAR, r11 /* Tag DAR */
+#ifdef CONFIG_PERF_EVENTS
+ patch_site 0f, patch__dtlbmiss_perf
+0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ addi r10, r10, 1
+ stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ mfspr r10, SPRN_SPRG_SCRATCH2
mfspr r11, SPRN_M_TW
rfi
- patch_site 0b, patch__dtlbmiss_exit_2
-
-DTLBMissLinear:
- mtcr r11
- rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */
-#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_DATA_SHIFT < 23
- patch_site 0f, patch__dtlbmiss_romem_top8
-
-0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha
- rlwinm r11, r11, 0, 0xff800000
- neg r10, r11
- or r11, r11, r10
- rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K
- ori r11, r11, MI_PS512K | MI_SVALID
- mfspr r10, SPRN_MD_EPN
- rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */
-#else
- /* Set 8M byte page and mark it valid */
- li r11, MD_PS8MEG | MD_SVALID
#endif
- mtspr SPRN_MD_TWC, r11
-#ifdef CONFIG_STRICT_KERNEL_RWX
- patch_site 0f, patch__dtlbmiss_romem_top
-
-0: subis r11, r10, 0
- rlwimi r10, r11, 11, _PAGE_RO
-#endif
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
- _PAGE_PRESENT
- mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
-
- li r11, RPN_PATTERN
-
-0: mfspr r10, SPRN_DAR
- mtspr SPRN_DAR, r11 /* Tag DAR */
- mfspr r11, SPRN_M_TW
- rfi
- patch_site 0b, patch__dtlbmiss_exit_3
/* This is an instruction TLB error on the MPC8xx. This could be due
* to many reasons, such as executing guarded memory or illegal instruction
* addresses. There is nothing to do but handle a big time error fault.
*/
- . = 0x1300
-InstructionTLBError:
- EXCEPTION_PROLOG
- mr r4,r12
+ START_EXCEPTION(INTERRUPT_INST_TLB_ERROR_8xx, InstructionTLBError)
+ /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
+ EXCEPTION_PROLOG INTERRUPT_INST_STORAGE InstructionTLBError
andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
andis. r10,r9,SRR1_ISI_NOPT@h
beq+ .Litlbie
- tlbie r4
- /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
+ tlbie r12
.Litlbie:
- stw r4, _DAR(r11)
- EXC_XFER_LITE(0x400, handle_page_fault)
+ stw r12, _DAR(r11)
+ stw r5, _DSISR(r11)
+ prepare_transfer_to_handler
+ bl do_page_fault
+ b interrupt_return
/* This is the data TLB error on the MPC8xx. This could be due to
* many reasons, including a dirty update to a pte. We bail out to
* a higher level function that can handle it.
*/
- . = 0x1400
-DataTLBError:
+ START_EXCEPTION(INTERRUPT_DATA_TLB_ERROR_8xx, DataTLBError)
EXCEPTION_PROLOG_0 handle_dar_dsisr=1
mfspr r11, SPRN_DAR
cmpwi cr1, r11, RPN_PATTERN
beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */
DARFixed:/* Return from dcbx instruction bug workaround */
-#ifdef CONFIG_VMAP_STACK
- li r11, RPN_PATTERN
- mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */
-#endif
EXCEPTION_PROLOG_1
- EXCEPTION_PROLOG_2 handle_dar_dsisr=1
- get_and_save_dar_dsisr_on_stack r4, r5, r11
+ /* 0x300 is DataAccess exception, needed by bad_page_fault() */
+ EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataTLBError handle_dar_dsisr=1
+ lwz r4, _DAR(r11)
+ lwz r5, _DSISR(r11)
andis. r10,r5,DSISR_NOHPTE@h
beq+ .Ldtlbie
tlbie r4
.Ldtlbie:
-#ifndef CONFIG_VMAP_STACK
- li r10,RPN_PATTERN
- mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */
-#endif
- /* 0x300 is DataAccess exception, needed by bad_page_fault() */
- EXC_XFER_LITE(0x300, handle_page_fault)
+ prepare_transfer_to_handler
+ bl do_page_fault
+ b interrupt_return
-/* Called from DataStoreTLBMiss when perf TLB misses events are activated */
-#ifdef CONFIG_PERF_EVENTS
- patch_site 0f, patch__dtlbmiss_perf
-0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
- addi r10, r10, 1
- stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
- mfspr r10, SPRN_DAR
- mtspr SPRN_DAR, r11 /* Tag DAR */
- mfspr r11, SPRN_M_TW
- rfi
-#endif
-
-stack_overflow:
+#ifdef CONFIG_VMAP_STACK
vmap_stack_overflow_exception
+#endif
/* On the MPC8xx, these next four traps are used for development
* support of breakpoints and such. Someday I will get around to
* using them.
*/
-do_databreakpoint:
- EXCEPTION_PROLOG_1
- EXCEPTION_PROLOG_2 handle_dar_dsisr=1
- addi r3,r1,STACK_FRAME_OVERHEAD
- mfspr r4,SPRN_BAR
- stw r4,_DAR(r11)
-#ifdef CONFIG_VMAP_STACK
- lwz r5,_DSISR(r11)
-#else
- mfspr r5,SPRN_DSISR
-#endif
- EXC_XFER_STD(0x1c00, do_break)
-
- . = 0x1c00
-DataBreakpoint:
+ START_EXCEPTION(INTERRUPT_DATA_BREAKPOINT_8xx, DataBreakpoint)
EXCEPTION_PROLOG_0 handle_dar_dsisr=1
mfspr r11, SPRN_SRR0
cmplwi cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l
cmplwi cr7, r11, (.Litlbie - PAGE_OFFSET)@l
cror 4*cr1+eq, 4*cr1+eq, 4*cr7+eq
- bne cr1, do_databreakpoint
+ bne cr1, 1f
mtcr r10
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
rfi
+1: EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_2 INTERRUPT_DATA_BREAKPOINT_8xx DataBreakpoint handle_dar_dsisr=1
+ mfspr r4,SPRN_BAR
+ stw r4,_DAR(r11)
+ prepare_transfer_to_handler
+ bl do_break
+ REST_NVGPRS(r1)
+ b interrupt_return
+
#ifdef CONFIG_PERF_EVENTS
- . = 0x1d00
-InstructionBreakpoint:
+ START_EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, InstructionBreakpoint)
mtspr SPRN_SPRG_SCRATCH0, r10
lwz r10, (instruction_counter - PAGE_OFFSET)@l(0)
addi r10, r10, -1
@@ -543,11 +385,12 @@ InstructionBreakpoint:
mfspr r10, SPRN_SPRG_SCRATCH0
rfi
#else
- EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, Trap_1d, unknown_exception)
#endif
- EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1e00, Trap_1e, unknown_exception)
+ EXCEPTION(0x1f00, Trap_1f, unknown_exception)
+ __HEAD
. = 0x2000
/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
@@ -563,14 +406,9 @@ FixupDAR:/* Entry point for dcbx workaround. */
cmpli cr1, r11, PAGE_OFFSET@h
mfspr r11, SPRN_M_TWB /* Get level 1 table */
blt+ cr1, 3f
- rlwinm r11, r10, 16, 0xfff8
-
-0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
- patch_site 0b, patch__fixupdar_linmem_top
/* create physical page address from effective address */
tophys(r11, r10)
- blt- cr7, 201f
mfspr r11, SPRN_M_TWB /* Get level 1 table */
rlwinm r11, r11, 0, 20, 31
oris r11, r11, (swapper_pg_dir - PAGE_OFFSET)@ha
@@ -581,7 +419,6 @@ FixupDAR:/* Entry point for dcbx workaround. */
mfspr r11, SPRN_MD_TWC
lwz r11, 0(r11) /* Get the pte */
bt 28,200f /* bit 28 = Large page (8M) */
- bt 29,202f /* bit 29 = Large page (8M or 512K) */
/* concat physical page address(r11) and page offset(r10) */
rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31
201: lwz r11,0(r11)
@@ -608,11 +445,6 @@ FixupDAR:/* Entry point for dcbx workaround. */
rlwimi r11, r10, 0, 32 - PAGE_SHIFT_8M, 31
b 201b
-202:
- /* concat physical page address(r11) and page offset(r10) */
- rlwimi r11, r10, 0, 32 - PAGE_SHIFT_512K, 31
- b 201b
-
144: mfspr r10, SPRN_DSISR
rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */
mtspr SPRN_DSISR, r10
@@ -668,14 +500,10 @@ FixupDAR:/* Entry point for dcbx workaround. */
152:
mfdar r11
mtctr r11 /* restore ctr reg from DAR */
-#ifdef CONFIG_VMAP_STACK
mfspr r11, SPRN_SPRG_THREAD
stw r10, DAR(r11)
mfspr r10, SPRN_DSISR
stw r10, DSISR(r11)
-#else
- mtdar r10 /* save fault EA to DAR */
-#endif
mfspr r10,SPRN_M_TW
b DARFixed /* Go back to normal TLB handling */
@@ -747,6 +575,27 @@ start_here:
rfi
/* Load up the kernel context */
2:
+#ifdef CONFIG_PIN_TLB_IMMR
+ lis r0, MD_TWAM@h
+ oris r0, r0, 0x1f00
+ mtspr SPRN_MD_CTR, r0
+ LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID)
+ tlbie r0
+ mtspr SPRN_MD_EPN, r0
+ LOAD_REG_IMMEDIATE(r0, MD_SVALID | MD_PS512K | MD_GUARDED)
+ mtspr SPRN_MD_TWC, r0
+ mfspr r0, SPRN_IMMR
+ rlwinm r0, r0, 0, 0xfff80000
+ ori r0, r0, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \
+ _PAGE_NO_CACHE | _PAGE_PRESENT
+ mtspr SPRN_MD_RPN, r0
+ lis r0, (MD_TWAM | MD_RSV4I)@h
+ mtspr SPRN_MD_CTR, r0
+#endif
+#if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR)
+ lis r0, MD_TWAM@h
+ mtspr SPRN_MD_CTR, r0
+#endif
tlbia /* Clear all TLB entries */
sync /* wait for tlbia/tlbie to finish */
@@ -779,17 +628,10 @@ start_here:
initial_mmu:
li r8, 0
mtspr SPRN_MI_CTR, r8 /* remove PINNED ITLB entries */
- lis r10, MD_RESETVAL@h
-#ifndef CONFIG_8xx_COPYBACK
- oris r10, r10, MD_WTDEF@h
-#endif
+ lis r10, MD_TWAM@h
mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */
tlbia /* Invalidate all TLB entries */
-#ifdef CONFIG_PIN_TLB_DATA
- oris r10, r10, MD_RSV4I@h
- mtspr SPRN_MD_CTR, r10 /* Set data TLB control */
-#endif
lis r8, MI_APG_INIT@h /* Set protection modes */
ori r8, r8, MI_APG_INIT@l
@@ -798,55 +640,32 @@ initial_mmu:
ori r8, r8, MD_APG_INIT@l
mtspr SPRN_MD_AP, r8
- /* Map a 512k page for the IMMR to get the processor
- * internal registers (among other things).
- */
-#ifdef CONFIG_PIN_TLB_IMMR
- oris r10, r10, MD_RSV4I@h
- ori r10, r10, 0x1c00
- mtspr SPRN_MD_CTR, r10
-
- mfspr r9, 638 /* Get current IMMR */
- andis. r9, r9, 0xfff8 /* Get 512 kbytes boundary */
-
- lis r8, VIRT_IMMR_BASE@h /* Create vaddr for TLB */
- ori r8, r8, MD_EVALID /* Mark it valid */
- mtspr SPRN_MD_EPN, r8
- li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */
- ori r8, r8, MD_SVALID /* Make it valid */
- mtspr SPRN_MD_TWC, r8
- mr r8, r9 /* Create paddr for TLB */
- ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */
- mtspr SPRN_MD_RPN, r8
-#endif
-
- /* Now map the lower RAM (up to 32 Mbytes) into the ITLB. */
-#ifdef CONFIG_PIN_TLB_TEXT
+ /* Map the lower RAM (up to 32 Mbytes) into the ITLB and DTLB */
lis r8, MI_RSV4I@h
ori r8, r8, 0x1c00
-#endif
+ oris r12, r10, MD_RSV4I@h
+ ori r12, r12, 0x1c00
li r9, 4 /* up to 4 pages of 8M */
mtctr r9
lis r9, KERNELBASE@h /* Create vaddr for TLB */
- li r10, MI_PS8MEG | MI_SVALID /* Set 8M byte page */
+ li r10, MI_PS8MEG | _PMD_ACCESSED | MI_SVALID
li r11, MI_BOOTINIT /* Create RPN for address 0 */
- lis r12, _einittext@h
- ori r12, r12, _einittext@l
1:
-#ifdef CONFIG_PIN_TLB_TEXT
mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
addi r8, r8, 0x100
-#endif
-
ori r0, r9, MI_EVALID /* Mark it valid */
mtspr SPRN_MI_EPN, r0
mtspr SPRN_MI_TWC, r10
mtspr SPRN_MI_RPN, r11 /* Store TLB entry */
+ mtspr SPRN_MD_CTR, r12
+ addi r12, r12, 0x100
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r10
+ mtspr SPRN_MD_RPN, r11
addis r9, r9, 0x80
addis r11, r11, 0x80
- cmpl cr0, r9, r12
- bdnzf gt, 1b
+ bdnz 1b
/* Since the cache is enabled according to the information we
* just loaded into the TLB, invalidate and enable the caches here.
@@ -857,17 +676,7 @@ initial_mmu:
mtspr SPRN_DC_CST, r8
lis r8, IDC_ENABLE@h
mtspr SPRN_IC_CST, r8
-#ifdef CONFIG_8xx_COPYBACK
- mtspr SPRN_DC_CST, r8
-#else
- /* For a debug option, I left this here to easily enable
- * the write through cache mode
- */
- lis r8, DC_SFWT@h
- mtspr SPRN_DC_CST, r8
- lis r8, IDC_ENABLE@h
mtspr SPRN_DC_CST, r8
-#endif
/* Disable debug mode entry on breakpoints */
mfspr r8, SPRN_DER
#ifdef CONFIG_PERF_EVENTS
@@ -878,28 +687,103 @@ initial_mmu:
mtspr SPRN_DER, r8
blr
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
- */
- .data
- .globl sdata
-sdata:
- .globl empty_zero_page
- .align PAGE_SHIFT
-empty_zero_page:
- .space PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
-
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/* Room for two PTE table poiners, usually the kernel and current user
- * pointer to their respective root page table (pgdir).
- */
- .globl abatron_pteptrs
-abatron_pteptrs:
- .space 8
+_GLOBAL(mmu_pin_tlb)
+ lis r9, (1f - PAGE_OFFSET)@h
+ ori r9, r9, (1f - PAGE_OFFSET)@l
+ mfmsr r10
+ mflr r11
+ li r12, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI)
+ rlwinm r0, r10, 0, ~MSR_RI
+ rlwinm r0, r0, 0, ~MSR_EE
+ mtmsr r0
+ isync
+ .align 4
+ mtspr SPRN_SRR0, r9
+ mtspr SPRN_SRR1, r12
+ rfi
+1:
+ li r5, 0
+ lis r6, MD_TWAM@h
+ mtspr SPRN_MI_CTR, r5
+ mtspr SPRN_MD_CTR, r6
+ tlbia
+
+ LOAD_REG_IMMEDIATE(r5, 28 << 8)
+ LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
+ LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
+ LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
+ LOAD_REG_ADDR(r9, _sinittext)
+ li r0, 4
+ mtctr r0
+
+2: ori r0, r6, MI_EVALID
+ mtspr SPRN_MI_CTR, r5
+ mtspr SPRN_MI_EPN, r0
+ mtspr SPRN_MI_TWC, r7
+ mtspr SPRN_MI_RPN, r8
+ addi r5, r5, 0x100
+ addis r6, r6, SZ_8M@h
+ addis r8, r8, SZ_8M@h
+ cmplw r6, r9
+ bdnzt lt, 2b
+ lis r0, MI_RSV4I@h
+ mtspr SPRN_MI_CTR, r0
+
+ LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM)
+#ifdef CONFIG_PIN_TLB_DATA
+ LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
+ LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
+ li r8, 0
+#ifdef CONFIG_PIN_TLB_IMMR
+ li r0, 3
+#else
+ li r0, 4
+#endif
+ mtctr r0
+ cmpwi r4, 0
+ beq 4f
+ LOAD_REG_ADDR(r9, _sinittext)
+
+2: ori r0, r6, MD_EVALID
+ ori r12, r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
+ mtspr SPRN_MD_CTR, r5
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r7
+ mtspr SPRN_MD_RPN, r12
+ addi r5, r5, 0x100
+ addis r6, r6, SZ_8M@h
+ addis r8, r8, SZ_8M@h
+ cmplw r6, r9
+ bdnzt lt, 2b
+4:
+2: ori r0, r6, MD_EVALID
+ ori r12, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
+ mtspr SPRN_MD_CTR, r5
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r7
+ mtspr SPRN_MD_RPN, r12
+ addi r5, r5, 0x100
+ addis r6, r6, SZ_8M@h
+ addis r8, r8, SZ_8M@h
+ cmplw r6, r3
+ bdnzt lt, 2b
+#endif
+#ifdef CONFIG_PIN_TLB_IMMR
+ LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID)
+ LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED | _PMD_ACCESSED)
+ mfspr r8, SPRN_IMMR
+ rlwinm r8, r8, 0, 0xfff80000
+ ori r8, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \
+ _PAGE_NO_CACHE | _PAGE_PRESENT
+ mtspr SPRN_MD_CTR, r5
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r7
+ mtspr SPRN_MD_RPN, r8
+#endif
+#if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA)
+ lis r0, (MD_RSV4I | MD_TWAM)@h
+ mtspr SPRN_MD_CTR, r0
+#endif
+ mtspr SPRN_SRR1, r10
+ mtspr SPRN_SRR0, r11
+ rfi
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 97c887950c3c..68e5c0a7e99d 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -17,10 +17,10 @@
*/
#include <linux/init.h>
+#include <linux/pgtable.h>
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
@@ -31,19 +31,10 @@
#include <asm/kvm_book3s_asm.h>
#include <asm/export.h>
#include <asm/feature-fixups.h>
+#include <asm/interrupt.h>
#include "head_32.h"
-/* 601 only have IBAT */
-#ifdef CONFIG_PPC_BOOK3S_601
-#define LOAD_BAT(n, reg, RA, RB) \
- li RA,0; \
- mtspr SPRN_IBAT##n##U,RA; \
- lwz RA,(n*16)+0(reg); \
- lwz RB,(n*16)+4(reg); \
- mtspr SPRN_IBAT##n##U,RA; \
- mtspr SPRN_IBAT##n##L,RB
-#else
#define LOAD_BAT(n, reg, RA, RB) \
/* see the comment for clear_bats() -- Cort */ \
li RA,0; \
@@ -57,11 +48,10 @@
lwz RB,(n*16)+12(reg); \
mtspr SPRN_DBAT##n##U,RA; \
mtspr SPRN_DBAT##n##L,RB
-#endif
__HEAD
.stabs "arch/powerpc/kernel/",N_SO,0,0,0f
- .stabs "head_32.S",N_SO,0,0,0f
+ .stabs "head_book3s_32.S",N_SO,0,0,0f
0:
_ENTRY(_stext);
@@ -166,9 +156,8 @@ __after_mmu_off:
bl initial_bats
bl load_segment_registers
-#ifdef CONFIG_KASAN
+ bl reloc_offset
bl early_hash_table
-#endif
#if defined(CONFIG_BOOTX_TEXT)
bl setup_disp_bat
#endif
@@ -185,10 +174,8 @@ __after_mmu_off:
bl reloc_offset
li r24,0 /* cpu# */
bl call_setup_cpu /* Call setup_cpu for this CPU */
-#ifdef CONFIG_PPC_BOOK3S_32
bl reloc_offset
bl init_idle_6xx
-#endif /* CONFIG_PPC_BOOK3S_32 */
/*
@@ -219,8 +206,7 @@ turn_on_mmu:
lis r0,start_here@h
ori r0,r0,start_here@l
mtspr SPRN_SRR0,r0
- SYNC
- RFI /* enables MMU */
+ rfi /* enables MMU */
/*
* We need __secondary_hold as a place to hold the other cpus on
@@ -253,8 +239,8 @@ __secondary_hold_acknowledge:
/* System reset */
/* core99 pmac starts the seconary here by changing the vector, and
- putting it back to what it was (unknown_exception) when done. */
- EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD)
+ putting it back to what it was (unknown_async_exception) when done. */
+ EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, unknown_async_exception)
/* Machine check */
/*
@@ -270,97 +256,62 @@ __secondary_hold_acknowledge:
* pointer when we take an exception from supervisor mode.)
* -- paulus.
*/
- . = 0x200
- DO_KVM 0x200
-MachineCheck:
+ START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck)
EXCEPTION_PROLOG_0
-#ifdef CONFIG_VMAP_STACK
- li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
- mtmsr r11
- isync
-#endif
#ifdef CONFIG_PPC_CHRP
- mfspr r11, SPRN_SPRG_THREAD
- tovirt_vmstack r11, r11
- lwz r11, RTAS_SP(r11)
- cmpwi cr1, r11, 0
+ mtspr SPRN_SPRG_SCRATCH2,r1
+ mfspr r1, SPRN_SPRG_THREAD
+ lwz r1, RTAS_SP(r1)
+ cmpwi cr1, r1, 0
bne cr1, 7f
+ mfspr r1, SPRN_SPRG_SCRATCH2
#endif /* CONFIG_PPC_CHRP */
- EXCEPTION_PROLOG_1 for_rtas=1
-7: EXCEPTION_PROLOG_2
- addi r3,r1,STACK_FRAME_OVERHEAD
+ EXCEPTION_PROLOG_1
+7: EXCEPTION_PROLOG_2 0x200 MachineCheck
#ifdef CONFIG_PPC_CHRP
-#ifdef CONFIG_VMAP_STACK
- mfspr r4, SPRN_SPRG_THREAD
- tovirt(r4, r4)
- lwz r4, RTAS_SP(r4)
- cmpwi cr1, r4, 0
-#endif
- beq cr1, machine_check_tramp
- b machine_check_in_rtas
-#else
- b machine_check_tramp
+ beq cr1, 1f
+ twi 31, 0, 0
#endif
+1: prepare_transfer_to_handler
+ bl machine_check_exception
+ b interrupt_return
/* Data access exception. */
- . = 0x300
- DO_KVM 0x300
-DataAccess:
-#ifdef CONFIG_VMAP_STACK
- mtspr SPRN_SPRG_SCRATCH0,r10
- mfspr r10, SPRN_SPRG_THREAD
+ START_EXCEPTION(INTERRUPT_DATA_STORAGE, DataAccess)
+#ifdef CONFIG_PPC_BOOK3S_604
BEGIN_MMU_FTR_SECTION
+ mtspr SPRN_SPRG_SCRATCH2,r10
+ mfspr r10, SPRN_SPRG_THREAD
stw r11, THR11(r10)
mfspr r10, SPRN_DSISR
mfcr r11
-#ifdef CONFIG_PPC_KUAP
- andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
-#else
andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
-#endif
mfspr r10, SPRN_SPRG_THREAD
beq hash_page_dsi
.Lhash_page_dsi_cont:
mtcr r11
lwz r11, THR11(r10)
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
- mtspr SPRN_SPRG_SCRATCH1,r11
- mfspr r11, SPRN_DAR
- stw r11, DAR(r10)
- mfspr r11, SPRN_DSISR
- stw r11, DSISR(r10)
- mfspr r11, SPRN_SRR0
- stw r11, SRR0(r10)
- mfspr r11, SPRN_SRR1 /* check whether user or kernel */
- stw r11, SRR1(r10)
- mfcr r10
- andi. r11, r11, MSR_PR
-
- EXCEPTION_PROLOG_1
- b handle_page_fault_tramp_1
-#else /* CONFIG_VMAP_STACK */
- EXCEPTION_PROLOG handle_dar_dsisr=1
- get_and_save_dar_dsisr_on_stack r4, r5, r11
-BEGIN_MMU_FTR_SECTION
-#ifdef CONFIG_PPC_KUAP
- andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
-#else
- andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
-#endif
- bne handle_page_fault_tramp_2 /* if not, try to put a PTE */
- rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */
- bl hash_page
- b handle_page_fault_tramp_1
-FTR_SECTION_ELSE
- b handle_page_fault_tramp_2
+ mfspr r10, SPRN_SPRG_SCRATCH2
+MMU_FTR_SECTION_ELSE
+ b 1f
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
-#endif /* CONFIG_VMAP_STACK */
+#endif
+1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1
+ EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataAccess handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ lwz r5, _DSISR(r1)
+ andis. r0, r5, DSISR_DABRMATCH@h
+ bne- 1f
+ bl do_page_fault
+ b interrupt_return
+1: bl do_break
+ REST_NVGPRS(r1)
+ b interrupt_return
+
/* Instruction access exception. */
- . = 0x400
- DO_KVM 0x400
-InstructionAccess:
-#ifdef CONFIG_VMAP_STACK
+ START_EXCEPTION(INTERRUPT_INST_STORAGE, InstructionAccess)
mtspr SPRN_SPRG_SCRATCH0,r10
mtspr SPRN_SPRG_SCRATCH1,r11
mfspr r10, SPRN_SPRG_THREAD
@@ -369,50 +320,47 @@ InstructionAccess:
mfspr r11, SPRN_SRR1 /* check whether user or kernel */
stw r11, SRR1(r10)
mfcr r10
+#ifdef CONFIG_PPC_BOOK3S_604
BEGIN_MMU_FTR_SECTION
andis. r11, r11, SRR1_ISI_NOPT@h /* no pte found? */
bne hash_page_isi
.Lhash_page_isi_cont:
mfspr r11, SPRN_SRR1 /* check whether user or kernel */
END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
andi. r11, r11, MSR_PR
EXCEPTION_PROLOG_1
- EXCEPTION_PROLOG_2
-#else /* CONFIG_VMAP_STACK */
- EXCEPTION_PROLOG
- andis. r0,r9,SRR1_ISI_NOPT@h /* no pte found? */
- beq 1f /* if so, try to put a PTE */
- li r3,0 /* into the hash table */
- mr r4,r12 /* SRR0 is fault address */
-BEGIN_MMU_FTR_SECTION
- bl hash_page
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-#endif /* CONFIG_VMAP_STACK */
-1: mr r4,r12
+ EXCEPTION_PROLOG_2 INTERRUPT_INST_STORAGE InstructionAccess
andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
- stw r4, _DAR(r11)
- EXC_XFER_LITE(0x400, handle_page_fault)
+ stw r5, _DSISR(r11)
+ stw r12, _DAR(r11)
+ prepare_transfer_to_handler
+ bl do_page_fault
+ b interrupt_return
/* External interrupt */
- EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ)
/* Alignment exception */
- . = 0x600
- DO_KVM 0x600
-Alignment:
- EXCEPTION_PROLOG handle_dar_dsisr=1
- save_dar_dsisr_on_stack r4, r5, r11
- addi r3,r1,STACK_FRAME_OVERHEAD
- b alignment_exception_tramp
+ START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment)
+ EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ bl alignment_exception
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Program check exception */
- EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
+ START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck)
+ EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck
+ prepare_transfer_to_handler
+ bl program_check_exception
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Floating-point unavailable */
- . = 0x800
- DO_KVM 0x800
-FPUnavailable:
+ START_EXCEPTION(0x800, FPUnavailable)
+#ifdef CONFIG_PPC_FPU
BEGIN_FTR_SECTION
/*
* Certain Freescale cores don't have a FPU and treat fp instructions
@@ -420,28 +368,29 @@ BEGIN_FTR_SECTION
*/
b ProgramCheck
END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
- EXCEPTION_PROLOG
+ EXCEPTION_PROLOG INTERRUPT_FP_UNAVAIL FPUnavailable
beq 1f
bl load_up_fpu /* if from user, just load it up */
b fast_exception_return
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception)
+1: prepare_transfer_to_handler
+ bl kernel_fp_unavailable_exception
+ b interrupt_return
+#else
+ b ProgramCheck
+#endif
/* Decrementer */
- EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
+ EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt)
- EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0xa00, Trap_0a, unknown_exception)
+ EXCEPTION(0xb00, Trap_0b, unknown_exception)
/* System call */
- . = 0xc00
- DO_KVM 0xc00
-SystemCall:
- SYSCALL_ENTRY 0xc00
+ START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall)
+ SYSCALL_ENTRY INTERRUPT_SYSCALL
-/* Single step - not used on 601 */
- EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
- EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception)
+ EXCEPTION(0xe00, Trap_0e, unknown_exception)
/*
* The Altivec unavailable trap is at 0x0f20. Foo.
@@ -451,19 +400,18 @@ SystemCall:
* non-altivec kernel running on a machine with altivec just
* by executing an altivec instruction.
*/
- . = 0xf00
- DO_KVM 0xf00
+ START_EXCEPTION(INTERRUPT_PERFMON, PerformanceMonitorTrap)
b PerformanceMonitor
- . = 0xf20
- DO_KVM 0xf20
+ START_EXCEPTION(INTERRUPT_ALTIVEC_UNAVAIL, AltiVecUnavailableTrap)
b AltiVecUnavailable
+ __HEAD
/*
* Handle TLB miss for instruction on 603/603e.
* Note: we get an alternate set of r0 - r3 to use automatically.
*/
- . = 0x1000
+ . = INTERRUPT_INST_TLB_MISS_603
InstructionTLBMiss:
/*
* r0: scratch
@@ -473,19 +421,17 @@ InstructionTLBMiss:
*/
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_IMISS
-#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
- lis r1,PAGE_OFFSET@h /* check if kernel address */
+#ifdef CONFIG_MODULES
+ lis r1, TASK_SIZE@h /* check if kernel address */
cmplw 0,r1,r3
#endif
- mfspr r2, SPRN_SPRG_PGDIR
-#ifdef CONFIG_SWAP
- li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
-#else
- li r1,_PAGE_PRESENT | _PAGE_EXEC
-#endif
-#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
- bge- 112f
+ mfspr r2, SPRN_SDR1
+ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER
+ rlwinm r2, r2, 28, 0xfffff000
+#ifdef CONFIG_MODULES
+ bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
#endif
112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
@@ -531,7 +477,7 @@ InstructionAddressInvalid:
/*
* Handle TLB miss for DATA Load operation on 603/603e
*/
- . = 0x1100
+ . = INTERRUPT_DATA_LOAD_TLB_MISS_603
DataLoadTLBMiss:
/*
* r0: scratch
@@ -541,16 +487,14 @@ DataLoadTLBMiss:
*/
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_DMISS
- lis r1,PAGE_OFFSET@h /* check if kernel address */
+ lis r1, TASK_SIZE@h /* check if kernel address */
cmplw 0,r1,r3
- mfspr r2, SPRN_SPRG_PGDIR
-#ifdef CONFIG_SWAP
- li r1, _PAGE_PRESENT | _PAGE_ACCESSED
-#else
- li r1, _PAGE_PRESENT
-#endif
- bge- 112f
+ mfspr r2, SPRN_SDR1
+ li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER
+ rlwinm r2, r2, 28, 0xfffff000
+ bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ li r1, _PAGE_PRESENT | _PAGE_ACCESSED
addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
@@ -574,8 +518,6 @@ BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
mtspr SPRN_RPA,r1
- mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
- mtcrf 0x80,r2
BEGIN_MMU_FTR_SECTION
li r0,1
mfspr r1,SPRN_SPRG_603_LRU
@@ -587,9 +529,15 @@ BEGIN_MMU_FTR_SECTION
mfspr r2,SPRN_SRR1
rlwimi r2,r0,31-14,14,14
mtspr SPRN_SRR1,r2
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+ mtcrf 0x80,r2
+ tlbld r3
+ rfi
+MMU_FTR_SECTION_ELSE
+ mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
+ mtcrf 0x80,r2
tlbld r3
rfi
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
DataAddressInvalid:
mfspr r3,SPRN_SRR1
rlwinm r1,r3,9,6,6 /* Get load/store bit */
@@ -611,7 +559,7 @@ DataAddressInvalid:
/*
* Handle TLB miss for DATA Store on 603/603e
*/
- . = 0x1200
+ . = INTERRUPT_DATA_STORE_TLB_MISS_603
DataStoreTLBMiss:
/*
* r0: scratch
@@ -621,16 +569,14 @@ DataStoreTLBMiss:
*/
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_DMISS
- lis r1,PAGE_OFFSET@h /* check if kernel address */
+ lis r1, TASK_SIZE@h /* check if kernel address */
cmplw 0,r1,r3
- mfspr r2, SPRN_SPRG_PGDIR
-#ifdef CONFIG_SWAP
- li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
-#else
- li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
-#endif
- bge- 112f
+ mfspr r2, SPRN_SDR1
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER
+ rlwinm r2, r2, 28, 0xfffff000
+ bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
@@ -665,63 +611,58 @@ BEGIN_MMU_FTR_SECTION
mfspr r2,SPRN_SRR1
rlwimi r2,r0,31-14,14,14
mtspr SPRN_SRR1,r2
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+ mtcrf 0x80,r2
tlbld r3
rfi
+MMU_FTR_SECTION_ELSE
+ mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
+ mtcrf 0x80,r2
+ tlbld r3
+ rfi
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
#ifndef CONFIG_ALTIVEC
#define altivec_assist_exception unknown_exception
#endif
- EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD)
- EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD)
- EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD)
- EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD)
-
- . = 0x3000
-
-machine_check_tramp:
- EXC_XFER_STD(0x200, machine_check_exception)
+#ifndef CONFIG_TAU_INT
+#define TAUException unknown_async_exception
+#endif
-alignment_exception_tramp:
- EXC_XFER_STD(0x600, alignment_exception)
+ EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception)
+ EXCEPTION(0x1400, SMI, SMIException)
+ EXCEPTION(0x1500, Trap_15, unknown_exception)
+ EXCEPTION(0x1600, Trap_16, altivec_assist_exception)
+ EXCEPTION(0x1700, Trap_17, TAUException)
+ EXCEPTION(0x1800, Trap_18, unknown_exception)
+ EXCEPTION(0x1900, Trap_19, unknown_exception)
+ EXCEPTION(0x1a00, Trap_1a, unknown_exception)
+ EXCEPTION(0x1b00, Trap_1b, unknown_exception)
+ EXCEPTION(0x1c00, Trap_1c, unknown_exception)
+ EXCEPTION(0x1d00, Trap_1d, unknown_exception)
+ EXCEPTION(0x1e00, Trap_1e, unknown_exception)
+ EXCEPTION(0x1f00, Trap_1f, unknown_exception)
+ EXCEPTION(0x2000, RunMode, RunModeException)
+ EXCEPTION(0x2100, Trap_21, unknown_exception)
+ EXCEPTION(0x2200, Trap_22, unknown_exception)
+ EXCEPTION(0x2300, Trap_23, unknown_exception)
+ EXCEPTION(0x2400, Trap_24, unknown_exception)
+ EXCEPTION(0x2500, Trap_25, unknown_exception)
+ EXCEPTION(0x2600, Trap_26, unknown_exception)
+ EXCEPTION(0x2700, Trap_27, unknown_exception)
+ EXCEPTION(0x2800, Trap_28, unknown_exception)
+ EXCEPTION(0x2900, Trap_29, unknown_exception)
+ EXCEPTION(0x2a00, Trap_2a, unknown_exception)
+ EXCEPTION(0x2b00, Trap_2b, unknown_exception)
+ EXCEPTION(0x2c00, Trap_2c, unknown_exception)
+ EXCEPTION(0x2d00, Trap_2d, unknown_exception)
+ EXCEPTION(0x2e00, Trap_2e, unknown_exception)
+ EXCEPTION(0x2f00, Trap_2f, unknown_exception)
-handle_page_fault_tramp_1:
-#ifdef CONFIG_VMAP_STACK
- EXCEPTION_PROLOG_2 handle_dar_dsisr=1
-#endif
- lwz r4, _DAR(r11)
- lwz r5, _DSISR(r11)
- /* fall through */
-handle_page_fault_tramp_2:
- EXC_XFER_LITE(0x300, handle_page_fault)
+ __HEAD
+ . = 0x3000
-#ifdef CONFIG_VMAP_STACK
+#ifdef CONFIG_PPC_BOOK3S_604
.macro save_regs_thread thread
stw r0, THR0(\thread)
stw r3, THR3(\thread)
@@ -785,45 +726,46 @@ fast_hash_page_return:
/* DSI */
mtcr r11
lwz r11, THR11(r10)
- mfspr r10, SPRN_SPRG_SCRATCH0
- SYNC
- RFI
+ mfspr r10, SPRN_SPRG_SCRATCH2
+ rfi
1: /* ISI */
mtcr r11
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r10, SPRN_SPRG_SCRATCH0
- SYNC
- RFI
+ rfi
+#endif /* CONFIG_PPC_BOOK3S_604 */
-stack_overflow:
+#ifdef CONFIG_VMAP_STACK
vmap_stack_overflow_exception
#endif
+ __HEAD
AltiVecUnavailable:
- EXCEPTION_PROLOG
+ EXCEPTION_PROLOG 0xf20 AltiVecUnavailable
#ifdef CONFIG_ALTIVEC
beq 1f
bl load_up_altivec /* if from user, just load it up */
b fast_exception_return
#endif /* CONFIG_ALTIVEC */
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_LITE(0xf20, altivec_unavailable_exception)
+1: prepare_transfer_to_handler
+ bl altivec_unavailable_exception
+ b interrupt_return
+ __HEAD
PerformanceMonitor:
- EXCEPTION_PROLOG
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0xf00, performance_monitor_exception)
+ EXCEPTION_PROLOG 0xf00 PerformanceMonitor
+ prepare_transfer_to_handler
+ bl performance_monitor_exception
+ b interrupt_return
+ __HEAD
/*
* This code is jumped to from the startup code to copy
* the kernel image to physical address PHYSICAL_START.
*/
relocate_kernel:
- addis r9,r26,klimit@ha /* fetch klimit */
- lwz r25,klimit@l(r9)
- addis r25,r25,-KERNELBASE@h
lis r3,PHYSICAL_START@h /* Destination base address */
li r6,0 /* Destination offset */
li r5,0x4000 /* # bytes of memory to copy */
@@ -831,7 +773,8 @@ relocate_kernel:
addi r0,r3,4f@l /* jump to the address of 4f */
mtctr r0 /* in copy and do the rest. */
bctr /* jump to the copy */
-4: mr r5,r25
+4: lis r5,_end-KERNELBASE@h
+ ori r5,r5,_end-KERNELBASE@l
bl copy_and_flush /* copy the rest */
b turn_on_mmu
@@ -884,7 +827,6 @@ __secondary_start_pmac_0:
set to map the 0xf0000000 - 0xffffffff region */
mfmsr r0
rlwinm r0,r0,0,28,26 /* clear DR (0x10) */
- SYNC
mtmsr r0
isync
@@ -896,10 +838,8 @@ __secondary_start:
lis r3,-KERNELBASE@h
mr r4,r24
bl call_setup_cpu /* Call setup_cpu for this CPU */
-#ifdef CONFIG_PPC_BOOK3S_32
lis r3,-KERNELBASE@h
bl init_idle_6xx
-#endif /* CONFIG_PPC_BOOK3S_32 */
/* get current's stack and current */
lis r2,secondary_current@ha
@@ -922,9 +862,12 @@ __secondary_start:
tophys(r4,r2)
addi r4,r4,THREAD /* phys address of our thread_struct */
mtspr SPRN_SPRG_THREAD,r4
+BEGIN_MMU_FTR_SECTION
lis r4, (swapper_pg_dir - PAGE_OFFSET)@h
ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
- mtspr SPRN_SPRG_PGDIR, r4
+ rlwinm r4, r4, 4, 0xffff01ff
+ mtspr SPRN_SDR1, r4
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE)
/* enable MMU and jump to start_secondary */
li r4,MSR_KERNEL
@@ -932,8 +875,7 @@ __secondary_start:
ori r3,r3,start_secondary@l
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- SYNC
- RFI
+ rfi
#endif /* CONFIG_SMP */
#ifdef CONFIG_KVM_BOOK3S_HANDLER
@@ -941,21 +883,9 @@ __secondary_start:
#endif
/*
- * Those generic dummy functions are kept for CPUs not
- * included in CONFIG_PPC_BOOK3S_32
- */
-#if !defined(CONFIG_PPC_BOOK3S_32)
-_ENTRY(__save_cpu_setup)
- blr
-_ENTRY(__restore_cpu_setup)
- blr
-#endif /* !defined(CONFIG_PPC_BOOK3S_32) */
-
-/*
* Load stuff into the MMU. Intended to be called with
* IR=0 and DR=0.
*/
-#ifdef CONFIG_KASAN
early_hash_table:
sync /* Force all PTE updates to finish */
isync
@@ -967,7 +897,6 @@ early_hash_table:
ori r6, r6, 3 /* 256kB table */
mtspr SPRN_SDR1, r6
blr
-#endif
load_up_mmu:
sync /* Force all PTE updates to finish */
@@ -975,14 +904,15 @@ load_up_mmu:
tlbia /* Clear all TLB entries */
sync /* wait for tlbia/tlbie to finish */
TLBSYNC /* ... on all CPUs */
+BEGIN_MMU_FTR_SECTION
/* Load the SDR1 register (hash table base & size) */
lis r6,_SDR1@ha
tophys(r6,r6)
lwz r6,_SDR1@l(r6)
mtspr SPRN_SDR1,r6
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-/* Load the BAT registers with the values set up by MMU_init.
- MMU_init takes care of whether we're on a 601 or not. */
+/* Load the BAT registers with the values set up by MMU_init. */
lis r3,BATS@ha
addi r3,r3,BATS@l
tophys(r3,r3)
@@ -998,16 +928,10 @@ BEGIN_MMU_FTR_SECTION
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
blr
-load_segment_registers:
+_GLOBAL(load_segment_registers)
li r0, NUM_USER_SEGMENTS /* load up user segment register values */
mtctr r0 /* for context 0 */
li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */
-#ifdef CONFIG_PPC_KUEP
- oris r3, r3, SR_NX@h /* Set Nx */
-#endif
-#ifdef CONFIG_PPC_KUAP
- oris r3, r3, SR_KS@h /* Set Ks */
-#endif
li r4, 0
3: mtsrin r3, r4
addi r3, r3, 0x111 /* increment VSID */
@@ -1036,9 +960,12 @@ start_here:
tophys(r4,r2)
addi r4,r4,THREAD /* init task's THREAD */
mtspr SPRN_SPRG_THREAD,r4
+BEGIN_MMU_FTR_SECTION
lis r4, (swapper_pg_dir - PAGE_OFFSET)@h
ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
- mtspr SPRN_SPRG_PGDIR, r4
+ rlwinm r4, r4, 4, 0xffff01ff
+ mtspr SPRN_SDR1, r4
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE)
/* stack */
lis r1,init_thread_union@ha
@@ -1057,11 +984,7 @@ start_here:
bl machine_init
bl __save_cpu_setup
bl MMU_init
-#ifdef CONFIG_KASAN
-BEGIN_MMU_FTR_SECTION
bl MMU_init_hw_patch
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-#endif
/*
* Go back to running unmapped so we can load up new values
@@ -1076,8 +999,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
.align 4
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r3
- SYNC
- RFI
+ rfi
/* Load up the kernel context */
2: bl load_up_mmu
@@ -1088,7 +1010,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
*/
lis r5, abatron_pteptrs@h
ori r5, r5, abatron_pteptrs@l
- stw r5, 0xf0(r0) /* This much match your Abatron config */
+ stw r5, 0xf0(0) /* This much match your Abatron config */
lis r6, swapper_pg_dir@h
ori r6, r6, swapper_pg_dir@l
tophys(r5, r5)
@@ -1101,54 +1023,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
ori r3,r3,start_kernel@l
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- SYNC
- RFI
-
-/*
- * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
- *
- * Set up the segment registers for a new context.
- */
-_ENTRY(switch_mmu_context)
- lwz r3,MMCONTEXTID(r4)
- cmpwi cr0,r3,0
- blt- 4f
- mulli r3,r3,897 /* multiply context by skew factor */
- rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */
-#ifdef CONFIG_PPC_KUEP
- oris r3, r3, SR_NX@h /* Set Nx */
-#endif
-#ifdef CONFIG_PPC_KUAP
- oris r3, r3, SR_KS@h /* Set Ks */
-#endif
- li r0,NUM_USER_SEGMENTS
- mtctr r0
-
- lwz r4, MM_PGD(r4)
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is passed as second argument.
- */
- lis r5, abatron_pteptrs@ha
- stw r4, abatron_pteptrs@l + 0x4(r5)
-#endif
- tophys(r4, r4)
- mtspr SPRN_SPRG_PGDIR, r4
- li r4,0
- isync
-3:
- mtsrin r3,r4
- addi r3,r3,0x111 /* next VSID */
- rlwinm r3,r3,0,8,3 /* clear out any overflow from VSID field */
- addis r4,r4,0x1000 /* address of next segment */
- bdnz 3b
- sync
- isync
- blr
-4: trap
- EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0
- blr
-EXPORT_SYMBOL(switch_mmu_context)
+ rfi
/*
* An undocumented "feature" of 604e requires that the v bit
@@ -1161,7 +1036,6 @@ EXPORT_SYMBOL(switch_mmu_context)
clear_bats:
li r10,0
-#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT0U,r10
mtspr SPRN_DBAT0L,r10
mtspr SPRN_DBAT1U,r10
@@ -1170,7 +1044,6 @@ clear_bats:
mtspr SPRN_DBAT2L,r10
mtspr SPRN_DBAT3U,r10
mtspr SPRN_DBAT3L,r10
-#endif
mtspr SPRN_IBAT0U,r10
mtspr SPRN_IBAT0L,r10
mtspr SPRN_IBAT1U,r10
@@ -1219,8 +1092,7 @@ _ENTRY(update_bats)
.align 4
mtspr SPRN_SRR0, r4
mtspr SPRN_SRR1, r3
- SYNC
- RFI
+ rfi
1: bl clear_bats
lis r3, BATS@ha
addi r3, r3, BATS@l
@@ -1239,8 +1111,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
mtmsr r3
mtspr SPRN_SRR0, r7
mtspr SPRN_SRR1, r6
- SYNC
- RFI
+ rfi
flush_tlbs:
lis r10, 0x40
@@ -1261,28 +1132,11 @@ mmu_off:
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r3
sync
- RFI
+ rfi
-/*
- * On 601, we use 3 BATs to map up to 24M of RAM at _PAGE_OFFSET
- * (we keep one for debugging) and on others, we use one 256M BAT.
- */
+/* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */
initial_bats:
lis r11,PAGE_OFFSET@h
-#ifdef CONFIG_PPC_BOOK3S_601
- ori r11,r11,4 /* set up BAT registers for 601 */
- li r8,0x7f /* valid, block length = 8MB */
- mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */
- mtspr SPRN_IBAT0L,r8 /* lower BAT register */
- addis r11,r11,0x800000@h
- addis r8,r8,0x800000@h
- mtspr SPRN_IBAT1U,r11
- mtspr SPRN_IBAT1L,r8
- addis r11,r11,0x800000@h
- addis r8,r8,0x800000@h
- mtspr SPRN_IBAT2U,r11
- mtspr SPRN_IBAT2L,r8
-#else
tophys(r8,r11)
#ifdef CONFIG_SMP
ori r8,r8,0x12 /* R/W access, M=1 */
@@ -1291,11 +1145,10 @@ initial_bats:
#endif /* CONFIG_SMP */
ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */
- mtspr SPRN_DBAT0L,r8 /* N.B. 6xx (not 601) have valid */
+ mtspr SPRN_DBAT0L,r8 /* N.B. 6xx have valid */
mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */
mtspr SPRN_IBAT0L,r8
mtspr SPRN_IBAT0U,r11
-#endif
isync
blr
@@ -1313,13 +1166,8 @@ setup_disp_bat:
beqlr
lwz r11,0(r8)
lwz r8,4(r8)
-#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT3L,r8
mtspr SPRN_DBAT3U,r11
-#else
- mtspr SPRN_IBAT3L,r8
- mtspr SPRN_IBAT3U,r11
-#endif
blr
#endif /* CONFIG_BOOTX_TEXT */
@@ -1358,61 +1206,4 @@ setup_usbgecko_bat:
blr
#endif
-#ifdef CONFIG_8260
-/* Jump into the system reset for the rom.
- * We first disable the MMU, and then jump to the ROM reset address.
- *
- * r3 is the board info structure, r4 is the location for starting.
- * I use this for building a small kernel that can load other kernels,
- * rather than trying to write or rely on a rom monitor that can tftp load.
- */
- .globl m8260_gorom
-m8260_gorom:
- mfmsr r0
- rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */
- sync
- mtmsr r0
- sync
- mfspr r11, SPRN_HID0
- lis r10, 0
- ori r10,r10,HID0_ICE|HID0_DCE
- andc r11, r11, r10
- mtspr SPRN_HID0, r11
- isync
- li r5, MSR_ME|MSR_RI
- lis r6,2f@h
- addis r6,r6,-KERNELBASE@h
- ori r6,r6,2f@l
- mtspr SPRN_SRR0,r6
- mtspr SPRN_SRR1,r5
- isync
- sync
- rfi
-2:
- mtlr r4
- blr
-#endif
-
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
- */
.data
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space 4096
-EXPORT_SYMBOL(empty_zero_page)
-
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/* Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 37fc84ed90e3..ef8d1b1c234e 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -44,7 +44,7 @@ END_BTB_FLUSH_SECTION
#endif
-#define NORMAL_EXCEPTION_PROLOG(intno) \
+#define NORMAL_EXCEPTION_PROLOG(trapno, intno) \
mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \
mfspr r10, SPRN_SPRG_THREAD; \
stw r11, THREAD_NORMSAVE(0)(r10); \
@@ -53,6 +53,8 @@ END_BTB_FLUSH_SECTION
mfspr r11, SPRN_SRR1; \
DO_KVM BOOKE_INTERRUPT_##intno SPRN_SRR1; \
andi. r11, r11, MSR_PR; /* check whether user or kernel */\
+ LOAD_REG_IMMEDIATE(r11, MSR_KERNEL); \
+ mtmsr r11; \
mr r11, r1; \
beq 1f; \
BOOKE_CLEAR_BTB(r11) \
@@ -76,12 +78,39 @@ END_BTB_FLUSH_SECTION
stw r1, 0(r11); \
mr r1, r11; \
rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
- stw r0,GPR0(r11); \
- lis r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \
- addi r10, r10, STACK_FRAME_REGS_MARKER@l; \
- stw r10, 8(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
+ COMMON_EXCEPTION_PROLOG_END trapno
+
+.macro COMMON_EXCEPTION_PROLOG_END trapno
+ stw r0,GPR0(r1)
+ lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ addi r10, r10, STACK_FRAME_REGS_MARKER@l
+ stw r10, 8(r1)
+ li r10, \trapno
+ stw r10,_TRAP(r1)
+ SAVE_4GPRS(3, r1)
+ SAVE_2GPRS(7, r1)
+ SAVE_NVGPRS(r1)
+ stw r2,GPR2(r1)
+ stw r12,_NIP(r1)
+ stw r9,_MSR(r1)
+ mfctr r10
+ mfspr r2,SPRN_SPRG_THREAD
+ stw r10,_CTR(r1)
+ tovirt(r2, r2)
+ mfspr r10,SPRN_XER
+ addi r2, r2, -THREAD
+ stw r10,_XER(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+.endm
+
+.macro prepare_transfer_to_handler
+#ifdef CONFIG_E500
+ andi. r12,r9,MSR_PR
+ bne 777f
+ bl prepare_transfer_to_handler
+777:
+#endif
+.endm
.macro SYSCALL_ENTRY trapno intno srr1
mfspr r10, SPRN_SPRG_THREAD
@@ -99,83 +128,21 @@ BEGIN_FTR_SECTION
mr r12, r13
lwz r13, THREAD_NORMSAVE(2)(r10)
FTR_SECTION_ELSE
-#endif
mfcr r12
-#ifdef CONFIG_KVM_BOOKE_HV
ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
+#else
+ mfcr r12
#endif
+ mfspr r9, SPRN_SRR1
BOOKE_CLEAR_BTB(r11)
- lwz r11, TASK_STACK - THREAD(r10)
+ mr r11, r1
+ lwz r1, TASK_STACK - THREAD(r10)
rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
- ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE)
- stw r12, _CCR(r11) /* save various registers */
- mflr r12
- stw r12,_LINK(r11)
+ ALLOC_STACK_FRAME(r1, THREAD_SIZE - INT_FRAME_SIZE)
+ stw r12, _CCR(r1)
mfspr r12,SPRN_SRR0
- stw r1, GPR1(r11)
- mfspr r9,SPRN_SRR1
- stw r1, 0(r11)
- mr r1, r11
- stw r12,_NIP(r11)
- rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
- lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
- stw r2,GPR2(r11)
- addi r12, r12, STACK_FRAME_REGS_MARKER@l
- stw r9,_MSR(r11)
- li r2, \trapno + 1
- stw r12, 8(r11)
- stw r2,_TRAP(r11)
- SAVE_GPR(0, r11)
- SAVE_4GPRS(3, r11)
- SAVE_2GPRS(7, r11)
-
- addi r11,r1,STACK_FRAME_OVERHEAD
- addi r2,r10,-THREAD
- stw r11,PT_REGS(r10)
- /* Check to see if the dbcr0 register is set up to debug. Use the
- internal debug mode bit to do this. */
- lwz r12,THREAD_DBCR0(r10)
- andis. r12,r12,DBCR0_IDM@h
- ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
- beq+ 3f
- /* From user and task is ptraced - load up global dbcr0 */
- li r12,-1 /* clear all pending debug events */
- mtspr SPRN_DBSR,r12
- lis r11,global_dbcr0@ha
- tophys(r11,r11)
- addi r11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
- lwz r10, TASK_CPU(r2)
- slwi r10, r10, 3
- add r11, r11, r10
-#endif
- lwz r12,0(r11)
- mtspr SPRN_DBCR0,r12
- lwz r12,4(r11)
- addi r12,r12,-1
- stw r12,4(r11)
-
-3:
- tovirt(r2, r2) /* set r2 to current */
- lis r11, transfer_to_syscall@h
- ori r11, r11, transfer_to_syscall@l
-#ifdef CONFIG_TRACE_IRQFLAGS
- /*
- * If MSR is changing we need to keep interrupts disabled at this point
- * otherwise we might risk taking an interrupt before we tell lockdep
- * they are enabled.
- */
- lis r10, MSR_KERNEL@h
- ori r10, r10, MSR_KERNEL@l
- rlwimi r10, r9, 0, MSR_EE
-#else
- lis r10, (MSR_KERNEL | MSR_EE)@h
- ori r10, r10, (MSR_KERNEL | MSR_EE)@l
-#endif
- mtspr SPRN_SRR1,r10
- mtspr SPRN_SRR0,r11
- SYNC
- RFI /* jump to handler, enable MMU */
+ stw r12,_NIP(r1)
+ b transfer_to_syscall /* jump to handler */
.endm
/* To handle the additional exception priority levels on 40x and Book-E
@@ -183,7 +150,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
*
* On 40x critical is the only additional level
* On 44x/e500 we have critical and machine check
- * On e200 we have critical and debug (machine check occurs via critical)
*
* Additionally we reserve a SPRG for each priority level so we can free up a
* GPR to use as the base for indirect access to the exception stacks. This
@@ -199,23 +165,21 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
#define MC_STACK_BASE mcheckirq_ctx
#define CRIT_STACK_BASE critirq_ctx
-/* only on e500mc/e200 */
+/* only on e500mc */
#define DBG_STACK_BASE dbgirq_ctx
-#define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE)
-
#ifdef CONFIG_SMP
#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \
mfspr r8,SPRN_PIR; \
slwi r8,r8,2; \
addis r8,r8,level##_STACK_BASE@ha; \
lwz r8,level##_STACK_BASE@l(r8); \
- addi r8,r8,EXC_LVL_FRAME_OVERHEAD;
+ addi r8,r8,THREAD_SIZE - INT_FRAME_SIZE;
#else
#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \
lis r8,level##_STACK_BASE@ha; \
lwz r8,level##_STACK_BASE@l(r8); \
- addi r8,r8,EXC_LVL_FRAME_OVERHEAD;
+ addi r8,r8,THREAD_SIZE - INT_FRAME_SIZE;
#endif
/*
@@ -226,7 +190,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
* registers as the normal prolog above. Instead we use a portion of the
* critical/machine check exception stack at low physical addresses.
*/
-#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, intno, exc_level_srr0, exc_level_srr1) \
+#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, trapno, intno, exc_level_srr0, exc_level_srr1) \
mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \
BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \
stw r9,GPR9(r8); /* save various registers */\
@@ -238,9 +202,11 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \
BOOKE_CLEAR_BTB(r10) \
andi. r11,r11,MSR_PR; \
+ LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)); \
+ mtmsr r11; \
mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
lwz r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\
- addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\
+ addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE; /* allocate stack frame */\
beq 1f; \
/* COMING FROM USER MODE */ \
stw r9,_CCR(r11); /* save CR */\
@@ -267,16 +233,44 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
stw r1,0(r11); \
mr r1,r11; \
rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
- stw r0,GPR0(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
-#define CRITICAL_EXCEPTION_PROLOG(intno) \
- EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1)
-#define DEBUG_EXCEPTION_PROLOG \
- EXC_LEVEL_EXCEPTION_PROLOG(DBG, DEBUG, SPRN_DSRR0, SPRN_DSRR1)
-#define MCHECK_EXCEPTION_PROLOG \
- EXC_LEVEL_EXCEPTION_PROLOG(MC, MACHINE_CHECK, \
+ COMMON_EXCEPTION_PROLOG_END trapno
+
+#define SAVE_xSRR(xSRR) \
+ mfspr r0,SPRN_##xSRR##0; \
+ stw r0,_##xSRR##0(r1); \
+ mfspr r0,SPRN_##xSRR##1; \
+ stw r0,_##xSRR##1(r1)
+
+
+.macro SAVE_MMU_REGS
+#ifdef CONFIG_PPC_BOOK3E_MMU
+ mfspr r0,SPRN_MAS0
+ stw r0,MAS0(r1)
+ mfspr r0,SPRN_MAS1
+ stw r0,MAS1(r1)
+ mfspr r0,SPRN_MAS2
+ stw r0,MAS2(r1)
+ mfspr r0,SPRN_MAS3
+ stw r0,MAS3(r1)
+ mfspr r0,SPRN_MAS6
+ stw r0,MAS6(r1)
+#ifdef CONFIG_PHYS_64BIT
+ mfspr r0,SPRN_MAS7
+ stw r0,MAS7(r1)
+#endif /* CONFIG_PHYS_64BIT */
+#endif /* CONFIG_PPC_BOOK3E_MMU */
+#ifdef CONFIG_44x
+ mfspr r0,SPRN_MMUCR
+ stw r0,MMUCR(r1)
+#endif
+.endm
+
+#define CRITICAL_EXCEPTION_PROLOG(trapno, intno) \
+ EXC_LEVEL_EXCEPTION_PROLOG(CRIT, trapno+2, intno, SPRN_CSRR0, SPRN_CSRR1)
+#define DEBUG_EXCEPTION_PROLOG(trapno) \
+ EXC_LEVEL_EXCEPTION_PROLOG(DBG, trapno+8, DEBUG, SPRN_DSRR0, SPRN_DSRR1)
+#define MCHECK_EXCEPTION_PROLOG(trapno) \
+ EXC_LEVEL_EXCEPTION_PROLOG(MC, trapno+4, MACHINE_CHECK, \
SPRN_MCSRR0, SPRN_MCSRR1)
/*
@@ -303,44 +297,34 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
.align 5; \
label:
-#define EXCEPTION(n, intno, label, hdlr, xfer) \
+#define EXCEPTION(n, intno, label, hdlr) \
START_EXCEPTION(label); \
- NORMAL_EXCEPTION_PROLOG(intno); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
+ NORMAL_EXCEPTION_PROLOG(n, intno); \
+ prepare_transfer_to_handler; \
+ bl hdlr; \
+ b interrupt_return
#define CRITICAL_EXCEPTION(n, intno, label, hdlr) \
START_EXCEPTION(label); \
- CRITICAL_EXCEPTION_PROLOG(intno); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- crit_transfer_to_handler, ret_from_crit_exc)
+ CRITICAL_EXCEPTION_PROLOG(n, intno); \
+ SAVE_MMU_REGS; \
+ SAVE_xSRR(SRR); \
+ prepare_transfer_to_handler; \
+ bl hdlr; \
+ b ret_from_crit_exc
#define MCHECK_EXCEPTION(n, label, hdlr) \
START_EXCEPTION(label); \
- MCHECK_EXCEPTION_PROLOG; \
+ MCHECK_EXCEPTION_PROLOG(n); \
mfspr r5,SPRN_ESR; \
stw r5,_ESR(r11); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- mcheck_transfer_to_handler, ret_from_mcheck_exc)
-
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- lis r10,msr@h; \
- ori r10,r10,msr@l; \
- bl tfer; \
- .long hdlr; \
- .long ret
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
- ret_from_except)
+ SAVE_xSRR(DSRR); \
+ SAVE_xSRR(CSRR); \
+ SAVE_MMU_REGS; \
+ SAVE_xSRR(SRR); \
+ prepare_transfer_to_handler; \
+ bl hdlr; \
+ b ret_from_mcheck_exc
/* Check for a single step debug exception while in an exception
* handler before state has been saved. This is to catch the case
@@ -357,7 +341,7 @@ label:
*/
#define DEBUG_DEBUG_EXCEPTION \
START_EXCEPTION(DebugDebug); \
- DEBUG_EXCEPTION_PROLOG; \
+ DEBUG_EXCEPTION_PROLOG(2000); \
\
/* \
* If there is a single step or branch-taken exception in an \
@@ -405,12 +389,17 @@ label:
\
/* continue normal handling for a debug exception... */ \
2: mfspr r4,SPRN_DBSR; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc)
+ stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\
+ SAVE_xSRR(CSRR); \
+ SAVE_MMU_REGS; \
+ SAVE_xSRR(SRR); \
+ prepare_transfer_to_handler; \
+ bl DebugException; \
+ b ret_from_debug_exc
#define DEBUG_CRIT_EXCEPTION \
START_EXCEPTION(DebugCrit); \
- CRITICAL_EXCEPTION_PROLOG(DEBUG); \
+ CRITICAL_EXCEPTION_PROLOG(2000,DEBUG); \
\
/* \
* If there is a single step or branch-taken exception in an \
@@ -458,80 +447,81 @@ label:
\
/* continue normal handling for a critical exception... */ \
2: mfspr r4,SPRN_DBSR; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc)
+ stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\
+ SAVE_MMU_REGS; \
+ SAVE_xSRR(SRR); \
+ prepare_transfer_to_handler; \
+ bl DebugException; \
+ b ret_from_crit_exc
#define DATA_STORAGE_EXCEPTION \
START_EXCEPTION(DataStorage) \
- NORMAL_EXCEPTION_PROLOG(DATA_STORAGE); \
+ NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE); \
mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
stw r5,_ESR(r11); \
mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \
stw r4, _DEAR(r11); \
- EXC_XFER_LITE(0x0300, handle_page_fault)
+ prepare_transfer_to_handler; \
+ bl do_page_fault; \
+ b interrupt_return
+/*
+ * Instruction TLB Error interrupt handlers may call InstructionStorage
+ * directly without clearing ESR, so the ESR at this point may be left over
+ * from a prior interrupt.
+ *
+ * In any case, do_page_fault for BOOK3E does not use ESR and always expects
+ * dsisr to be 0. ESR_DST from a prior store in particular would confuse fault
+ * handling.
+ */
#define INSTRUCTION_STORAGE_EXCEPTION \
START_EXCEPTION(InstructionStorage) \
- NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \
- mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
+ NORMAL_EXCEPTION_PROLOG(0x400, INST_STORAGE); \
+ li r5,0; /* Store 0 in regs->esr (dsisr) */ \
stw r5,_ESR(r11); \
- mr r4,r12; /* Pass SRR0 as arg2 */ \
- stw r4, _DEAR(r11); \
- li r5,0; /* Pass zero as arg3 */ \
- EXC_XFER_LITE(0x0400, handle_page_fault)
+ stw r12, _DEAR(r11); /* Set regs->dear (dar) to SRR0 */ \
+ prepare_transfer_to_handler; \
+ bl do_page_fault; \
+ b interrupt_return
#define ALIGNMENT_EXCEPTION \
START_EXCEPTION(Alignment) \
- NORMAL_EXCEPTION_PROLOG(ALIGNMENT); \
+ NORMAL_EXCEPTION_PROLOG(0x600, ALIGNMENT); \
mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \
stw r4,_DEAR(r11); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_STD(0x0600, alignment_exception)
+ prepare_transfer_to_handler; \
+ bl alignment_exception; \
+ REST_NVGPRS(r1); \
+ b interrupt_return
#define PROGRAM_EXCEPTION \
START_EXCEPTION(Program) \
- NORMAL_EXCEPTION_PROLOG(PROGRAM); \
+ NORMAL_EXCEPTION_PROLOG(0x700, PROGRAM); \
mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \
stw r4,_ESR(r11); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_STD(0x0700, program_check_exception)
+ prepare_transfer_to_handler; \
+ bl program_check_exception; \
+ REST_NVGPRS(r1); \
+ b interrupt_return
#define DECREMENTER_EXCEPTION \
START_EXCEPTION(Decrementer) \
- NORMAL_EXCEPTION_PROLOG(DECREMENTER); \
+ NORMAL_EXCEPTION_PROLOG(0x900, DECREMENTER); \
lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \
mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_LITE(0x0900, timer_interrupt)
+ prepare_transfer_to_handler; \
+ bl timer_interrupt; \
+ b interrupt_return
#define FP_UNAVAILABLE_EXCEPTION \
START_EXCEPTION(FloatingPointUnavailable) \
- NORMAL_EXCEPTION_PROLOG(FP_UNAVAIL); \
+ NORMAL_EXCEPTION_PROLOG(0x800, FP_UNAVAIL); \
beq 1f; \
bl load_up_fpu; /* if from user, just load it up */ \
b fast_exception_return; \
-1: addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_STD(0x800, kernel_fp_unavailable_exception)
-
-#else /* __ASSEMBLY__ */
-struct exception_regs {
- unsigned long mas0;
- unsigned long mas1;
- unsigned long mas2;
- unsigned long mas3;
- unsigned long mas6;
- unsigned long mas7;
- unsigned long srr0;
- unsigned long srr1;
- unsigned long csrr0;
- unsigned long csrr1;
- unsigned long dsrr0;
- unsigned long dsrr1;
- unsigned long saved_ksp_limit;
-};
-
-/* ensure this structure is always sized to a multiple of the stack alignment */
-#define STACK_EXC_LVL_FRAME_SIZE _ALIGN_UP(sizeof (struct exception_regs), 16)
+1: prepare_transfer_to_handler; \
+ bl kernel_fp_unavailable_exception; \
+ b interrupt_return
#endif /* __ASSEMBLY__ */
#endif /* __HEAD_BOOKE_H__ */
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 840af004041e..0a9a0f301474 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -28,10 +28,10 @@
#include <linux/init.h>
#include <linux/threads.h>
+#include <linux/pgtable.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
@@ -79,7 +79,7 @@ _ENTRY(_start);
mr r23,r3
mr r25,r4
- bl 0f
+ bcl 20,31,$+4
0: mflr r8
addis r3,r8,(is_second_reloc - 0b)@ha
lwz r19,(is_second_reloc - 0b)@l(r3)
@@ -113,7 +113,7 @@ _ENTRY(_start);
1:
/*
- * We have the runtime (virutal) address of our base.
+ * We have the runtime (virtual) address of our base.
* We calculate our shift of offset from a 64M page.
* We could map the 64M page we belong to at PAGE_OFFSET and
* get going from there.
@@ -187,9 +187,6 @@ set_ivor:
/* Setup the defaults for TLB entries */
li r2,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l
-#ifdef CONFIG_E200
- oris r2,r2,MAS4_TLBSELD(1)@h
-#endif
mtspr SPRN_MAS4, r2
#if !defined(CONFIG_BDI_SWITCH)
@@ -362,33 +359,30 @@ interrupt_base:
CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
/* Machine Check Interrupt */
-#ifdef CONFIG_E200
- /* no RFMCI, MCSRRs on E200 */
- CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
- machine_check_exception)
-#else
MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
-#endif
/* Data Storage Interrupt */
START_EXCEPTION(DataStorage)
- NORMAL_EXCEPTION_PROLOG(DATA_STORAGE)
- mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
+ NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE)
+ mfspr r5,SPRN_ESR /* Grab the ESR, save it */
stw r5,_ESR(r11)
- mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
+ mfspr r4,SPRN_DEAR /* Grab the DEAR, save it */
+ stw r4, _DEAR(r11)
andis. r10,r5,(ESR_ILK|ESR_DLK)@h
bne 1f
- stw r4, _DEAR(r11)
- EXC_XFER_LITE(0x0300, handle_page_fault)
+ prepare_transfer_to_handler
+ bl do_page_fault
+ b interrupt_return
1:
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_LITE(0x0300, CacheLockingException)
+ prepare_transfer_to_handler
+ bl CacheLockingException
+ b interrupt_return
/* Instruction Storage Interrupt */
INSTRUCTION_STORAGE_EXCEPTION
/* External Input Interrupt */
- EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ)
/* Alignment Interrupt */
ALIGNMENT_EXCEPTION
@@ -400,14 +394,7 @@ interrupt_base:
#ifdef CONFIG_PPC_FPU
FP_UNAVAILABLE_EXCEPTION
#else
-#ifdef CONFIG_E200
- /* E200 treats 'normal' floating point instructions as FP Unavail exception */
- EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
- program_check_exception, EXC_XFER_STD)
-#else
- EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
- unknown_exception, EXC_XFER_STD)
-#endif
+ EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, unknown_exception)
#endif
/* System Call Interrupt */
@@ -415,16 +402,14 @@ interrupt_base:
SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL SPRN_SRR1
/* Auxiliary Processor Unavailable Interrupt */
- EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
- unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, unknown_exception)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
- EXCEPTION(0x3100, FIT, FixedIntervalTimer, \
- unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x3100, FIT, FixedIntervalTimer, unknown_exception)
/* Watchdog Timer Interrupt */
#ifdef CONFIG_BOOKE_WDT
@@ -512,7 +497,7 @@ END_BTB_FLUSH_SECTION
#endif
#endif
- bne 2f /* Bail if permission/valid mismach */
+ bne 2f /* Bail if permission/valid mismatch */
/* Jump to common tlb load */
b finish_tlb_load
@@ -607,7 +592,7 @@ END_BTB_FLUSH_SECTION
#endif
#endif
- bne 2f /* Bail if permission mismach */
+ bne 2f /* Bail if permission mismatch */
/* Jump to common TLB load point */
b finish_tlb_load
@@ -625,42 +610,48 @@ END_BTB_FLUSH_SECTION
mfspr r10, SPRN_SPRG_RSCRATCH0
b InstructionStorage
-/* Define SPE handlers for e200 and e500v2 */
+/* Define SPE handlers for e500v2 */
#ifdef CONFIG_SPE
/* SPE Unavailable */
START_EXCEPTION(SPEUnavailable)
- NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL)
+ NORMAL_EXCEPTION_PROLOG(0x2010, SPE_UNAVAIL)
beq 1f
bl load_up_spe
b fast_exception_return
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_LITE(0x2010, KernelSPE)
+1: prepare_transfer_to_handler
+ bl KernelSPE
+ b interrupt_return
#elif defined(CONFIG_SPE_POSSIBLE)
- EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
- unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, unknown_exception)
#endif /* CONFIG_SPE_POSSIBLE */
/* SPE Floating Point Data */
#ifdef CONFIG_SPE
- EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData,
- SPEFloatingPointException, EXC_XFER_STD)
+ START_EXCEPTION(SPEFloatingPointData)
+ NORMAL_EXCEPTION_PROLOG(0x2030, SPE_FP_DATA)
+ prepare_transfer_to_handler
+ bl SPEFloatingPointException
+ REST_NVGPRS(r1)
+ b interrupt_return
/* SPE Floating Point Round */
- EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
- SPEFloatingPointRoundException, EXC_XFER_STD)
+ START_EXCEPTION(SPEFloatingPointRound)
+ NORMAL_EXCEPTION_PROLOG(0x2050, SPE_FP_ROUND)
+ prepare_transfer_to_handler
+ bl SPEFloatingPointRoundException
+ REST_NVGPRS(r1)
+ b interrupt_return
#elif defined(CONFIG_SPE_POSSIBLE)
- EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData,
- unknown_exception, EXC_XFER_STD)
- EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
- unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, unknown_exception)
+ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, unknown_exception)
#endif /* CONFIG_SPE_POSSIBLE */
/* Performance Monitor */
EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
- performance_monitor_exception, EXC_XFER_STD)
+ performance_monitor_exception)
- EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD)
+ EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception)
CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \
CriticalDoorbell, unknown_exception)
@@ -675,10 +666,10 @@ END_BTB_FLUSH_SECTION
unknown_exception)
/* Hypercall */
- EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception)
/* Embedded Hypervisor Privilege */
- EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception)
interrupt_end:
@@ -807,31 +798,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
#endif
3: mtspr SPRN_MAS2, r12
-#ifdef CONFIG_E200
- /* Round robin TLB1 entries assignment */
- mfspr r12, SPRN_MAS0
-
- /* Extract TLB1CFG(NENTRY) */
- mfspr r11, SPRN_TLB1CFG
- andi. r11, r11, 0xfff
-
- /* Extract MAS0(NV) */
- andi. r13, r12, 0xfff
- addi r13, r13, 1
- cmpw 0, r13, r11
- addi r12, r12, 1
-
- /* check if we need to wrap */
- blt 7f
-
- /* wrap back to first free tlbcam entry */
- lis r13, tlbcam_index@ha
- lwz r13, tlbcam_index@l(r13)
- rlwimi r12, r13, 0, 20, 31
-7:
- mtspr SPRN_MAS0,r12
-#endif /* CONFIG_E200 */
-
tlb_write_entry:
tlbwe
@@ -892,9 +858,9 @@ KernelSPE:
ori r3,r3,87f@l
mr r4,r2 /* current */
lwz r5,_NIP(r1)
- bl printk
+ bl _printk
#endif
- b ret_from_except
+ b interrupt_return
#ifdef CONFIG_PRINTK
87: .string "SPE used in kernel (task=%p, pc=%x) \n"
#endif
@@ -933,21 +899,6 @@ get_phys_addr:
* Global functions
*/
-#ifdef CONFIG_E200
-/* Adjust or setup IVORs for e200 */
-_GLOBAL(__setup_e200_ivors)
- li r3,DebugDebug@l
- mtspr SPRN_IVOR15,r3
- li r3,SPEUnavailable@l
- mtspr SPRN_IVOR32,r3
- li r3,SPEFloatingPointData@l
- mtspr SPRN_IVOR33,r3
- li r3,SPEFloatingPointRound@l
- mtspr SPRN_IVOR34,r3
- sync
- blr
-#endif
-
#ifdef CONFIG_E500
#ifndef CONFIG_PPC_E500MC
/* Adjust or setup IVORs for e500v1/v2 */
@@ -1034,20 +985,6 @@ _GLOBAL(abort)
mtspr SPRN_DBCR0,r13
isync
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is the second parameter.
- */
- lis r5, abatron_pteptrs@h
- ori r5, r5, abatron_pteptrs@l
- stw r4, 0x4(r5)
-#endif
- mtspr SPRN_PID,r3
- isync /* Force context change */
- blr
-
#ifdef CONFIG_SMP
/* When we get here, r24 needs to hold the CPU # */
.globl __secondary_start
@@ -1195,7 +1132,7 @@ _GLOBAL(switch_to_as1)
bne 1b
/* Get the tlb entry used by the current running code */
- bl 0f
+ bcl 20,31,$+4
0: mflr r4
tlbsx 0,r4
@@ -1229,7 +1166,7 @@ _GLOBAL(switch_to_as1)
_GLOBAL(restore_to_as0)
mflr r0
- bl 0f
+ bcl 20,31,$+4
0: mflr r9
addi r9,r9,1f - 0b
@@ -1275,26 +1212,3 @@ _GLOBAL(restore_to_as0)
*/
3: mr r3,r5
bl _start
-
-/*
- * We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
- .data
- .align 12
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space 4096
-EXPORT_SYMBOL(empty_zero_page)
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/*
- * Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index d0854320bb50..91a3be14808b 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -22,15 +22,15 @@
#include <asm/processor.h>
#include <asm/sstep.h>
#include <asm/debug.h>
-#include <asm/debugfs.h>
#include <asm/hvcall.h>
+#include <asm/inst.h>
#include <linux/uaccess.h>
/*
* Stores the breakpoints currently in use on each breakpoint address
* register for every cpu
*/
-static DEFINE_PER_CPU(struct perf_event *, bp_per_reg);
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM_MAX]);
/*
* Returns total number of data or instruction breakpoints available.
@@ -38,10 +38,21 @@ static DEFINE_PER_CPU(struct perf_event *, bp_per_reg);
int hw_breakpoint_slots(int type)
{
if (type == TYPE_DATA)
- return HBP_NUM;
+ return nr_wp_slots();
return 0; /* no instruction breakpoints available */
}
+static bool single_step_pending(void)
+{
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (current->thread.last_hit_ubp[i])
+ return true;
+ }
+ return false;
+}
+
/*
* Install a perf counter breakpoint.
*
@@ -54,16 +65,26 @@ int hw_breakpoint_slots(int type)
int arch_install_hw_breakpoint(struct perf_event *bp)
{
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
- struct perf_event **slot = this_cpu_ptr(&bp_per_reg);
+ struct perf_event **slot;
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ slot = this_cpu_ptr(&bp_per_reg[i]);
+ if (!*slot) {
+ *slot = bp;
+ break;
+ }
+ }
- *slot = bp;
+ if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot"))
+ return -EBUSY;
/*
* Do not install DABR values if the instruction must be single-stepped.
* If so, DABR will be populated in single_step_dabr_instruction().
*/
- if (current->thread.last_hit_ubp != bp)
- __set_breakpoint(info);
+ if (!single_step_pending())
+ __set_breakpoint(i, info);
return 0;
}
@@ -79,15 +100,248 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
*/
void arch_uninstall_hw_breakpoint(struct perf_event *bp)
{
- struct perf_event **slot = this_cpu_ptr(&bp_per_reg);
+ struct arch_hw_breakpoint null_brk = {0};
+ struct perf_event **slot;
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ slot = this_cpu_ptr(&bp_per_reg[i]);
+ if (*slot == bp) {
+ *slot = NULL;
+ break;
+ }
+ }
- if (*slot != bp) {
- WARN_ONCE(1, "Can't find the breakpoint");
+ if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot"))
return;
+
+ __set_breakpoint(i, &null_brk);
+}
+
+static bool is_ptrace_bp(struct perf_event *bp)
+{
+ return bp->overflow_handler == ptrace_triggered;
+}
+
+struct breakpoint {
+ struct list_head list;
+ struct perf_event *bp;
+ bool ptrace_bp;
+};
+
+static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]);
+static LIST_HEAD(task_bps);
+
+static struct breakpoint *alloc_breakpoint(struct perf_event *bp)
+{
+ struct breakpoint *tmp;
+
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return ERR_PTR(-ENOMEM);
+ tmp->bp = bp;
+ tmp->ptrace_bp = is_ptrace_bp(bp);
+ return tmp;
+}
+
+static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event *bp2)
+{
+ __u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr;
+
+ bp1_saddr = ALIGN_DOWN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE);
+ bp1_eaddr = ALIGN(bp1->attr.bp_addr + bp1->attr.bp_len, HW_BREAKPOINT_SIZE);
+ bp2_saddr = ALIGN_DOWN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE);
+ bp2_eaddr = ALIGN(bp2->attr.bp_addr + bp2->attr.bp_len, HW_BREAKPOINT_SIZE);
+
+ return (bp1_saddr < bp2_eaddr && bp1_eaddr > bp2_saddr);
+}
+
+static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp)
+{
+ return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp;
+}
+
+static bool can_co_exist(struct breakpoint *b, struct perf_event *bp)
+{
+ return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp));
+}
+
+static int task_bps_add(struct perf_event *bp)
+{
+ struct breakpoint *tmp;
+
+ tmp = alloc_breakpoint(bp);
+ if (IS_ERR(tmp))
+ return PTR_ERR(tmp);
+
+ list_add(&tmp->list, &task_bps);
+ return 0;
+}
+
+static void task_bps_remove(struct perf_event *bp)
+{
+ struct list_head *pos, *q;
+
+ list_for_each_safe(pos, q, &task_bps) {
+ struct breakpoint *tmp = list_entry(pos, struct breakpoint, list);
+
+ if (tmp->bp == bp) {
+ list_del(&tmp->list);
+ kfree(tmp);
+ break;
+ }
}
+}
- *slot = NULL;
- hw_breakpoint_disable();
+/*
+ * If any task has breakpoint from alternate infrastructure,
+ * return true. Otherwise return false.
+ */
+static bool all_task_bps_check(struct perf_event *bp)
+{
+ struct breakpoint *tmp;
+
+ list_for_each_entry(tmp, &task_bps, list) {
+ if (!can_co_exist(tmp, bp))
+ return true;
+ }
+ return false;
+}
+
+/*
+ * If same task has breakpoint from alternate infrastructure,
+ * return true. Otherwise return false.
+ */
+static bool same_task_bps_check(struct perf_event *bp)
+{
+ struct breakpoint *tmp;
+
+ list_for_each_entry(tmp, &task_bps, list) {
+ if (tmp->bp->hw.target == bp->hw.target &&
+ !can_co_exist(tmp, bp))
+ return true;
+ }
+ return false;
+}
+
+static int cpu_bps_add(struct perf_event *bp)
+{
+ struct breakpoint **cpu_bp;
+ struct breakpoint *tmp;
+ int i = 0;
+
+ tmp = alloc_breakpoint(bp);
+ if (IS_ERR(tmp))
+ return PTR_ERR(tmp);
+
+ cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!cpu_bp[i]) {
+ cpu_bp[i] = tmp;
+ break;
+ }
+ }
+ return 0;
+}
+
+static void cpu_bps_remove(struct perf_event *bp)
+{
+ struct breakpoint **cpu_bp;
+ int i = 0;
+
+ cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!cpu_bp[i])
+ continue;
+
+ if (cpu_bp[i]->bp == bp) {
+ kfree(cpu_bp[i]);
+ cpu_bp[i] = NULL;
+ break;
+ }
+ }
+}
+
+static bool cpu_bps_check(int cpu, struct perf_event *bp)
+{
+ struct breakpoint **cpu_bp;
+ int i;
+
+ cpu_bp = per_cpu_ptr(cpu_bps, cpu);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp))
+ return true;
+ }
+ return false;
+}
+
+static bool all_cpu_bps_check(struct perf_event *bp)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ if (cpu_bps_check(cpu, bp))
+ return true;
+ }
+ return false;
+}
+
+/*
+ * We don't use any locks to serialize accesses to cpu_bps or task_bps
+ * because are already inside nr_bp_mutex.
+ */
+int arch_reserve_bp_slot(struct perf_event *bp)
+{
+ int ret;
+
+ /* ptrace breakpoint */
+ if (is_ptrace_bp(bp)) {
+ if (all_cpu_bps_check(bp))
+ return -ENOSPC;
+
+ if (same_task_bps_check(bp))
+ return -ENOSPC;
+
+ return task_bps_add(bp);
+ }
+
+ /* perf breakpoint */
+ if (is_kernel_addr(bp->attr.bp_addr))
+ return 0;
+
+ if (bp->hw.target && bp->cpu == -1) {
+ if (same_task_bps_check(bp))
+ return -ENOSPC;
+
+ return task_bps_add(bp);
+ } else if (!bp->hw.target && bp->cpu != -1) {
+ if (all_task_bps_check(bp))
+ return -ENOSPC;
+
+ return cpu_bps_add(bp);
+ }
+
+ if (same_task_bps_check(bp))
+ return -ENOSPC;
+
+ ret = cpu_bps_add(bp);
+ if (ret)
+ return ret;
+ ret = task_bps_add(bp);
+ if (ret)
+ cpu_bps_remove(bp);
+
+ return ret;
+}
+
+void arch_release_bp_slot(struct perf_event *bp)
+{
+ if (!is_kernel_addr(bp->attr.bp_addr)) {
+ if (bp->hw.target)
+ task_bps_remove(bp);
+ if (bp->cpu != -1)
+ cpu_bps_remove(bp);
+ }
}
/*
@@ -102,8 +356,14 @@ void arch_unregister_hw_breakpoint(struct perf_event *bp)
* restoration variables to prevent dangling pointers.
* FIXME, this should not be using bp->ctx at all! Sayeth peterz.
*/
- if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L))
- bp->ctx->task->thread.last_hit_ubp = NULL;
+ if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L)) {
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (bp->ctx->task->thread.last_hit_ubp[i] == bp)
+ bp->ctx->task->thread.last_hit_ubp[i] = NULL;
+ }
+ }
}
/*
@@ -140,10 +400,10 @@ int arch_bp_generic_fields(int type, int *gen_bp_type)
* <---8 bytes--->
*
* In this case, we should configure hw as:
- * start_addr = address & ~HW_BREAKPOINT_ALIGN
+ * start_addr = address & ~(HW_BREAKPOINT_SIZE - 1)
* len = 16 bytes
*
- * @start_addr and @end_addr are inclusive.
+ * @start_addr is inclusive but @end_addr is exclusive.
*/
static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
{
@@ -151,14 +411,15 @@ static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
u16 hw_len;
unsigned long start_addr, end_addr;
- start_addr = hw->address & ~HW_BREAKPOINT_ALIGN;
- end_addr = (hw->address + hw->len - 1) | HW_BREAKPOINT_ALIGN;
- hw_len = end_addr - start_addr + 1;
+ start_addr = ALIGN_DOWN(hw->address, HW_BREAKPOINT_SIZE);
+ end_addr = ALIGN(hw->address + hw->len, HW_BREAKPOINT_SIZE);
+ hw_len = end_addr - start_addr;
if (dawr_enabled()) {
max_len = DAWR_MAX_LEN;
- /* DAWR region can't cross 512 bytes boundary */
- if ((start_addr >> 9) != (end_addr >> 9))
+ /* DAWR region can't cross 512 bytes boundary on p10 predecessors */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31) &&
+ (ALIGN_DOWN(start_addr, SZ_512) != ALIGN_DOWN(end_addr - 1, SZ_512)))
return -EINVAL;
} else if (IS_ENABLED(CONFIG_PPC_8xx)) {
/* 8xx can setup a range without limitation */
@@ -215,90 +476,150 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
{
struct arch_hw_breakpoint *info;
+ int i;
- if (likely(!tsk->thread.last_hit_ubp))
- return;
-
- info = counter_arch_bp(tsk->thread.last_hit_ubp);
- regs->msr &= ~MSR_SE;
- __set_breakpoint(info);
- tsk->thread.last_hit_ubp = NULL;
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (unlikely(tsk->thread.last_hit_ubp[i]))
+ goto reset;
+ }
+ return;
+
+reset:
+ regs_set_return_msr(regs, regs->msr & ~MSR_SE);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ info = counter_arch_bp(__this_cpu_read(bp_per_reg[i]));
+ __set_breakpoint(i, info);
+ tsk->thread.last_hit_ubp[i] = NULL;
+ }
}
-static bool dar_within_range(unsigned long dar, struct arch_hw_breakpoint *info)
+static bool is_larx_stcx_instr(int type)
{
- return ((info->address <= dar) && (dar - info->address < info->len));
+ return type == LARX || type == STCX;
}
-static bool
-dar_range_overlaps(unsigned long dar, int size, struct arch_hw_breakpoint *info)
+static bool is_octword_vsx_instr(int type, int size)
{
- return ((dar <= info->address + info->len - 1) &&
- (dar + size - 1 >= info->address));
+ return ((type == LOAD_VSX || type == STORE_VSX) && size == 32);
}
/*
- * Handle debug exception notifications.
+ * We've failed in reliably handling the hw-breakpoint. Unregister
+ * it and throw a warning message to let the user know about it.
*/
-static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp,
- struct arch_hw_breakpoint *info)
+static void handler_error(struct perf_event *bp, struct arch_hw_breakpoint *info)
{
- unsigned int instr = 0;
- int ret, type, size;
- struct instruction_op op;
- unsigned long addr = info->address;
-
- if (__get_user_inatomic(instr, (unsigned int *)regs->nip))
- goto fail;
-
- ret = analyse_instr(&op, regs, instr);
- type = GETTYPE(op.type);
- size = GETSIZE(op.type);
+ WARN(1, "Unable to handle hardware breakpoint. Breakpoint at 0x%lx will be disabled.",
+ info->address);
+ perf_event_disable_inatomic(bp);
+}
- if (!ret && (type == LARX || type == STCX)) {
- printk_ratelimited("Breakpoint hit on instruction that can't be emulated."
- " Breakpoint at 0x%lx will be disabled.\n", addr);
- goto disable;
- }
+static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info)
+{
+ printk_ratelimited("Breakpoint hit on instruction that can't be emulated. Breakpoint at 0x%lx will be disabled.\n",
+ info->address);
+ perf_event_disable_inatomic(bp);
+}
- /*
- * If it's extraneous event, we still need to emulate/single-
- * step the instruction, but we don't generate an event.
- */
- if (size && !dar_range_overlaps(regs->dar, size, info))
- info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
+ struct arch_hw_breakpoint **info, int *hit,
+ struct ppc_inst instr)
+{
+ int i;
+ int stepped;
/* Do not emulate user-space instructions, instead single-step them */
if (user_mode(regs)) {
- current->thread.last_hit_ubp = bp;
- regs->msr |= MSR_SE;
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+ current->thread.last_hit_ubp[i] = bp[i];
+ info[i] = NULL;
+ }
+ regs_set_return_msr(regs, regs->msr | MSR_SE);
return false;
}
- if (!emulate_step(regs, instr))
- goto fail;
-
+ stepped = emulate_step(regs, instr);
+ if (!stepped) {
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+ handler_error(bp[i], info[i]);
+ info[i] = NULL;
+ }
+ return false;
+ }
return true;
+}
+
+static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info,
+ int *hit, unsigned long ea)
+{
+ int i;
+ unsigned long hw_end_addr;
-fail:
/*
- * We've failed in reliably handling the hw-breakpoint. Unregister
- * it and throw a warning message to let the user know about it.
+ * Handle spurious exception only when any bp_per_reg is set.
+ * Otherwise this might be created by xmon and not actually a
+ * spurious exception.
*/
- WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
- "0x%lx will be disabled.", addr);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!info[i])
+ continue;
+
+ hw_end_addr = ALIGN(info[i]->address + info[i]->len, HW_BREAKPOINT_SIZE);
+
+ /*
+ * Ending address of DAWR range is less than starting
+ * address of op.
+ */
+ if ((hw_end_addr - 1) >= ea)
+ continue;
+
+ /*
+ * Those addresses need to be in the same or in two
+ * consecutive 512B blocks;
+ */
+ if (((hw_end_addr - 1) >> 10) != (ea >> 10))
+ continue;
+
+ /*
+ * 'op address + 64B' generates an address that has a
+ * carry into bit 52 (crosses 2K boundary).
+ */
+ if ((ea & 0x800) == ((ea + 64) & 0x800))
+ continue;
-disable:
- perf_event_disable_inatomic(bp);
- return false;
+ break;
+ }
+
+ if (i == nr_wp_slots())
+ return;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (info[i]) {
+ hit[i] = 1;
+ info[i]->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ }
+ }
}
int hw_breakpoint_handler(struct die_args *args)
{
+ bool err = false;
int rc = NOTIFY_STOP;
- struct perf_event *bp;
+ struct perf_event *bp[HBP_NUM_MAX] = { NULL };
struct pt_regs *regs = args->regs;
- struct arch_hw_breakpoint *info;
+ struct arch_hw_breakpoint *info[HBP_NUM_MAX] = { NULL };
+ int i;
+ int hit[HBP_NUM_MAX] = {0};
+ int nr_hit = 0;
+ bool ptrace_bp = false;
+ struct ppc_inst instr = ppc_inst(0);
+ int type = 0;
+ int size = 0;
+ unsigned long ea;
/* Disable breakpoints during exception handling */
hw_breakpoint_disable();
@@ -311,12 +632,46 @@ int hw_breakpoint_handler(struct die_args *args)
*/
rcu_read_lock();
- bp = __this_cpu_read(bp_per_reg);
- if (!bp) {
- rc = NOTIFY_DONE;
- goto out;
+ if (!IS_ENABLED(CONFIG_PPC_8xx))
+ wp_get_instr_detail(regs, &instr, &type, &size, &ea);
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ bp[i] = __this_cpu_read(bp_per_reg[i]);
+ if (!bp[i])
+ continue;
+
+ info[i] = counter_arch_bp(bp[i]);
+ info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
+
+ if (wp_check_constraints(regs, instr, ea, type, size, info[i])) {
+ if (!IS_ENABLED(CONFIG_PPC_8xx) &&
+ ppc_inst_equal(instr, ppc_inst(0))) {
+ handler_error(bp[i], info[i]);
+ info[i] = NULL;
+ err = 1;
+ continue;
+ }
+
+ if (is_ptrace_bp(bp[i]))
+ ptrace_bp = true;
+ hit[i] = 1;
+ nr_hit++;
+ }
+ }
+
+ if (err)
+ goto reset;
+
+ if (!nr_hit) {
+ /* Workaround for Power10 DD1 */
+ if (!IS_ENABLED(CONFIG_PPC_8xx) && mfspr(SPRN_PVR) == 0x800100 &&
+ is_octword_vsx_instr(type, size)) {
+ handle_p10dd1_spurious_exception(info, hit, ea);
+ } else {
+ rc = NOTIFY_DONE;
+ goto out;
+ }
}
- info = counter_arch_bp(bp);
/*
* Return early after invoking user-callback function without restoring
@@ -324,29 +679,50 @@ int hw_breakpoint_handler(struct die_args *args)
* one-shot mode. The ptrace-ed process will receive the SIGTRAP signal
* generated in do_dabr().
*/
- if (bp->overflow_handler == ptrace_triggered) {
- perf_bp_event(bp, regs);
+ if (ptrace_bp) {
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+ perf_bp_event(bp[i], regs);
+ info[i] = NULL;
+ }
rc = NOTIFY_DONE;
- goto out;
+ goto reset;
}
- info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
- if (IS_ENABLED(CONFIG_PPC_8xx)) {
- if (!dar_within_range(regs->dar, info))
- info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
- } else {
- if (!stepping_handler(regs, bp, info))
- goto out;
+ if (!IS_ENABLED(CONFIG_PPC_8xx)) {
+ if (is_larx_stcx_instr(type)) {
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+ larx_stcx_err(bp[i], info[i]);
+ info[i] = NULL;
+ }
+ goto reset;
+ }
+
+ if (!stepping_handler(regs, bp, info, hit, instr))
+ goto reset;
}
/*
* As a policy, the callback is invoked in a 'trigger-after-execute'
* fashion
*/
- if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
- perf_bp_event(bp, regs);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+ if (!(info[i]->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
+ perf_bp_event(bp[i], regs);
+ }
+
+reset:
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!info[i])
+ continue;
+ __set_breakpoint(i, info[i]);
+ }
- __set_breakpoint(info);
out:
rcu_read_unlock();
return rc;
@@ -361,26 +737,43 @@ static int single_step_dabr_instruction(struct die_args *args)
struct pt_regs *regs = args->regs;
struct perf_event *bp = NULL;
struct arch_hw_breakpoint *info;
+ int i;
+ bool found = false;
- bp = current->thread.last_hit_ubp;
/*
* Check if we are single-stepping as a result of a
* previous HW Breakpoint exception
*/
- if (!bp)
+ for (i = 0; i < nr_wp_slots(); i++) {
+ bp = current->thread.last_hit_ubp[i];
+
+ if (!bp)
+ continue;
+
+ found = true;
+ info = counter_arch_bp(bp);
+
+ /*
+ * We shall invoke the user-defined callback function in the
+ * single stepping handler to confirm to 'trigger-after-execute'
+ * semantics
+ */
+ if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
+ perf_bp_event(bp, regs);
+ current->thread.last_hit_ubp[i] = NULL;
+ }
+
+ if (!found)
return NOTIFY_DONE;
- info = counter_arch_bp(bp);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ bp = __this_cpu_read(bp_per_reg[i]);
+ if (!bp)
+ continue;
- /*
- * We shall invoke the user-defined callback function in the single
- * stepping handler to confirm to 'trigger-after-execute' semantics
- */
- if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
- perf_bp_event(bp, regs);
-
- __set_breakpoint(info);
- current->thread.last_hit_ubp = NULL;
+ info = counter_arch_bp(bp);
+ __set_breakpoint(i, info);
+ }
/*
* If the process was being single-stepped by ptrace, let the
@@ -419,13 +812,32 @@ NOKPROBE_SYMBOL(hw_breakpoint_exceptions_notify);
*/
void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
{
+ int i;
struct thread_struct *t = &tsk->thread;
- unregister_hw_breakpoint(t->ptrace_bps[0]);
- t->ptrace_bps[0] = NULL;
+ for (i = 0; i < nr_wp_slots(); i++) {
+ unregister_hw_breakpoint(t->ptrace_bps[i]);
+ t->ptrace_bps[i] = NULL;
+ }
}
void hw_breakpoint_pmu_read(struct perf_event *bp)
{
/* TODO */
}
+
+void ptrace_triggered(struct perf_event *bp,
+ struct perf_sample_data *data, struct pt_regs *regs)
+{
+ struct perf_event_attr attr;
+
+ /*
+ * Disable the breakpoint request here since ptrace has defined a
+ * one-shot behaviour for breakpoint exceptions in PPC64.
+ * The SIGTRAP signal is generated automatically for us in do_dabr().
+ * We don't have to do anything about that here
+ */
+ attr = bp->attr;
+ attr.disabled = true;
+ modify_user_hw_breakpoint(bp, &attr);
+}
diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c
new file mode 100644
index 000000000000..42b967e3d85c
--- /dev/null
+++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/sstep.h>
+#include <asm/cache.h>
+
+static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info)
+{
+ return ((info->address <= dar) && (dar - info->address < info->len));
+}
+
+static bool ea_user_range_overlaps(unsigned long ea, int size,
+ struct arch_hw_breakpoint *info)
+{
+ return ((ea < info->address + info->len) &&
+ (ea + size > info->address));
+}
+
+static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info)
+{
+ unsigned long hw_start_addr, hw_end_addr;
+
+ hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE);
+ hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
+
+ return ((hw_start_addr <= dar) && (hw_end_addr > dar));
+}
+
+static bool ea_hw_range_overlaps(unsigned long ea, int size,
+ struct arch_hw_breakpoint *info)
+{
+ unsigned long hw_start_addr, hw_end_addr;
+ unsigned long align_size = HW_BREAKPOINT_SIZE;
+
+ /*
+ * On p10 predecessors, quadword is handle differently then
+ * other instructions.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31) && size == 16)
+ align_size = HW_BREAKPOINT_SIZE_QUADWORD;
+
+ hw_start_addr = ALIGN_DOWN(info->address, align_size);
+ hw_end_addr = ALIGN(info->address + info->len, align_size);
+
+ return ((ea < hw_end_addr) && (ea + size > hw_start_addr));
+}
+
+/*
+ * If hw has multiple DAWR registers, we also need to check all
+ * dawrx constraint bits to confirm this is _really_ a valid event.
+ * If type is UNKNOWN, but privilege level matches, consider it as
+ * a positive match.
+ */
+static bool check_dawrx_constraints(struct pt_regs *regs, int type,
+ struct arch_hw_breakpoint *info)
+{
+ if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ))
+ return false;
+
+ /*
+ * The Cache Management instructions other than dcbz never
+ * cause a match. i.e. if type is CACHEOP, the instruction
+ * is dcbz, and dcbz is treated as Store.
+ */
+ if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE))
+ return false;
+
+ if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL))
+ return false;
+
+ if (user_mode(regs) && !(info->type & HW_BRK_TYPE_USER))
+ return false;
+
+ return true;
+}
+
+/*
+ * Return true if the event is valid wrt dawr configuration,
+ * including extraneous exception. Otherwise return false.
+ */
+bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
+ unsigned long ea, int type, int size,
+ struct arch_hw_breakpoint *info)
+{
+ bool in_user_range = dar_in_user_range(regs->dar, info);
+ bool dawrx_constraints;
+
+ /*
+ * 8xx supports only one breakpoint and thus we can
+ * unconditionally return true.
+ */
+ if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ if (!in_user_range)
+ info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ return true;
+ }
+
+ if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) {
+ if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+ !dar_in_hw_range(regs->dar, info))
+ return false;
+
+ return true;
+ }
+
+ dawrx_constraints = check_dawrx_constraints(regs, type, info);
+
+ if (type == UNKNOWN) {
+ if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+ !dar_in_hw_range(regs->dar, info))
+ return false;
+
+ return dawrx_constraints;
+ }
+
+ if (ea_user_range_overlaps(ea, size, info))
+ return dawrx_constraints;
+
+ if (ea_hw_range_overlaps(ea, size, info)) {
+ if (dawrx_constraints) {
+ info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ return true;
+ }
+ }
+ return false;
+}
+
+void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
+ int *type, int *size, unsigned long *ea)
+{
+ struct instruction_op op;
+
+ if (__get_user_instr(*instr, (void __user *)regs->nip))
+ return;
+
+ analyse_instr(&op, regs, *instr);
+ *type = GETTYPE(op.type);
+ *ea = op.ea;
+
+ if (!(regs->msr & MSR_64BIT))
+ *ea &= 0xffffffffUL;
+
+
+ *size = GETSIZE(op.type);
+ if (*type == CACHEOP) {
+ *size = l1_dcache_bytes();
+ *ea &= ~(*size - 1);
+ } else if (*type == LOAD_VMX || *type == STORE_VMX) {
+ *ea &= ~(*size - 1);
+ }
+}
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 422e31d2f5a2..1f835539fda4 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -41,14 +41,6 @@ static int __init powersave_off(char *arg)
}
__setup("powersave=off", powersave_off);
-#ifdef CONFIG_HOTPLUG_CPU
-void arch_cpu_idle_dead(void)
-{
- sched_preempt_enable_no_resched();
- cpu_die();
-}
-#endif
-
void arch_cpu_idle(void)
{
ppc64_runlatch_off();
@@ -60,9 +52,9 @@ void arch_cpu_idle(void)
* interrupts enabled, some don't.
*/
if (irqs_disabled())
- local_irq_enable();
+ raw_local_irq_enable();
} else {
- local_irq_enable();
+ raw_local_irq_enable();
/*
* Go into low thread priority and possibly
* low power mode.
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 433d97bea1f3..13cad9297d82 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -145,9 +145,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
/*
* Return from NAP/DOZE mode, restore some CPU specific registers,
- * we are called with DR/IR still off and r2 containing physical
- * address of current. R11 points to the exception frame (physical
- * address). We have to preserve r10.
+ * R11 points to the exception frame. We have to preserve r10.
*/
_GLOBAL(power_save_ppc32_restore)
lwz r9,_LINK(r11) /* interrupted in ppc6xx_idle: */
@@ -166,11 +164,7 @@ BEGIN_FTR_SECTION
mfspr r9,SPRN_HID0
andis. r9,r9,HID0_NAP@h
beq 1f
-#ifdef CONFIG_VMAP_STACK
addis r9, r11, nap_save_msscr0@ha
-#else
- addis r9,r11,(nap_save_msscr0-KERNELBASE)@ha
-#endif
lwz r9,nap_save_msscr0@l(r9)
mtspr SPRN_MSSCR0, r9
sync
@@ -178,15 +172,12 @@ BEGIN_FTR_SECTION
1:
END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
BEGIN_FTR_SECTION
-#ifdef CONFIG_VMAP_STACK
addis r9, r11, nap_save_hid1@ha
-#else
- addis r9,r11,(nap_save_hid1-KERNELBASE)@ha
-#endif
lwz r9,nap_save_hid1@l(r9)
mtspr SPRN_HID1, r9
END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
- b transfer_to_handler_cont
+ blr
+_ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore)
.data
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 22f249b6f58d..3d97fb833834 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -52,28 +52,32 @@ _GLOBAL(isa300_idle_stop_mayloss)
std r1,PACAR1(r13)
mflr r4
mfcr r5
- /* use stack red zone rather than a new frame for saving regs */
- std r2,-8*0(r1)
- std r14,-8*1(r1)
- std r15,-8*2(r1)
- std r16,-8*3(r1)
- std r17,-8*4(r1)
- std r18,-8*5(r1)
- std r19,-8*6(r1)
- std r20,-8*7(r1)
- std r21,-8*8(r1)
- std r22,-8*9(r1)
- std r23,-8*10(r1)
- std r24,-8*11(r1)
- std r25,-8*12(r1)
- std r26,-8*13(r1)
- std r27,-8*14(r1)
- std r28,-8*15(r1)
- std r29,-8*16(r1)
- std r30,-8*17(r1)
- std r31,-8*18(r1)
- std r4,-8*19(r1)
- std r5,-8*20(r1)
+ /*
+ * Use the stack red zone rather than a new frame for saving regs since
+ * in the case of no GPR loss the wakeup code branches directly back to
+ * the caller without deallocating the stack frame first.
+ */
+ std r2,-8*1(r1)
+ std r14,-8*2(r1)
+ std r15,-8*3(r1)
+ std r16,-8*4(r1)
+ std r17,-8*5(r1)
+ std r18,-8*6(r1)
+ std r19,-8*7(r1)
+ std r20,-8*8(r1)
+ std r21,-8*9(r1)
+ std r22,-8*10(r1)
+ std r23,-8*11(r1)
+ std r24,-8*12(r1)
+ std r25,-8*13(r1)
+ std r26,-8*14(r1)
+ std r27,-8*15(r1)
+ std r28,-8*16(r1)
+ std r29,-8*17(r1)
+ std r30,-8*18(r1)
+ std r31,-8*19(r1)
+ std r4,-8*20(r1)
+ std r5,-8*21(r1)
/* 168 bytes */
PPC_STOP
b . /* catch bugs */
@@ -89,8 +93,8 @@ _GLOBAL(isa300_idle_stop_mayloss)
*/
_GLOBAL(idle_return_gpr_loss)
ld r1,PACAR1(r13)
- ld r4,-8*19(r1)
- ld r5,-8*20(r1)
+ ld r4,-8*20(r1)
+ ld r5,-8*21(r1)
mtlr r4
mtcr r5
/*
@@ -98,38 +102,40 @@ _GLOBAL(idle_return_gpr_loss)
* from PACATOC. This could be avoided for that less common case
* if KVM saved its r2.
*/
- ld r2,-8*0(r1)
- ld r14,-8*1(r1)
- ld r15,-8*2(r1)
- ld r16,-8*3(r1)
- ld r17,-8*4(r1)
- ld r18,-8*5(r1)
- ld r19,-8*6(r1)
- ld r20,-8*7(r1)
- ld r21,-8*8(r1)
- ld r22,-8*9(r1)
- ld r23,-8*10(r1)
- ld r24,-8*11(r1)
- ld r25,-8*12(r1)
- ld r26,-8*13(r1)
- ld r27,-8*14(r1)
- ld r28,-8*15(r1)
- ld r29,-8*16(r1)
- ld r30,-8*17(r1)
- ld r31,-8*18(r1)
+ ld r2,-8*1(r1)
+ ld r14,-8*2(r1)
+ ld r15,-8*3(r1)
+ ld r16,-8*4(r1)
+ ld r17,-8*5(r1)
+ ld r18,-8*6(r1)
+ ld r19,-8*7(r1)
+ ld r20,-8*8(r1)
+ ld r21,-8*9(r1)
+ ld r22,-8*10(r1)
+ ld r23,-8*11(r1)
+ ld r24,-8*12(r1)
+ ld r25,-8*13(r1)
+ ld r26,-8*14(r1)
+ ld r27,-8*15(r1)
+ ld r28,-8*16(r1)
+ ld r29,-8*17(r1)
+ ld r30,-8*18(r1)
+ ld r31,-8*19(r1)
blr
/*
* This is the sequence required to execute idle instructions, as
* specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
- *
- * The 0(r1) slot is used to save r2 in isa206, so use that here.
+ * We have to store a GPR somewhere, ptesync, then reload it, and create
+ * a false dependency on the result of the load. It doesn't matter which
+ * GPR we store, or where we store it. We have already stored r2 to the
+ * stack at -8(r1) in isa206_idle_insn_mayloss, so use that.
*/
#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \
/* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
- std r2,0(r1); \
+ std r2,-8(r1); \
ptesync; \
- ld r2,0(r1); \
+ ld r2,-8(r1); \
236: cmpd cr0,r2,r2; \
bne 236b; \
IDLE_INST; \
@@ -154,28 +160,32 @@ _GLOBAL(isa206_idle_insn_mayloss)
std r1,PACAR1(r13)
mflr r4
mfcr r5
- /* use stack red zone rather than a new frame for saving regs */
- std r2,-8*0(r1)
- std r14,-8*1(r1)
- std r15,-8*2(r1)
- std r16,-8*3(r1)
- std r17,-8*4(r1)
- std r18,-8*5(r1)
- std r19,-8*6(r1)
- std r20,-8*7(r1)
- std r21,-8*8(r1)
- std r22,-8*9(r1)
- std r23,-8*10(r1)
- std r24,-8*11(r1)
- std r25,-8*12(r1)
- std r26,-8*13(r1)
- std r27,-8*14(r1)
- std r28,-8*15(r1)
- std r29,-8*16(r1)
- std r30,-8*17(r1)
- std r31,-8*18(r1)
- std r4,-8*19(r1)
- std r5,-8*20(r1)
+ /*
+ * Use the stack red zone rather than a new frame for saving regs since
+ * in the case of no GPR loss the wakeup code branches directly back to
+ * the caller without deallocating the stack frame first.
+ */
+ std r2,-8*1(r1)
+ std r14,-8*2(r1)
+ std r15,-8*3(r1)
+ std r16,-8*4(r1)
+ std r17,-8*5(r1)
+ std r18,-8*6(r1)
+ std r19,-8*7(r1)
+ std r20,-8*8(r1)
+ std r21,-8*9(r1)
+ std r22,-8*10(r1)
+ std r23,-8*11(r1)
+ std r24,-8*12(r1)
+ std r25,-8*13(r1)
+ std r26,-8*14(r1)
+ std r27,-8*15(r1)
+ std r28,-8*16(r1)
+ std r29,-8*17(r1)
+ std r30,-8*18(r1)
+ std r31,-8*19(r1)
+ std r4,-8*20(r1)
+ std r5,-8*21(r1)
cmpwi r3,PNV_THREAD_NAP
bne 1f
IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
@@ -201,4 +211,8 @@ _GLOBAL(power4_idle_nap)
mtmsrd r7
isync
b 1b
+
+ .globl power4_idle_nap_return
+power4_idle_nap_return:
+ blr
#endif
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S
index 308f499e146c..9e1bc4502c50 100644
--- a/arch/powerpc/kernel/idle_e500.S
+++ b/arch/powerpc/kernel/idle_e500.S
@@ -74,19 +74,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
/*
* Return from NAP/DOZE mode, restore some CPU specific registers,
- * r2 containing physical address of current.
- * r11 points to the exception frame (physical address).
+ * r2 containing address of current.
+ * r11 points to the exception frame.
* We have to preserve r10.
*/
_GLOBAL(power_save_ppc32_restore)
lwz r9,_LINK(r11) /* interrupted in e500_idle */
stw r9,_NIP(r11) /* make it do a blr */
-
-#ifdef CONFIG_SMP
- lwz r11,TASK_CPU(r2) /* get cpu number * 4 */
- slwi r11,r11,2
-#else
- li r11,0
-#endif
-
- b transfer_to_handler_cont
+ blr
+_ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore)
diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c
index e34116255ced..957abd592075 100644
--- a/arch/powerpc/kernel/ima_arch.c
+++ b/arch/powerpc/kernel/ima_arch.c
@@ -19,12 +19,12 @@ bool arch_ima_get_secureboot(void)
* to be stored as an xattr or as an appended signature.
*
* To avoid duplicate signature verification as much as possible, the IMA
- * policy rule for module appraisal is added only if CONFIG_MODULE_SIG_FORCE
+ * policy rule for module appraisal is added only if CONFIG_MODULE_SIG
* is not enabled.
*/
static const char *const secure_rules[] = {
"appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
-#ifndef CONFIG_MODULE_SIG_FORCE
+#ifndef CONFIG_MODULE_SIG
"appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
#endif
NULL
@@ -50,7 +50,7 @@ static const char *const secure_and_trusted_rules[] = {
"measure func=KEXEC_KERNEL_CHECK template=ima-modsig",
"measure func=MODULE_CHECK template=ima-modsig",
"appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
-#ifndef CONFIG_MODULE_SIG_FORCE
+#ifndef CONFIG_MODULE_SIG
"appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
#endif
NULL
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
new file mode 100644
index 000000000000..563ebfcd16e2
--- /dev/null
+++ b/arch/powerpc/kernel/interrupt.c
@@ -0,0 +1,670 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/context_tracking.h>
+#include <linux/err.h>
+#include <linux/compat.h>
+#include <linux/sched/debug.h> /* for show_regs */
+
+#include <asm/asm-prototypes.h>
+#include <asm/kup.h>
+#include <asm/cputime.h>
+#include <asm/hw_irq.h>
+#include <asm/interrupt.h>
+#include <asm/kprobes.h>
+#include <asm/paca.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/signal.h>
+#include <asm/switch_to.h>
+#include <asm/syscall.h>
+#include <asm/time.h>
+#include <asm/tm.h>
+#include <asm/unistd.h>
+
+#if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32)
+unsigned long global_dbcr0[NR_CPUS];
+#endif
+
+typedef long (*syscall_fn)(long, long, long, long, long, long);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
+static inline bool exit_must_hard_disable(void)
+{
+ return static_branch_unlikely(&interrupt_exit_not_reentrant);
+}
+#else
+static inline bool exit_must_hard_disable(void)
+{
+ return true;
+}
+#endif
+
+/*
+ * local irqs must be disabled. Returns false if the caller must re-enable
+ * them, check for new work, and try again.
+ *
+ * This should be called with local irqs disabled, but if they were previously
+ * enabled when the interrupt handler returns (indicating a process-context /
+ * synchronous interrupt) then irqs_enabled should be true.
+ *
+ * restartable is true then EE/RI can be left on because interrupts are handled
+ * with a restart sequence.
+ */
+static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
+{
+ /* This must be done with RI=1 because tracing may touch vmaps */
+ trace_hardirqs_on();
+
+ if (exit_must_hard_disable() || !restartable)
+ __hard_EE_RI_disable();
+
+#ifdef CONFIG_PPC64
+ /* This pattern matches prep_irq_for_idle */
+ if (unlikely(lazy_irq_pending_nocheck())) {
+ if (exit_must_hard_disable() || !restartable) {
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ __hard_RI_enable();
+ }
+ trace_hardirqs_off();
+
+ return false;
+ }
+#endif
+ return true;
+}
+
+/* Has to run notrace because it is entered not completely "reconciled" */
+notrace long system_call_exception(long r3, long r4, long r5,
+ long r6, long r7, long r8,
+ unsigned long r0, struct pt_regs *regs)
+{
+ syscall_fn f;
+
+ kuep_lock();
+
+ regs->orig_gpr3 = r3;
+
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+
+ trace_hardirqs_off(); /* finish reconciling */
+
+ CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
+ user_exit_irqoff();
+
+ BUG_ON(regs_is_unrecoverable(regs));
+ BUG_ON(!(regs->msr & MSR_PR));
+ BUG_ON(arch_irq_disabled_regs(regs));
+
+#ifdef CONFIG_PPC_PKEY
+ if (mmu_has_feature(MMU_FTR_PKEY)) {
+ unsigned long amr, iamr;
+ bool flush_needed = false;
+ /*
+ * When entering from userspace we mostly have the AMR/IAMR
+ * different from kernel default values. Hence don't compare.
+ */
+ amr = mfspr(SPRN_AMR);
+ iamr = mfspr(SPRN_IAMR);
+ regs->amr = amr;
+ regs->iamr = iamr;
+ if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
+ mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
+ flush_needed = true;
+ }
+ if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
+ mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
+ flush_needed = true;
+ }
+ if (flush_needed)
+ isync();
+ } else
+#endif
+ kuap_assert_locked();
+
+ booke_restore_dbcr0();
+
+ account_cpu_user_entry();
+
+ account_stolen_time();
+
+ /*
+ * This is not required for the syscall exit path, but makes the
+ * stack frame look nicer. If this was initialised in the first stack
+ * frame, or if the unwinder was taught the first stack frame always
+ * returns to user with IRQS_ENABLED, this store could be avoided!
+ */
+ irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
+
+ /*
+ * If system call is called with TM active, set _TIF_RESTOREALL to
+ * prevent RFSCV being used to return to userspace, because POWER9
+ * TM implementation has problems with this instruction returning to
+ * transactional state. Final register values are not relevant because
+ * the transaction will be aborted upon return anyway. Or in the case
+ * of unsupported_scv SIGILL fault, the return state does not much
+ * matter because it's an edge case.
+ */
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+ unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
+ set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
+
+ /*
+ * If the system call was made with a transaction active, doom it and
+ * return without performing the system call. Unless it was an
+ * unsupported scv vector, in which case it's treated like an illegal
+ * instruction.
+ */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
+ !trap_is_unsupported_scv(regs)) {
+ /* Enable TM in the kernel, and disable EE (for scv) */
+ hard_irq_disable();
+ mtmsr(mfmsr() | MSR_TM);
+
+ /* tabort, this dooms the transaction, nothing else */
+ asm volatile(".long 0x7c00071d | ((%0) << 16)"
+ :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
+
+ /*
+ * Userspace will never see the return value. Execution will
+ * resume after the tbegin. of the aborted transaction with the
+ * checkpointed register state. A context switch could occur
+ * or signal delivered to the process before resuming the
+ * doomed transaction context, but that should all be handled
+ * as expected.
+ */
+ return -ENOSYS;
+ }
+#endif // CONFIG_PPC_TRANSACTIONAL_MEM
+
+ local_irq_enable();
+
+ if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
+ if (unlikely(trap_is_unsupported_scv(regs))) {
+ /* Unsupported scv vector */
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ return regs->gpr[3];
+ }
+ /*
+ * We use the return value of do_syscall_trace_enter() as the
+ * syscall number. If the syscall was rejected for any reason
+ * do_syscall_trace_enter() returns an invalid syscall number
+ * and the test against NR_syscalls will fail and the return
+ * value to be used is in regs->gpr[3].
+ */
+ r0 = do_syscall_trace_enter(regs);
+ if (unlikely(r0 >= NR_syscalls))
+ return regs->gpr[3];
+ r3 = regs->gpr[3];
+ r4 = regs->gpr[4];
+ r5 = regs->gpr[5];
+ r6 = regs->gpr[6];
+ r7 = regs->gpr[7];
+ r8 = regs->gpr[8];
+
+ } else if (unlikely(r0 >= NR_syscalls)) {
+ if (unlikely(trap_is_unsupported_scv(regs))) {
+ /* Unsupported scv vector */
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ return regs->gpr[3];
+ }
+ return -ENOSYS;
+ }
+
+ /* May be faster to do array_index_nospec? */
+ barrier_nospec();
+
+ if (unlikely(is_compat_task())) {
+ f = (void *)compat_sys_call_table[r0];
+
+ r3 &= 0x00000000ffffffffULL;
+ r4 &= 0x00000000ffffffffULL;
+ r5 &= 0x00000000ffffffffULL;
+ r6 &= 0x00000000ffffffffULL;
+ r7 &= 0x00000000ffffffffULL;
+ r8 &= 0x00000000ffffffffULL;
+
+ } else {
+ f = (void *)sys_call_table[r0];
+ }
+
+ return f(r3, r4, r5, r6, r7, r8);
+}
+
+static notrace void booke_load_dbcr0(void)
+{
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ unsigned long dbcr0 = current->thread.debug.dbcr0;
+
+ if (likely(!(dbcr0 & DBCR0_IDM)))
+ return;
+
+ /*
+ * Check to see if the dbcr0 register is set up to debug.
+ * Use the internal debug mode bit to do this.
+ */
+ mtmsr(mfmsr() & ~MSR_DE);
+ if (IS_ENABLED(CONFIG_PPC32)) {
+ isync();
+ global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0);
+ }
+ mtspr(SPRN_DBCR0, dbcr0);
+ mtspr(SPRN_DBSR, -1);
+#endif
+}
+
+static void check_return_regs_valid(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ unsigned long trap, srr0, srr1;
+ static bool warned;
+ u8 *validp;
+ char *h;
+
+ if (trap_is_scv(regs))
+ return;
+
+ trap = TRAP(regs);
+ // EE in HV mode sets HSRRs like 0xea0
+ if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
+ trap = 0xea0;
+
+ switch (trap) {
+ case 0x980:
+ case INTERRUPT_H_DATA_STORAGE:
+ case 0xe20:
+ case 0xe40:
+ case INTERRUPT_HMI:
+ case 0xe80:
+ case 0xea0:
+ case INTERRUPT_H_FAC_UNAVAIL:
+ case 0x1200:
+ case 0x1500:
+ case 0x1600:
+ case 0x1800:
+ validp = &local_paca->hsrr_valid;
+ if (!*validp)
+ return;
+
+ srr0 = mfspr(SPRN_HSRR0);
+ srr1 = mfspr(SPRN_HSRR1);
+ h = "H";
+
+ break;
+ default:
+ validp = &local_paca->srr_valid;
+ if (!*validp)
+ return;
+
+ srr0 = mfspr(SPRN_SRR0);
+ srr1 = mfspr(SPRN_SRR1);
+ h = "";
+ break;
+ }
+
+ if (srr0 == regs->nip && srr1 == regs->msr)
+ return;
+
+ /*
+ * A NMI / soft-NMI interrupt may have come in after we found
+ * srr_valid and before the SRRs are loaded. The interrupt then
+ * comes in and clobbers SRRs and clears srr_valid. Then we load
+ * the SRRs here and test them above and find they don't match.
+ *
+ * Test validity again after that, to catch such false positives.
+ *
+ * This test in general will have some window for false negatives
+ * and may not catch and fix all such cases if an NMI comes in
+ * later and clobbers SRRs without clearing srr_valid, but hopefully
+ * such things will get caught most of the time, statistically
+ * enough to be able to get a warning out.
+ */
+ barrier();
+
+ if (!*validp)
+ return;
+
+ if (!warned) {
+ warned = true;
+ printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
+ printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
+ show_regs(regs);
+ }
+
+ *validp = 0; /* fixup */
+#endif
+}
+
+static notrace unsigned long
+interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
+{
+ unsigned long ti_flags;
+
+again:
+ ti_flags = read_thread_flags();
+ while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+ local_irq_enable();
+ if (ti_flags & _TIF_NEED_RESCHED) {
+ schedule();
+ } else {
+ /*
+ * SIGPENDING must restore signal handler function
+ * argument GPRs, and some non-volatiles (e.g., r1).
+ * Restore all for now. This could be made lighter.
+ */
+ if (ti_flags & _TIF_SIGPENDING)
+ ret |= _TIF_RESTOREALL;
+ do_notify_resume(regs, ti_flags);
+ }
+ local_irq_disable();
+ ti_flags = read_thread_flags();
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+ unlikely((ti_flags & _TIF_RESTORE_TM))) {
+ restore_tm_state(regs);
+ } else {
+ unsigned long mathflags = MSR_FP;
+
+ if (cpu_has_feature(CPU_FTR_VSX))
+ mathflags |= MSR_VEC | MSR_VSX;
+ else if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ mathflags |= MSR_VEC;
+
+ /*
+ * If userspace MSR has all available FP bits set,
+ * then they are live and no need to restore. If not,
+ * it means the regs were given up and restore_math
+ * may decide to restore them (to avoid taking an FP
+ * fault).
+ */
+ if ((regs->msr & mathflags) != mathflags)
+ restore_math(regs);
+ }
+ }
+
+ check_return_regs_valid(regs);
+
+ user_enter_irqoff();
+ if (!prep_irq_for_enabled_exit(true)) {
+ user_exit_irqoff();
+ local_irq_enable();
+ local_irq_disable();
+ goto again;
+ }
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ local_paca->tm_scratch = regs->msr;
+#endif
+
+ booke_load_dbcr0();
+
+ account_cpu_user_exit();
+
+ /* Restore user access locks last */
+ kuap_user_restore(regs);
+ kuep_unlock();
+
+ return ret;
+}
+
+/*
+ * This should be called after a syscall returns, with r3 the return value
+ * from the syscall. If this function returns non-zero, the system call
+ * exit assembly should additionally load all GPR registers and CTR and XER
+ * from the interrupt frame.
+ *
+ * The function graph tracer can not trace the return side of this function,
+ * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
+ */
+notrace unsigned long syscall_exit_prepare(unsigned long r3,
+ struct pt_regs *regs,
+ long scv)
+{
+ unsigned long ti_flags;
+ unsigned long ret = 0;
+ bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
+
+ CT_WARN_ON(ct_state() == CONTEXT_USER);
+
+ kuap_assert_locked();
+
+ regs->result = r3;
+
+ /* Check whether the syscall is issued inside a restartable sequence */
+ rseq_syscall(regs);
+
+ ti_flags = read_thread_flags();
+
+ if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
+ if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
+ r3 = -r3;
+ regs->ccr |= 0x10000000; /* Set SO bit in CR */
+ }
+ }
+
+ if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
+ if (ti_flags & _TIF_RESTOREALL)
+ ret = _TIF_RESTOREALL;
+ else
+ regs->gpr[3] = r3;
+ clear_bits(_TIF_PERSYSCALL_MASK, &current_thread_info()->flags);
+ } else {
+ regs->gpr[3] = r3;
+ }
+
+ if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+ do_syscall_trace_leave(regs);
+ ret |= _TIF_RESTOREALL;
+ }
+
+ local_irq_disable();
+ ret = interrupt_exit_user_prepare_main(ret, regs);
+
+#ifdef CONFIG_PPC64
+ regs->exit_result = ret;
+#endif
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC64
+notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs)
+{
+ /*
+ * This is called when detecting a soft-pending interrupt as well as
+ * an alternate-return interrupt. So we can't just have the alternate
+ * return path clear SRR1[MSR] and set PACA_IRQ_HARD_DIS (unless
+ * the soft-pending case were to fix things up as well). RI might be
+ * disabled, in which case it gets re-enabled by __hard_irq_disable().
+ */
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ trace_hardirqs_off();
+ user_exit_irqoff();
+ account_cpu_user_entry();
+
+ BUG_ON(!user_mode(regs));
+
+ regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs);
+
+ return regs->exit_result;
+}
+#endif
+
+notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
+{
+ unsigned long ret;
+
+ BUG_ON(regs_is_unrecoverable(regs));
+ BUG_ON(arch_irq_disabled_regs(regs));
+ CT_WARN_ON(ct_state() == CONTEXT_USER);
+
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * AMR can only have been unlocked if we interrupted the kernel.
+ */
+ kuap_assert_locked();
+
+ local_irq_disable();
+
+ ret = interrupt_exit_user_prepare_main(0, regs);
+
+#ifdef CONFIG_PPC64
+ regs->exit_result = ret;
+#endif
+
+ return ret;
+}
+
+void preempt_schedule_irq(void);
+
+notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
+{
+ unsigned long flags;
+ unsigned long ret = 0;
+ unsigned long kuap;
+ bool stack_store = read_thread_flags() & _TIF_EMULATE_STACK_STORE;
+
+ if (regs_is_unrecoverable(regs))
+ unrecoverable_exception(regs);
+ /*
+ * CT_WARN_ON comes here via program_check_exception,
+ * so avoid recursion.
+ */
+ if (TRAP(regs) != INTERRUPT_PROGRAM)
+ CT_WARN_ON(ct_state() == CONTEXT_USER);
+
+ kuap = kuap_get_and_assert_locked();
+
+ local_irq_save(flags);
+
+ if (!arch_irq_disabled_regs(regs)) {
+ /* Returning to a kernel context with local irqs enabled. */
+ WARN_ON_ONCE(!(regs->msr & MSR_EE));
+again:
+ if (IS_ENABLED(CONFIG_PREEMPT)) {
+ /* Return to preemptible kernel context */
+ if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) {
+ if (preempt_count() == 0)
+ preempt_schedule_irq();
+ }
+ }
+
+ check_return_regs_valid(regs);
+
+ /*
+ * Stack store exit can't be restarted because the interrupt
+ * stack frame might have been clobbered.
+ */
+ if (!prep_irq_for_enabled_exit(unlikely(stack_store))) {
+ /*
+ * Replay pending soft-masked interrupts now. Don't
+ * just local_irq_enabe(); local_irq_disable(); because
+ * if we are returning from an asynchronous interrupt
+ * here, another one might hit after irqs are enabled,
+ * and it would exit via this same path allowing
+ * another to fire, and so on unbounded.
+ */
+ hard_irq_disable();
+ replay_soft_interrupts();
+ /* Took an interrupt, may have more exit work to do. */
+ goto again;
+ }
+#ifdef CONFIG_PPC64
+ /*
+ * An interrupt may clear MSR[EE] and set this concurrently,
+ * but it will be marked pending and the exit will be retried.
+ * This leaves a racy window where MSR[EE]=0 and HARD_DIS is
+ * clear, until interrupt_exit_kernel_restart() calls
+ * hard_irq_disable(), which will set HARD_DIS again.
+ */
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+
+ } else {
+ check_return_regs_valid(regs);
+
+ if (unlikely(stack_store))
+ __hard_EE_RI_disable();
+ /*
+ * Returning to a kernel context with local irqs disabled.
+ * Here, if EE was enabled in the interrupted context, enable
+ * it on return as well. A problem exists here where a soft
+ * masked interrupt may have cleared MSR[EE] and set HARD_DIS
+ * here, and it will still exist on return to the caller. This
+ * will be resolved by the masked interrupt firing again.
+ */
+ if (regs->msr & MSR_EE)
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+#endif /* CONFIG_PPC64 */
+ }
+
+ if (unlikely(stack_store)) {
+ clear_bits(_TIF_EMULATE_STACK_STORE, &current_thread_info()->flags);
+ ret = 1;
+ }
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ local_paca->tm_scratch = regs->msr;
+#endif
+
+ /*
+ * 64s does not want to mfspr(SPRN_AMR) here, because this comes after
+ * mtmsr, which would cause Read-After-Write stalls. Hence, take the
+ * AMR value from the check above.
+ */
+ kuap_kernel_restore(regs, kuap);
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC64
+notrace unsigned long interrupt_exit_user_restart(struct pt_regs *regs)
+{
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ trace_hardirqs_off();
+ user_exit_irqoff();
+ account_cpu_user_entry();
+
+ BUG_ON(!user_mode(regs));
+
+ regs->exit_result |= interrupt_exit_user_prepare(regs);
+
+ return regs->exit_result;
+}
+
+/*
+ * No real need to return a value here because the stack store case does not
+ * get restarted.
+ */
+notrace unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs)
+{
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ if (regs->softe == IRQS_ENABLED)
+ trace_hardirqs_off();
+
+ BUG_ON(user_mode(regs));
+
+ return interrupt_exit_kernel_prepare(regs);
+}
+#endif
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
new file mode 100644
index 000000000000..ec950b08a8dc
--- /dev/null
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -0,0 +1,733 @@
+#include <asm/asm-offsets.h>
+#include <asm/bug.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
+#include <asm/feature-fixups.h>
+#include <asm/head-64.h>
+#include <asm/hw_irq.h>
+#include <asm/kup.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
+
+ .section ".toc","aw"
+SYS_CALL_TABLE:
+ .tc sys_call_table[TC],sys_call_table
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYS_CALL_TABLE:
+ .tc compat_sys_call_table[TC],compat_sys_call_table
+#endif
+ .previous
+
+ .align 7
+
+.macro DEBUG_SRR_VALID srr
+#ifdef CONFIG_PPC_RFI_SRR_DEBUG
+ .ifc \srr,srr
+ mfspr r11,SPRN_SRR0
+ ld r12,_NIP(r1)
+100: tdne r11,r12
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ mfspr r11,SPRN_SRR1
+ ld r12,_MSR(r1)
+100: tdne r11,r12
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ .else
+ mfspr r11,SPRN_HSRR0
+ ld r12,_NIP(r1)
+100: tdne r11,r12
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ mfspr r11,SPRN_HSRR1
+ ld r12,_MSR(r1)
+100: tdne r11,r12
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ .endif