aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/.gitignore1
-rw-r--r--arch/powerpc/kernel/Makefile14
-rw-r--r--arch/powerpc/kernel/btext.c2
-rw-r--r--arch/powerpc/kernel/cputable.c1
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c1
-rw-r--r--arch/powerpc/kernel/eeh.c145
-rw-r--r--arch/powerpc/kernel/entry_32.S38
-rw-r--r--arch/powerpc/kernel/entry_64.S897
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S287
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S2058
-rw-r--r--arch/powerpc/kernel/fadump.c134
-rw-r--r--arch/powerpc/kernel/head_32.S9
-rw-r--r--arch/powerpc/kernel/head_32.h28
-rw-r--r--arch/powerpc/kernel/head_64.S4
-rw-r--r--arch/powerpc/kernel/head_booke.h5
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c16
-rw-r--r--arch/powerpc/kernel/irq.c205
-rw-r--r--arch/powerpc/kernel/kprobes.c84
-rw-r--r--arch/powerpc/kernel/mce.c14
-rw-r--r--arch/powerpc/kernel/mce_power.c8
-rw-r--r--arch/powerpc/kernel/misc.S4
-rw-r--r--arch/powerpc/kernel/of_platform.c12
-rw-r--r--arch/powerpc/kernel/paca.c14
-rw-r--r--arch/powerpc/kernel/pci-common.c8
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c2
-rw-r--r--arch/powerpc/kernel/ppc_save_regs.S6
-rw-r--r--arch/powerpc/kernel/process.c124
-rw-r--r--arch/powerpc/kernel/prom_init.c4
-rw-r--r--arch/powerpc/kernel/ptrace.c3468
-rw-r--r--arch/powerpc/kernel/ptrace/Makefile20
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-adv.c492
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-altivec.c128
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-decl.h184
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-noadv.c265
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-novsx.c57
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-spe.c68
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-tm.c851
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-view.c904
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-vsx.c151
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace.c481
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace32.c (renamed from arch/powerpc/kernel/ptrace32.c)11
-rw-r--r--arch/powerpc/kernel/setup-common.c3
-rw-r--r--arch/powerpc/kernel/setup.h6
-rw-r--r--arch/powerpc/kernel/setup_32.c1
-rw-r--r--arch/powerpc/kernel/setup_64.c32
-rw-r--r--arch/powerpc/kernel/signal.c144
-rw-r--r--arch/powerpc/kernel/signal.h2
-rw-r--r--arch/powerpc/kernel/signal_32.c140
-rw-r--r--arch/powerpc/kernel/signal_64.c4
-rw-r--r--arch/powerpc/kernel/smp.c31
-rw-r--r--arch/powerpc/kernel/stacktrace.c6
-rw-r--r--arch/powerpc/kernel/syscall_64.c377
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl22
-rw-r--r--arch/powerpc/kernel/syscalls/syscallhdr.sh3
-rw-r--r--arch/powerpc/kernel/sysfs.c381
-rw-r--r--arch/powerpc/kernel/systbl.S9
-rw-r--r--arch/powerpc/kernel/time.c57
-rw-r--r--arch/powerpc/kernel/traps.c25
-rw-r--r--arch/powerpc/kernel/vdso.c8
-rw-r--r--arch/powerpc/kernel/vdso32/.gitignore1
-rw-r--r--arch/powerpc/kernel/vdso64/.gitignore1
-rw-r--r--arch/powerpc/kernel/vector.S2
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S7
63 files changed, 6746 insertions, 5721 deletions
diff --git a/arch/powerpc/kernel/.gitignore b/arch/powerpc/kernel/.gitignore
index 67ebd3003c05..d71179d3ffe9 100644
--- a/arch/powerpc/kernel/.gitignore
+++ b/arch/powerpc/kernel/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
prom_init_check
vmlinux.lds
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 78a1b22d4fd8..1c4385852d3d 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -3,8 +3,6 @@
# Makefile for the linux kernel.
#
-CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
-
ifdef CONFIG_PPC64
CFLAGS_prom_init.o += $(NO_MINIMAL_TOC)
endif
@@ -41,16 +39,18 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
endif
-obj-y := cputable.o ptrace.o syscalls.o \
- irq.o align.o signal_32.o pmc.o vdso.o \
+obj-y := cputable.o syscalls.o \
+ irq.o align.o signal_$(BITS).o pmc.o vdso.o \
process.o systbl.o idle.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
udbg.o misc.o io.o misc_$(BITS).o \
of_platform.o prom_parse.o
-obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
- signal_64.o ptrace32.o \
- paca.o nvram_64.o firmware.o note.o
+obj-y += ptrace/
+obj-$(CONFIG_PPC64) += setup_64.o \
+ paca.o nvram_64.o firmware.o note.o \
+ syscall_64.o
+obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o
obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 6dfceaa820e4..f57712a55815 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -26,7 +26,7 @@
static void scrollscreen(void);
#endif
-#define __force_data __attribute__((__section__(".data")))
+#define __force_data __section(.data)
static int g_loc_X __force_data;
static int g_loc_Y __force_data;
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 245be4fafe13..13eba2eb46fe 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -2198,7 +2198,6 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
*/
if (old.oprofile_cpu_type != NULL) {
t->oprofile_cpu_type = old.oprofile_cpu_type;
- t->oprofile_type = old.oprofile_type;
t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG;
}
}
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 182b4047c1ef..36bc0d5c4f3a 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -139,7 +139,6 @@ static void __init cpufeatures_setup_cpu(void)
/* Initialize the base environment -- clear FSCR/HFSCR. */
hv_mode = !!(mfmsr() & MSR_HV);
if (hv_mode) {
- /* CPU_FTR_HVMODE is used early in PACA setup */
cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
mtspr(SPRN_HFSCR, 0);
}
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 17cb3e9b5697..7cdcb413bb44 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1107,87 +1107,43 @@ static int eeh_init(void)
core_initcall_sync(eeh_init);
/**
- * eeh_add_device_early - Enable EEH for the indicated device node
- * @pdn: PCI device node for which to set up EEH
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices that were added after system boot (e.g. hotplug, dlpar).
- * This routine must be called before any i/o is performed to the
- * adapter (inluding any config-space i/o).
- * Whether this actually enables EEH or not for this device depends
- * on the CEC architecture, type of the device, on earlier boot
- * command-line arguments & etc.
- */
-void eeh_add_device_early(struct pci_dn *pdn)
-{
- struct pci_controller *phb = pdn ? pdn->phb : NULL;
- struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
-
- if (!edev)
- return;
-
- if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))
- return;
-
- /* USB Bus children of PCI devices will not have BUID's */
- if (NULL == phb ||
- (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid))
- return;
-
- eeh_ops->probe(pdn, NULL);
-}
-
-/**
- * eeh_add_device_tree_early - Enable EEH for the indicated device
- * @pdn: PCI device node
- *
- * This routine must be used to perform EEH initialization for the
- * indicated PCI device that was added after system boot (e.g.
- * hotplug, dlpar).
- */
-void eeh_add_device_tree_early(struct pci_dn *pdn)
-{
- struct pci_dn *n;
-
- if (!pdn)
- return;
-
- list_for_each_entry(n, &pdn->child_list, list)
- eeh_add_device_tree_early(n);
- eeh_add_device_early(pdn);
-}
-EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
-
-/**
- * eeh_add_device_late - Perform EEH initialization for the indicated pci device
+ * eeh_probe_device() - Perform EEH initialization for the indicated pci device
* @dev: pci device for which to set up EEH
*
* This routine must be used to complete EEH initialization for PCI
* devices that were added after system boot (e.g. hotplug, dlpar).
*/
-void eeh_add_device_late(struct pci_dev *dev)
+void eeh_probe_device(struct pci_dev *dev)
{
- struct pci_dn *pdn;
struct eeh_dev *edev;
- if (!dev)
+ pr_debug("EEH: Adding device %s\n", pci_name(dev));
+
+ /*
+ * pci_dev_to_eeh_dev() can only work if eeh_probe_dev() was
+ * already called for this device.
+ */
+ if (WARN_ON_ONCE(pci_dev_to_eeh_dev(dev))) {
+ pci_dbg(dev, "Already bound to an eeh_dev!\n");
return;
+ }
- pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
- edev = pdn_to_eeh_dev(pdn);
- eeh_edev_dbg(edev, "Adding device\n");
- if (edev->pdev == dev) {
- eeh_edev_dbg(edev, "Device already referenced!\n");
+ edev = eeh_ops->probe(dev);
+ if (!edev) {
+ pr_debug("EEH: Adding device failed\n");
return;
}
/*
- * The EEH cache might not be removed correctly because of
- * unbalanced kref to the device during unplug time, which
- * relies on pcibios_release_device(). So we have to remove
- * that here explicitly.
+ * FIXME: We rely on pcibios_release_device() to remove the
+ * existing EEH state. The release function is only called if
+ * the pci_dev's refcount drops to zero so if something is
+ * keeping a ref to a device (e.g. a filesystem) we need to
+ * remove the old EEH state.
+ *
+ * FIXME: HEY MA, LOOK AT ME, NO LOCKING!
*/
- if (edev->pdev) {
+ if (edev->pdev && edev->pdev != dev) {
eeh_rmv_from_parent_pe(edev);
eeh_addr_cache_rmv_dev(edev->pdev);
eeh_sysfs_remove_device(edev->pdev);
@@ -1198,69 +1154,16 @@ void eeh_add_device_late(struct pci_dev *dev)
* into error handler afterwards.
*/
edev->mode |= EEH_DEV_NO_HANDLER;
-
- edev->pdev = NULL;
- dev->dev.archdata.edev = NULL;
}
- if (eeh_has_flag(EEH_PROBE_MODE_DEV))
- eeh_ops->probe(pdn, NULL);
-
+ /* bind the pdev and the edev together */
edev->pdev = dev;
dev->dev.archdata.edev = edev;
-
eeh_addr_cache_insert_dev(dev);
+ eeh_sysfs_add_device(dev);
}
/**
- * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
- * @bus: PCI bus
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices which are attached to the indicated PCI bus. The PCI bus
- * is added after system boot through hotplug or dlpar.
- */
-void eeh_add_device_tree_late(struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- if (eeh_has_flag(EEH_FORCE_DISABLED))
- return;
- list_for_each_entry(dev, &bus->devices, bus_list) {
- eeh_add_device_late(dev);
- if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
- struct pci_bus *subbus = dev->subordinate;
- if (subbus)
- eeh_add_device_tree_late(subbus);
- }
- }
-}
-EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
-
-/**
- * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus
- * @bus: PCI bus
- *
- * This routine must be used to add EEH sysfs files for PCI
- * devices which are attached to the indicated PCI bus. The PCI bus
- * is added after system boot through hotplug or dlpar.
- */
-void eeh_add_sysfs_files(struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- list_for_each_entry(dev, &bus->devices, bus_list) {
- eeh_sysfs_add_device(dev);
- if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
- struct pci_bus *subbus = dev->subordinate;
- if (subbus)
- eeh_add_sysfs_files(subbus);
- }
- }
-}
-EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
-
-/**
* eeh_remove_device - Undo EEH setup for the indicated pci device
* @dev: pci device to be removed
*
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 16af0d8d90a8..a6371fb8f761 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -246,9 +246,8 @@ reenable_mmu:
* r3 can be different from GPR3(r1) at this point, r9 and r11
* contains the old MSR and handler address respectively,
* r4 & r5 can contain page fault arguments that need to be passed
- * along as well. r12, CCR, CTR, XER etc... are left clobbered as
- * they aren't useful past this point (aren't syscall arguments),
- * the rest is restored from the exception frame.
+ * along as well. r0, r6-r8, r12, CCR, CTR, XER etc... are left
+ * clobbered as they aren't useful past this point.
*/
stwu r1,-32(r1)
@@ -262,16 +261,12 @@ reenable_mmu:
* lockdep
*/
1: bl trace_hardirqs_off
-2: lwz r5,24(r1)
+ lwz r5,24(r1)
lwz r4,20(r1)
lwz r3,16(r1)
lwz r11,12(r1)
lwz r9,8(r1)
addi r1,r1,32
- lwz r0,GPR0(r1)
- lwz r6,GPR6(r1)
- lwz r7,GPR7(r1)
- lwz r8,GPR8(r1)
mtctr r11
mtlr r9
bctr /* jump to handler */
@@ -575,6 +570,33 @@ syscall_exit_work:
bl do_syscall_trace_leave
b ret_from_except_full
+ /*
+ * System call was called from kernel. We get here with SRR1 in r9.
+ * Mark the exception as recoverable once we have retrieved SRR0,
+ * trap a warning and return ENOSYS with CR[SO] set.
+ */
+ .globl ret_from_kernel_syscall
+ret_from_kernel_syscall:
+ mfspr r9, SPRN_SRR0
+ mfspr r10, SPRN_SRR1
+#if !defined(CONFIG_4xx) && !defined(CONFIG_BOOKE)
+ LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR|MSR_DR))
+ mtmsr r11
+#endif
+
+0: trap
+ EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+
+ li r3, ENOSYS
+ crset so
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
+ mtspr SPRN_NRI, r0
+#endif
+ mtspr SPRN_SRR0, r9
+ mtspr SPRN_SRR1, r10
+ SYNC
+ RFI
+
/*
* The fork/clone functions need to copy the full register set into
* the child process. Therefore we need to save all the nonvolatile
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6ba675b0cf7d..9a1e5d636dea 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -16,6 +16,7 @@
#include <linux/errno.h>
#include <linux/err.h>
+#include <asm/cache.h>
#include <asm/unistd.h>
#include <asm/processor.h>
#include <asm/page.h>
@@ -51,8 +52,10 @@
SYS_CALL_TABLE:
.tc sys_call_table[TC],sys_call_table
+#ifdef CONFIG_COMPAT
COMPAT_SYS_CALL_TABLE:
.tc compat_sys_call_table[TC],compat_sys_call_table
+#endif
/* This value is used to mark exception frames on the stack. */
exception_marker:
@@ -69,6 +72,7 @@ BEGIN_FTR_SECTION
bne .Ltabort_syscall
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
+_ASM_NOKPROBE_SYMBOL(system_call_common)
mr r10,r1
ld r1,PACAKSAVE(r13)
std r10,0(r1)
@@ -76,342 +80,122 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
+ std r2,GPR2(r1)
#ifdef CONFIG_PPC_FSL_BOOK3E
START_BTB_FLUSH_SECTION
BTB_FLUSH(r10)
END_BTB_FLUSH_SECTION
#endif
- ACCOUNT_CPU_USER_ENTRY(r13, r10, r11)
- std r2,GPR2(r1)
+ ld r2,PACATOC(r13)
+ mfcr r12
+ li r11,0
+ /* Can we avoid saving r3-r8 in common case? */
std r3,GPR3(r1)
- mfcr r2
std r4,GPR4(r1)
std r5,GPR5(r1)
std r6,GPR6(r1)
std r7,GPR7(r1)
std r8,GPR8(r1)
- li r11,0
+ /* Zero r9-r12, this should only be required when restoring all GPRs */
std r11,GPR9(r1)
std r11,GPR10(r1)
std r11,GPR11(r1)
std r11,GPR12(r1)
+ std r9,GPR13(r1)
+ SAVE_NVGPRS(r1)
std r11,_XER(r1)
std r11,_CTR(r1)
- std r9,GPR13(r1)
mflr r10
+
/*
* This clears CR0.SO (bit 28), which is the error indication on
* return from this system call.
*/
- rldimi r2,r11,28,(63-28)
- li r11,0xc01
+ rldimi r12,r11,28,(63-28)
+ li r11,0xc00
std r10,_LINK(r1)
std r11,_TRAP(r1)
+ std r12,_CCR(r1)
std r3,ORIG_GPR3(r1)
- std r2,_CCR(r1)
- ld r2,PACATOC(r13)
- addi r9,r1,STACK_FRAME_OVERHEAD
+ addi r10,r1,STACK_FRAME_OVERHEAD
ld r11,exception_marker@toc(r2)
- std r11,-16(r9) /* "regshere" marker */
-
- kuap_check_amr r10, r11
-
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
-BEGIN_FW_FTR_SECTION
- /* see if there are any DTL entries to process */
- ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
- ld r11,PACA_DTL_RIDX(r13) /* get log read index */
- addi r10,r10,LPPACA_DTLIDX
- LDX_BE r10,0,r10 /* get log write index */
- cmpd r11,r10
- beq+ 33f
- bl accumulate_stolen_time
- REST_GPR(0,r1)
- REST_4GPRS(3,r1)
- REST_2GPRS(7,r1)
- addi r9,r1,STACK_FRAME_OVERHEAD
-33:
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
-
- /*
- * A syscall should always be called with interrupts enabled
- * so we just unconditionally hard-enable here. When some kind
- * of irq tracing is used, we additionally check that condition
- * is correct
- */
-#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
- lbz r10,PACAIRQSOFTMASK(r13)
-1: tdnei r10,IRQS_ENABLED
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
-
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 1
-#else
- li r11,MSR_RI
- ori r11,r11,MSR_EE
- mtmsrd r11,1
-#endif /* CONFIG_PPC_BOOK3E */
-
-system_call: /* label this so stack traces look sane */
- /* We do need to set SOFTE in the stack frame or the return
- * from interrupt will be painful
- */
- li r10,IRQS_ENABLED
- std r10,SOFTE(r1)
+ std r11,-16(r10) /* "regshere" marker */
- ld r11, PACA_THREAD_INFO(r13)
- ld r10,TI_FLAGS(r11)
- andi. r11,r10,_TIF_SYSCALL_DOTRACE
- bne .Lsyscall_dotrace /* does not return */
- cmpldi 0,r0,NR_syscalls
- bge- .Lsyscall_enosys
-
-.Lsyscall:
-/*
- * Need to vector to 32 Bit or default sys_call_table here,
- * based on caller's run-mode / personality.
- */
- ld r11,SYS_CALL_TABLE@toc(2)
- andis. r10,r10,_TIF_32BIT@h
- beq 15f
- ld r11,COMPAT_SYS_CALL_TABLE@toc(2)
- clrldi r3,r3,32
- clrldi r4,r4,32
- clrldi r5,r5,32
- clrldi r6,r6,32
- clrldi r7,r7,32
- clrldi r8,r8,32
-15:
- slwi r0,r0,3
-
- barrier_nospec_asm
/*
- * Prevent the load of the handler below (based on the user-passed
- * system call number) being speculatively executed until the test
- * against NR_syscalls and branch to .Lsyscall_enosys above has
- * committed.
+ * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which
+ * would clobber syscall parameters. Also we always enter with IRQs
+ * enabled and nothing pending. system_call_exception() will call
+ * trace_hardirqs_off().
*/
+ li r11,IRQS_ALL_DISABLED
+ li r12,PACA_IRQ_HARD_DIS
+ stb r11,PACAIRQSOFTMASK(r13)
+ stb r12,PACAIRQHAPPENED(r13)
- ldx r12,r11,r0 /* Fetch system call handler [ptr] */
- mtctr r12
- bctrl /* Call handler */
+ /* Calling convention has r9 = orig r0, r10 = regs */
+ mr r9,r0
+ bl system_call_exception
- /* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */
.Lsyscall_exit:
- std r3,RESULT(r1)
-
-#ifdef CONFIG_DEBUG_RSEQ
- /* Check whether the syscall is issued inside a restartable sequence */
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl rseq_syscall
- ld r3,RESULT(r1)
-#endif
-
- ld r12, PACA_THREAD_INFO(r13)
-
- ld r8,_MSR(r1)
-
-/*
- * This is a few instructions into the actual syscall exit path (which actually
- * starts at .Lsyscall_exit) to cater to kprobe blacklisting and to reduce the
- * number of visible symbols for profiling purposes.
- *
- * We can probe from system_call until this point as MSR_RI is set. But once it
- * is cleared below, we won't be able to take a trap.
- *
- * This is blacklisted from kprobes further below with _ASM_NOKPROBE_SYMBOL().
- */
-system_call_exit:
- /*
- * Disable interrupts so current_thread_info()->flags can't change,
- * and so that we don't get interrupted after loading SRR0/1.
- *
- * Leave MSR_RI enabled for now, because with THREAD_INFO_IN_TASK we
- * could fault on the load of the TI_FLAGS below.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- li r11,MSR_RI
- mtmsrd r11,1
-#endif /* CONFIG_PPC_BOOK3E */
+ addi r4,r1,STACK_FRAME_OVERHEAD
+ bl syscall_exit_prepare
- ld r9,TI_FLAGS(r12)
- li r11,-MAX_ERRNO
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
- bne- .Lsyscall_exit_work
+ ld r2,_CCR(r1)
+ ld r4,_NIP(r1)
+ ld r5,_MSR(r1)
+ ld r6,_LINK(r1)
- andi. r0,r8,MSR_FP
- beq 2f
-#ifdef CONFIG_ALTIVEC
- andis. r0,r8,MSR_VEC@h
- bne 3f
-#endif
-2: addi r3,r1,STACK_FRAME_OVERHEAD
- bl restore_math
- ld r8,_MSR(r1)
- ld r3,RESULT(r1)
- li r11,-MAX_ERRNO
-
-3: cmpld r3,r11
- ld r5,_CCR(r1)
- bge- .Lsyscall_error
-.Lsyscall_error_cont:
- ld r7,_NIP(r1)
BEGIN_FTR_SECTION
stdcx. r0,0,r1 /* to clear the reservation */
END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
- andi. r6,r8,MSR_PR
- ld r4,_LINK(r1)
- kuap_check_amr r10, r11
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r5
+ mtlr r6
-#ifdef CONFIG_PPC_BOOK3S
- /*
- * Clear MSR_RI, MSR_EE is already and remains disabled. We could do
- * this later, but testing shows that doing it here causes less slow
- * down than doing it closer to the rfid.
- */
+ cmpdi r3,0
+ bne .Lsyscall_restore_regs
+ /* Zero volatile regs that may contain sensitive kernel data */
+ li r0,0
+ li r4,0
+ li r5,0
+ li r6,0
+ li r7,0
+ li r8,0
+ li r9,0
+ li r10,0
li r11,0
- mtmsrd r11,1
-#endif
-
- beq- 1f
- ACCOUNT_CPU_USER_EXIT(r13, r11, r12)
+ li r12,0
+ mtctr r0
+ mtspr SPRN_XER,r0
+.Lsyscall_restore_regs_cont:
BEGIN_FTR_SECTION
HMT_MEDIUM_LOW
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- std r8, PACATMSCRATCH(r13)
-#endif
-
/*
* We don't need to restore AMR on the way back to userspace for KUAP.
* The value of AMR only matters while we're in the kernel.
*/
- ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
+ mtcr r2
ld r2,GPR2(r1)
+ ld r3,GPR3(r1)
+ ld r13,GPR13(r1)
ld r1,GPR1(r1)
- mtlr r4
- mtcr r5
- mtspr SPRN_SRR0,r7
- mtspr SPRN_SRR1,r8
RFI_TO_USER
b . /* prevent speculative execution */
-1: /* exit to kernel */
- kuap_restore_amr r2
-
- ld r2,GPR2(r1)
- ld r1,GPR1(r1)
- mtlr r4
- mtcr r5
- mtspr SPRN_SRR0,r7
- mtspr SPRN_SRR1,r8
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-
-.Lsyscall_error:
- oris r5,r5,0x1000 /* Set SO bit in CR */
- neg r3,r3
- std r5,_CCR(r1)
- b .Lsyscall_error_cont
-
-/* Traced system call support */
-.Lsyscall_dotrace:
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_enter
-
- /*
- * We use the return value of do_syscall_trace_enter() as the syscall
- * number. If the syscall was rejected for any reason do_syscall_trace_enter()
- * returns an invalid syscall number and the test below against
- * NR_syscalls will fail.
- */
- mr r0,r3
-
- /* Restore argument registers just clobbered and/or possibly changed. */
- ld r3,GPR3(r1)
- ld r4,GPR4(r1)
- ld r5,GPR5(r1)
- ld r6,GPR6(r1)
- ld r7,GPR7(r1)
- ld r8,GPR8(r1)
-
- /* Repopulate r9 and r10 for the syscall path */
- addi r9,r1,STACK_FRAME_OVERHEAD
- ld r10, PACA_THREAD_INFO(r13)
- ld r10,TI_FLAGS(r10)
-
- cmpldi r0,NR_syscalls
- blt+ .Lsyscall
-
- /* Return code is already in r3 thanks to do_syscall_trace_enter() */
- b .Lsyscall_exit
-
-
-.Lsyscall_enosys:
- li r3,-ENOSYS
- b .Lsyscall_exit
-
-.Lsyscall_exit_work:
- /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
- If TIF_NOERROR is set, just save r3 as it is. */
-
- andi. r0,r9,_TIF_RESTOREALL
- beq+ 0f
+.Lsyscall_restore_regs:
+ ld r3,_CTR(r1)
+ ld r4,_XER(r1)
REST_NVGPRS(r1)
- b 2f
-0: cmpld r3,r11 /* r11 is -MAX_ERRNO */
- blt+ 1f
- andi. r0,r9,_TIF_NOERROR
- bne- 1f
- ld r5,_CCR(r1)
- neg r3,r3
- oris r5,r5,0x1000 /* Set SO bit in CR */
- std r5,_CCR(r1)
-1: std r3,GPR3(r1)
-2: andi. r0,r9,(_TIF_PERSYSCALL_MASK)
- beq 4f
-
- /* Clear per-syscall TIF flags if any are set. */
-
- li r11,_TIF_PERSYSCALL_MASK
- addi r12,r12,TI_FLAGS
-3: ldarx r10,0,r12
- andc r10,r10,r11
- stdcx. r10,0,r12
- bne- 3b
- subi r12,r12,TI_FLAGS
-
-4: /* Anything else left to do? */
-BEGIN_FTR_SECTION
- lis r3,DEFAULT_PPR@highest /* Set default PPR */
- sldi r3,r3,32 /* bits 11-13 are used for ppr */
- std r3,_PPR(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
- beq ret_from_except_lite
-
- /* Re-enable interrupts */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 1
-#else
- li r10,MSR_RI
- ori r10,r10,MSR_EE
- mtmsrd r10,1
-#endif /* CONFIG_PPC_BOOK3E */
-
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_leave
- b ret_from_except
+ mtctr r3
+ mtspr SPRN_XER,r4
+ ld r0,GPR0(r1)
+ REST_8GPRS(4, r1)
+ ld r12,GPR12(r1)
+ b .Lsyscall_restore_regs_cont
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
.Ltabort_syscall:
@@ -439,64 +223,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
RFI_TO_USER
b . /* prevent speculative execution */
#endif
-_ASM_NOKPROBE_SYMBOL(system_call_common);
-_ASM_NOKPROBE_SYMBOL(system_call_exit);
-
-/* Save non-volatile GPRs, if not already saved. */
-_GLOBAL(save_nvgprs)
- ld r11,_TRAP(r1)
- andi. r0,r11,1
- beqlr-
- SAVE_NVGPRS(r1)
- clrrdi r0,r11,1
- std r0,_TRAP(r1)
- blr
-_ASM_NOKPROBE_SYMBOL(save_nvgprs);
-
-
-/*
- * The sigsuspend and rt_sigsuspend system calls can call do_signal
- * and thus put the process into the stopped state where we might
- * want to examine its user state with ptrace. Therefore we need
- * to save all the nonvolatile registers (r14 - r31) before calling
- * the C code. Similarly, fork, vfork and clone need the full
- * register state on the stack so that it can be copied to the child.
- */
-
-_GLOBAL(ppc_fork)
- bl save_nvgprs
- bl sys_fork
- b .Lsyscall_exit
-
-_GLOBAL(ppc_vfork)
- bl save_nvgprs
- bl sys_vfork
- b .Lsyscall_exit
-
-_GLOBAL(ppc_clone)
- bl save_nvgprs
- bl sys_clone
- b .Lsyscall_exit
-
-_GLOBAL(ppc_clone3)
- bl save_nvgprs
- bl sys_clone3
- b .Lsyscall_exit
-
-_GLOBAL(ppc32_swapcontext)
- bl save_nvgprs
- bl compat_sys_swapcontext
- b .Lsyscall_exit
-
-_GLOBAL(ppc64_swapcontext)
- bl save_nvgprs
- bl sys_swapcontext
- b .Lsyscall_exit
-
-_GLOBAL(ppc_switch_endian)
- bl save_nvgprs
- bl sys_switch_endian
- b .Lsyscall_exit
_GLOBAL(ret_from_fork)
bl schedule_tail
@@ -516,6 +242,19 @@ _GLOBAL(ret_from_kernel_thread)
li r3,0
b .Lsyscall_exit
+#ifdef CONFIG_PPC_BOOK3E
+/* Save non-volatile GPRs, if not already saved. */
+_GLOBAL(save_nvgprs)
+ ld r11,_TRAP(r1)
+ andi. r0,r11,1
+ beqlr-
+ SAVE_NVGPRS(r1)
+ clrrdi r0,r11,1
+ std r0,_TRAP(r1)
+ blr
+_ASM_NOKPROBE_SYMBOL(save_nvgprs);
+#endif
+
#ifdef CONFIG_PPC_BOOK3S_64
#define FLUSH_COUNT_CACHE \
@@ -578,7 +317,7 @@ flush_count_cache:
* state of one is saved on its kernel stack. Then the state
* of the other is restored from its kernel stack. The memory
* management hardware is updated to the second process's state.
- * Finally, we can return to the second process, via ret_from_except.
+ * Finally, we can return to the second process, via interrupt_return.
* On entry, r3 points to the THREAD for the current task, r4
* points to the THREAD for the new task.
*
@@ -730,408 +469,146 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
addi r1,r1,SWITCH_FRAME_SIZE
blr
- .align 7
-_GLOBAL(ret_from_except)
- ld r11,_TRAP(r1)
- andi. r0,r11,1
- bne ret_from_except_lite
- REST_NVGPRS(r1)
-
-_GLOBAL(ret_from_except_lite)
- /*
- * Disable interrupts so that current_thread_info()->flags
- * can't change between when we test it and when we return
- * from the interrupt.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- li r10,MSR_RI
- mtmsrd r10,1 /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
-
- ld r9, PACA_THREAD_INFO(r13)
- ld r3,_MSR(r1)
-#ifdef CONFIG_PPC_BOOK3E
- ld r10,PACACURRENT(r13)
-#endif /* CONFIG_PPC_BOOK3E */
- ld r4,TI_FLAGS(r9)
- andi. r3,r3,MSR_PR
- beq resume_kernel
-#ifdef CONFIG_PPC_BOOK3E
- lwz r3,(THREAD+THREAD_DBCR0)(r10)
-#endif /* CONFIG_PPC_BOOK3E */
-
- /* Check current_thread_info()->flags */
- andi. r0,r4,_TIF_USER_WORK_MASK
- bne 1f
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3S
/*
- * Check to see if the dbcr0 register is set up to debug.
- * Use the internal debug mode bit to do this.
- */
- andis. r0,r3,DBCR0_IDM@h
- beq restore
- mfmsr r0
- rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */
- mtmsr r0
- mtspr SPRN_DBCR0,r3
- li r10, -1
- mtspr SPRN_DBSR,r10
- b restore
-#else
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl restore_math
- b restore
-#endif
-1: andi. r0,r4,_TIF_NEED_RESCHED
- beq 2f
- bl restore_interrupts
- SCHEDULE_USER
- b ret_from_except_lite
-2:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- andi. r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM
- bne 3f /* only restore TM if nothing else to do */
+ * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
+ * touched, AMR not set, no exit work created, then this can be used.
+ */
+ .balign IFETCH_ALIGN_BYTES
+ .globl fast_interrupt_return
+fast_interrupt_return:
+_ASM_NOKPROBE_SYMBOL(fast_interrupt_return)
+ ld r4,_MSR(r1)
+ andi. r0,r4,MSR_PR
+ bne .Lfast_user_interrupt_return
+ andi. r0,r4,MSR_RI
+ li r3,0 /* 0 return value, no EMULATE_STACK_STORE */
+ bne+ .Lfast_kernel_interrupt_return
addi r3,r1,STACK_FRAME_OVERHEAD
- bl restore_tm_state
- b restore
-3:
-#endif
- bl save_nvgprs
- /*
- * Use a non volatile GPR to save and restore our thread_info flags
- * across the call to restore_interrupts.
- */
- mr r30,r4
- bl restore_interrupts
- mr r4,r30
+ bl unrecoverable_exception
+ b . /* should not get here */
+
+ .balign IFETCH_ALIGN_BYTES
+ .globl interrupt_return
+interrupt_return:
+_ASM_NOKPROBE_SYMBOL(interrupt_return)
+ ld r4,_MSR(r1)
+ andi. r0,r4,MSR_PR
+ beq .Lkernel_interrupt_return
addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_notify_resume
- b ret_from_except
+ bl interrupt_exit_user_prepare
+ cmpdi r3,0
+ bne- .Lrestore_nvgprs
-resume_kernel:
- /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
- andis. r8,r4,_TIF_EMULATE_STACK_STORE@h
- beq+ 1f
-
- addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
+.Lfast_user_interrupt_return:
+ ld r11,_NIP(r1)
+ ld r12,_MSR(r1)
+BEGIN_FTR_SECTION
+ ld r10,_PPR(r1)
+ mtspr SPRN_PPR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
- ld r3,GPR1(r1)
- subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
- mr r4,r1 /* src: current exception frame */
- mr r1,r3 /* Reroute the trampoline frame to r1 */
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ ldarx r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
- /* Copy from the original to the trampoline. */
- li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */
- li r6,0 /* start offset: 0 */
- mtctr r5
-2: ldx r0,r6,r4
- stdx r0,r6,r3
- addi r6,r6,8
- bdnz 2b
-
- /* Do real store operation to complete stdu */
- ld r5,GPR1(r1)
- std r8,0(r5)
-
- /* Clear _TIF_EMULATE_STACK_STORE flag */
- lis r11,_TIF_EMULATE_STACK_STORE@h
- addi r5,r9,TI_FLAGS
-0: ldarx r4,0,r5
- andc r4,r4,r11
- stdcx. r4,0,r5
- bne- 0b
-1:
-
-#ifdef CONFIG_PREEMPTION
- /* Check if we need to preempt */
- andi. r0,r4,_TIF_NEED_RESCHED
- beq+ restore
- /* Check that preempt_count() == 0 and interrupts are enabled */
- lwz r8,TI_PREEMPT(r9)
- cmpwi cr0,r8,0
- bne restore
- ld r0,SOFTE(r1)
- andi. r0,r0,IRQS_DISABLED
- bne restore
+ ld r3,_CCR(r1)
+ ld r4,_LINK(r1)
+ ld r5,_CTR(r1)
+ ld r6,_XER(r1)
+ li r0,0
- /*
- * Here we are preempting the current task. We want to make
- * sure we are soft-disabled first and reconcile irq state.
- */
- RECONCILE_IRQ_STATE(r3,r4)
- bl preempt_schedule_irq
+ REST_4GPRS(7, r1)
+ REST_2GPRS(11, r1)
+ REST_GPR(13, r1)
- /*
- * arch_local_irq_restore() from preempt_schedule_irq above may
- * enable hard interrupt but we really should disable interrupts
- * when we return from the interrupt, and so that we don't get
- * interrupted after loading SRR0/1.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- li r10,MSR_RI
- mtmsrd r10,1 /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
-#endif /* CONFIG_PREEMPTION */
+ mtcr r3
+ mtlr r4
+ mtctr r5
+ mtspr SPRN_XER,r6
- .globl fast_exc_return_irq
-fast_exc_return_irq:
-restore:
- /*
- * This is the main kernel exit path. First we check if we
- * are about to re-enable interrupts
- */
- ld r5,SOFTE(r1)
- lbz r6,PACAIRQSOFTMASK(r13)
- andi. r5,r5,IRQS_DISABLED
- bne .Lrestore_irq_off
+ REST_4GPRS(2, r1)
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ RFI_TO_USER
+ b . /* prevent speculative execution */
- /* We are enabling, were we already enabled ? Yes, just return */
- andi. r6,r6,IRQS_DISABLED
- beq cr0,.Ldo_restore
+.Lrestore_nvgprs:
+ REST_NVGPRS(r1)
+ b .Lfast_user_interrupt_return
- /*
- * We are about to soft-enable interrupts (we are hard disabled
- * at this point). We check if there's anything that needs to
- * be replayed first.
- */
- lbz r0,PACAIRQHAPPENED(r13)
- cmpwi cr0,r0,0
- bne- .Lrestore_check_irq_replay
+ .balign IFETCH_ALIGN_BYTES
+.Lkernel_interrupt_return:
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl interrupt_exit_kernel_prepare
- /*
- * Get here when nothing happened while soft-disabled, just
- * soft-enable and move-on. We will hard-enable as a side
- * effect of rfi
- */
-.Lrestore_no_replay:
- TRACE_ENABLE_INTS
- li r0,IRQS_ENABLED
- stb r0,PACAIRQSOFTMASK(r13);
+.Lfast_kernel_interrupt_return:
+ cmpdi cr1,r3,0
+ ld r11,_NIP(r1)
+ ld r12,_MSR(r1)
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
- /*
- * Final return path. BookE is handled in a different file
- */
-.Ldo_restore:
-#ifdef CONFIG_PPC_BOOK3E
- b exception_return_book3e
-#else
- /*
- * Clear the reservation. If we know the CPU tracks the address of
- * the reservation then we can potentially save some cycles and use
- * a larx. On POWER6 and POWER7 this is significantly faster.
- */
BEGIN_FTR_SECTION
stdcx. r0,0,r1 /* to clear the reservation */
FTR_SECTION_ELSE
- ldarx r4,0,r1
+ ldarx r0,0,r1
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
- /*
- * Some code path such as load_up_fpu or altivec return directly
- * here. They run entirely hard disabled and do not alter the
- * interrupt state. They also don't use lwarx/stwcx. and thus
- * are known not to leave dangling reservations.
- */
- .globl fast_exception_return
-fast_exception_return:
- ld r3,_MSR(r1)
+ ld r3,_LINK(r1)
ld r4,_CTR(r1)
- ld r0,_LINK(r1)
- mtctr r4
- mtlr r0
- ld r4,_XER(r1)
- mtspr SPRN_XER,r4
-
- kuap_check_amr r5, r6
-
- REST_8GPRS(5, r1)
-
- andi. r0,r3,MSR_RI
- beq- .Lunrecov_restore
-
- /*
- * Clear RI before restoring r13. If we are returning to
- * userspace and we take an exception after restoring r13,
- * we end up corrupting the userspace r13 value.
- */
- li r4,0
- mtmsrd r4,1
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* TM debug */
- std r3, PACATMSCRATCH(r13) /* Stash returned-to MSR */
-#endif
- /*
- * r13 is our per cpu area, only restore it if we are returning to
- * userspace the value stored in the stack frame may belong to
- * another CPU.
- */
- andi. r0,r3,MSR_PR
- beq 1f
-BEGIN_FTR_SECTION
- /* Restore PPR */
- ld r2,_PPR(r1)
- mtspr SPRN_PPR,r2
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
- ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
- REST_GPR(13, r1)
-
- /*
- * We don't need to restore AMR on the way back to userspace for KUAP.
- * The value of AMR only matters while we're in the kernel.
- */
- mtspr SPRN_SRR1,r3
-
- ld r2,_CCR(r1)
- mtcrf 0xFF,r2
- ld r2,_NIP(r1)
- mtspr SPRN_SRR0,r2
-
- ld r0,GPR0(r1)
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
- ld r4,GPR4(r1)
- ld r1,GPR1(r1)
- RFI_TO_USER
- b . /* prevent speculative execution */
+ ld r5,_XER(r1)
+ ld r6,_CCR(r1)
+ li r0,0
-1: mtspr SPRN_SRR1,r3
+ REST_4GPRS(7, r1)
+ REST_2GPRS(11, r1)
- ld r2,_CCR(r1)
- mtcrf 0xFF,r2
- ld r2,_NIP(r1)
- mtspr SPRN_SRR0,r2
+ mtlr r3
+ mtctr r4
+ mtspr SPRN_XER,r5
/*
* Leaving a stale exception_marker on the stack can confuse
* the reliable stack unwinder later on. Clear it.
*/
- li r2,0
- std r2,STACK_FRAME_OVERHEAD-16(r1)
-
- ld r0,GPR0(r1)
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
+ std r0,STACK_FRAME_OVERHEAD-16(r1)
- kuap_restore_amr r4
+ REST_4GPRS(2, r1)
- ld r4,GPR4(r1)
- ld r1,GPR1(r1)
+ bne- cr1,1f /* emulate stack store */
+ mtcr r6
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
RFI_TO_KERNEL
b . /* prevent speculative execution */
-#endif /* CONFIG_PPC_BOOK3E */
-
- /*
- * We are returning to a context with interrupts soft disabled.
- *
- * However, we may also about to hard enable, so we need to
- * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
- * or that bit can get out of sync and bad things will happen
- */
-.Lrestore_irq_off:
- ld r3,_MSR(r1)
- lbz r7,PACAIRQHAPPENED(r13)
- andi. r0,r3,MSR_EE
- beq 1f
- rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS
- stb r7,PACAIRQHAPPENED(r13)
-1:
-#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
- /* The interrupt should not have soft enabled. */
- lbz r7,PACAIRQSOFTMASK(r13)
-1: tdeqi r7,IRQS_ENABLED
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
- b .Ldo_restore
-
- /*
- * Something did happen, check if a re-emit is needed
- * (this also clears paca->irq_happened)
- */
-.Lrestore_check_irq_replay:
- /* XXX: We could implement a fast path here where we check
- * for irq_happened being just 0x01, in which case we can
- * clear it and return. That means that we would potentially
- * miss a decrementer having wrapped all the way around.
- *
- * Still, this might be useful for things like hash_page
- */
- bl __check_irq_replay
- cmpwi cr0,r3,0
- beq .Lrestore_no_replay
-
- /*
- * We need to re-emit an interrupt. We do so by re-using our
- * existing exception frame. We first change the trap value,
- * but we need to ensure we preserve the low nibble of it
- */
- ld r4,_TRAP(r1)
- clrldi r4,r4,60
- or r4,r4,r3
- std r4,_TRAP(r1)
-
- /*
- * PACA_IRQ_HARD_DIS won't always be set here, so set it now
- * to reconcile the IRQ state. Tracing is already accounted for.
- */
- lbz r4,PACAIRQHAPPENED(r13)
- ori r4,r4,PACA_IRQ_HARD_DIS
- stb r4,PACAIRQHAPPENED(r13)
-
- /*
- * Then find the right handler and call it. Interrupts are
- * still soft-disabled and we keep them that way.
- */
- cmpwi cr0,r3,0x500
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl do_IRQ
- b ret_from_except
-1: cmpwi cr0,r3,0xf00
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl performance_monitor_exception
- b ret_from_except
-1: cmpwi cr0,r3,0xe60
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl handle_hmi_exception
- b ret_from_except
-1: cmpwi cr0,r3,0x900
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl timer_interrupt
- b ret_from_except
-#ifdef CONFIG_PPC_DOORBELL
-1:
-#ifdef CONFIG_PPC_BOOK3E
- cmpwi cr0,r3,0x280
-#else
- cmpwi cr0,r3,0xa00
-#endif /* CONFIG_PPC_BOOK3E */
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl doorbell_exception
-#endif /* CONFIG_PPC_DOORBELL */
-1: b ret_from_except /* What else to do here ? */
-
-.Lunrecov_restore:
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b .Lunrecov_restore
-
-_ASM_NOKPROBE_SYMBOL(ret_from_except);
-_ASM_NOKPROBE_SYMBOL(ret_from_except_lite);
-_ASM_NOKPROBE_SYMBOL(resume_kernel);
-_ASM_NOKPROBE_SYMBOL(fast_exc_return_irq);
-_ASM_NOKPROBE_SYMBOL(restore);
-_ASM_NOKPROBE_SYMBOL(fast_exception_return);
+1: /*
+ * Emulate stack store with update. New r1 value was already calculated
+ * and updated in our interrupt regs by emulate_loadstore, but we can't
+ * store the previous value of r1 to the stack before re-loading our
+ * registers from it, otherwise they could be clobbered. Use
+ * PACA_EXGEN as temporary storage to hold the store data, as
+ * interrupts are disabled here so it won't be clobbered.
+ */
+ mtcr r6
+ std r9,PACA_EXGEN+0(r13)
+ addi r9,r1,INT_FRAME_SIZE /* get original r1 */
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ std r9,0(r1) /* perform store component of stdu */
+ ld r9,PACA_EXGEN+0(r13)
+ RFI_TO_KERNEL
+ b . /* prevent speculative execution */
+#endif /* CONFIG_PPC_BOOK3S */
#ifdef CONFIG_PPC_RTAS
/*
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index e4076e3c072d..d9ed79415100 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -24,6 +24,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
#include <asm/feature-fixups.h>
+#include <asm/context_tracking.h>
/* XXX This will ultimately add space for a special exception save
* structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
@@ -1003,38 +1004,6 @@ masked_interrupt_book3e_0x2c0:
masked_interrupt_book3e PACA_IRQ_DBELL 0
/*
- * Called from arch_local_irq_enable when an interrupt needs
- * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280
- * to indicate the kind of interrupt. MSR:EE is already off.
- * We generate a stackframe like if a real interrupt had happened.
- *
- * Note: While MSR:EE is off, we need to make sure that _MSR
- * in the generated frame has EE set to 1 or the exception
- * handler will not properly re-enable them.
- */
-_GLOBAL(__replay_interrupt)
- /* We are going to jump to the exception common code which
- * will retrieve various register values from the PACA which
- * we don't give a damn about.
- */
- mflr r10
- mfmsr r11
- mfcr r4
- mtspr SPRN_SPRG_GEN_SCRATCH,r13;
- std r1,PACA_EXGEN+EX_R1(r13);
- stw r4,PACA_EXGEN+EX_CR(r13);
- ori r11,r11,MSR_EE
- subi r1,r1,INT_FRAME_SIZE;
- cmpwi cr0,r3,0x500
- beq exc_0x500_common
- cmpwi cr0,r3,0x900
- beq exc_0x900_common
- cmpwi cr0,r3,0x280
- beq exc_0x280_common
- blr
-
-
-/*
* This is called from 0x300 and 0x400 handlers after the prologs with
* r14 and r15 containing the fault address and error code, with the
* original values stashed away in the PACA
@@ -1073,17 +1042,161 @@ alignment_more:
bl alignment_exception
b ret_from_except
-/*
- * We branch here from entry_64.S for the last stage of the exception
- * return code path. MSR:EE is expected to be off at that point
- */
-_GLOBAL(exception_return_book3e)
- b 1f
+ .align 7
+_GLOBAL(ret_from_except)
+ ld r11,_TRAP(r1)
+ andi. r0,r11,1
+ bne ret_from_except_lite
+ REST_NVGPRS(r1)
+
+_GLOBAL(ret_from_except_lite)
+ /*
+ * Disable interrupts so that current_thread_info()->flags
+ * can't change between when we test it and when we return
+ * from the interrupt.
+ */
+ wrteei 0
+
+ ld r9, PACA_THREAD_INFO(r13)
+ ld r3,_MSR(r1)
+ ld r10,PACACURRENT(r13)
+ ld r4,TI_FLAGS(r9)
+ andi. r3,r3,MSR_PR
+ beq resume_kernel
+ lwz r3,(THREAD+THREAD_DBCR0)(r10)
+
+ /* Check current_thread_info()->flags */
+ andi. r0,r4,_TIF_USER_WORK_MASK
+ bne 1f
+ /*
+ * Check to see if the dbcr0 register is set up to debug.
+ * Use the internal debug mode bit to do this.
+ */
+ andis. r0,r3,DBCR0_IDM@h
+ beq restore
+ mfmsr r0
+ rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */
+ mtmsr r0
+ mtspr SPRN_DBCR0,r3
+ li r10, -1
+ mtspr SPRN_DBSR,r10
+ b restore
+1: andi. r0,r4,_TIF_NEED_RESCHED
+ beq 2f
+ bl restore_interrupts
+ SCHEDULE_USER
+ b ret_from_except_lite
+2:
+ bl save_nvgprs
+ /*
+ * Use a non volatile GPR to save and restore our thread_info flags
+ * across the call to restore_interrupts.
+ */
+ mr r30,r4
+ bl restore_interrupts
+ mr r4,r30
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_notify_resume
+ b ret_from_except
+
+resume_kernel:
+ /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
+ andis. r8,r4,_TIF_EMULATE_STACK_STORE@h
+ beq+ 1f
+
+ addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
+
+ ld r3,GPR1(r1)
+ subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
+ mr r4,r1 /* src: current exception frame */
+ mr r1,r3 /* Reroute the trampoline frame to r1 */
+
+ /* Copy from the original to the trampoline. */
+ li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */
+ li r6,0 /* start offset: 0 */
+ mtctr r5
+2: ldx r0,r6,r4
+ stdx r0,r6,r3
+ addi r6,r6,8
+ bdnz 2b
+
+ /* Do real store operation to complete stdu */
+ ld r5,GPR1(r1)
+ std r8,0(r5)
+
+ /* Clear _TIF_EMULATE_STACK_STORE flag */
+ lis r11,_TIF_EMULATE_STACK_STORE@h
+ addi r5,r9,TI_FLAGS
+0: ldarx r4,0,r5
+ andc r4,r4,r11
+ stdcx. r4,0,r5
+ bne- 0b
+1:
+
+#ifdef CONFIG_PREEMPT
+ /* Check if we need to preempt */
+ andi. r0,r4,_TIF_NEED_RESCHED
+ beq+ restore
+ /* Check that preempt_count() == 0 and interrupts are enabled */
+ lwz r8,TI_PREEMPT(r9)
+ cmpwi cr0,r8,0
+ bne restore
+ ld r0,SOFTE(r1)
+ andi. r0,r0,IRQS_DISABLED
+ bne restore
+
+ /*
+ * Here we are preempting the current task. We want to make
+ * sure we are soft-disabled first and reconcile irq state.
+ */
+ RECONCILE_IRQ_STATE(r3,r4)
+ bl preempt_schedule_irq
+
+ /*
+ * arch_local_irq_restore() from preempt_schedule_irq above may
+ * enable hard interrupt but we really should disable interrupts
+ * when we return from the interrupt, and so that we don't get
+ * interrupted after loading SRR0/1.
+ */
+ wrteei 0
+#endif /* CONFIG_PREEMPT */
+
+restore:
+ /*
+ * This is the main kernel exit path. First we check if we
+ * are about to re-enable interrupts
+ */
+ ld r5,SOFTE(r1)
+ lbz r6,PACAIRQSOFTMASK(r13)
+ andi. r5,r5,IRQS_DISABLED
+ bne .Lrestore_irq_off
+
+ /* We are enabling, were we already enabled ? Yes, just return */
+ andi. r6,r6,IRQS_DISABLED
+ beq cr0,fast_exception_return
+
+ /*
+ * We are about to soft-enable interrupts (we are hard disabled
+ * at this point). We check if there's anything that needs to
+ * be replayed first.
+ */
+ lbz r0,PACAIRQHAPPENED(r13)
+ cmpwi cr0,r0,0
+ bne- .Lrestore_check_irq_replay
+
+ /*
+ * Get here when nothing happened while soft-disabled, just
+ * soft-enable and move-on. We will hard-enable as a side
+ * effect of rfi
+ */
+.Lrestore_no_replay:
+ TRACE_ENABLE_INTS
+ li r0,IRQS_ENABLED
+ stb r0,PACAIRQSOFTMASK(r13);
/* This is the return from load_up_fpu fast path which could do with
* less GPR restores in fact, but for now we have a single return path
*/
- .globl fast_exception_return
fast_exception_return:
wrteei 0
1: mr r0,r13
@@ -1124,6 +1237,102 @@ fast_exception_return:
mfspr r13,SPRN_SPRG_GEN_SCRATCH
rfi
+ /*
+ * We are returning to a context with interrupts soft disabled.
+ *
+ * However, we may also about to hard enable, so we need to
+ * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
+ * or that bit can get out of sync and bad things will happen
+ */
+.Lrestore_irq_off:
+ ld r3,_MSR(r1)
+ lbz r7,PACAIRQHAPPENED(r13)
+ andi. r0,r3,MSR_EE
+ beq 1f
+ rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS
+ stb r7,PACAIRQHAPPENED(r13)
+1:
+#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
+ /* The interrupt should not have soft enabled. */
+ lbz r7,PACAIRQSOFTMASK(r13)
+1: tdeqi r7,IRQS_ENABLED
+ EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
+#endif
+ b fast_exception_return
+
+ /*
+ * Something did happen, check if a re-emit is needed
+ * (this also clears paca->irq_happened)
+ */
+.Lrestore_check_irq_replay:
+ /* XXX: We could implement a fast path here where we check
+ * for irq_happened being just 0x01, in which case we can
+ * clear it and return. That means that we would potentially
+ * miss a decrementer having wrapped all the way around.
+ *
+ * Still, this might be useful for things like hash_page
+ */
+ bl __check_irq_replay
+ cmpwi cr0,r3,0
+ beq .Lrestore_no_replay
+
+ /*
+ * We need to re-emit an interrupt. We do so by re-using our
+ * existing exception frame. We first change the trap value,
+ * but we need to ensure we preserve the low nibble of it
+ */
+ ld r4,_TRAP(r1)
+ clrldi r4,r4,60
+ or r4,r4,r3
+ std r4,_TRAP(r1)
+
+ /*
+ * PACA_IRQ_HARD_DIS won't always be set here, so set it now
+ * to reconcile the IRQ state. Tracing is already accounted for.
+ */
+ lbz r4,PACAIRQHAPPENED(r13)
+ ori r4,r4,PACA_IRQ_HARD_DIS
+ stb r4,PACAIRQHAPPENED(r13)
+
+ /*
+ * Then find the right handler and call it. Interrupts are
+ * still soft-disabled and we keep them that way.
+ */
+ cmpwi cr0,r3,0x500
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl do_IRQ
+ b ret_from_except
+1: cmpwi cr0,r3,0xf00
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl performance_monitor_exception
+ b ret_from_except
+1: cmpwi cr0,r3,0xe60
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl handle_hmi_exception
+ b ret_from_except
+1: cmpwi cr0,r3,0x900
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl timer_interrupt
+ b ret_from_except
+#ifdef CONFIG_PPC_DOORBELL
+1:
+ cmpwi cr0,r3,0x280
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl doorbell_exception
+#endif /* CONFIG_PPC_DOORBELL */
+1: b ret_from_except /* What else to do here ? */
+
+_ASM_NOKPROBE_SYMBOL(ret_from_except);
+_ASM_NOKPROBE_SYMBOL(ret_from_except_lite);
+_ASM_NOKPROBE_SYMBOL(resume_kernel);
+_ASM_NOKPROBE_SYMBOL(restore);
+_ASM_NOKPROBE_SYMBOL(fast_exception_return);
+
/*
* Trampolines used when spotting a bad kernel stack pointer in
* the exception entry code.
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ffc15f4f079d..728ccb0f560c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -32,16 +32,10 @@
#define EX_CCR 52
#define EX_CFAR 56
#define EX_PPR 64
-#if defined(CONFIG_RELOCATABLE)
#define EX_CTR 72
.if EX_SIZE != 10
.error "EX_SIZE is wrong"
.endif
-#else
-.if EX_SIZE != 9
- .error "EX_SIZE is wrong"
-.endif
-#endif
/*
* Following are fixed section helper macros.
@@ -50,7 +44,6 @@
* EXC_VIRT_BEGIN/END - virt (AIL), unrelocated exception vectors
* TRAMP_REAL_BEGIN - real, unrelocated helpers (virt may call these)
* TRAMP_VIRT_BEGIN - virt, unreloc helpers (in practice, real can use)
- * TRAMP_KVM_BEGIN - KVM handlers, these are put into real, unrelocated
* EXC_COMMON - After switching to virtual, relocated mode.
*/
@@ -80,13 +73,6 @@ name:
#define TRAMP_VIRT_BEGIN(name) \
FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#define TRAMP_KVM_BEGIN(name) \
- TRAMP_VIRT_BEGIN(name)
-#else
-#define TRAMP_KVM_BEGIN(name)
-#endif
-
#define EXC_REAL_NONE(start, size) \
FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \
FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size)
@@ -119,67 +105,6 @@ name:
ori reg,reg,(ABS_ADDR(label))@l; \
addis reg,reg,(ABS_ADDR(label))@h
-/* Exception register prefixes */
-#define EXC_HV_OR_STD 2 /* depends on HVMODE */
-#define EXC_HV 1
-#define EXC_STD 0
-
-#if defined(CONFIG_RELOCATABLE)
-/*
- * If we support interrupts with relocation on AND we're a relocatable kernel,
- * we need to use CTR to get to the 2nd level handler. So, save/restore it
- * when required.
- */
-#define SAVE_CTR(reg, area) mfctr reg ; std reg,area+EX_CTR(r13)
-#define GET_CTR(reg, area) ld reg,area+EX_CTR(r13)
-#define RESTORE_CTR(reg, area) ld reg,area+EX_CTR(r13) ; mtctr reg
-#else
-/* ...else CTR is unused and in register. */
-#define SAVE_CTR(reg, area)
-#define GET_CTR(reg, area) mfctr reg
-#define RESTORE_CTR(reg, area)
-#endif
-
-/*
- * PPR save/restore macros used in exceptions-64s.S
- * Used for P7 or later processors
- */
-#define SAVE_PPR(area, ra) \
-BEGIN_FTR_SECTION_NESTED(940) \
- ld ra,area+EX_PPR(r13); /* Read PPR from paca */ \
- std ra,_PPR(r1); \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
-
-#define RESTORE_PPR_PACA(area, ra) \
-BEGIN_FTR_SECTION_NESTED(941) \
- ld ra,area+EX_PPR(r13); \
- mtspr SPRN_PPR,ra; \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941)
-
-/*
- * Get an SPR into a register if the CPU has the given feature
- */
-#define OPT_GET_SPR(ra, spr, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- mfspr ra,spr; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Set an SPR from a register if the CPU has the given feature
- */
-#define OPT_SET_SPR(ra, spr, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- mtspr spr,ra; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Save a register to the PACA if the CPU has the given feature
- */
-#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- std ra,offset(r13); \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
/*
* Branch to label using its 0xC000 address. This results in instruction
* address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
@@ -193,89 +118,199 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mtctr reg; \
bctr
-.macro INT_KVM_HANDLER name, vec, hsrr, area, skip
- TRAMP_KVM_BEGIN(\name\()_kvm)
- KVM_HANDLER \vec, \hsrr, \area, \skip
+/*
+ * Interrupt code generation macros
+ */
+#define IVEC .L_IVEC_\name\() /* Interrupt vector address */
+#define IHSRR .L_IHSRR_\name\() /* Sets SRR or HSRR registers */
+#define IHSRR_IF_HVMODE .L_IHSRR_IF_HVMODE_\name\() /* HSRR if HV else SRR */
+#define IAREA .L_IAREA_\name\() /* PACA save area */
+#define IVIRT .L_IVIRT_\name\() /* Has virt mode entry point */
+#define IISIDE .L_IISIDE_\name\() /* Uses SRR0/1 not DAR/DSISR */
+#define IDAR .L_IDAR_\name\() /* Uses DAR (or SRR0) */
+#define IDSISR .L_IDSISR_\name\() /* Uses DSISR (or SRR1) */
+#define ISET_RI .L_ISET_RI_\name\() /* Run common code w/ MSR[RI]=1 */
+#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
+#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
+#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */
+#define IKVM_SKIP .L_IKVM_SKIP_\name\() /* Generate KVM skip handler */
+#define IKVM_REAL .L_IKVM_REAL_\name\() /* Real entry tests KVM */
+#define __IKVM_REAL(name) .L_IKVM_REAL_ ## name
+#define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */
+#define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */
+#define __ISTACK(name) .L_ISTACK_ ## name
+#define IRECONCILE .L_IRECONCILE_\name\() /* Do RECONCILE_IRQ_STATE */
+#define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */
+
+#define INT_DEFINE_BEGIN(n) \
+.macro int_define_ ## n name
+
+#define INT_DEFINE_END(n) \
+.endm ; \
+int_define_ ## n n ; \
+do_define_int n
+
+.macro do_define_int name
+ .ifndef IVEC
+ .error "IVEC not defined"
+ .endif
+ .ifndef IHSRR
+ IHSRR=0
+ .endif
+ .ifndef IHSRR_IF_HVMODE
+ IHSRR_IF_HVMODE=0
+ .endif
+ .ifndef IAREA
+ IAREA=PACA_EXGEN
+ .endif
+ .ifndef IVIRT
+ IVIRT=1
+ .endif
+ .ifndef IISIDE
+ IISIDE=0
+ .endif
+ .ifndef IDAR
+ IDAR=0
+ .endif
+ .ifndef IDSISR
+ IDSISR=0
+ .endif
+ .ifndef ISET_RI
+ ISET_RI=1
+ .endif
+ .ifndef IBRANCH_TO_COMMON
+ IBRANCH_TO_COMMON=1
+ .endif
+ .ifndef IREALMODE_COMMON
+ IREALMODE_COMMON=0
+ .else
+ .if ! IBRANCH_TO_COMMON
+ .error "IREALMODE_COMMON=1 but IBRANCH_TO_COMMON=0"
+ .endif
+ .endif
+ .ifndef IMASK
+ IMASK=0
+ .endif
+ .ifndef IKVM_SKIP
+ IKVM_SKIP=0
+ .endif
+ .ifndef IKVM_REAL
+ IKVM_REAL=0
+ .endif
+ .ifndef IKVM_VIRT
+ IKVM_VIRT=0
+ .endif
+ .ifndef ISTACK
+ ISTACK=1
+ .endif
+ .ifndef IRECONCILE
+ IRECONCILE=1
+ .endif
+ .ifndef IKUAP
+ IKUAP=1
+ .endif
.endm
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
- * If hv is possible, interrupts come into to the hv version
- * of the kvmppc_interrupt code, which then jumps to the PR handler,
- * kvmppc_interrupt_pr, if the guest is a PR guest.
+ * All interrupts which set HSRR registers, as well as SRESET and MCE and
+ * syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
+ * so they all generally need to test whether they were taken in guest context.
+ *
+ * Note: SRESET and MCE may also be sent to the guest by the hypervisor, and be
+ * taken with MSR[HV]=0.
+ *
+ * Interrupts which set SRR registers (with the above exceptions) do not
+ * elevate to MSR[HV]=1 mode, though most can be taken when running with
+ * MSR[HV]=1 (e.g., bare metal kernel and userspace). So these interrupts do
+ * not need to test whether a guest is running because they get delivered to
+ * the guest directly, including nested HV KVM guests.
+ *
+ * The exception is PR KVM, where the guest runs with MSR[PR]=1 and the host
+ * runs with MSR[HV]=0, so the host takes all interrupts on behalf of the
+ * guest. PR KVM runs with LPCR[AIL]=0 which causes interrupts to always be
+ * delivered to the real-mode entry point, therefore such interrupts only test
+ * KVM in their real mode handlers, and only when PR KVM is possible.
+ *
+ * Interrupts that are taken in MSR[HV]=0 and escalate to MSR[HV]=1 are always
+ * delivered in real-mode when the MMU is in hash mode because the MMU
+ * registers are not set appropriately to translate host addresses. In nested
+ * radix mode these can be delivered in virt-mode as the host translations are
+ * used implicitly (see: effective LPID, effective PID).
+ */
+
+/*
+ * If an interrupt is taken while a guest is running, it is immediately routed
+ * to KVM to handle. If both HV and PR KVM arepossible, KVM interrupts go first
+ * to kvmppc_interrupt_hv, which handles the PR guest case.
*/
#define kvmppc_interrupt kvmppc_interrupt_hv
#else
#define kvmppc_interrupt kvmppc_interrupt_pr
#endif
-.macro KVMTEST name, hsrr, n
+.macro KVMTEST name
lbz r10,HSTATE_IN_GUEST(r13)
cmpwi r10,0
bne \name\()_kvm
.endm
-.macro KVM_HANDLER vec, hsrr, area, skip
- .if \skip
+.macro GEN_KVM name
+ .balign IFETCH_ALIGN_BYTES
+\name\()_kvm:
+
+ .if IKVM_SKIP
cmpwi r10,KVM_GUEST_MODE_SKIP
beq 89f
.else
-BEGIN_FTR_SECTION_NESTED(947)
- ld r10,\area+EX_CFAR(r13)
+BEGIN_FTR_SECTION
+ ld r10,IAREA+EX_CFAR(r13)
std r10,HSTATE_CFAR(r13)
-END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
.endif
-BEGIN_FTR_SECTION_NESTED(948)
- ld r10,\area+EX_PPR(r13)
+ ld r10,PACA_EXGEN+EX_CTR(r13)
+ mtctr r10
+BEGIN_FTR_SECTION
+ ld r10,IAREA+EX_PPR(r13)
std r10,HSTATE_PPR(r13)
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
- ld r10,\area+EX_R10(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ ld r11,IAREA+EX_R11(r13)
+ ld r12,IAREA+EX_R12(r13)
std r12,HSTATE_SCRATCH0(r13)
sldi r12,r9,32
+ ld r9,IAREA+EX_R9(r13)
+ ld r10,IAREA+EX_R10(r13)
/* HSRR variants have the 0x2 bit added to their trap number */
- .if \hsrr == EXC_HV_OR_STD
+ .if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
- ori r12,r12,(\vec + 0x2)
+ ori r12,r12,(IVEC + 0x2)
FTR_SECTION_ELSE
- ori r12,r12,(\vec)
+ ori r12,r12,(IVEC)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- ori r12,r12,(\vec + 0x2)
+ .elseif IHSRR
+ ori r12,r12,(IVEC+ 0x2)
.else
- ori r12,r12,(\vec)
+ ori r12,r12,(IVEC)
.endif
-
-#ifdef CONFIG_RELOCATABLE
- /*
- * KVM requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
- * outside the head section. CONFIG_RELOCATABLE KVM expects CTR
- * to be saved in HSTATE_SCRATCH1.
- */
- mfctr r9
- std r9,HSTATE_SCRATCH1(r13)
- __LOAD_FAR_HANDLER(r9, kvmppc_interrupt)
- mtctr r9
- ld r9,\area+EX_R9(r13)
- bctr
-#else
- ld r9,\area+EX_R9(r13)
b kvmppc_interrupt
-#endif
-
- .if \skip
+ .if IKVM_SKIP
89: mtocrf 0x80,r9
- ld r9,\area+EX_R9(r13)
- ld r10,\area+EX_R10(r13)
- .if \hsrr == EXC_HV_OR_STD
+ ld r10,PACA_EXGEN+EX_CTR(r13)
+ mtctr r10
+ ld r9,IAREA+EX_R9(r13)
+ ld r10,IAREA+EX_R10(r13)
+ ld r11,IAREA+EX_R11(r13)
+ ld r12,IAREA+EX_R12(r13)
+ .if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
b kvmppc_skip_Hinterrupt
FTR_SECTION_ELSE
b kvmppc_skip_interrupt
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
+ .elseif IHSRR
b kvmppc_skip_Hinterrupt
.else
b kvmppc_skip_interrupt
@@ -284,107 +319,12 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endm
#else
-.macro KVMTEST name, hsrr, n
+.macro KVMTEST name
.endm
-.macro KVM_HANDLER name, vec, hsrr, area, skip
+.macro GEN_KVM name
.endm
#endif
-.macro INT_SAVE_SRR_AND_JUMP label, hsrr, set_ri
- ld r10,PACAKMSR(r13) /* get MSR value for kernel */
- .if ! \set_ri
- xori r10,r10,MSR_RI /* Clear MSR_RI */
- .endif
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- mtspr SPRN_HSRR1,r10
- FTR_SECTION_ELSE
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- mtspr SPRN_SRR1,r10
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- mtspr SPRN_HSRR1,r10
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- mtspr SPRN_SRR1,r10
- .endif
- LOAD_HANDLER(r10, \label\())
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mtspr SPRN_HSRR0,r10
- HRFI_TO_KERNEL
- FTR_SECTION_ELSE
- mtspr SPRN_SRR0,r10
- RFI_TO_KERNEL
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mtspr SPRN_HSRR0,r10
- HRFI_TO_KERNEL
- .else
- mtspr SPRN_SRR0,r10
- RFI_TO_KERNEL
- .endif
- b . /* prevent speculative execution */
-.endm
-
-/* INT_SAVE_SRR_AND_JUMP works for real or virt, this is faster but virt only */
-.macro INT_VIRT_SAVE_SRR_AND_JUMP label, hsrr
-#ifdef CONFIG_RELOCATABLE
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- FTR_SECTION_ELSE
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- .endif
- LOAD_HANDLER(r12, \label\())
- mtctr r12
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- FTR_SECTION_ELSE
- mfspr r12,SPRN_SRR1 /* and HSRR1 */
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- .else
- mfspr r12,SPRN_SRR1 /* and HSRR1 */
- .endif
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
- bctr
-#else
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- FTR_SECTION_ELSE
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- .endif
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
- b \label
-#endif
-.endm
-
/*
* This is the BOOK3S interrupt entry code macro.
*
@@ -405,14 +345,41 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
* - Fall through and continue executing in real, unrelocated mode.
* This is done if early=2.
*/
-.macro INT_HANDLER name, vec, ool=0, early=0, virt=0, hsrr=0, area=PACA_EXGEN, ri=1, dar=0, dsisr=0, bitmask=0, kvm=0
+
+.macro GEN_BRANCH_TO_COMMON name, virt
+ .if IREALMODE_COMMON
+ LOAD_HANDLER(r10, \name\()_common)
+ mtctr r10
+ bctr
+ .else
+ .if \virt
+#ifndef CONFIG_RELOCATABLE
+ b \name\()_common_virt
+#else
+ LOAD_HANDLER(r10, \name\()_common_virt)
+ mtctr r10
+ bctr
+#endif
+ .else
+ LOAD_HANDLER(r10, \name\()_common_real)
+ mtctr r10
+ bctr
+ .endif
+ .endif
+.endm
+
+.macro GEN_INT_ENTRY name, virt, ool=0
SET_SCRATCH0(r13) /* save r13 */
GET_PACA(r13)
- std r9,\area\()+EX_R9(r13) /* save r9 */
- OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
+ std r9,IAREA+EX_R9(r13) /* save r9 */
+BEGIN_FTR_SECTION
+ mfspr r9,SPRN_PPR
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
HMT_MEDIUM
- std r10,\area\()+EX_R10(r13) /* save r10 - r12 */
- OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
+ std r10,IAREA+EX_R10(r13) /* save r10 - r12 */
+BEGIN_FTR_SECTION
+ mfspr r10,SPRN_CFAR
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
.if \ool
.if !\virt
b tramp_real_\name
@@ -425,47 +392,18 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endif
.endif
- OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
- OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+ std r9,IAREA+EX_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+BEGIN_FTR_SECTION
+ std r10,IAREA+EX_CFAR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
INTERRUPT_TO_KERNEL
- SAVE_CTR(r10, \area\())
+ mfctr r10
+ std r10,IAREA+EX_CTR(r13)
mfcr r9
- .if \kvm
- KVMTEST \name \hsrr \vec
- .endif
- .if \bitmask
- lbz r10,PACAIRQSOFTMASK(r13)
- andi. r10,r10,\bitmask
- /* Associate vector numbers with bits in paca->irq_happened */
- .if \vec == 0x500 || \vec == 0xea0
- li r10,PACA_IRQ_EE
- .elseif \vec == 0x900
- li r10,PACA_IRQ_DEC
- .elseif \vec == 0xa00 || \vec == 0xe80
- li r10,PACA_IRQ_DBELL
- .elseif \vec == 0xe60
- li r10,PACA_IRQ_HMI
- .elseif \vec == 0xf00
- li r10,PACA_IRQ_PMI
- .else
- .abort "Bad maskable vector"
- .endif
-
- .if \hsrr == EXC_HV_OR_STD
- BEGIN_FTR_SECTION
- bne masked_Hinterrupt
- FTR_SECTION_ELSE
- bne masked_interrupt
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif \hsrr
- bne masked_Hinterrupt
- .else
- bne masked_interrupt
- .endif
- .endif
-
- std r11,\area\()+EX_R11(r13)
- std r12,\area\()+EX_R12(r13)
+ std r11,IAREA+EX_R11(r13)
+ std r12,IAREA+EX_R12(r13)
/*
* DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
@@ -473,49 +411,134 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
* not recoverable if they are live.
*/
GET_SCRATCH0(r10)
- std r10,\area\()+EX_R13(r13)
- .if \dar
- .if \hsrr
+ std r10,IAREA+EX_R13(r13)
+ .if IDAR && !IISIDE
+ .if IHSRR
mfspr r10,SPRN_HDAR
.else
mfspr r10,SPRN_DAR
.endif
- std r10,\area\()+EX_DAR(r13)
+ std r10,IAREA+EX_DAR(r13)
.endif
- .if \dsisr
- .if \hsrr
+ .if IDSISR && !IISIDE
+ .if IHSRR
mfspr r10,SPRN_HDSISR
.else
mfspr r10,SPRN_DSISR
.endif
- stw r10,\area\()+EX_DSISR(r13)
+ stw r10,IAREA+EX_DSISR(r13)
.endif
- .if \early == 2
- /* nothing more */
- .elseif \early
- mfctr r10 /* save ctr, even for !RELOCATABLE */
- BRANCH_TO_C000(r11, \name\()_early_common)
- .elseif !\virt
- INT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr, \ri
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif IHSRR
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
.else
- INT_VIRT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
.endif
+
+ .if IBRANCH_TO_COMMON
+ GEN_BRANCH_TO_COMMON \name \virt
+ .endif
+
.if \ool
.popsection
.endif
.endm
/*
- * On entry r13 points to the paca, r9-r13 are saved in the paca,
- * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
- * SRR1, and relocation is on.
+ * __GEN_COMMON_ENTRY is required to receive the branch from interrupt
+ * entry, except in the case of the real-mode handlers which require
+ * __GEN_REALMODE_COMMON_ENTRY.
*
- * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
- * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
+ * This switches to virtual mode and sets MSR[RI].
+ */
+.macro __GEN_COMMON_ENTRY name
+DEFINE_FIXED_SYMBOL(\name\()_common_real)
+\name\()_common_real:
+ .if IKVM_REAL
+ KVMTEST \name
+ .endif
+
+ ld r10,PACAKMSR(r13) /* get MSR value for kernel */
+ /* MSR[RI] is clear iff using SRR regs */
+ .if IHSRR == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ xori r10,r10,MSR_RI
+ END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
+ .elseif ! IHSRR
+ xori r10,r10,MSR_RI
+ .endif
+ mtmsrd r10
+
+ .if IVIRT
+ .if IKVM_VIRT
+ b 1f /* skip the virt test coming from real */
+ .endif
+
+ .balign IFETCH_ALIGN_BYTES
+DEFINE_FIXED_SYMBOL(\name\()_common_virt)
+\name\()_common_virt:
+ .if IKVM_VIRT
+ KVMTEST \name
+1:
+ .endif
+ .endif /* IVIRT */
+.endm
+
+/*
+ * Don't switch to virt mode. Used for early MCE and HMI handlers that
+ * want to run in real mode.
*/
-.macro INT_COMMON vec, area, stack, kuap, reconcile, dar, dsisr
- .if \stack
+.macro __GEN_REALMODE_COMMON_ENTRY name
+DEFINE_FIXED_SYMBOL(\name\()_common_real)
+\name\()_common_real:
+ .if IKVM_REAL
+ KVMTEST \name
+ .endif
+.endm
+
+.macro __GEN_COMMON_BODY name
+ .if IMASK
+ lbz r10,PACAIRQSOFTMASK(r13)
+ andi. r10,r10,IMASK
+ /* Associate vector numbers with bits in paca->irq_happened */
+ .if IVEC == 0x500 || IVEC == 0xea0
+ li r10,PACA_IRQ_EE
+ .elseif IVEC == 0x900
+ li r10,PACA_IRQ_DEC
+ .elseif IVEC == 0xa00 || IVEC == 0xe80
+ li r10,PACA_IRQ_DBELL
+ .elseif IVEC == 0xe60
+ li r10,PACA_IRQ_HMI
+ .elseif IVEC == 0xf00
+ li r10,PACA_IRQ_PMI
+ .else
+ .abort "Bad maskable vector"
+ .endif
+
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ bne masked_Hinterrupt
+ FTR_SECTION_ELSE
+ bne masked_interrupt
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif IHSRR
+ bne masked_Hinterrupt
+ .else
+ bne masked_interrupt
+ .endif
+ .endif
+
+ .if ISTACK
andi. r10,r12,MSR_PR /* See if coming from user */
mr r10,r1 /* Save r1 */
subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */
@@ -532,54 +555,67 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
std r0,GPR0(r1) /* save r0 in stackframe */
std r10,GPR1(r1) /* save r1 in stackframe */
- .if \stack
- .if \kuap
+ .if ISET_RI
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Set MSR_RI */
+ .endif
+
+ .if ISTACK
+ .if IKUAP
kuap_save_amr_and_lock r9, r10, cr1, cr0
.endif
beq 101f /* if from kernel mode */
ACCOUNT_CPU_USER_ENTRY(r13, r9, r10)
- SAVE_PPR(\area, r9)
+BEGIN_FTR_SECTION
+ ld r9,IAREA+EX_PPR(r13) /* Read PPR from paca */
+ std r9,_PPR(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
101:
.else
- .if \kuap
+ .if IKUAP
kuap_save_amr_and_lock r9, r10, cr1
.endif
.endif
/* Save original regs values from save area to stack frame. */
- ld r9,\area+EX_R9(r13) /* move r9, r10 to stackframe */
- ld r10,\area+EX_R10(r13)
+ ld r9,IAREA+EX_R9(r13) /* move r9, r10 to stackframe */
+ ld r10,IAREA+EX_R10(r13)
std r9,GPR9(r1)
std r10,GPR10(r1)
- ld r9,\area+EX_R11(r13) /* move r11 - r13 to stackframe */
- ld r10,\area+EX_R12(r13)
- ld r11,\area+EX_R13(r13)
+ ld r9,IAREA+EX_R11(r13) /* move r11 - r13 to stackframe */
+ ld r10,IAREA+EX_R12(r13)
+ ld r11,IAREA+EX_R13(r13)
std r9,GPR11(r1)
std r10,GPR12(r1)
std r11,GPR13(r1)
- .if \dar
- .if \dar == 2
+
+ SAVE_NVGPRS(r1)
+
+ .if IDAR
+ .if IISIDE
ld r10,_NIP(r1)
.else
- ld r10,\area+EX_DAR(r13)
+ ld r10,IAREA+EX_DAR(r13)
.endif
std r10,_DAR(r1)
.endif
- .if \dsisr
- .if \dsisr == 2
+
+ .if IDSISR
+ .if IISIDE
ld r10,_MSR(r1)
lis r11,DSISR_SRR1_MATCH_64S@h
and r10,r10,r11
.else
- lwz r10,\area+EX_DSISR(r13)
+ lwz r10,IAREA+EX_DSISR(r13)
.endif
std r10,_DSISR(r1)
.endif
-BEGIN_FTR_SECTION_NESTED(66)
- ld r10,\area+EX_CFAR(r13)
+
+BEGIN_FTR_SECTION
+ ld r10,IAREA+EX_CFAR(r13)
std r10,ORIG_GPR3(r1)
-END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
- GET_CTR(r10, \area)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ ld r10,IAREA+EX_CTR(r13)
std r10,_CTR(r1)
std r2,GPR2(r1) /* save r2 in stackframe */
SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */
@@ -591,32 +627,42 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
mfspr r11,SPRN_XER /* save XER in stackframe */
std r10,SOFTE(r1)
std r11,_XER(r1)
- li r9,(\vec)+1
+ li r9,IVEC
std r9,_TRAP(r1) /* set trap number */
li r10,0
ld r11,exception_marker@toc(r2)
std r10,RESULT(r1) /* clear regs->result */
std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */
- .if \stack
+ .if ISTACK
ACCOUNT_STOLEN_TIME
.endif
- .if \reconcile
+ .if IRECONCILE
RECONCILE_IRQ_STATE(r10, r11)
.endif
.endm
/*
+ * On entry r13 points to the paca, r9-r13 are saved in the paca,
+ * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
+ * SRR1, and relocation is on.
+ *
+ * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
+ * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
+ */
+.macro GEN_COMMON name
+ __GEN_COMMON_ENTRY \name
+ __GEN_COMMON_BODY \name
+.endm
+
+/*
* Restore all registers including H/SRR0/1 saved in a stack frame of a
* standard exception.
*/
-.macro EXCEPTION_RESTORE_REGS hsrr
+.macro EXCEPTION_RESTORE_REGS hsrr=0
/* Move original SRR0 and SRR1 into the respective regs */
ld r9,_MSR(r1)
- .if \hsrr == EXC_HV_OR_STD
- .error "EXC_HV_OR_STD Not implemented for EXCEPTION_RESTORE_REGS"
- .endif
.if \hsrr
mtspr SPRN_HSRR1,r9
.else
@@ -670,28 +716,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
#define FINISH_NAP
#endif
-#define EXC_COMMON(name, realvec, hdlr) \
- EXC_COMMON_BEGIN(name); \
- INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
- bl save_nvgprs; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- bl hdlr; \
- b ret_from_except
-
-/*
- * Like EXC_COMMON, but for exceptions that can occur in the idle task and
- * therefore need the special idle handling (finish nap and runlatch)
- */
-#define EXC_COMMON_ASYNC(name, realvec, hdlr) \
- EXC_COMMON_BEGIN(name); \
- INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
- FINISH_NAP; \
- RUNLATCH_ON; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- bl hdlr; \
- b ret_from_except_lite
-
-
/*
* There are a few constraints to be concerned with.
* - Real mode exceptions code/data must be located at their physical location.
@@ -778,6 +802,53 @@ __start_interrupts:
EXC_VIRT_NONE(0x4000, 0x100)
+/**
+ * Interrupt 0x100 - System Reset Interrupt (SRESET aka NMI).
+ * This is a non-maskable, asynchronous interrupt always taken in real-mode.
+ * It is caused by:
+ * - Wake from power-saving state, on powernv.
+ * - An NMI from another CPU, triggered by firmware or hypercall.
+ * - As crash/debug signal injected from BMC, firmware or hypervisor.
+ *
+ * Handling:
+ * Power-save wakeup is the only performance critical path, so this is
+ * determined quickly as possible first. In this case volatile registers
+ * can be discarded and SPRs like CFAR don't need to be read.
+ *
+ * If not a powersave wakeup, then it's run as a regular interrupt, however
+ * it uses its own stack and PACA save area to preserve the regular kernel
+ * environment for debugging.
+ *
+ * This interrupt is not maskable, so triggering it when MSR[RI] is clear,
+ * or SCRATCH0 is in use, etc. may cause a crash. It's also not entirely
+ * correct to switch to virtual mode to run the regular interrupt handler
+ * because it might be interrupted when the MMU is in a bad state (e.g., SLB
+ * is clear).
+ *
+ * FWNMI:
+ * PAPR specifies a "fwnmi" facility which sends the sreset to a different
+ * entry point with a different register set up. Some hypervisors will
+ * send the sreset to 0x100 in the guest if it is not fwnmi capable.
+ *
+ * KVM:
+ * Unlike most SRR interrupts, this may be taken by the host while executing
+ * in a guest, so a KVM test is required. KVM will pull the CPU out of guest
+ * mode and then raise the sreset.
+ */
+INT_DEFINE_BEGIN(system_reset)
+ IVEC=0x100
+ IAREA=PACA_EXNMI
+ IVIRT=0 /* no virt entry point */
+ /*
+ * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
+ * being used, so a nested NMI exception would corrupt it.
+ */
+ ISET_RI=0
+ ISTACK=0
+ IRECONCILE=0
+ IKVM_REAL=1
+INT_DEFINE_END(system_reset)
+
EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
#ifdef CONFIG_PPC_P7_NAP
/*
@@ -815,11 +886,8 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif
- INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0, kvm=1
+ GEN_INT_ENTRY system_reset, virt=0
/*
- * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
- * being used, so a nested NMI exception would corrupt it.
- *
* In theory, we should not enable relocation here if it was disabled
* in SRR1, because the MMU may not be configured to support it (e.g.,
* SLB may have been cleared). In practice, there should only be a few
@@ -828,7 +896,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
*/
EXC_REAL_END(system_reset, 0x100, 0x100)
EXC_VIRT_NONE(0x4100, 0x100)
-INT_KVM_HANDLER system_reset 0x100, EXC_STD, PACA_EXNMI, 0
#ifdef CONFIG_PPC_P7_NAP
TRAMP_REAL_BEGIN(system_reset_idle_wake)
@@ -843,12 +910,14 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)
* Vectors for the FWNMI option. Share common code.
*/
TRAMP_REAL_BEGIN(system_reset_fwnmi)
- /* See comment at system_reset exception, don't turn on RI */
- INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0
+ /* XXX: fwnmi guest could run a nested/PR guest, so why no test? */
+ __IKVM_REAL(system_reset)=0
+ GEN_INT_ENTRY system_reset, virt=0
#endif /* CONFIG_PPC_PSERIES */
EXC_COMMON_BEGIN(system_reset_common)
+ __GEN_COMMON_ENTRY system_reset
/*
* Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
* to recover, but nested NMI will notice in_nmi and not recover
@@ -864,21 +933,21 @@ EXC_COMMON_BEGIN(system_reset_common)
mr r10,r1
ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE
- INT_COMMON 0x100, PACA_EXNMI, 0, 1, 0, 0, 0
- bl save_nvgprs
+ __GEN_COMMON_BODY system_reset
/*
- * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
+ * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does
* the right thing. We do not want to reconcile because that goes
* through irq tracing which we don't want in NMI.
*
- * Save PACAIRQHAPPENED because some code will do a hard disable
- * (e.g., xmon). So we want to restore this back to where it was
- * when we return. DAR is unused in the stack, so save it there.
+ * Save PACAIRQHAPPENED to _DAR (otherwise unused), and set HARD_DIS
+ * as we are running with MSR[EE]=0.
*/
li r10,IRQS_ALL_DISABLED
stb r10,PACAIRQSOFTMASK(r13)
lbz r10,PACAIRQHAPPENED(r13)
std r10,_DAR(r1)
+ ori r10,r10,PACA_IRQ_HARD_DIS
+ stb r10,PACAIRQHAPPENED(r13)
addi r3,r1,STACK_FRAME_OVERHEAD
bl system_reset_exception
@@ -902,28 +971,95 @@ EXC_COMMON_BEGIN(system_reset_common)
ld r10,SOFTE(r1)
stb r10,PACAIRQSOFTMASK(r13)
- EXCEPTION_RESTORE_REGS EXC_STD
+ EXCEPTION_RESTORE_REGS
RFI_TO_USER_OR_KERNEL
+ GEN_KVM system_reset
-EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
- INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+
+/**
+ * Interrupt 0x200 - Machine Check Interrupt (MCE).
+ * This is a non-maskable interrupt always taken in real-mode. It can be
+ * synchronous or asynchronous, caused by hardware or software, and it may be
+ * taken in a power-saving state.
+ *
+ * Handling:
+ * Similarly to system reset, this uses its own stack and PACA save area,
+ * the difference is re-entrancy is allowed on the machine check stack.
+ *
+ * machine_check_early is run in real mode, and carefully decodes the
+ * machine check and tries to handle it (e.g., flush the SLB if there was an
+ * error detected there), determines if it was recoverable and logs the
+ * event.
+ *
+ * This early code does not "reconcile" irq soft-mask state like SRESET or
+ * regular interrupts do, so irqs_disabled() among other things may not work
+ * properly (irq disable/enable already doesn't work because irq tracing can
+ * not work in real mode).
+ *
+ * Then, depending on the execution context when the interrupt is taken, there
+ * are 3 main actions:
+ * - Executing in kernel mode. The event is queued with irq_work, which means
+ * it is handled when it is next safe to do so (i.e., the kernel has enabled
+ * interrupts), which could be immediately when the interrupt returns. This
+ * avoids nasty issues like switching to virtual mode when the MMU is in a
+ * bad state, or when executing OPAL code. (SRESET is exposed to such issues,
+ * but it has different priorities). Check to see if the CPU was in power
+ * save, and return via the wake up code if it was.
+ *
+ * - Executing in user mode. machine_check_exception is run like a normal
+ * interrupt handler, which processes the data generated by the early handler.
+ *
+ * - Executing in guest mode. The interrupt is run with its KVM test, and
+ * branches to KVM to deal with. KVM may queue the event for the host
+ * to report later.
+ *
+ * This interrupt is not maskable, so if it triggers when MSR[RI] is clear,
+ * or SCRATCH0 is in use, it may cause a crash.
+ *
+ * KVM:
+ * See SRESET.
+ */
+INT_DEFINE_BEGIN(machine_check_early)
+ IVEC=0x200
+ IAREA=PACA_EXMC
+ IVIRT=0 /* no virt entry point */
+ IREALMODE_COMMON=1
/*
* MSR_RI is not enabled, because PACA_EXMC is being used, so a
* nested machine check corrupts it. machine_check_common enables
* MSR_RI.
*/
+ ISET_RI=0
+ ISTACK=0
+ IDAR=1
+ IDSISR=1
+ IRECONCILE=0
+ IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
+INT_DEFINE_END(machine_check_early)
+
+INT_DEFINE_BEGIN(machine_check)
+ IVEC=0x200
+ IAREA=PACA_EXMC
+ IVIRT=0 /* no virt entry point */
+ ISET_RI=0
+ IDAR=1
+ IDSISR=1
+ IKVM_SKIP=1
+ IKVM_REAL=1
+INT_DEFINE_END(machine_check)
+
+EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
+ GEN_INT_ENTRY machine_check_early, virt=0
EXC_REAL_END(machine_check, 0x200, 0x100)
EXC_VIRT_NONE(0x4200, 0x100)
#ifdef CONFIG_PPC_PSERIES
TRAMP_REAL_BEGIN(machine_check_fwnmi)
/* See comment at machine_check exception, don't turn on RI */
- INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+ GEN_INT_ENTRY machine_check_early, virt=0
#endif
-INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1
-
#define MACHINE_CHECK_HANDLER_WINDUP \
/* Clear MSR_RI before setting SRR0 and SRR1. */\
li r9,0; \
@@ -932,12 +1068,10 @@ INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1
lhz r12,PACA_IN_MCE(r13); \
subi r12,r12,1; \
sth r12,PACA_IN_MCE(r13); \
- EXCEPTION_RESTORE_REGS EXC_STD
+ EXCEPTION_RESTORE_REGS
EXC_COMMON_BEGIN(machine_check_early_common)
- mtctr r10 /* Restore ctr */
- mfspr r11,SPRN_SRR0
- mfspr r12,SPRN_SRR1
+ __GEN_REALMODE_COMMON_ENTRY machine_check_early
/*
* Switch to mc_emergency stack and handle re-entrancy (we limit
@@ -974,8 +1108,7 @@ EXC_COMMON_BEGIN(machine_check_early_common)
bgt cr1,unrecoverable_mce /* Check if we hit limit of 4 */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- /* We don't touch AMR here, we never go to virtual mode */
- INT_COMMON 0x200, PACA_EXMC, 0, 0, 0, 1, 1
+ __GEN_COMMON_BODY machine_check_early
BEGIN_FTR_SECTION
bl enable_machine_check
@@ -983,7 +1116,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
li r10,MSR_RI
mtmsrd r10,1
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_early
std r3,RESULT(r1) /* Save result */
@@ -1063,23 +1195,25 @@ BEGIN_FTR_SECTION
mtspr SPRN_CFAR,r10
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
MACHINE_CHECK_HANDLER_WINDUP
- /* See comment at machine_check exception, don't turn on RI */
- INT_HANDLER machine_check, 0x200, area=PACA_EXMC, ri=0, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY machine_check, virt=0
EXC_COMMON_BEGIN(machine_check_common)
/*
* Machine check is different because we use a different
* save area: PACA_EXMC instead of PACA_EXGEN.
*/
- INT_COMMON 0x200, PACA_EXMC, 1, 1, 1, 1, 1
+ GEN_COMMON machine_check
+
FINISH_NAP
/* Enable MSR_RI when finished with PACA_EXMC */
li r10,MSR_RI
mtmsrd r10,1
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception
- b ret_from_except
+ b interrupt_return
+
+ GEN_KVM machine_check
+
#ifdef CONFIG_PPC_P7_NAP
/*
@@ -1144,21 +1278,48 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
b .
+/**
+ * Interrupt 0x300 - Data Storage Interrupt (DSI).
+ * This is a synchronous interrupt generated due to a data access exception,
+ * e.g., a load orstore which does not have a valid page table entry with
+ * permissions. DAWR matches also fault here, as do RC updates, and minor misc
+ * errors e.g., copy/paste, AMO, certain invalid CI accesses, etc.
+ *
+ * Handling:
+ * - Hash MMU
+ * Go to do_hash_page first to see if the HPT can be filled from an entry in
+ * the Linux page table. Hash faults can hit in kernel mode in a fairly
+ * arbitrary state (e.g., interrupts disabled, locks held) when accessing
+ * "non-bolted" regions, e.g., vmalloc space. However these should always be
+ * backed by Linux page tables.
+ *
+ * If none is found, do a Linux page fault. Linux page faults can happen in
+ * kernel mode due to user copy operations of course.
+ *
+ * - Radix MMU
+ * The hardware loads from the Linux page table directly, so a fault goes
+ * immediately to Linux page fault.
+ *
+ * Conditions like DAWR match are handled on the way in to Linux page fault.
+ */
+INT_DEFINE_BEGIN(data_access)
+ IVEC=0x300
+ IDAR=1
+ IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_SKIP=1
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(data_access)
+
EXC_REAL_BEGIN(data_access, 0x300, 0x80)
- INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY data_access, virt=0
EXC_REAL_END(data_access, 0x300, 0x80)
EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
- INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1
+ GEN_INT_ENTRY data_access, virt=1
EXC_VIRT_END(data_access, 0x4300, 0x80)
-INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1
EXC_COMMON_BEGIN(data_access_common)
- /*
- * Here r13 points to the paca, r9 contains the saved CR,
- * SRR0 and SRR1 are saved in r11 and r12,
- * r9 - r13 are saved in paca->exgen.
- * EX_DAR and EX_DSISR have saved DAR/DSISR
- */
- INT_COMMON 0x300, PACA_EXGEN, 1, 1, 1, 1, 1
+ GEN_COMMON data_access
ld r4,_DAR(r1)
ld r5,_DSISR(r1)
BEGIN_MMU_FTR_SECTION
@@ -1169,16 +1330,46 @@ MMU_FTR_SECTION_ELSE
b handle_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+ GEN_KVM data_access
+
+
+/**
+ * Interrupt 0x380 - Data Segment Interrupt (DSLB).
+ * This is a synchronous interrupt in response to an MMU fault missing SLB
+ * entry for HPT, or an address outside RPT translation range.
+ *
+ * Handling:
+ * - HPT:
+ * This refills the SLB, or reports an access fault similarly to a bad page
+ * fault. When coming from user-mode, the SLB handler may access any kernel
+ * data, though it may itself take a DSLB. When coming from kernel mode,
+ * recursive faults must be avoided so access is restricted to the kernel
+ * image text/data, kernel stack, and any data allocated below
+ * ppc64_bolted_size (first segment). The kernel handler must avoid stomping
+ * on user-handler data structures.
+ *
+ * A dedicated save area EXSLB is used (XXX: but it actually need not be
+ * these days, we could use EXGEN).
+ */
+INT_DEFINE_BEGIN(data_access_slb)
+ IVEC=0x380
+ IAREA=PACA_EXSLB
+ IRECONCILE=0
+ IDAR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_SKIP=1
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(data_access_slb)
EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
- INT_HANDLER data_access_slb, 0x380, ool=1, area=PACA_EXSLB, dar=1, kvm=1
+ GEN_INT_ENTRY data_access_slb, virt=0
EXC_REAL_END(data_access_slb, 0x380, 0x80)
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
- INT_HANDLER data_access_slb, 0x380, virt=1, area=PACA_EXSLB, dar=1
+ GEN_INT_ENTRY data_access_slb, virt=1
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
-INT_KVM_HANDLER data_access_slb, 0x380, EXC_STD, PACA_EXSLB, 1
EXC_COMMON_BEGIN(data_access_slb_common)
- INT_COMMON 0x380, PACA_EXSLB, 1, 1, 0, 1, 0
+ GEN_COMMON data_access_slb
ld r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
@@ -1186,31 +1377,50 @@ BEGIN_MMU_FTR_SECTION
bl do_slb_fault
cmpdi r3,0
bne- 1f
- b fast_exception_return
+ b fast_interrupt_return
1: /* Error case */
MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
ld r4,_DAR(r1)
ld r5,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
- b ret_from_except
+ b interrupt_return
+
+ GEN_KVM data_access_slb
+/**
+ * Interrupt 0x400 - Instruction Storage Interrupt (ISI).
+ * This is a synchronous interrupt in response to an MMU fault due to an
+ * instruction fetch.
+ *
+ * Handling:
+ * Similar to DSI, though in response to fetch. The faulting address is found
+ * in SRR0 (rather than DAR), and status in SRR1 (rather than DSISR).
+ */
+INT_DEFINE_BEGIN(instruction_access)
+ IVEC=0x400
+ IISIDE=1
+ IDAR=1
+ IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(instruction_access)
+
EXC_REAL_BEGIN(instruction_access, 0x400, 0x80)
- INT_HANDLER instruction_access, 0x400, kvm=1
+ GEN_INT_ENTRY instruction_access, virt=0
EXC_REAL_END(instruction_access, 0x400, 0x80)
EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80)
- INT_HANDLER instruction_access, 0x400, virt=1
+ GEN_INT_ENTRY instruction_access, virt=1
EXC_VIRT_END(instruction_access, 0x4400, 0x80)
-INT_KVM_HANDLER instruction_access, 0x400, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(instruction_access_common)
- INT_COMMON 0x400, PACA_EXGEN, 1, 1, 1, 2, 2
+ GEN_COMMON instruction_access
ld r4,_DAR(r1)
ld r5,_DSISR(r1)
BEGIN_MMU_FTR_SECTION
@@ -1221,16 +1431,37 @@ MMU_FTR_SECTION_ELSE
b handle_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+ GEN_KVM instruction_access
+
+
+/**
+ * Interrupt 0x480 - Instruction Segment Interrupt (ISLB).
+ * This is a synchronous interrupt in response to an MMU fault due to an
+ * instruction fetch.
+ *
+ * Handling:
+ * Similar to DSLB, though in response to fetch. The faulting address is found
+ * in SRR0 (rather than DAR).
+ */
+INT_DEFINE_BEGIN(instruction_access_slb)
+ IVEC=0x480
+ IAREA=PACA_EXSLB
+ IRECONCILE=0
+ IISIDE=1
+ IDAR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(instruction_access_slb)
EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
- INT_HANDLER instruction_access_slb, 0x480, area=PACA_EXSLB, kvm=1
+ GEN_INT_ENTRY instruction_access_slb, virt=0
EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
- INT_HANDLER instruction_access_slb, 0x480, virt=1, area=PACA_EXSLB
+ GEN_INT_ENTRY instruction_access_slb, virt=1
EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
-INT_KVM_HANDLER instruction_access_slb, 0x480, EXC_STD, PACA_EXSLB, 0
EXC_COMMON_BEGIN(instruction_access_slb_common)
- INT_COMMON 0x480, PACA_EXSLB, 1, 1, 0, 2, 0
+ GEN_COMMON instruction_access_slb
ld r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
@@ -1238,54 +1469,125 @@ BEGIN_MMU_FTR_SECTION
bl do_slb_fault
cmpdi r3,0
bne- 1f
- b fast_exception_return
+ b fast_interrupt_return
1: /* Error case */
MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
ld r4,_DAR(r1)
ld r5,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
- b ret_from_except
+ b interrupt_return
+
+ GEN_KVM instruction_access_slb
+
+
+/**
+ * Interrupt 0x500 - External Interrupt.
+ * This is an asynchronous maskable interrupt in response to an "external
+ * exception" from the interrupt controller or hypervisor (e.g., device
+ * interrupt). It is maskable in hardware by clearing MSR[EE], and
+ * soft-maskable with IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * When running in HV mode, Linux sets up the LPCR[LPES] bit such that
+ * interrupts are delivered with HSRR registers, guests use SRRs, which
+ * reqiures IHSRR_IF_HVMODE.
+ *
+ * On bare metal POWER9 and later, Linux sets the LPCR[HVICE] bit such that
+ * external interrupts are delivered as Hypervisor Virtualization Interrupts
+ * rather than External Interrupts.
+ *
+ * Handling:
+ * This calls into Linux IRQ handler. NVGPRs are not saved to reduce overhead,
+ * because registers at the time of the interrupt are not so important as it is
+ * asynchronous.
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and clear MSR[EE] in the interrupted context.
+ */
+INT_DEFINE_BEGIN(hardware_interrupt)
+ IVEC=0x500
+ IHSRR_IF_HVMODE=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(hardware_interrupt)
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
- INT_HANDLER hardware_interrupt, 0x500, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY hardware_interrupt, virt=0
EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
- INT_HANDLER hardware_interrupt, 0x500, virt=1, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY hardware_interrupt, virt=1
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
-INT_KVM_HANDLER hardware_interrupt, 0x500, EXC_HV_OR_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
+EXC_COMMON_BEGIN(hardware_interrupt_common)
+ GEN_COMMON hardware_interrupt
+ FINISH_NAP
+ RUNLATCH_ON
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_IRQ
+ b interrupt_return
+
+ GEN_KVM hardware_interrupt
+/**
+ * Interrupt 0x600 - Alignment Interrupt
+ * This is a synchronous interrupt in response to data alignment fault.
+ */
+INT_DEFINE_BEGIN(alignment)
+ IVEC=0x600
+ IDAR=1
+ IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(alignment)
+
EXC_REAL_BEGIN(alignment, 0x600, 0x100)
- INT_HANDLER alignment, 0x600, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY alignment, virt=0
EXC_REAL_END(alignment, 0x600, 0x100)
EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
- INT_HANDLER alignment, 0x600, virt=1, dar=1, dsisr=1
+ GEN_INT_ENTRY alignment, virt=1
EXC_VIRT_END(alignment, 0x4600, 0x100)
-INT_KVM_HANDLER alignment, 0x600, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(alignment_common)
- INT_COMMON 0x600, PACA_EXGEN, 1, 1, 1, 1, 1
- bl save_nvgprs
+ GEN_COMMON alignment
addi r3,r1,STACK_FRAME_OVERHEAD
bl alignment_exception
- b ret_from_except
+ REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+ b interrupt_return
+
+ GEN_KVM alignment
+/**
+ * Interrupt 0x700 - Program Interrupt (program check).
+ * This is a synchronous interrupt in response to various instruction faults:
+ * traps, privilege errors, TM errors, floating point exceptions.
+ *
+ * Handling:
+ * This interrupt may use the "emergency stack" in some cases when being taken
+ * from kernel context, which complicates handling.
+ */
+INT_DEFINE_BEGIN(program_check)
+ IVEC=0x700
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(program_check)
+
EXC_REAL_BEGIN(program_check, 0x700, 0x100)
- INT_HANDLER program_check, 0x700, kvm=1
+ GEN_INT_ENTRY program_check, virt=0
EXC_REAL_END(program_check, 0x700, 0x100)
EXC_VIRT_BEGIN(program_check, 0x4700, 0x100)
- INT_HANDLER program_check, 0x700, virt=1
+ GEN_INT_ENTRY program_check, virt=1
EXC_VIRT_END(program_check, 0x4700, 0x100)
-INT_KVM_HANDLER program_check, 0x700, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(program_check_common)
+ __GEN_COMMON_ENTRY program_check
+
/*
* It's possible to receive a TM Bad Thing type program check with
* userspace register values (in particular r1), but with SRR1 reporting
@@ -1310,28 +1612,47 @@ EXC_COMMON_BEGIN(program_check_common)
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- INT_COMMON 0x700, PACA_EXGEN, 0, 1, 1, 0, 0
+ __ISTACK(program_check)=0
+ __GEN_COMMON_BODY program_check
b 3f
2:
- INT_COMMON 0x700, PACA_EXGEN, 1, 1, 1, 0, 0
+ __ISTACK(program_check)=1
+ __GEN_COMMON_BODY program_check
3:
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl program_check_exception
- b ret_from_except
+ REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+ b interrupt_return
+
+ GEN_KVM program_check
+/*
+ * Interrupt 0x800 - Floating-Point Unavailable Interrupt.
+ * This is a synchronous interrupt in response to executing an fp instruction
+ * with MSR[FP]=0.
+ *
+ * Handling:
+ * This will load FP registers and enable the FP bit if coming from userspace,
+ * otherwise report a bad kernel use of FP.
+ */
+INT_DEFINE_BEGIN(fp_unavailable)
+ IVEC=0x800
+ IRECONCILE=0
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(fp_unavailable)
+
EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
- INT_HANDLER fp_unavailable, 0x800, kvm=1
+ GEN_INT_ENTRY fp_unavailable, virt=0
EXC_REAL_END(fp_unavailable, 0x800, 0x100)
EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100)
- INT_HANDLER fp_unavailable, 0x800, virt=1
+ GEN_INT_ENTRY fp_unavailable, virt=1
EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
-INT_KVM_HANDLER fp_unavailable, 0x800, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(fp_unavailable_common)
- INT_COMMON 0x800, PACA_EXGEN, 1, 1, 0, 0, 0
+ GEN_COMMON fp_unavailable
bne 1f /* if from user, just load it up */
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl kernel_fp_unavailable_exception
@@ -1348,64 +1669,168 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
bl load_up_fpu
- b fast_exception_return
+ b fast_interrupt_return
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl fp_unavailable_tm
- b ret_from_except
+ b interrupt_return
#endif
+ GEN_KVM fp_unavailable
+
+
+/**
+ * Interrupt 0x900 - Decrementer Interrupt.
+ * This is an asynchronous interrupt in response to a decrementer exception
+ * (e.g., DEC has wrapped below zero). It is maskable in hardware by clearing
+ * MSR[EE], and soft-maskable with IRQS_DISABLED mask (i.e.,
+ * local_irq_disable()).
+ *
+ * Handling:
+ * This calls into Linux timer handler. NVGPRs are not saved (see 0x500).
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and bump the decrementer to a high value, leaving MSR[EE] enabled
+ * in the interrupted context.
+ * If PPC_WATCHDOG is configured, the soft masked handler will actually set
+ * things back up to run soft_nmi_interrupt as a regular interrupt handler
+ * on the emergency stack.
+ */
+INT_DEFINE_BEGIN(decrementer)
+ IVEC=0x900
+ IMASK=IRQS_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(decrementer)
EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
- INT_HANDLER decrementer, 0x900, ool=1, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY decrementer, virt=0
EXC_REAL_END(decrementer, 0x900, 0x80)
EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
- INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED
+ GEN_INT_ENTRY decrementer, virt=1
EXC_VIRT_END(decrementer, 0x4900, 0x80)
-INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
+EXC_COMMON_BEGIN(decrementer_common)
+ GEN_COMMON decrementer
+ FINISH_NAP
+ RUNLATCH_ON
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl timer_interrupt
+ b interrupt_return
+ GEN_KVM decrementer
+
+
+/**
+ * Interrupt 0x980 - Hypervisor Decrementer Interrupt.
+ * This is an asynchronous interrupt, similar to 0x900 but for the HDEC
+ * register.
+ *
+ * Handling:
+ * Linux does not use this outside KVM where it's used to keep a host timer
+ * while the guest is given control of DEC. It should normally be caught by
+ * the KVM test and routed there.
+ */
+INT_DEFINE_BEGIN(hdecrementer)
+ IVEC=0x980
+ IHSRR=1
+ ISTACK=0
+ IRECONCILE=0
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(hdecrementer)
EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80)
- INT_HANDLER hdecrementer, 0x980, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY hdecrementer, virt=0
EXC_REAL_END(hdecrementer, 0x980, 0x80)
EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80)
- INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY hdecrementer, virt=1
EXC_VIRT_END(hdecrementer, 0x4980, 0x80)
-INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
+EXC_COMMON_BEGIN(hdecrementer_common)
+ __GEN_COMMON_ENTRY hdecrementer
+ /*
+ * Hypervisor decrementer interrupts not caught by the KVM test
+ * shouldn't occur but are sometimes left pending on exit from a KVM
+ * guest. We don't need to do anything to clear them, as they are
+ * edge-triggered.
+ *
+ * Be careful to avoid touching the kernel stack.
+ */
+ ld r10,PACA_EXGEN+EX_CTR(r13)
+ mtctr r10
+ mtcrf 0x80,r9
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ HRFI_TO_KERNEL
+ GEN_KVM hdecrementer
+
+
+/**
+ * Interrupt 0xa00 - Directed Privileged Doorbell Interrupt.
+ * This is an asynchronous interrupt in response to a msgsndp doorbell.
+ * It is maskable in hardware by clearing MSR[EE], and soft-maskable with
+ * IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * Handling:
+ * Guests may use this for IPIs between threads in a core if the
+ * hypervisor supports it. NVGPRS are not saved (see 0x500).
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, leaving MSR[EE] enabled in the interrupted context because the
+ * doorbells are edge triggered.
+ */
+INT_DEFINE_BEGIN(doorbell_super)
+ IVEC=0xa00
+ IMASK=IRQS_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(doorbell_super)
EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
- INT_HANDLER doorbell_super, 0xa00, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY doorbell_super, virt=0
EXC_REAL_END(doorbell_super, 0xa00, 0x100)
EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
- INT_HANDLER doorbell_super, 0xa00, virt=1, bitmask=IRQS_DISABLED
+ GEN_INT_ENTRY doorbell_super, virt=1
EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
-INT_KVM_HANDLER doorbell_super, 0xa00, EXC_STD, PACA_EXGEN, 0
+EXC_COMMON_BEGIN(doorbell_super_common)
+ GEN_COMMON doorbell_super
+ FINISH_NAP
+ RUNLATCH_ON
+ addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_DOORBELL
-EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
+ bl doorbell_exception
#else
-EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception)
+ bl unknown_exception
#endif
+ b interrupt_return
+
+ GEN_KVM doorbell_super
EXC_REAL_NONE(0xb00, 0x100)
EXC_VIRT_NONE(0x4b00, 0x100)
-/*
- * system call / hypercall (0xc00, 0x4c00)
- *
- * The system call exception is invoked with "sc 0" and does not alter HV bit.
- *
- * The hypercall is invoked with "sc 1" and sets HV=1.
+/**
+ * Interrupt 0xc00 - System Call Interrupt (syscall, hcall).
+ * This is a synchronous interrupt invoked with the "sc" instruction. The
+ * system call is invoked with "sc 0" and does not alter the HV bit, so it
+ * is directed to the currently running OS. The hypercall is invoked with
+ * "sc 1" and it sets HV=1, so it elevates to hypervisor.
*
* In HPT, sc 1 always goes to 0xc00 real mode. In RADIX, sc 1 can go to
* 0x4c00 virtual mode.
*
+ * Handling:
+ * If the KVM test fires then it was due to a hypercall and is accordingly
+ * routed to KVM. Otherwise this executes a normal Linux system call.
+ *
* Call convention:
*
* syscall and hypercalls register conventions are documented in
@@ -1417,6 +1842,12 @@ EXC_VIRT_NONE(0x4b00, 0x100)
* without saving, though xer is not a good idea to use, as hardware may
* interpret some bits so it may be costly to change them.
*/
+INT_DEFINE_BEGIN(system_call)
+ IVEC=0xc00
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(system_call)
+
.macro SYSTEM_CALL virt
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
@@ -1431,7 +1862,7 @@ EXC_VIRT_NONE(0x4b00, 0x100)
GET_PACA(r13)
std r10,PACA_EXGEN+EX_R10(r13)
INTERRUPT_TO_KERNEL
- KVMTEST system_call EXC_STD 0xc00 /* uses r10, branch to system_call_kvm */
+ KVMTEST system_call /* uses r10, branch to system_call_kvm */
mfctr r9
#else
mr r9,r13
@@ -1490,6 +1921,7 @@ EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
EXC_VIRT_END(system_call, 0x4c00, 0x100)
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+TRAMP_REAL_BEGIN(system_call_kvm)
/*
* This is a hcall, so register convention is as above, with these
* differences:
@@ -1497,43 +1929,95 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100)
* ctr = orig r13
* orig r10 saved in PACA
*/
-TRAMP_KVM_BEGIN(system_call_kvm)
/*
* Save the PPR (on systems that support it) before changing to
* HMT_MEDIUM. That allows the KVM code to save that value into the
* guest state (it is the guest's PPR value).
*/
- OPT_GET_SPR(r10, SPRN_PPR, CPU_FTR_HAS_PPR)
+BEGIN_FTR_SECTION
+ mfspr r10,SPRN_PPR
+ std r10,HSTATE_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
HMT_MEDIUM
- OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r10, CPU_FTR_HAS_PPR)
mfctr r10
SET_SCRATCH0(r10)
- std r9,PACA_EXGEN+EX_R9(r13)
- mfcr r9
- KVM_HANDLER 0xc00, EXC_STD, PACA_EXGEN, 0
+ mfcr r10
+ std r12,HSTATE_SCRATCH0(r13)
+ sldi r12,r10,32
+ ori r12,r12,0xc00
+#ifdef CONFIG_RELOCATABLE
+ /*
+ * Requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
+ * outside the head section.
+ */
+ __LOAD_FAR_HANDLER(r10, kvmppc_interrupt)
+ mtctr r10
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ bctr
+#else
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ b kvmppc_interrupt
+#endif
#endif
+/**
+ * Interrupt 0xd00 - Trace Interrupt.
+ * This is a synchronous interrupt in response to instruction step or
+ * breakpoint faults.
+ */
+INT_DEFINE_BEGIN(single_step)
+ IVEC=0xd00
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(single_step)
+
EXC_REAL_BEGIN(single_step, 0xd00, 0x100)
- INT_HANDLER single_step, 0xd00, kvm=1
+ GEN_INT_ENTRY single_step, virt=0
EXC_REAL_END(single_step, 0xd00, 0x100)
EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100)
- INT_HANDLER single_step, 0xd00, virt=1
+ GEN_INT_ENTRY single_step, virt=1
EXC_VIRT_END(single_step, 0x4d00, 0x100)
-INT_KVM_HANDLER single_step, 0xd00, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON(single_step_common, 0xd00, single_step_exception)
+EXC_COMMON_BEGIN(single_step_common)
+ GEN_COMMON single_step
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl single_step_exception
+ b interrupt_return
+ GEN_KVM single_step
+
+
+/**
+ * Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI).
+ * This is a synchronous interrupt in response to an MMU fault caused by a
+ * guest data access.
+ *
+ * Handling:
+ * This should always get routed to KVM. In radix MMU mode, this is caused
+ * by a guest nested radix access that can't be performed due to the
+ * partition scope page table. In hash mode, this can be caused by guests
+ * running with translation disabled (virtual real mode) or with VPM enabled.
+ * KVM will update the page table structures or disallow the access.
+ */
+INT_DEFINE_BEGIN(h_data_storage)
+ IVEC=0xe00
+ IHSRR=1
+ IDAR=1
+ IDSISR=1
+ IKVM_SKIP=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_data_storage)
EXC_REAL_BEGIN(h_data_storage, 0xe00, 0x20)
- INT_HANDLER h_data_storage, 0xe00, ool=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY h_data_storage, virt=0, ool=1
EXC_REAL_END(h_data_storage, 0xe00, 0x20)
EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20)
- INT_HANDLER h_data_storage, 0xe00, ool=1, virt=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+ GEN_INT_ENTRY h_data_storage, virt=1, ool=1
EXC_VIRT_END(h_data_storage, 0x4e00, 0x20)
-INT_KVM_HANDLER h_data_storage, 0xe00, EXC_HV, PACA_EXGEN, 1
EXC_COMMON_BEGIN(h_data_storage_common)
- INT_COMMON 0xe00, PACA_EXGEN, 1, 1, 1, 1, 1
- bl save_nvgprs
+ GEN_COMMON h_data_storage
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
ld r4,_DAR(r1)
@@ -1542,56 +2026,125 @@ BEGIN_MMU_FTR_SECTION
MMU_FTR_SECTION_ELSE
bl unknown_exception
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
- b ret_from_except
+ b interrupt_return
+ GEN_KVM h_data_storage
+
+
+/**
+ * Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI).
+ * This is a synchronous interrupt in response to an MMU fault caused by a
+ * guest instruction fetch, similar to HDSI.
+ */
+INT_DEFINE_BEGIN(h_instr_storage)
+ IVEC=0xe20
+ IHSRR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_instr_storage)
EXC_REAL_BEGIN(h_instr_storage, 0xe20, 0x20)
- INT_HANDLER h_instr_storage, 0xe20, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY h_instr_storage, virt=0, ool=1
EXC_REAL_END(h_instr_storage, 0xe20, 0x20)
EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20)
- INT_HANDLER h_instr_storage, 0xe20, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY h_instr_storage, virt=1, ool=1
EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
-INT_KVM_HANDLER h_instr_storage, 0xe20, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception)
+EXC_COMMON_BEGIN(h_instr_storage_common)
+ GEN_COMMON h_instr_storage
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unknown_exception
+ b interrupt_return
+ GEN_KVM h_instr_storage
+
+
+/**
+ * Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt.
+ */
+INT_DEFINE_BEGIN(emulation_assist)
+ IVEC=0xe40
+ IHSRR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(emulation_assist)
EXC_REAL_BEGIN(emulation_assist, 0xe40, 0x20)
- INT_HANDLER emulation_assist, 0xe40, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY emulation_assist, virt=0, ool=1
EXC_REAL_END(emulation_assist, 0xe40, 0x20)
EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20)
- INT_HANDLER emulation_assist, 0xe40, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY emulation_assist, virt=1, ool=1
EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
-INT_KVM_HANDLER emulation_assist, 0xe40, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
+EXC_COMMON_BEGIN(emulation_assist_common)
+ GEN_COMMON emulation_assist
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl emulation_assist_interrupt
+ REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+ b interrupt_return
+ GEN_KVM emulation_assist
-/*
- * hmi_exception trampoline is a special case. It jumps to hmi_exception_early
- * first, and then eventaully from there to the trampoline to get into virtual
- * mode.
+
+/**
+ * Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI).
+ * This is an asynchronous interrupt caused by a Hypervisor Maintenance
+ * Exception. It is always taken in real mode but uses HSRR registers
+ * unlike SRESET and MCE.
+ *
+ * It is maskable in hardware by clearing MSR[EE], and partially soft-maskable
+ * with IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * Handling:
+ * This is a special case, this is handled similarly to machine checks, with an
+ * initial real mode handler that is not soft-masked, which attempts to fix the
+ * problem. Then a regular handler which is soft-maskable and reports the
+ * problem.
+ *
+ * The emergency stack is used for the early real mode handler.
+ *
+ * XXX: unclear why MCE and HMI schemes could not be made common, e.g.,
+ * either use soft-masking for the MCE, or use irq_work for the HMI.
+ *
+ * KVM:
+ * Unlike MCE, this calls into KVM without calling the real mode handler
+ * first.
*/
+INT_DEFINE_BEGIN(hmi_exception_early)
+ IVEC=0xe60
+ IHSRR=1
+ IREALMODE_COMMON=1
+ ISTACK=0
+ IRECONCILE=0
+ IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
+ IKVM_REAL=1
+INT_DEFINE_END(hmi_exception_early)
+
+INT_DEFINE_BEGIN(hmi_exception)
+ IVEC=0xe60
+ IHSRR=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+INT_DEFINE_END(hmi_exception)
+
EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20)
- INT_HANDLER hmi_exception, 0xe60, ool=1, early=1, hsrr=EXC_HV, ri=0, kvm=1
+ GEN_INT_ENTRY hmi_exception_early, virt=0, ool=1
EXC_REAL_END(hmi_exception, 0xe60, 0x20)
EXC_VIRT_NONE(0x4e60, 0x20)
-INT_KVM_HANDLER hmi_exception, 0xe60, EXC_HV, PACA_EXGEN, 0
+
EXC_COMMON_BEGIN(hmi_exception_early_common)
- mtctr r10 /* Restore ctr */
- mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* Save HSRR1 */
+ __GEN_REALMODE_COMMON_ENTRY hmi_exception_early
+
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- /* We don't touch AMR here, we never go to virtual mode */
- INT_COMMON 0xe60, PACA_EXGEN, 0, 0, 0, 0, 0
+ __GEN_COMMON_BODY hmi_exception_early
addi r3,r1,STACK_FRAME_OVERHEAD
bl hmi_exception_realmode
cmpdi cr0,r3,0
bne 1f
- EXCEPTION_RESTORE_REGS EXC_HV
+ EXCEPTION_RESTORE_REGS hsrr=1
HRFI_TO_USER_OR_KERNEL
1:
@@ -1599,41 +2152,84 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
* Go to virtual mode and pull the HMI event information from
* firmware.
*/
- EXCEPTION_RESTORE_REGS EXC_HV
- INT_HANDLER hmi_exception, 0xe60, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ EXCEPTION_RESTORE_REGS hsrr=1
+ GEN_INT_ENTRY hmi_exception, virt=0
+
+ GEN_KVM hmi_exception_early
EXC_COMMON_BEGIN(hmi_exception_common)
- INT_COMMON 0xe60, PACA_EXGEN, 1, 1, 1, 0, 0
+ GEN_COMMON hmi_exception
FINISH_NAP
RUNLATCH_ON
- bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl handle_hmi_exception
- b ret_from_except
+ b interrupt_return
+
+ GEN_KVM hmi_exception
+/**
+ * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
+ * This is an asynchronous interrupt in response to a msgsnd doorbell.
+ * Similar to the 0xa00 doorbell but for host rather than guest.
+ */
+INT_DEFINE_BEGIN(h_doorbell)
+ IVEC=0xe80
+ IHSRR=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_doorbell)
+
EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20)
- INT_HANDLER h_doorbell, 0xe80, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY h_doorbell, virt=0, ool=1
EXC_REAL_END(h_doorbell, 0xe80, 0x20)
EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
- INT_HANDLER h_doorbell, 0xe80, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY h_doorbell, virt=1, ool=1
EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
-INT_KVM_HANDLER h_doorbell, 0xe80, EXC_HV, PACA_EXGEN, 0
+EXC_COMMON_BEGIN(h_doorbell_common)
+ GEN_COMMON h_doorbell
+ FINISH_NAP
+ RUNLATCH_ON
+ addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_DOORBELL
-EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
+ bl doorbell_exception
#else
-EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception)
+ bl unknown_exception
#endif
+ b interrupt_return
+
+ GEN_KVM h_doorbell
+/**
+ * Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
+ * This is an asynchronous interrupt in response to an "external exception".
+ * Similar to 0x500 but for host only.
+ */
+INT_DEFINE_BEGIN(h_virt_irq)
+ IVEC=0xea0
+ IHSRR=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_virt_irq)
+
EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20)
- INT_HANDLER h_virt_irq, 0xea0, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY h_virt_irq, virt=0, ool=1
EXC_REAL_END(h_virt_irq, 0xea0, 0x20)
EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
- INT_HANDLER h_virt_irq, 0xea0, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+ GEN_INT_ENTRY h_virt_irq, virt=1, ool=1
EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
-INT_KVM_HANDLER h_virt_irq, 0xea0, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
+EXC_COMMON_BEGIN(h_virt_irq_common)
+ GEN_COMMON h_virt_irq
+ FINISH_NAP
+ RUNLATCH_ON
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_IRQ
+ b interrupt_return
+
+ GEN_KVM h_virt_irq
EXC_REAL_NONE(0xec0, 0x20)
@@ -1642,25 +2238,69 @@ EXC_REAL_NONE(0xee0, 0x20)
EXC_VIRT_NONE(0x4ee0, 0x20)
+/*
+ * Interrupt 0xf00 - Performance Monitor Interrupt (PMI, PMU).
+ * This is an asynchronous interrupt in response to a PMU exception.
+ * It is maskable in hardware by clearing MSR[EE], and soft-maskable with
+ * IRQS_PMI_DISABLED mask (NOTE: NOT local_irq_disable()).
+ *
+ * Handling:
+ * This calls into the perf subsystem.
+ *
+ * Like the watchdog soft-nmi, it appears an NMI interrupt to Linux, in that it
+ * runs under local_irq_disable. However it may be soft-masked in
+ * powerpc-specific code.
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and clear MSR[EE] in the interrupted context.
+ */
+INT_DEFINE_BEGIN(performance_monitor)
+ IVEC=0xf00
+ IMASK=IRQS_PMI_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(performance_monitor)
+
EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20)
- INT_HANDLER performance_monitor, 0xf00, ool=1, bitmask=IRQS_PMI_DISABLED, kvm=1
+ GEN_INT_ENTRY performance_monitor, virt=0, ool=1
EXC_REAL_END(performance_monitor, 0xf00, 0x20)
EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
- INT_HANDLER performance_monitor, 0xf00, ool=1, virt=1, bitmask=IRQS_PMI_DISABLED
+ GEN_INT_ENTRY performance_monitor, virt=1, ool=1
EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
-INT_KVM_HANDLER performance_monitor, 0xf00, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
+EXC_COMMON_BEGIN(performance_monitor_common)
+ GEN_COMMON performance_monitor
+ FINISH_NAP
+ RUNLATCH_ON
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl performance_monitor_exception
+ b interrupt_return
+
+ GEN_KVM performance_monitor
+/**
+ * Interrupt 0xf20 - Vector Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing a vector (or altivec) instruction with MSR[VEC]=0.
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(altivec_unavailable)
+ IVEC=0xf20
+ IRECONCILE=0
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(altivec_unavailable)
+
EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20)
- INT_HANDLER altivec_unavailable, 0xf20, ool=1, kvm=1
+ GEN_INT_ENTRY altivec_unavailable, virt=0, ool=1
EXC_REAL_END(altivec_unavailable, 0xf20, 0x20)
EXC_VIRT_BEGIN(altivec_unavailable, 0x4f20, 0x20)
- INT_HANDLER altivec_unavailable, 0xf20, ool=1, virt=1
+ GEN_INT_ENTRY altivec_unavailable, virt=1, ool=1
EXC_VIRT_END(altivec_unavailable, 0x4f20, 0x20)
-INT_KVM_HANDLER altivec_unavailable, 0xf20, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(altivec_unavailable_common)
- INT_COMMON 0xf20, PACA_EXGEN, 1, 1, 0, 0, 0
+ GEN_COMMON altivec_unavailable
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
beq 1f
@@ -1674,34 +2314,47 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
#endif
bl load_up_altivec
- b fast_exception_return
+ b fast_interrupt_return
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl altivec_unavailable_tm
- b ret_from_except
+ b interrupt_return
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl altivec_unavailable_exception
- b ret_from_except
+ b interrupt_return
+ GEN_KVM altivec_unavailable
+
+
+/**
+ * Interrupt 0xf40 - VSX Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing a VSX instruction with MSR[VSX]=0.
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(vsx_unavailable)
+ IVEC=0xf40
+ IRECONCILE=0
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(vsx_unavailable)
EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20)
- INT_HANDLER vsx_unavailable, 0xf40, ool=1, kvm=1
+ GEN_INT_ENTRY vsx_unavailable, virt=0, ool=1
EXC_REAL_END(vsx_unavailable, 0xf40, 0x20)
EXC_VIRT_BEGIN(vsx_unavailable, 0x4f40, 0x20)
- INT_HANDLER vsx_unavailable, 0xf40, ool=1, virt=1
+ GEN_INT_ENTRY vsx_unavailable, virt=1, ool=1
EXC_VIRT_END(vsx_unavailable, 0x4f40, 0x20)
-INT_KVM_HANDLER vsx_unavailable, 0xf40, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(vsx_unavailable_common)
- INT_COMMON 0xf40, PACA_EXGEN, 1, 1, 0, 0, 0
+ GEN_COMMON vsx_unavailable
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
beq 1f
@@ -1717,40 +2370,78 @@ BEGIN_FTR_SECTION
b load_up_vsx
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl vsx_unavailable_tm
- b ret_from_except
+ b interrupt_return
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl vsx_unavailable_exception
- b ret_from_except
+ b interrupt_return
+
+ GEN_KVM vsx_unavailable
+
+/**
+ * Interrupt 0xf60 - Facility Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing an instruction without access to the facility that can be
+ * resolved by the OS (e.g., FSCR, MSR).
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(facility_unavailable)
+ IVEC=0xf60
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(facility_unavailable)
EXC_REAL_BEGIN(facility_unavailable, 0xf60, 0x20)
- INT_HANDLER facility_unavailable, 0xf60, ool=1, kvm=1
+ GEN_INT_ENTRY facility_unavailable, virt=0, ool=1
EXC_REAL_END(facility_unavailable, 0xf60, 0x20)
EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20)
- INT_HANDLER facility_unavailable, 0xf60, ool=1, virt=1
+ GEN_INT_ENTRY facility_unavailable, virt=1, ool=1
EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
-INT_KVM_HANDLER facility_unavailable, 0xf60, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception)
+EXC_COMMON_BEGIN(facility_unavailable_common)
+ GEN_COMMON facility_unavailable
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl facility_unavailable_exception
+ b interrupt_return
+ GEN_KVM facility_unavailable
+
+
+/**
+ * Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing an instruction without access to the facility that can only
+ * be resolved in HV mode (e.g., HFSCR).
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(h_facility_unavailable)
+ IVEC=0xf80
+ IHSRR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_facility_unavailable)
EXC_REAL_BEGIN(h_facility_unavailable, 0xf80, 0x20)
- INT_HANDLER h_facility_unavailable, 0xf80, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY h_facility_unavailable, virt=0, ool=1
EXC_REAL_END(h_facility_unavailable, 0xf80, 0x20)
EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20)
- INT_HANDLER h_facility_unavailable, 0xf80, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY h_facility_unavailable, virt=1, ool=1
EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
-INT_KVM_HANDLER h_facility_unavailable, 0xf80, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception)
+EXC_COMMON_BEGIN(h_facility_unavailable_common)
+ GEN_COMMON h_facility_unavailable
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl facility_unavailable_exception
+ b interrupt_return
+
+ GEN_KVM h_facility_unavailable
EXC_REAL_NONE(0xfa0, 0x20)
@@ -1766,56 +2457,95 @@ EXC_REAL_NONE(0x1100, 0x100)
EXC_VIRT_NONE(0x5100, 0x100)
#ifdef CONFIG_CBE_RAS
+INT_DEFINE_BEGIN(cbe_system_error)
+ IVEC=0x1200
+ IHSRR=1
+ IKVM_SKIP=1
+ IKVM_REAL=1
+INT_DEFINE_END(cbe_system_error)
+
EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100)
- INT_HANDLER cbe_system_error, 0x1200, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY cbe_system_error, virt=0
EXC_REAL_END(cbe_system_error, 0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
-INT_KVM_HANDLER cbe_system_error, 0x1200, EXC_HV, PACA_EXGEN, 1
-EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception)
+EXC_COMMON_BEGIN(cbe_system_error_common)
+ GEN_COMMON cbe_system_error
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl cbe_system_error_exception
+ b interrupt_return
+
+ GEN_KVM cbe_system_error
+
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
#endif
+INT_DEFINE_BEGIN(instruction_breakpoint)
+ IVEC=0x1300
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_SKIP=1
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(instruction_breakpoint)
+
EXC_REAL_BEGIN(instruction_breakpoint, 0x1300, 0x100)
- INT_HANDLER instruction_breakpoint, 0x1300, kvm=1
+ GEN_INT_ENTRY instruction_breakpoint, virt=0
EXC_REAL_END(instruction_breakpoint, 0x1300, 0x100)
EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100)
- INT_HANDLER instruction_breakpoint, 0x1300, virt=1
+ GEN_INT_ENTRY instruction_breakpoint, virt=1
EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
-INT_KVM_HANDLER instruction_breakpoint, 0x1300, EXC_STD, PACA_EXGEN, 1
-EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception)
+EXC_COMMON_BEGIN(instruction_breakpoint_common)
+ GEN_COMMON instruction_breakpoint
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl instruction_breakpoint_exception
+ b interrupt_return
+
+ GEN_KVM instruction_breakpoint
EXC_REAL_NONE(0x1400, 0x100)
EXC_VIRT_NONE(0x5400, 0x100)
-EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
- INT_HANDLER denorm_exception_hv, 0x1500, early=2, hsrr=EXC_HV
+/**
+ * Interrupt 0x1500 - Soft Patch Interrupt
+ *
+ * Handling:
+ * This is an implementation specific interrupt which can be used for a
+ * range of exceptions.
+ *
+ * This interrupt handler is unique in that it runs the denormal assist
+ * code even for guests (and even in guest context) without going to KVM,
+ * for speed. POWER9 does not raise denorm exceptions, so this special case
+ * could be phased out in future to reduce special cases.
+ */
+INT_DEFINE_BEGIN(denorm_exception)
+ IVEC=0x1500
+ IHSRR=1
+ IBRANCH_COMMON=0
+ IKVM_REAL=1
+INT_DEFINE_END(denorm_exception)
+
+EXC_REAL_BEGIN(denorm_exception, 0x1500, 0x100)
+ GEN_INT_ENTRY denorm_exception, virt=0
#ifdef CONFIG_PPC_DENORMALISATION
- mfspr r10,SPRN_HSRR1
- andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+ andis. r10,r12,(HSRR1_DENORM)@h /* denorm? */
bne+ denorm_assist
#endif
- KVMTEST denorm_exception_hv, EXC_HV 0x1500
- INT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV, 1
-EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100)
-
+ GEN_BRANCH_TO_COMMON denorm_exception, virt=0
+EXC_REAL_END(denorm_exception, 0x1500, 0x100)
#ifdef CONFIG_PPC_DENORMALISATION
EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100)
- INT_HANDLER denorm_exception, 0x1500, 0, 2, 1, EXC_HV, PACA_EXGEN, 1, 0, 0, 0, 0
- mfspr r10,SPRN_HSRR1
- andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+ GEN_INT_ENTRY denorm_exception, virt=1
+ andis. r10,r12,(HSRR1_DENORM)@h /* denorm? */
bne+ denorm_assist
- INT_VIRT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV
+ GEN_BRANCH_TO_COMMON denorm_exception, virt=1
EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
#else
EXC_VIRT_NONE(0x5500, 0x100)
#endif
-INT_KVM_HANDLER denorm_exception_hv, 0x1500, EXC_HV, PACA_EXGEN, 0
-
#ifdef CONFIG_PPC_DENORMALISATION
TRAMP_REAL_BEGIN(denorm_assist)
BEGIN_FTR_SECTION
@@ -1872,7 +2602,10 @@ denorm_done:
mtspr SPRN_HSRR0,r11
mtcrf 0x80,r9
ld r9,PACA_EXGEN+EX_R9(r13)
- RESTORE_PPR_PACA(PACA_EXGEN, r10)
+BEGIN_FTR_SECTION
+ ld r10,PACA_EXGEN+EX_PPR(r13)
+ mtspr SPRN_PPR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
BEGIN_FTR_SECTION
ld r10,PACA_EXGEN+EX_CFAR(r13)
mtspr SPRN_CFAR,r10
@@ -1885,43 +2618,88 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
b .
#endif
-EXC_COMMON(denorm_common, 0x1500, unknown_exception)
+EXC_COMMON_BEGIN(denorm_exception_common)
+ GEN_COMMON denorm_exception
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unknown_exception
+ b interrupt_return
+
+ GEN_KVM denorm_exception
#ifdef CONFIG_CBE_RAS
+INT_DEFINE_BEGIN(cbe_maintenance)
+ IVEC=0x1600
+ IHSRR=1
+ IKVM_SKIP=1
+ IKVM_REAL=1
+INT_DEFINE_END(cbe_maintenance)
+
EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100)
- INT_HANDLER cbe_maintenance, 0x1600, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY cbe_maintenance, virt=0
EXC_REAL_END(cbe_maintenance, 0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
-INT_KVM_HANDLER cbe_maintenance, 0x1600, EXC_HV, PACA_EXGEN, 1
-EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception)
+EXC_COMMON_BEGIN(cbe_maintenance_common)
+ GEN_COMMON cbe_maintenance
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl cbe_maintenance_exception
+ b interrupt_return
+
+ GEN_KVM cbe_maintenance
+
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
#endif
+INT_DEFINE_BEGIN(altivec_assist)
+ IVEC=0x1700
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(altivec_assist)
+
EXC_REAL_BEGIN(altivec_assist, 0x1700, 0x100)
- INT_HANDLER altivec_assist, 0x1700, kvm=1
+ GEN_INT_ENTRY altivec_assist, virt=0
EXC_REAL_END(altivec_assist, 0x1700, 0x100)
EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100)
- INT_HANDLER altivec_assist, 0x1700, virt=1
+ GEN_INT_ENTRY altivec_assist, virt=1
EXC_VIRT_END(altivec_assist, 0x5700, 0x100)
-INT_KVM_HANDLER altivec_assist, 0x1700, EXC_STD, PACA_EXGEN, 0
+EXC_COMMON_BEGIN(altivec_assist_common)
+ GEN_COMMON altivec_assist
+ addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_ALTIVEC
-EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception)
+ bl altivec_assist_exception
+ REST_NVGPRS(r1) /* instruction emulation may change GPRs */
#else
-EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception)
+ bl unknown_exception
#endif
+ b interrupt_return
+
+ GEN_KVM altivec_assist
#ifdef CONFIG_CBE_RAS
+INT_DEFINE_BEGIN(cbe_thermal)
+ IVEC=0x1800
+ IHSRR=1
+ IKVM_SKIP=1
+ IKVM_REAL=1
+INT_DEFINE_END(cbe_thermal)
+
EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100)
- INT_HANDLER cbe_thermal, 0x1800, ool=1, hsrr=EXC_HV, kvm=1
+ GEN_INT_ENTRY cbe_thermal, virt=0
EXC_REAL_END(cbe_thermal, 0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
-INT_KVM_HANDLER cbe_thermal, 0x1800, EXC_HV, PACA_EXGEN, 1
-EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception)
+EXC_COMMON_BEGIN(cbe_thermal_common)
+ GEN_COMMON cbe_thermal
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl cbe_thermal_exception
+ b interrupt_return
+
+ GEN_KVM cbe_thermal
+
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
@@ -1930,14 +2708,11 @@ EXC_VIRT_NONE(0x5800, 0x100)
#ifdef CONFIG_PPC_WATCHDOG
-#define MASKED_DEC_HANDLER_LABEL 3f
-
-#define MASKED_DEC_HANDLER(_H) \
-3: /* soft-nmi */ \
- std r12,PACA_EXGEN+EX_R12(r13); \
- GET_SCRATCH0(r10); \
- std r10,PACA_EXGEN+EX_R13(r13); \
- INT_SAVE_SRR_AND_JUMP soft_nmi_common, _H, 1
+INT_DEFINE_BEGIN(soft_nmi)
+ IVEC=0x900
+ ISTACK=0
+ IRECONCILE=0 /* Soft-NMI may fire under local_irq_disable */
+INT_DEFINE_END(soft_nmi)
/*
* Branch to soft_nmi_interrupt using the emergency stack. The emergency
@@ -1949,18 +2724,42 @@ EXC_VIRT_NONE(0x5800, 0x100)
* and run it entirely with interrupts hard disabled.
*/
EXC_COMMON_BEGIN(soft_nmi_common)
+ mfspr r11,SPRN_SRR0
mr r10,r1
ld r1,PACAEMERGSP(r13)
subi r1,r1,INT_FRAME_SIZE
- INT_COMMON 0x900, PACA_EXGEN, 0, 1, 1, 0, 0
- bl save_nvgprs
+ __GEN_COMMON_BODY soft_nmi
+
+ /*
+ * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see
+ * system_reset_common)
+ */
+ li r10,IRQS_ALL_DISABLED
+ stb r10,PACAIRQSOFTMASK(r13)
+ lbz r10,PACAIRQHAPPENED(r13)
+ std r10,_DAR(r1)
+ ori r10,r10,PACA_IRQ_HARD_DIS
+ stb r10,PACAIRQHAPPENED(r13)
+
addi r3,r1,STACK_FRAME_OVERHEAD
bl soft_nmi_interrupt
- b ret_from_except
-#else /* CONFIG_PPC_WATCHDOG */
-#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */
-#define MASKED_DEC_HANDLER(_H)
+ /* Clear MSR_RI before setting SRR0 and SRR1. */
+ li r9,0
+ mtmsrd r9,1
+
+ /*
+ * Restore soft mask settings.
+ */
+ ld r10,_DAR(r1)
+ stb r10,PACAIRQHAPPENED(r13)
+ ld r10,SOFTE(r1)
+ stb r10,PACAIRQSOFTMASK(r13)
+
+ kuap_restore_amr r10
+ EXCEPTION_RESTORE_REGS hsrr=0
+ RFI_TO_KERNEL
+
#endif /* CONFIG_PPC_WATCHDOG */
/*
@@ -1973,13 +2772,12 @@ EXC_COMMON_BEGIN(soft_nmi_common)
* - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
* This is called with r10 containing the value to OR to the paca field.
*/
-.macro MASKED_INTERRUPT hsrr
+.macro MASKED_INTERRUPT hsrr=0
.if \hsrr
masked_Hinterrupt:
.else
masked_interrupt:
.endif
- std r11,PACA_EXGEN+EX_R11(r13)
lbz r11,PACAIRQHAPPENED(r13)
or r11,r11,r10
stb r11,PACAIRQHAPPENED(r13)
@@ -1988,26 +2786,30 @@ masked_interrupt:
lis r10,0x7fff
ori r10,r10,0xffff
mtspr SPRN_DEC,r10
- b MASKED_DEC_HANDLER_LABEL
+#ifdef CONFIG_PPC_WATCHDOG
+ b soft_nmi_common
+#else
+ b 2f
+#endif
1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK
beq 2f
+ xori r12,r12,MSR_EE /* clear MSR_EE */
.if \hsrr
- mfspr r10,SPRN_HSRR1
- xori r10,r10,MSR_EE /* clear MSR_EE */
- mtspr SPRN_HSRR1,r10
+ mtspr SPRN_HSRR1,r12
.else
- mfspr r10,SPRN_SRR1
- xori r10,r10,MSR_EE /* clear MSR_EE */
- mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR1,r12
.endif
ori r11,r11,PACA_IRQ_HARD_DIS
stb r11,PACAIRQHAPPENED(r13)
2: /* done */
+ ld r10,PACA_EXGEN+EX_CTR(r13)
+ mtctr r10
mtcrf 0x80,r9
std r1,PACAR1(r13)
ld r9,PACA_EXGEN+EX_R9(r13)
ld r10,PACA_EXGEN+EX_R10(r13)
ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
/* returns to kernel where r13 must be set up, so don't restore it */
.if \hsrr
HRFI_TO_KERNEL
@@ -2015,7 +2817,6 @@ masked_interrupt:
RFI_TO_KERNEL
.endif
b .
- MASKED_DEC_HANDLER(\hsrr\())
.endm
TRAMP_REAL_BEGIN(stf_barrier_fallback)
@@ -2117,17 +2918,12 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
GET_SCRATCH0(r13);
hrfid
-/*
- * Real mode exceptions actually use this too, but alternate
- * instruction code patches (which end up in the common .text area)
- * cannot reach these if they are put there.
- */
-USE_FIXED_SECTION(virt_trampolines)
- MASKED_INTERRUPT EXC_STD
- MASKED_INTERRUPT EXC_HV
+USE_TEXT_SECTION()
+ MASKED_INTERRUPT
+ MASKED_INTERRUPT hsrr=1
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
+kvmppc_skip_interrupt:
/*
* Here all GPRs are unchanged from when the interrupt happened
* except for r13, which is saved in SPRG_SCRATCH0.
@@ -2139,7 +2935,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
RFI_TO_KERNEL
b .
-TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
+kvmppc_skip_Hinterrupt:
/*
* Here all GPRs are unchanged from when the interrupt happened
* except for r13, which is saved in SPRG_SCRATCH0.
@@ -2152,16 +2948,6 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
b .
#endif
-/*
- * Ensure that any handlers that get invoked from the exception prologs
- * above are below the first 64KB (0x10000) of the kernel image because
- * the prologs assemble the addresses of these handlers using the
- * LOAD_HANDLER macro, which uses an ori instruction.
- */
-
-/*** Common interrupt handlers ***/
-
-
/*
* Relocation-on interrupts: A subset of the interrupts can be delivered
* with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
@@ -2275,7 +3061,7 @@ do_hash_page:
cmpdi r3,0 /* see if __hash_page succeeded */
/* Success */
- beq fast_exc_return_irq /* Return from exception on success */
+ beq interrupt_return /* Return from exception on success */
/* Error */
blt- 13f
@@ -2292,39 +3078,36 @@ handle_page_fault:
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_page_fault
cmpdi r3,0
- beq+ ret_from_except_lite
- bl save_nvgprs
+ beq+ interrupt_return
mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
ld r4,_DAR(r1)
bl bad_page_fault
- b ret_from_except
+ b interrupt_return
/* We have a data breakpoint exception - handle it */
handle_dabr_fault:
- bl save_nvgprs
ld r4,_DAR(r1)
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_break
/*
* do_break() may have changed the NV GPRS while handling a breakpoint.
- * If so, we need to restore them with their updated values. Don't use
- * ret_from_except_lite here.
+ * If so, we need to restore them with their updated values.
*/
- b ret_from_except
+ REST_NVGPRS(r1)
+ b interrupt_return
#ifdef CONFIG_PPC_BOOK3S_64
/* We have a page fault that hash_page could handle but HV refused
* the PTE insertion
*/
-13: bl save_nvgprs
- mr r5,r3
+13: mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
ld r4,_DAR(r1)
bl low_hash_fault
- b ret_from_except
+ b interrupt_return
#endif
/*
@@ -2334,74 +3117,7 @@ handle_dabr_fault:
* were soft-disabled. We want to invoke the exception handler for
* the access, or panic if there isn't a handler.
*/
-77: bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
+77: addi r3,r1,STACK_FRAME_OVERHEAD
li r5,SIGSEGV
bl bad_page_fault
- b ret_from_except
-
-/*
- * When doorbell is triggered from system reset wakeup, the message is
- * not cleared, so it would fire again when EE is enabled.
- *
- * When coming from local_irq_enable, there may be the same problem if
- * we were hard disabled.
- *
- * Execute msgclr to clear pending exceptions before handling it.
- */
-h_doorbell_common_msgclr:
- LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
- PPC_MSGCLR(3)
- b h_doorbell_common
-
-doorbell_super_common_msgclr:
- LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
- PPC_MSGCLRP(3)
- b doorbell_super_common
-
-/*
- * Called from arch_local_irq_enable when an interrupt needs
- * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate
- * which kind of interrupt. MSR:EE is already off. We generate a
- * stackframe like if a real interrupt had happened.
- *
- * Note: While MSR:EE is off, we need to make sure that _MSR
- * in the generated frame has EE set to 1 or the exception
- * handler will not properly re-enable them.
- *
- * Note that we don't specify LR as the NIP (return address) for
- * the interrupt because that would unbalance the return branch
- * predictor.
- */
-_GLOBAL(__replay_interrupt)
- /* We are going to jump to the exception common code which
- * will retrieve various register values from the PACA which
- * we don't give a damn about, so we don't bother storing them.
- */
- mfmsr r12
- LOAD_REG_ADDR(r11, replay_interrupt_return)
- mfcr r9
- ori r12,r12,MSR_EE
- cmpwi r3,0x900
- beq decrementer_common
- cmpwi r3,0x500
-BEGIN_FTR_SECTION
- beq h_virt_irq_common
-FTR_SECTION_ELSE
- beq hardware_interrupt_common
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300)
- cmpwi r3,0xf00
- beq performance_monitor_common
-BEGIN_FTR_SECTION
- cmpwi r3,0xa00
- beq h_doorbell_common_msgclr
- cmpwi r3,0xe60
- beq hmi_exception_common
-FTR_SECTION_ELSE
- cmpwi r3,0xa00
- beq doorbell_super_common_msgclr
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
-replay_interrupt_return:
- blr
-
-_ASM_NOKPROBE_SYMBOL(__replay_interrupt)
+ b interrupt_return
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index ff0114aeba9b..59e60a9a9f5c 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -36,6 +36,8 @@ static struct fw_dump fw_dump;
static void __init fadump_reserve_crash_area(u64 base);
+struct kobject *fadump_kobj;
+
#ifndef CONFIG_PRESERVE_FA_DUMP
static DEFINE_MUTEX(fadump_mutex);
struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 };
@@ -1323,9 +1325,9 @@ static void fadump_invalidate_release_mem(void)
fw_dump.ops->fadump_init_mem_struct(&fw_dump);
}
-static ssize_t fadump_release_memory_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count)
+static ssize_t release_mem_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
{
int input = -1;
@@ -1350,23 +1352,40 @@ static ssize_t fadump_release_memory_store(struct kobject *kobj,
return count;
}
-static ssize_t fadump_enabled_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *buf)
+/* Release the reserved memory and disable the FADump */
+static void unregister_fadump(void)
+{
+ fadump_cleanup();
+ fadump_release_memory(fw_dump.reserve_dump_area_start,
+ fw_dump.reserve_dump_area_size);
+ fw_dump.fadump_enabled = 0;
+ kobject_put(fadump_kobj);
+}
+
+static ssize_t enabled_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
}
-static ssize_t fadump_register_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *buf)
+static ssize_t mem_reserved_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%ld\n", fw_dump.reserve_dump_area_size);
+}
+
+static ssize_t registered_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%d\n", fw_dump.dump_registered);
}
-static ssize_t fadump_register_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count)
+static ssize_t registered_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
{
int ret = 0;
int input = -1;
@@ -1418,45 +1437,82 @@ static int fadump_region_show(struct seq_file *m, void *private)
return 0;
}
-static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem,
- 0200, NULL,
- fadump_release_memory_store);
-static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
- 0444, fadump_enabled_show,
- NULL);
-static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
- 0644, fadump_register_show,
- fadump_register_store);
+static struct kobj_attribute release_attr = __ATTR_WO(release_mem);
+static struct kobj_attribute enable_attr = __ATTR_RO(enabled);
+static struct kobj_attribute register_attr = __ATTR_RW(registered);
+static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved);
+
+static struct attribute *fadump_attrs[] = {
+ &enable_attr.attr,
+ &register_attr.attr,
+ &mem_reserved_attr.attr,
+ NULL,
+};
+
+ATTRIBUTE_GROUPS(fadump);
DEFINE_SHOW_ATTRIBUTE(fadump_region);
static void fadump_init_files(void)
{
- struct dentry *debugfs_file;
int rc = 0;
- rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
- if (rc)
- printk(KERN_ERR "fadump: unable to create sysfs file"
- " fadump_enabled (%d)\n", rc);
+ fadump_kobj = kobject_create_and_add("fadump", kernel_kobj);
+ if (!fadump_kobj) {
+ pr_err("failed to create fadump kobject\n");
+ return;
+ }
- rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
- if (rc)
- printk(KERN_ERR "fadump: unable to create sysfs file"
- " fadump_registered (%d)\n", rc);
+ debugfs_create_file("fadump_region", 0444, powerpc_debugfs_root, NULL,
+ &fadump_region_fops);
- debugfs_file = debugfs_create_file("fadump_region", 0444,
- powerpc_debugfs_root, NULL,
- &fadump_region_fops);
- if (!debugfs_file)
- printk(KERN_ERR "fadump: unable to create debugfs file"
- " fadump_region\n");
+ if (fw_dump.dump_active) {
+ rc = sysfs_create_file(fadump_kobj, &release_attr.attr);
+ if (rc)
+ pr_err("unable to create release_mem sysfs file (%d)\n",
+ rc);
+ }
+
+ rc = sysfs_create_groups(fadump_kobj, fadump_groups);
+ if (rc) {
+ pr_err("sysfs group creation failed (%d), unregistering FADump",
+ rc);
+ unregister_fadump();
+ return;
+ }
+
+ /*
+ * The FADump sysfs are moved from kernel_kobj to fadump_kobj need to
+ * create symlink at old location to maintain backward compatibility.
+ *
+ * - fadump_enabled -> fadump/enabled
+ * - fadump_registered -> fadump/registered
+ * - fadump_release_mem -> fadump/release_mem
+ */
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
+ "enabled", "fadump_enabled");
+ if (rc) {
+ pr_err("unable to create fadump_enabled symlink (%d)", rc);
+ return;
+ }
+
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
+ "registered",
+ "fadump_registered");
+ if (rc) {
+ pr_err("unable to create fadump_registered symlink (%d)", rc);
+ sysfs_remove_link(kernel_kobj, "fadump_enabled");
+ return;
+ }
if (fw_dump.dump_active) {
- rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr);
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj,
+ fadump_kobj,
+ "release_mem",
+ "fadump_release_mem");
if (rc)
- printk(KERN_ERR "fadump: unable to create sysfs file"
- " fadump_release_mem (%d)\n", rc);
+ pr_err("unable to create fadump_release_mem symlink (%d)",
+ rc);
}
return;
}
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 97c887950c3c..daaa153950c2 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -348,7 +348,7 @@ BEGIN_MMU_FTR_SECTION
andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
#endif
bne handle_page_fault_tramp_2 /* if not, try to put a PTE */
- rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */
+ rlwinm r3, r5, 32 - 24, 30, 30 /* DSISR_STORE -> _PAGE_RW */
bl hash_page
b handle_page_fault_tramp_1
FTR_SECTION_ELSE
@@ -497,7 +497,6 @@ InstructionTLBMiss:
andc. r1,r1,r0 /* check access & ~permission */
bne- InstructionAddressInvalid /* return if access not permitted */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */
ori r1, r1, 0xe06 /* clear out reserved bits */
andc r1, r0, r1 /* PP = user? 1 : 0 */
BEGIN_FTR_SECTION
@@ -565,9 +564,8 @@ DataLoadTLBMiss:
* we would need to update the pte atomically with lwarx/stwcx.
*/
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */
- rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
- rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
+ rlwinm r1,r0,0,30,30 /* _PAGE_RW -> PP msb */
+ rlwimi r0,r0,1,30,30 /* _PAGE_USER -> PP msb */
ori r1,r1,0xe04 /* clear out reserved bits */
andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */
BEGIN_FTR_SECTION
@@ -645,7 +643,6 @@ DataStoreTLBMiss:
* we would need to update the pte atomically with lwarx/stwcx.
*/
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */
li r1,0xe06 /* clear out reserved bits & PP msb */
andc r1,r0,r1 /* PP = user? 1: 0 */
BEGIN_FTR_SECTION
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 9db162f79fe6..9abec6cd099c 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -130,37 +130,36 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
.macro SYSCALL_ENTRY trapno
mfspr r12,SPRN_SPRG_THREAD
+ mfspr r9, SPRN_SRR1
#ifdef CONFIG_VMAP_STACK
- mfspr r9, SPRN_SRR0
- mfspr r11, SPRN_SRR1
- stw r9, SRR0(r12)
- stw r11, SRR1(r12)
+ mfspr r11, SPRN_SRR0
+ mtctr r11
#endif
- mfcr r10
+ andi. r11, r9, MSR_PR
lwz r11,TASK_STACK-THREAD(r12)
- rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */
+ beq- 99f
addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE
#ifdef CONFIG_VMAP_STACK
- li r9, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
- mtmsr r9
+ li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+ mtmsr r10
isync
#endif
tovirt_vmstack r12, r12
tophys_novmstack r11, r11
- mflr r9
- stw r10,_CCR(r11) /* save registers */
- stw r9, _LINK(r11)
+ mflr r10
+ stw r10, _LINK(r11)
#ifdef CONFIG_VMAP_STACK
- lwz r10, SRR0(r12)
- lwz r9, SRR1(r12)
+ mfctr r10
#else
mfspr r10,SPRN_SRR0
- mfspr r9,SPRN_SRR1
#endif
stw r1,GPR1(r11)
stw r1,0(r11)
tovirt_novmstack r1, r11 /* set new kernel sp */
stw r10,_NIP(r11)
+ mfcr r10
+ rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */
+ stw r10,_CCR(r11) /* save registers */
#ifdef CONFIG_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
#else
@@ -228,6 +227,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
mtspr SPRN_SRR0,r11
SYNC
RFI /* jump to handler, enable MMU */
+99: b ret_from_kernel_syscall
.endm
.macro save_dar_dsisr_on_stack reg1, reg2, sp
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index ad79fddb974d..ddfbd02140d9 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -537,6 +537,7 @@ __start_initialization_multiplatform:
b __after_prom_start
#endif /* CONFIG_PPC_BOOK3E */
+__REF
__boot_from_prom:
#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
/* Save parameters */
@@ -574,6 +575,7 @@ __boot_from_prom:
/* We never return. We also hit that trap if trying to boot
* from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
trap
+ .previous
__after_prom_start:
#ifdef CONFIG_RELOCATABLE
@@ -977,7 +979,6 @@ start_here_multiplatform:
RFI
b . /* prevent speculative execution */
- .previous
/* This is where all platforms converge execution */
start_here_common:
@@ -1001,6 +1002,7 @@ start_here_common:
/* Not reached */
trap
EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
+ .previous
/*
* We put a few things here that have to be page-aligned.
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 37fc84ed90e3..bd2e5ed8dd50 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -104,16 +104,18 @@ FTR_SECTION_ELSE
#ifdef CONFIG_KVM_BOOKE_HV
ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
#endif
+ mfspr r9, SPRN_SRR1
BOOKE_CLEAR_BTB(r11)
+ andi. r11, r9, MSR_PR
lwz r11, TASK_STACK - THREAD(r10)
rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
+ beq- 99f
ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE)
stw r12, _CCR(r11) /* save various registers */
mflr r12
stw r12,_LINK(r11)
mfspr r12,SPRN_SRR0
stw r1, GPR1(r11)
- mfspr r9,SPRN_SRR1
stw r1, 0(r11)
mr r1, r11
stw r12,_NIP(r11)
@@ -176,6 +178,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
mtspr SPRN_SRR0,r11
SYNC
RFI /* jump to handler, enable MMU */
+99: b ret_from_kernel_syscall
.endm
/* To handle the additional exception priority levels on 40x and Book-E
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index d0854320bb50..72f461bd70fb 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -429,3 +429,19 @@ void hw_breakpoint_pmu_read(struct perf_event *bp)
{
/* TODO */
}
+
+void ptrace_triggered(struct perf_event *bp,
+ struct perf_sample_data *data, struct pt_regs *regs)
+{
+ struct perf_event_attr attr;
+
+ /*
+ * Disable the breakpoint request here since ptrace has defined a
+ * one-shot behaviour for breakpoint exceptions in PPC64.
+ * The SIGTRAP signal is generated automatically for us in do_dabr().
+ * We don't have to do anything about that here
+ */
+ attr = bp->attr;
+ attr.disabled = true;
+ modify_user_hw_breakpoint(bp, &attr);
+}
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5c9b11878555..1f1169856dc8 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -70,6 +70,7 @@
#include <asm/paca.h>
#include <asm/firmware.h>
#include <asm/lv1call.h>
+#include <asm/dbell.h>
#endif
#define CREATE_TRACE_POINTS
#include <asm/trace.h>
@@ -109,6 +110,8 @@ static inline notrace int decrementer_check_overflow(void)
return now >= *next_tb;
}
+#ifdef CONFIG_PPC_BOOK3E
+
/* This is called whenever we are re-enabling interrupts
* and returns either 0 (nothing to do) or 500/900/280/a00/e80 if
* there's an EE, DEC or DBELL to generate.
@@ -168,6 +171,67 @@ notrace unsigned int __check_irq_replay(void)
}
}
+ if (happened & PACA_IRQ_DEC) {
+ local_paca->irq_happened &= ~PACA_IRQ_DEC;
+ return 0x900;
+ }
+
+ if (happened & PACA_IRQ_EE) {
+ local_paca->irq_happened &= ~PACA_IRQ_EE;
+ return 0x500;
+ }
+
+ /*
+ * Check if an EPR external interrupt happened this bit is typically
+ * set if we need to handle another "edge" interrupt from within the
+ * MPIC "EPR" handler.
+ */
+ if (happened & PACA_IRQ_EE_EDGE) {
+ local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
+ return 0x500;
+ }
+
+ if (happened & PACA_IRQ_DBELL) {
+ local_paca->irq_happened &= ~PACA_IRQ_DBELL;
+ return 0x280;
+ }
+
+ /* There should be nothing left ! */
+ BUG_ON(local_paca->irq_happened != 0);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_BOOK3E */
+
+void replay_soft_interrupts(void)
+{
+ /*
+ * We use local_paca rather than get_paca() to avoid all
+ * the debug_smp_processor_id() business in this low level
+ * function
+ */
+ unsigned char happened = local_paca->irq_happened;
+ struct pt_regs regs;
+
+ ppc_save_regs(&regs);
+ regs.softe = IRQS_ALL_DISABLED;
+
+again:
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+ if (happened & PACA_IRQ_HARD_DIS) {
+ /*
+ * We may have missed a decrementer interrupt if hard disabled.
+ * Check the decrementer register in case we had a rollover
+ * while hard disabled.
+ */
+ if (!(happened & PACA_IRQ_DEC)) {
+ if (decrementer_check_overflow())
+ happened |= PACA_IRQ_DEC;
+ }
+ }
+
/*
* Force the delivery of pending soft-disabled interrupts on PS3.
* Any HV call will have this side effect.
@@ -182,58 +246,78 @@ notrace unsigned int __check_irq_replay(void)
* This is a higher priority interrupt than the others, so
* replay it first.
*/
- if (happened & PACA_IRQ_HMI) {
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (happened & PACA_IRQ_HMI)) {
local_paca->irq_happened &= ~PACA_IRQ_HMI;
- return 0xe60;
+ regs.trap = 0xe60;
+ handle_hmi_exception(&regs);
+ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
+ hard_irq_disable();
}
if (happened & PACA_IRQ_DEC) {
local_paca->irq_happened &= ~PACA_IRQ_DEC;
- return 0x900;
- }
-
- if (happened & PACA_IRQ_PMI) {
- local_paca->irq_happened &= ~PACA_IRQ_PMI;
- return 0xf00;
+ regs.trap = 0x900;
+ timer_interrupt(&regs);
+ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
+ hard_irq_disable();
}
if (happened & PACA_IRQ_EE) {
local_paca->irq_happened &= ~PACA_IRQ_EE;
- return 0x500;
+ regs.trap = 0x500;
+ do_IRQ(&regs);
+ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
+ hard_irq_disable();
}
-#ifdef CONFIG_PPC_BOOK3E
/*
* Check if an EPR external interrupt happened this bit is typically
* set if we need to handle another "edge" interrupt from within the
* MPIC "EPR" handler.
*/
- if (happened & PACA_IRQ_EE_EDGE) {
+ if (IS_ENABLED(CONFIG_PPC_BOOK3E) && (happened & PACA_IRQ_EE_EDGE)) {
local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
- return 0x500;
+ regs.trap = 0x500;
+ do_IRQ(&regs);
+ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
+ hard_irq_disable();
}
- if (happened & PACA_IRQ_DBELL) {
+ if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (happened & PACA_IRQ_DBELL)) {
local_paca->irq_happened &= ~PACA_IRQ_DBELL;
- return 0x280;
+ if (IS_ENABLED(CONFIG_PPC_BOOK3E))
+ regs.trap = 0x280;
+ else
+ regs.trap = 0xa00;
+ doorbell_exception(&regs);
+ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
+ hard_irq_disable();
}
-#else
- if (happened & PACA_IRQ_DBELL) {
- local_paca->irq_happened &= ~PACA_IRQ_DBELL;
- return 0xa00;
- }
-#endif /* CONFIG_PPC_BOOK3E */
- /* There should be nothing left ! */
- BUG_ON(local_paca->irq_happened != 0);
+ /* Book3E does not support soft-masking PMI interrupts */
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (happened & PACA_IRQ_PMI)) {
+ local_paca->irq_happened &= ~PACA_IRQ_PMI;
+ regs.trap = 0xf00;
+ performance_monitor_exception(&regs);
+ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
+ hard_irq_disable();
+ }
- return 0;
+ happened = local_paca->irq_happened;
+ if (happened & ~PACA_IRQ_HARD_DIS) {
+ /*
+ * We are responding to the next interrupt, so interrupt-off
+ * latencies should be reset here.
+ */
+ trace_hardirqs_on();
+ trace_hardirqs_off();
+ goto again;
+ }
}
notrace void arch_local_irq_restore(unsigned long mask)
{
unsigned char irq_happened;
- unsigned int replay;
/* Write the new soft-enabled value */
irq_soft_mask_set(mask);
@@ -255,24 +339,16 @@ notrace void arch_local_irq_restore(unsigned long mask)
*/
irq_happened = get_irq_happened();
if (!irq_happened) {
-#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
- WARN_ON_ONCE(!(mfmsr() & MSR_EE));
-#endif
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(!(mfmsr() & MSR_EE));
return;
}
- /*
- * We need to hard disable to get a trusted value from
- * __check_irq_replay(). We also need to soft-disable
- * again to avoid warnings in there due to the use of
- * per-cpu variables.
- */
+ /* We need to hard disable to replay. */
if (!(irq_happened & PACA_IRQ_HARD_DIS)) {
-#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
- WARN_ON_ONCE(!(mfmsr() & MSR_EE));
-#endif
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(!(mfmsr() & MSR_EE));
__hard_irq_disable();
-#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
} else {
/*
* We should already be hard disabled here. We had bugs
@@ -280,35 +356,26 @@ notrace void arch_local_irq_restore(unsigned long mask)
* warn if we are wrong. Only do that when IRQ tracing
* is enabled as mfmsr() can be costly.
*/
- if (WARN_ON_ONCE(mfmsr() & MSR_EE))
- __hard_irq_disable();
-#endif
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+ if (WARN_ON_ONCE(mfmsr() & MSR_EE))
+ __hard_irq_disable();
+ }
+
+ if (irq_happened == PACA_IRQ_HARD_DIS) {
+ local_paca->irq_happened = 0;
+ __hard_irq_enable();
+ return;
+ }
}
irq_soft_mask_set(IRQS_ALL_DISABLED);
trace_hardirqs_off();
- /*
- * Check if anything needs to be re-emitted. We haven't
- * soft-enabled yet to avoid warnings in decrementer_check_overflow
- * accessing per-cpu variables
- */
- replay = __check_irq_replay();
+ replay_soft_interrupts();
+ local_paca->irq_happened = 0;
- /* We can soft-enable now */
trace_hardirqs_on();
irq_soft_mask_set(IRQS_ENABLED);
-
- /*
- * And replay if we have to. This will return with interrupts
- * hard-enabled.
- */
- if (replay) {
- __replay_interrupt(replay);
- return;
- }
-
- /* Finally, let's ensure we are hard enabled */
__hard_irq_enable();
}
EXPORT_SYMBOL(arch_local_irq_restore);
@@ -460,6 +527,19 @@ void irq_set_pending_from_srr1(unsigned long srr1)
return;
}
+ if (reason == PACA_IRQ_DBELL) {
+ /*
+ * When doorbell triggers a system reset wakeup, the message
+ * is not cleared, so if the doorbell interrupt is replayed
+ * and the IPI handled, the doorbell interrupt would still
+ * fire when EE is enabled.
+ *
+ * To avoid taking the superfluous doorbell interrupt,
+ * execute a msgclr here before the interrupt is replayed.
+ */
+ ppc_msgclr(PPC_DBELL_MSGTYPE);
+ }
+
/*
* The 0 index (SRR1[42:45]=b0000) must always evaluate to 0,
* so this can be called unconditionally with the SRR1 wake
@@ -599,17 +679,18 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
static inline void check_stack_overflow(void)
{
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
long sp;
- sp = current_stack_pointer() & (THREAD_SIZE-1);
+ if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW))
+ return;
+
+ sp = current_stack_pointer & (THREAD_SIZE - 1);
/* check for stack overflow: is there less than 2KB free? */
if (unlikely(sp < 2048)) {
pr_err("do_IRQ: stack overflow: %ld\n", sp);
dump_stack();
}
-#endif
}
void __do_irq(struct pt_regs *regs)
@@ -647,7 +728,7 @@ void do_IRQ(struct pt_regs *regs)
void *cursp, *irqsp, *sirqsp;
/* Switch to the irq stack to handle this */
- cursp = (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+ cursp = (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
irqsp = hardirq_ctx[raw_smp_processor_id()];
sirqsp = softirq_ctx[raw_smp_processor_id()];
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 2d27ec4feee4..81efb605113e 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -264,6 +264,9 @@ int kprobe_handler(struct pt_regs *regs)
if (user_mode(regs))
return 0;
+ if (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR))
+ return 0;
+
/*
* We don't want to be preempted for the entire
* duration of kprobe processing
@@ -271,54 +274,6 @@ int kprobe_handler(struct pt_regs *regs)
preempt_disable();
kcb = get_kprobe_ctlblk();
- /* Check we're not actually recursing */
- if (kprobe_running()) {
- p = get_kprobe(addr);
- if (p) {
- kprobe_opcode_t insn = *p->ainsn.insn;
- if (kcb->kprobe_status == KPROBE_HIT_SS &&
- is_trap(insn)) {
- /* Turn off 'trace' bits */
- regs->msr &= ~MSR_SINGLESTEP;
- regs->msr |= kcb->kprobe_saved_msr;
- goto no_kprobe;
- }
- /* We have reentered the kprobe_handler(), since
- * another probe was hit while within the handler.
- * We here save the original kprobes variables and
- * just single step on the instruction of the new probe
- * without calling any user handlers.
- */
- save_previous_kprobe(kcb);
- set_current_kprobe(p, regs, kcb);
- kprobes_inc_nmissed_count(p);
- kcb->kprobe_status = KPROBE_REENTER;
- if (p->ainsn.boostable >= 0) {
- ret = try_to_emulate(p, regs);
-
- if (ret > 0) {
- restore_previous_kprobe(kcb);
- preempt_enable_no_resched();
- return 1;
- }
- }
- prepare_singlestep(p, regs);
- return 1;
- } else if (*addr != BREAKPOINT_INSTRUCTION) {
- /* If trap variant, then it belongs not to us */
- kprobe_opcode_t cur_insn = *addr;
-
- if (is_trap(cur_insn))
- goto no_kprobe;
- /* The breakpoint instruction was removed by
- * another cpu right after we hit, no further
- * handling of this interrupt is appropriate
- */
- ret = 1;
- }
- goto no_kprobe;
- }
-
p = get_kprobe(addr);
if (!p) {
if (*addr != BREAKPOINT_INSTRUCTION) {
@@ -343,6 +298,39 @@ int kprobe_handler(struct pt_regs *regs)
goto no_kprobe;
}
+ /* Check we're not actually recursing */
+ if (kprobe_running()) {
+ kprobe_opcode_t insn = *p->ainsn.insn;
+ if (kcb->kprobe_status == KPROBE_HIT_SS && is_trap(insn)) {
+ /* Turn off 'trace' bits */
+ regs->msr &= ~MSR_SINGLESTEP;
+ regs->msr |= kcb->kprobe_saved_msr;
+ goto no_kprobe;
+ }
+
+ /*
+ * We have reentered the kprobe_handler(), since another probe
+ * was hit while within the handler. We here save the original
+ * kprobes variables and just single step on the instruction of
+ * the new probe without calling any user handlers.
+ */
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, regs, kcb);
+ kprobes_inc_nmissed_count(p);
+ kcb->kprobe_status = KPROBE_REENTER;
+ if (p->ainsn.boostable >= 0) {
+ ret = try_to_emulate(p, regs);
+
+ if (ret > 0) {
+ restore_previous_kprobe(kcb);
+ preempt_enable_no_resched();
+ return 1;
+ }
+ }
+ prepare_singlestep(p, regs);
+ return 1;
+ }
+
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
set_current_kprobe(p, regs, kcb);
if (p->pre_handler && p->pre_handler(p, regs)) {
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 34c1001e9e8b..8077b5fb18a7 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -15,6 +15,7 @@
#include <linux/percpu.h>
#include <linux/export.h>
#include <linux/irq_work.h>
+#include <linux/extable.h>
#include <asm/machdep.h>
#include <asm/mce.h>
@@ -251,6 +252,19 @@ void machine_check_queue_event(void)
/* Queue irq work to process this event later. */
irq_work_queue(&mce_event_process_work);
}
+
+void mce_common_process_ue(struct pt_regs *regs,
+ struct mce_error_info *mce_err)
+{
+ const struct exception_table_entry *entry;
+
+ entry = search_kernel_exception_table(regs->nip);
+ if (entry) {
+ mce_err->ignore_event = true;
+ regs->nip = extable_fixup(entry);
+ }
+}
+
/*
* process pending MCE event from the mce event queue. This function will be
* called during syscall exit.
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 1cbf7f1a4e3d..067b094bfeff 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -579,14 +579,10 @@ static long mce_handle_ue_error(struct pt_regs *regs,
struct mce_error_info *mce_err)
{
long handled = 0;
- const struct exception_table_entry *entry;
- entry = search_kernel_exception_table(regs->nip);
- if (entry) {
- mce_err->ignore_event = true;
- regs->nip = extable_fixup(entry);
+ mce_common_process_ue(regs, mce_err);
+ if (mce_err->ignore_event)
return 1;
- }
/*
* On specific SCOM read via MMIO we may get a machine check
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 974f65f79a8e..65f9f731c229 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -110,7 +110,7 @@ _GLOBAL(longjmp)
li r3, 1
blr
-_GLOBAL(current_stack_pointer)
+_GLOBAL(current_stack_frame)
PPC_LL r3,0(r1)
blr
-EXPORT_SYMBOL(current_stack_pointer)
+EXPORT_SYMBOL(current_stack_frame)
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 427fc22f72b6..71a3f97dc988 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -62,13 +62,9 @@ static int of_pci_phb_probe(struct platform_device *dev)
/* Init pci_dn data structures */
pci_devs_phb_init_dynamic(phb);
- /* Create EEH devices for the PHB */
+ /* Create EEH PEs for the PHB */
eeh_dev_phb_init_dynamic(phb);
- /* Register devices with EEH */
- if (dev->dev.of_node->child)
- eeh_add_device_tree_early(PCI_DN(dev->dev.of_node));
-
/* Scan the bus */
pcibios_scan_phb(phb);
if (phb->bus == NULL)
@@ -80,15 +76,9 @@ static int of_pci_phb_probe(struct platform_device *dev)
*/
pcibios_claim_one_bus(phb->bus);
- /* Finish EEH setup */
- eeh_add_device_tree_late(phb->bus);
-
/* Add probed PCI devices to the device model */
pci_bus_add_devices(phb->bus);
- /* sysfs files should only be added after devices are added */
- eeh_add_sysfs_files(phb->bus);
-
return 0;
}
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 949eceb254d8..3f91ccaa9c74 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -176,7 +176,7 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
struct paca_struct **paca_ptrs __read_mostly;
EXPORT_SYMBOL(paca_ptrs);
-void __init initialise_paca(struct paca_struct *new_paca, int cpu)
+void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int cpu)
{
#ifdef CONFIG_PPC_PSERIES
new_paca->lppaca_ptr = NULL;
@@ -205,7 +205,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
}
/* Put the paca pointer into r13 and SPRG_PACA */
-void setup_paca(struct paca_struct *new_paca)
+void __nostackprotector setup_paca(struct paca_struct *new_paca)
{
/* Setup r13 */
local_paca = new_paca;
@@ -214,11 +214,15 @@ void setup_paca(struct paca_struct *new_paca)
/* On Book3E, initialize the TLB miss exception frames */
mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
#else
- /* In HV mode, we setup both HPACA and PACA to avoid problems
+ /*
+ * In HV mode, we setup both HPACA and PACA to avoid problems
* if we do a GET_PACA() before the feature fixups have been
- * applied
+ * applied.
+ *
+ * Normally you should test against CPU_FTR_HVMODE, but CPU features
+ * are not yet set up when we first reach here.
*/
- if (early_cpu_has_feature(CPU_FTR_HVMODE))
+ if (mfmsr() & MSR_HV)
mtspr(SPRN_SPRG_HPACA, local_paca);
#endif
mtspr(SPRN_SPRG_PACA, local_paca);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index c6c03416a151..be108616a721 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -728,7 +728,7 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
" MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n",
range.cpu_addr, range.cpu_addr + range.size - 1,
range.pci_addr,
- (range.pci_space & 0x40000000) ?
+ (range.flags & IORESOURCE_PREFETCH) ?
"Prefetch" : "");
/* We support only 3 memory ranges */
@@ -1399,14 +1399,8 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus)
pci_assign_unassigned_bus_resources(bus);
}
- /* Fixup EEH */
- eeh_add_device_tree_late(bus);
-
/* Add new devices to global lists. Register in proc, sysfs. */
pci_bus_add_devices(bus);
-
- /* sysfs files should only be added after devices are added */
- eeh_add_sysfs_files(bus);
}
EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus);
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index d6a67f814983..bf83f76563a3 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -112,8 +112,6 @@ void pci_hp_add_devices(struct pci_bus *bus)
struct pci_controller *phb;
struct device_node *dn = pci_bus_to_OF_node(bus);
- eeh_add_device_tree_early(PCI_DN(dn));
-
phb = pci_bus_to_host(bus);
mode = PCI_PROBE_NORMAL;
diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S
index f3bd0bbf2ae8..2d4d21bb46a9 100644
--- a/arch/powerpc/kernel/ppc_save_regs.S
+++ b/arch/powerpc/kernel/ppc_save_regs.S
@@ -55,14 +55,17 @@ _GLOBAL(ppc_save_regs)
PPC_STL r29,29*SZL(r3)
PPC_STL r30,30*SZL(r3)
PPC_STL r31,31*SZL(r3)
+ lbz r0,PACAIRQSOFTMASK(r13)
+ PPC_STL r0,SOFTE-STACK_FRAME_OVERHEAD(r3)
#endif
/* go up one stack frame for SP */
PPC_LL r4,0(r1)
PPC_STL r4,1*SZL(r3)
/* get caller's LR */
PPC_LL r0,LRSAVE(r4)
- PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3)
PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3)
+ mflr r0
+ PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3)
mfmsr r0
PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3)
mfctr r0
@@ -73,4 +76,5 @@ _GLOBAL(ppc_save_regs)
PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3)
li r0,0
PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,ORIG_GPR3-STACK_FRAME_OVERHEAD(r3)
blr
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index fad50db9dcf2..9c21288f8645 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -236,23 +236,9 @@ void enable_kernel_fp(void)
}
}
EXPORT_SYMBOL(enable_kernel_fp);
-
-static int restore_fp(struct task_struct *tsk)
-{
- if (tsk->thread.load_fp) {
- load_fp_state(&current->thread.fp_state);
- current->thread.load_fp++;
- return 1;
- }
- return 0;
-}
-#else
-static int restore_fp(struct task_struct *tsk) { return 0; }
#endif /* CONFIG_PPC_FPU */
#ifdef CONFIG_ALTIVEC
-#define loadvec(thr) ((thr).load_vec)
-
static void __giveup_altivec(struct task_struct *tsk)
{
unsigned long msr;
@@ -318,21 +304,6 @@ void flush_altivec_to_thread(struct task_struct *tsk)
}
}
EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
-
-static int restore_altivec(struct task_struct *tsk)
-{
- if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) {
- load_vr_state(&tsk->thread.vr_state);
- tsk->thread.used_vr = 1;
- tsk->thread.load_vec++;
-
- return 1;
- }
- return 0;
-}
-#else
-#define loadvec(thr) 0
-static inline int restore_altivec(struct task_struct *tsk) { return 0; }
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
@@ -400,18 +371,6 @@ void flush_vsx_to_thread(struct task_struct *tsk)
}
}
EXPORT_SYMBOL_GPL(flush_vsx_to_thread);
-
-static int restore_vsx(struct task_struct *tsk)
-{
- if (cpu_has_feature(CPU_FTR_VSX)) {
- tsk->thread.used_vsr = 1;
- return 1;
- }
-
- return 0;
-}
-#else
-static inline int restore_vsx(struct task_struct *tsk) { return 0; }
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
@@ -511,6 +470,53 @@ void giveup_all(struct task_struct *tsk)
}
EXPORT_SYMBOL(giveup_all);
+#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_FPU
+static int restore_fp(struct task_struct *tsk)
+{
+ if (tsk->thread.load_fp) {
+ load_fp_state(&current->thread.fp_state);
+ current->thread.load_fp++;
+ return 1;
+ }
+ return 0;
+}
+#else
+static int restore_fp(struct task_struct *tsk) { return 0; }
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_ALTIVEC
+#define loadvec(thr) ((thr).load_vec)
+static int restore_altivec(struct task_struct *tsk)
+{
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) {
+ load_vr_state(&tsk->thread.vr_state);
+ tsk->thread.used_vr = 1;
+ tsk->thread.load_vec++;
+
+ return 1;
+ }
+ return 0;
+}
+#else
+#define loadvec(thr) 0
+static inline int restore_altivec(struct task_struct *tsk) { return 0; }
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+static int restore_vsx(struct task_struct *tsk)
+{
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ tsk->thread.used_vsr = 1;
+ return 1;
+ }
+
+ return 0;
+}
+#else
+static inline int restore_vsx(struct task_struct *tsk) { return 0; }
+#endif /* CONFIG_VSX */
+
/*
* The exception exit path calls restore_math() with interrupts hard disabled
* but the soft irq state not "reconciled". ftrace code that calls
@@ -551,6 +557,7 @@ void notrace restore_math(struct pt_regs *regs)
regs->msr = msr;
}
+#endif
static void save_all(struct task_struct *tsk)
{
@@ -1634,11 +1641,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
p->thread.regs = childregs;
childregs->gpr[3] = 0; /* Result from fork() */
if (clone_flags & CLONE_SETTLS) {
-#ifdef CONFIG_PPC64
if (!is_32bit_task())
childregs->gpr[13] = tls;
else
-#endif
childregs->gpr[2] = tls;
}
@@ -1976,6 +1981,32 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,
return 0;
}
+static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p,
+ unsigned long nbytes)
+{
+#ifdef CONFIG_PPC64
+ unsigned long stack_page;
+ unsigned long cpu = task_cpu(p);
+
+ stack_page = (unsigned long)paca_ptrs[cpu]->emergency_sp - THREAD_SIZE;
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
+# ifdef CONFIG_PPC_BOOK3S_64
+ stack_page = (unsigned long)paca_ptrs[cpu]->nmi_emergency_sp - THREAD_SIZE;
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
+ stack_page = (unsigned long)paca_ptrs[cpu]->mc_emergency_sp - THREAD_SIZE;
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+# endif
+#endif
+
+ return 0;
+}
+
+
int validate_sp(unsigned long sp, struct task_struct *p,
unsigned long nbytes)
{
@@ -1987,7 +2018,10 @@ int validate_sp(unsigned long sp, struct task_struct *p,
if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
return 1;
- return valid_irq_stack(sp, p, nbytes);
+ if (valid_irq_stack(sp, p, nbytes))
+ return 1;
+
+ return valid_emergency_stack(sp, p, nbytes);
}
EXPORT_SYMBOL(validate_sp);
@@ -2053,7 +2087,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
sp = (unsigned long) stack;
if (sp == 0) {
if (tsk == current)
- sp = current_stack_pointer();
+ sp = current_stack_frame();
else
sp = tsk->thread.ksp;
}
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 577345382b23..806be751c336 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1773,6 +1773,9 @@ static void __init prom_rtas_os_term(char *str)
if (token == 0)
prom_panic("Could not get token for ibm,os-term\n");
os_term_args.token = cpu_to_be32(token);
+ os_term_args.nargs = cpu_to_be32(1);
+ os_term_args.nret = cpu_to_be32(1);
+ os_term_args.args[0] = cpu_to_be32(__pa(str));
prom_rtas_hcall((uint64_t)&os_term_args);
}
#endif /* CONFIG_PPC_SVM */
@@ -3474,7 +3477,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
*/
hdr = dt_header_start;
- /* Don't print anything after quiesce under OPAL, it crashes OFW */
prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
prom_debug("->dt_header_start=0x%lx\n", hdr);
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
deleted file mode 100644
index 25c0424e8868..000000000000
--- a/arch/powerpc/kernel/ptrace.c
+++ /dev/null
@@ -1,3468 +0,0 @@
-/*
- * PowerPC version
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Derived from "arch/m68k/kernel/ptrace.c"
- * Copyright (C) 1994 by Hamish Macdonald
- * Taken from linux/kernel/ptrace.c and modified for M680x0.
- * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
- *
- * Modified by Cort Dougan (cort@hq.fsmlabs.com)
- * and Paul Mackerras (paulus@samba.org).
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file README.legal in the main directory of
- * this archive for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/regset.h>
-#include <linux/tracehook.h>
-#include <linux/elf.h>
-#include <linux/user.h>
-#include <linux/security.h>
-#include <linux/signal.h>
-#include <linux/seccomp.h>
-#include <linux/audit.h>
-#include <trace/syscall.h>
-#include <linux/hw_breakpoint.h>
-#include <linux/perf_event.h>
-#include <linux/context_tracking.h>
-#include <linux/nospec.h>
-
-#include <linux/uaccess.h>
-#include <linux/pkeys.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/switch_to.h>
-#include <asm/tm.h>
-#include <asm/asm-prototypes.h>
-#include <asm/debug.h>
-#include <asm/hw_breakpoint.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/syscalls.h>
-
-/*
- * The parameter save area on the stack is used to store arguments being passed
- * to callee function and is located at fixed offset from stack pointer.
- */
-#ifdef CONFIG_PPC32
-#define PARAMETER_SAVE_AREA_OFFSET 24 /* bytes */
-#else /* CONFIG_PPC32 */
-#define PARAMETER_SAVE_AREA_OFFSET 48 /* bytes */
-#endif
-
-struct pt_regs_offset {
- const char *name;
- int offset;
-};
-
-#define STR(s) #s /* convert to string */
-#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
-#define GPR_OFFSET_NAME(num) \
- {.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \
- {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])}
-#define REG_OFFSET_END {.name = NULL, .offset = 0}
-
-#define TVSO(f) (offsetof(struct thread_vr_state, f))
-#define TFSO(f) (offsetof(struct thread_fp_state, f))
-#define TSO(f) (offsetof(struct thread_struct, f))
-
-static const struct pt_regs_offset regoffset_table[] = {
- GPR_OFFSET_NAME(0),
- GPR_OFFSET_NAME(1),
- GPR_OFFSET_NAME(2),
- GPR_OFFSET_NAME(3),
- GPR_OFFSET_NAME(4),
- GPR_OFFSET_NAME(5),
- GPR_OFFSET_NAME(6),
- GPR_OFFSET_NAME(7),
- GPR_OFFSET_NAME(8),
- GPR_OFFSET_NAME(9),
- GPR_OFFSET_NAME(10),
- GPR_OFFSET_NAME(11),
- GPR_OFFSET_NAME(12),
- GPR_OFFSET_NAME(13),
- GPR_OFFSET_NAME(14),
- GPR_OFFSET_NAME(15),
- GPR_OFFSET_NAME(16),
- GPR_OFFSET_NAME(17),
- GPR_OFFSET_NAME(18),
- GPR_OFFSET_NAME(19),
- GPR_OFFSET_NAME(20),
- GPR_OFFSET_NAME(21),
- GPR_OFFSET_NAME(22),
- GPR_OFFSET_NAME(23),
- GPR_OFFSET_NAME(24),
- GPR_OFFSET_NAME(25),
- GPR_OFFSET_NAME(26),
- GPR_OFFSET_NAME(27),
- GPR_OFFSET_NAME(28),
- GPR_OFFSET_NAME(29),
- GPR_OFFSET_NAME(30),
- GPR_OFFSET_NAME(31),
- REG_OFFSET_NAME(nip),
- REG_OFFSET_NAME(msr),
- REG_OFFSET_NAME(ctr),
- REG_OFFSET_NAME(link),
- REG_OFFSET_NAME(xer),
- REG_OFFSET_NAME(ccr),
-#ifdef CONFIG_PPC64
- REG_OFFSET_NAME(softe),
-#else
- REG_OFFSET_NAME(mq),
-#endif
- REG_OFFSET_NAME(trap),
- REG_OFFSET_NAME(dar),
- REG_OFFSET_NAME(dsisr),
- REG_OFFSET_END,
-};
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-static void flush_tmregs_to_thread(struct task_struct *tsk)
-{
- /*
- * If task is not current, it will have been flushed already to
- * it's thread_struct during __switch_to().
- *
- * A reclaim flushes ALL the state or if not in TM save TM SPRs
- * in the appropriate thread structures from live.
- */
-
- if ((!cpu_has_feature(CPU_FTR_TM)) || (tsk != current))
- return;
-
- if (MSR_TM_SUSPENDED(mfmsr())) {
- tm_reclaim_current(TM_CAUSE_SIGNAL);
- } else {
- tm_enable();
- tm_save_sprs(&(tsk->thread));
- }
-}
-#else
-static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
-#endif
-
-/**
- * regs_query_register_offset() - query register offset from its name
- * @name: the name of a register
- *
- * regs_query_register_offset() returns the offset of a register in struct
- * pt_regs from its name. If the name is invalid, this returns -EINVAL;
- */
-int regs_query_register_offset(const char *name)
-{
- const struct pt_regs_offset *roff;
- for (roff = regoffset_table; roff->name != NULL; roff++)
- if (!strcmp(roff->name, name))
- return roff->offset;
- return -EINVAL;
-}
-
-/**
- * regs_query_register_name() - query register name from its offset
- * @offset: the offset of a register in struct pt_regs.
- *
- * regs_query_register_name() returns the name of a register from its
- * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
- */
-const char *regs_query_register_name(unsigned int offset)
-{
- const struct pt_regs_offset *roff;
- for (roff = regoffset_table; roff->name != NULL; roff++)
- if (roff->offset == offset)
- return roff->name;
- return NULL;
-}
-
-/*
- * does not yet catch signals sent when the child dies.
- * in exit.c or in signal.c.
- */
-
-/*
- * Set of msr bits that gdb can change on behalf of a process.
- */
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-#define MSR_DEBUGCHANGE 0
-#else
-#define MSR_DEBUGCHANGE (MSR_SE | MSR_BE)
-#endif
-
-/*
- * Max register writeable via put_reg
- */
-#ifdef CONFIG_PPC32
-#define PT_MAX_PUT_REG PT_MQ
-#else
-#define PT_MAX_PUT_REG PT_CCR
-#endif
-
-static unsigned long get_user_msr(struct task_struct *task)
-{
- return task->thread.regs->msr | task->thread.fpexc_mode;
-}
-
-static int set_user_msr(struct task_struct *task, unsigned long msr)
-{
- task->thread.regs->msr &= ~MSR_DEBUGCHANGE;
- task->thread.regs->msr |= msr & MSR_DEBUGCHANGE;
- return 0;
-}
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-static unsigned long get_user_ckpt_msr(struct task_struct *task)
-{
- return task->thread.ckpt_regs.msr | task->thread.fpexc_mode;
-}
-
-static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr)
-{
- task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE;
- task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE;
- return 0;
-}
-
-static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap)
-{
- task->thread.ckpt_regs.trap = trap & 0xfff0;
- return 0;
-}
-#endif
-
-#ifdef CONFIG_PPC64
-static int get_user_dscr(struct task_struct *task, unsigned long *data)
-{
- *data = task->thread.dscr;
- return 0;
-}
-
-static int set_user_dscr(struct task_struct *task, unsigned long dscr)
-{
- task->thread.dscr = dscr;
- task->thread.dscr_inherit = 1;
- return 0;
-}
-#else
-static int get_user_dscr(struct task_struct *task, unsigned long *data)
-{
- return -EIO;
-}
-
-static int set_user_dscr(struct task_struct *task, unsigned long dscr)
-{
- return -EIO;
-}
-#endif
-
-/*
- * We prevent mucking around with the reserved area of trap
- * which are used internally by the kernel.
- */
-static int set_user_trap(struct task_struct *task, unsigned long trap)
-{
- task->thread.regs->trap = trap & 0xfff0;
- return 0;
-}
-
-/*
- * Get contents of register REGNO in task TASK.
- */
-int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
-{
- unsigned int regs_max;
-
- if ((task->thread.regs == NULL) || !data)
- return -EIO;
-
- if (regno == PT_MSR) {
- *data = get_user_msr(task);
- return 0;
- }
-
- if (regno == PT_DSCR)
- return get_user_dscr(task, data);
-
-#ifdef CONFIG_PPC64
- /*
- * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is
- * no more used as a flag, lets force usr to alway see the softe value as 1
- * which means interrupts are not soft disabled.
- */
- if (regno == PT_SOFTE) {
- *data = 1;
- return 0;
- }
-#endif
-
- regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long);
- if (regno < regs_max) {
- regno = array_index_nospec(regno, regs_max);
- *data = ((unsigned long *)task->thread.regs)[regno];
- return 0;
- }
-
- return -EIO;
-}
-
-/*
- * Write contents of register REGNO in task TASK.
- */
-int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data)
-{
- if (task->thread.regs == NULL)
- return -EIO;
-
- if (regno == PT_MSR)
- return set_user_msr(task, data);
- if (regno == PT_TRAP)
- return set_user_trap(task, data);
- if (regno == PT_DSCR)
- return set_user_dscr(task, data);
-
- if (regno <= PT_MAX_PUT_REG) {
- regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1);
- ((unsigned long *)task->thread.regs)[regno] = data;
- return 0;
- }
- return -EIO;
-}
-
-static int gpr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int i, ret;
-
- if (target->thread.regs == NULL)
- return -EIO;
-
- if (!FULL_REGS(target->thread.regs)) {
- /* We have a partial register set. Fill 14-31 with bogus values */
- for (i = 14; i < 32; i++)
- target->thread.regs->gpr[i] = NV_REG_POISON;
- }
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- target->thread.regs,
- 0, offsetof(struct pt_regs, msr));
- if (!ret) {
- unsigned long msr = get_user_msr(target);
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr,
- offsetof(struct pt_regs, msr),
- offsetof(struct pt_regs, msr) +
- sizeof(msr));
- }
-
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct pt_regs, msr) + sizeof(long));
-
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.regs->orig_gpr3,
- offsetof(struct pt_regs, orig_gpr3),
- sizeof(struct user_pt_regs));
- if (!ret)
- ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- sizeof(struct user_pt_regs), -1);
-
- return ret;
-}
-
-static int gpr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- unsigned long reg;
- int ret;
-
- if (target->thread.regs == NULL)
- return -EIO;
-
- CHECK_FULL_REGS(target->thread.regs);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- target->thread.regs,
- 0, PT_MSR * sizeof(reg));
-
- if (!ret && count > 0) {
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
- PT_MSR * sizeof(reg),
- (PT_MSR + 1) * sizeof(reg));
- if (!ret)
- ret = set_user_msr(target, reg);
- }
-
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct pt_regs, msr) + sizeof(long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.regs->orig_gpr3,
- PT_ORIG_R3 * sizeof(reg),
- (PT_MAX_PUT_REG + 1) * sizeof(reg));
-
- if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
- ret = user_regset_copyin_ignore(
- &pos, &count, &kbuf, &ubuf,
- (PT_MAX_PUT_REG + 1) * sizeof(reg),
- PT_TRAP * sizeof(reg));
-
- if (!ret && count > 0) {
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
- PT_TRAP * sizeof(reg),
- (PT_TRAP + 1) * sizeof(reg));
- if (!ret)
- ret = set_user_trap(target, reg);
- }
-
- if (!ret)
- ret = user_regset_copyin_ignore(
- &pos, &count, &kbuf, &ubuf,
- (PT_TRAP + 1) * sizeof(reg), -1);
-
- return ret;
-}
-
-/*
- * Regardless of transactions, 'fp_state' holds the current running
- * value of all FPR registers and 'ckfp_state' holds the last checkpointed
- * value of all FPR registers for the current transaction.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * u64 fpr[32];
- * u64 fpscr;
- * };
- */
-static int fpr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
-#ifdef CONFIG_VSX
- u64 buf[33];
- int i;
-
- flush_fp_to_thread(target);
-
- /* copy to local buffer then write that out */
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.TS_FPR(i);
- buf[32] = target->thread.fp_state.fpscr;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
-#else
- BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
- offsetof(struct thread_fp_state, fpr[32]));
-
- flush_fp_to_thread(target);
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fp_state, 0, -1);
-#endif
-}
-
-/*
- * Regardless of transactions, 'fp_state' holds the current running
- * value of all FPR registers and 'ckfp_state' holds the last checkpointed
- * value of all FPR registers for the current transaction.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * u64 fpr[32];
- * u64 fpscr;
- * };
- *
- */
-static int fpr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
-#ifdef CONFIG_VSX
- u64 buf[33];
- int i;
-
- flush_fp_to_thread(target);
-
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.TS_FPR(i);
- buf[32] = target->thread.fp_state.fpscr;
-
- /* copy to local buffer then write that out */
- i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
- if (i)
- return i;
-
- for (i = 0; i < 32 ; i++)
- target->thread.TS_FPR(i) = buf[i];
- target->thread.fp_state.fpscr = buf[32];
- return 0;
-#else
- BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
- offsetof(struct thread_fp_state, fpr[32]));
-
- flush_fp_to_thread(target);
-
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fp_state, 0, -1);
-#endif
-}
-
-#ifdef CONFIG_ALTIVEC
-/*
- * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
- * The transfer totals 34 quadword. Quadwords 0-31 contain the
- * corresponding vector registers. Quadword 32 contains the vscr as the
- * last word (offset 12) within that quadword. Quadword 33 contains the
- * vrsave as the first word (offset 0) within the quadword.
- *
- * This definition of the VMX state is compatible with the current PPC32
- * ptrace interface. This allows signal handling and ptrace to use the
- * same structures. This also simplifies the implementation of a bi-arch
- * (combined (32- and 64-bit) gdb.
- */
-
-static int vr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- flush_altivec_to_thread(target);
- return target->thread.used_vr ? regset->n : 0;
-}
-
-/*
- * Regardless of transactions, 'vr_state' holds the current running
- * value of all the VMX registers and 'ckvr_state' holds the last
- * checkpointed value of all the VMX registers for the current
- * transaction to fall back on in case it aborts.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * vector128 vr[32];
- * vector128 vscr;
- * vector128 vrsave;
- * };
- */
-static int vr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- flush_altivec_to_thread(target);
-
- BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
- offsetof(struct thread_vr_state, vr[32]));
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.vr_state, 0,
- 33 * sizeof(vector128));
- if (!ret) {
- /*
- * Copy out only the low-order word of vrsave.
- */
- int start, end;
- union {
- elf_vrreg_t reg;
- u32 word;
- } vrsave;
- memset(&vrsave, 0, sizeof(vrsave));
-
- vrsave.word = target->thread.vrsave;
-
- start = 33 * sizeof(vector128);
- end = start + sizeof(vrsave);
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
- start, end);
- }
-
- return ret;
-}
-
-/*
- * Regardless of transactions, 'vr_state' holds the current running
- * value of all the VMX registers and 'ckvr_state' holds the last
- * checkpointed value of all the VMX registers for the current
- * transaction to fall back on in case it aborts.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * vector128 vr[32];
- * vector128 vscr;
- * vector128 vrsave;
- * };
- */
-static int vr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- flush_altivec_to_thread(target);
-
- BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
- offsetof(struct thread_vr_state, vr[32]));
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.vr_state, 0,
- 33 * sizeof(vector128));
- if (!ret && count > 0) {
- /*
- * We use only the first word of vrsave.
- */
- int start, end;
- union {
- elf_vrreg_t reg;
- u32 word;
- } vrsave;
- memset(&vrsave, 0, sizeof(vrsave));
-
- vrsave.word = target->thread.vrsave;
-
- start = 33 * sizeof(vector128);
- end = start + sizeof(vrsave);
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
- start, end);
- if (!ret)
- target->thread.vrsave = vrsave.word;
- }
-
- return ret;
-}
-#endif /* CONFIG_ALTIVEC */
-
-#ifdef CONFIG_VSX
-/*
- * Currently to set and and get all the vsx state, you need to call
- * the fp and VMX calls as well. This only get/sets the lower 32
- * 128bit VSX registers.
- */
-
-static int vsr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- flush_vsx_to_thread(target);
- return target->thread.used_vsr ? regset->n : 0;
-}
-
-/*
- * Regardless of transactions, 'fp_state' holds the current running
- * value of all FPR registers and 'ckfp_state' holds the last
- * checkpointed value of all FPR registers for the current
- * transaction.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * u64 vsx[32];
- * };
- */
-static int vsr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- u64 buf[32];
- int ret, i;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
- flush_vsx_to_thread(target);
-
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- buf, 0, 32 * sizeof(double));
-
- return ret;
-}
-
-/*
- * Regardless of transactions, 'fp_state' holds the current running
- * value of all FPR registers and 'ckfp_state' holds the last
- * checkpointed value of all FPR registers for the current
- * transaction.
- *
- * Userspace interface buffer layout:
- *
- * struct data {
- * u64 vsx[32];
- * };
- */
-static int vsr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- u64 buf[32];
- int ret,i;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
- flush_vsx_to_thread(target);
-
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- buf, 0, 32 * sizeof(double));
- if (!ret)
- for (i = 0; i < 32 ; i++)
- target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
-
- return ret;
-}
-#endif /* CONFIG_VSX */
-
-#ifdef CONFIG_SPE
-
-/*
- * For get_evrregs/set_evrregs functions 'data' has the following layout:
- *
- * struct {
- * u32 evr[32];
- * u64 acc;
- * u32 spefscr;
- * }
- */
-
-static int evr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- flush_spe_to_thread(target);
- return target->thread.used_spe ? regset->n : 0;
-}
-
-static int evr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- flush_spe_to_thread(target);
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.evr,
- 0, sizeof(target->thread.evr));
-
- BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
- offsetof(struct thread_struct, spefscr));
-
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.acc,
- sizeof(target->thread.evr), -1);
-
- return ret;
-}
-
-static int evr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- flush_spe_to_thread(target);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.evr,
- 0, sizeof(target->thread.evr));
-
- BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
- offsetof(struct thread_struct, spefscr));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.acc,
- sizeof(target->thread.evr), -1);
-
- return ret;
-}
-#endif /* CONFIG_SPE */
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/**
- * tm_cgpr_active - get active number of registers in CGPR
- * @target: The target task.
- * @regset: The user regset structure.
- *
- * This function checks for the active number of available
- * regisers in transaction checkpointed GPR category.
- */
-static int tm_cgpr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return 0;
-
- return regset->n;
-}
-
-/**
- * tm_cgpr_get - get CGPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy from.
- * @ubuf: User buffer to copy into.
- *
- * This function gets transaction checkpointed GPR registers.
- *
- * When the transaction is active, 'ckpt_regs' holds all the checkpointed
- * GPR register values for the current transaction to fall back on if it
- * aborts in between. This function gets those checkpointed GPR registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct data {
- * struct pt_regs ckpt_regs;
- * };
- */
-static int tm_cgpr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckpt_regs,
- 0, offsetof(struct pt_regs, msr));
- if (!ret) {
- unsigned long msr = get_user_ckpt_msr(target);
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr,
- offsetof(struct pt_regs, msr),
- offsetof(struct pt_regs, msr) +
- sizeof(msr));
- }
-
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct pt_regs, msr) + sizeof(long));
-
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckpt_regs.orig_gpr3,
- offsetof(struct pt_regs, orig_gpr3),
- sizeof(struct user_pt_regs));
- if (!ret)
- ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- sizeof(struct user_pt_regs), -1);
-
- return ret;
-}
-
-/*
- * tm_cgpr_set - set the CGPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy into.
- * @ubuf: User buffer to copy from.
- *
- * This function sets in transaction checkpointed GPR registers.
- *
- * When the transaction is active, 'ckpt_regs' holds the checkpointed
- * GPR register values for the current transaction to fall back on if it
- * aborts in between. This function sets those checkpointed GPR registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct data {
- * struct pt_regs ckpt_regs;
- * };
- */
-static int tm_cgpr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- unsigned long reg;
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckpt_regs,
- 0, PT_MSR * sizeof(reg));
-
- if (!ret && count > 0) {
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
- PT_MSR * sizeof(reg),
- (PT_MSR + 1) * sizeof(reg));
- if (!ret)
- ret = set_user_ckpt_msr(target, reg);
- }
-
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct pt_regs, msr) + sizeof(long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckpt_regs.orig_gpr3,
- PT_ORIG_R3 * sizeof(reg),
- (PT_MAX_PUT_REG + 1) * sizeof(reg));
-
- if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
- ret = user_regset_copyin_ignore(
- &pos, &count, &kbuf, &ubuf,
- (PT_MAX_PUT_REG + 1) * sizeof(reg),
- PT_TRAP * sizeof(reg));
-
- if (!ret && count > 0) {
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
- PT_TRAP * sizeof(reg),
- (PT_TRAP + 1) * sizeof(reg));
- if (!ret)
- ret = set_user_ckpt_trap(target, reg);
- }
-
- if (!ret)
- ret = user_regset_copyin_ignore(
- &pos, &count, &kbuf, &ubuf,
- (PT_TRAP + 1) * sizeof(reg), -1);
-
- return ret;
-}
-
-/**
- * tm_cfpr_active - get active number of registers in CFPR
- * @target: The target task.
- * @regset: The user regset structure.
- *
- * This function checks for the active number of available
- * regisers in transaction checkpointed FPR category.
- */
-static int tm_cfpr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return 0;
-
- return regset->n;
-}
-
-/**
- * tm_cfpr_get - get CFPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy from.
- * @ubuf: User buffer to copy into.
- *
- * This function gets in transaction checkpointed FPR registers.
- *
- * When the transaction is active 'ckfp_state' holds the checkpointed
- * values for the current transaction to fall back on if it aborts
- * in between. This function gets those checkpointed FPR registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct data {
- * u64 fpr[32];
- * u64 fpscr;
- *};
- */
-static int tm_cfpr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- u64 buf[33];
- int i;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- /* copy to local buffer then write that out */
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.TS_CKFPR(i);
- buf[32] = target->thread.ckfp_state.fpscr;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
-}
-
-/**
- * tm_cfpr_set - set CFPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy into.
- * @ubuf: User buffer to copy from.
- *
- * This function sets in transaction checkpointed FPR registers.
- *
- * When the transaction is active 'ckfp_state' holds the checkpointed
- * FPR register values for the current transaction to fall back on
- * if it aborts in between. This function sets these checkpointed
- * FPR registers. The userspace interface buffer layout is as follows.
- *
- * struct data {
- * u64 fpr[32];
- * u64 fpscr;
- *};
- */
-static int tm_cfpr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- u64 buf[33];
- int i;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- for (i = 0; i < 32; i++)
- buf[i] = target->thread.TS_CKFPR(i);
- buf[32] = target->thread.ckfp_state.fpscr;
-
- /* copy to local buffer then write that out */
- i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
- if (i)
- return i;
- for (i = 0; i < 32 ; i++)
- target->thread.TS_CKFPR(i) = buf[i];
- target->thread.ckfp_state.fpscr = buf[32];
- return 0;
-}
-
-/**
- * tm_cvmx_active - get active number of registers in CVMX
- * @target: The target task.
- * @regset: The user regset structure.
- *
- * This function checks for the active number of available
- * regisers in checkpointed VMX category.
- */
-static int tm_cvmx_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return 0;
-
- return regset->n;
-}
-
-/**
- * tm_cvmx_get - get CMVX registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy from.
- * @ubuf: User buffer to copy into.
- *
- * This function gets in transaction checkpointed VMX registers.
- *
- * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
- * the checkpointed values for the current transaction to fall
- * back on if it aborts in between. The userspace interface buffer
- * layout is as follows.
- *
- * struct data {
- * vector128 vr[32];
- * vector128 vscr;
- * vector128 vrsave;
- *};
- */
-static int tm_cvmx_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- /* Flush the state */
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckvr_state, 0,
- 33 * sizeof(vector128));
- if (!ret) {
- /*
- * Copy out only the low-order word of vrsave.
- */
- union {
- elf_vrreg_t reg;
- u32 word;
- } vrsave;
- memset(&vrsave, 0, sizeof(vrsave));
- vrsave.word = target->thread.ckvrsave;
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
- 33 * sizeof(vector128), -1);
- }
-
- return ret;
-}
-
-/**
- * tm_cvmx_set - set CMVX registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy into.
- * @ubuf: User buffer to copy from.
- *
- * This function sets in transaction checkpointed VMX registers.
- *
- * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
- * the checkpointed values for the current transaction to fall
- * back on if it aborts in between. The userspace interface buffer
- * layout is as follows.
- *
- * struct data {
- * vector128 vr[32];
- * vector128 vscr;
- * vector128 vrsave;
- *};
- */
-static int tm_cvmx_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ckvr_state, 0,
- 33 * sizeof(vector128));
- if (!ret && count > 0) {
- /*
- * We use only the low-order word of vrsave.
- */
- union {
- elf_vrreg_t reg;
- u32 word;
- } vrsave;
- memset(&vrsave, 0, sizeof(vrsave));
- vrsave.word = target->thread.ckvrsave;
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
- 33 * sizeof(vector128), -1);
- if (!ret)
- target->thread.ckvrsave = vrsave.word;
- }
-
- return ret;
-}
-
-/**
- * tm_cvsx_active - get active number of registers in CVSX
- * @target: The target task.
- * @regset: The user regset structure.
- *
- * This function checks for the active number of available
- * regisers in transaction checkpointed VSX category.
- */
-static int tm_cvsx_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return 0;
-
- flush_vsx_to_thread(target);
- return target->thread.used_vsr ? regset->n : 0;
-}
-
-/**
- * tm_cvsx_get - get CVSX registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy from.
- * @ubuf: User buffer to copy into.
- *
- * This function gets in transaction checkpointed VSX registers.
- *
- * When the transaction is active 'ckfp_state' holds the checkpointed
- * values for the current transaction to fall back on if it aborts
- * in between. This function gets those checkpointed VSX registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct data {
- * u64 vsx[32];
- *};
- */
-static int tm_cvsx_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- u64 buf[32];
- int ret, i;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- /* Flush the state */
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
- flush_vsx_to_thread(target);
-
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- buf, 0, 32 * sizeof(double));
-
- return ret;
-}
-
-/**
- * tm_cvsx_set - set CFPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy into.
- * @ubuf: User buffer to copy from.
- *
- * This function sets in transaction checkpointed VSX registers.
- *
- * When the transaction is active 'ckfp_state' holds the checkpointed
- * VSX register values for the current transaction to fall back on
- * if it aborts in between. This function sets these checkpointed
- * FPR registers. The userspace interface buffer layout is as follows.
- *
- * struct data {
- * u64 vsx[32];
- *};
- */
-static int tm_cvsx_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- u64 buf[32];
- int ret, i;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- /* Flush the state */
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
- flush_vsx_to_thread(target);
-
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- buf, 0, 32 * sizeof(double));
- if (!ret)
- for (i = 0; i < 32 ; i++)
- target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
-
- return ret;
-}
-
-/**
- * tm_spr_active - get active number of registers in TM SPR
- * @target: The target task.
- * @regset: The user regset structure.
- *
- * This function checks the active number of available
- * regisers in the transactional memory SPR category.
- */
-static int tm_spr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- return regset->n;
-}
-
-/**
- * tm_spr_get - get the TM related SPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy from.
- * @ubuf: User buffer to copy into.
- *
- * This function gets transactional memory related SPR registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct {
- * u64 tm_tfhar;
- * u64 tm_texasr;
- * u64 tm_tfiar;
- * };
- */
-static int tm_spr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- /* Build tests */
- BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
- BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
- BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- /* Flush the states */
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- /* TFHAR register */
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tfhar, 0, sizeof(u64));
-
- /* TEXASR register */
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_texasr, sizeof(u64),
- 2 * sizeof(u64));
-
- /* TFIAR register */
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tfiar,
- 2 * sizeof(u64), 3 * sizeof(u64));
- return ret;
-}
-
-/**
- * tm_spr_set - set the TM related SPR registers
- * @target: The target task.
- * @regset: The user regset structure.
- * @pos: The buffer position.
- * @count: Number of bytes to copy.
- * @kbuf: Kernel buffer to copy into.
- * @ubuf: User buffer to copy from.
- *
- * This function sets transactional memory related SPR registers.
- * The userspace interface buffer layout is as follows.
- *
- * struct {
- * u64 tm_tfhar;
- * u64 tm_texasr;
- * u64 tm_tfiar;
- * };
- */
-static int tm_spr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- /* Build tests */
- BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
- BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
- BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- /* Flush the states */
- flush_tmregs_to_thread(target);
- flush_fp_to_thread(target);
- flush_altivec_to_thread(target);
-
- /* TFHAR register */
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tfhar, 0, sizeof(u64));
-
- /* TEXASR register */
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_texasr, sizeof(u64),
- 2 * sizeof(u64));
-
- /* TFIAR register */
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tfiar,
- 2 * sizeof(u64), 3 * sizeof(u64));
- return ret;
-}
-
-static int tm_tar_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (MSR_TM_ACTIVE(target->thread.regs->msr))
- return regset->n;
-
- return 0;
-}
-
-static int tm_tar_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tar, 0, sizeof(u64));
- return ret;
-}
-
-static int tm_tar_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_tar, 0, sizeof(u64));
- return ret;
-}
-
-static int tm_ppr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (MSR_TM_ACTIVE(target->thread.regs->msr))
- return regset->n;
-
- return 0;
-}
-
-
-static int tm_ppr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_ppr, 0, sizeof(u64));
- return ret;
-}
-
-static int tm_ppr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_ppr, 0, sizeof(u64));
- return ret;
-}
-
-static int tm_dscr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (MSR_TM_ACTIVE(target->thread.regs->msr))
- return regset->n;
-
- return 0;
-}
-
-static int tm_dscr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_dscr, 0, sizeof(u64));
- return ret;
-}
-
-static int tm_dscr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_TM))
- return -ENODEV;
-
- if (!MSR_TM_ACTIVE(target->thread.regs->msr))
- return -ENODATA;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tm_dscr, 0, sizeof(u64));
- return ret;
-}
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
-#ifdef CONFIG_PPC64
-static int ppr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.regs->ppr, 0, sizeof(u64));
-}
-
-static int ppr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.regs->ppr, 0, sizeof(u64));
-}
-
-static int dscr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.dscr, 0, sizeof(u64));
-}
-static int dscr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.dscr, 0, sizeof(u64));
-}
-#endif
-#ifdef CONFIG_PPC_BOOK3S_64
-static int tar_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tar, 0, sizeof(u64));
-}
-static int tar_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tar, 0, sizeof(u64));
-}
-
-static int ebb_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- if (target->thread.used_ebb)
- return regset->n;
-
- return 0;
-}
-
-static int ebb_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- /* Build tests */
- BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
- BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
-
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- if (!target->thread.used_ebb)
- return -ENODATA;
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ebbrr, 0, 3 * sizeof(unsigned long));
-}
-
-static int ebb_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret = 0;
-
- /* Build tests */
- BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
- BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
-
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- if (target->thread.used_ebb)
- return -ENODATA;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ebbrr, 0, sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ebbhr, sizeof(unsigned long),
- 2 * sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.bescr,
- 2 * sizeof(unsigned long), 3 * sizeof(unsigned long));
-
- return ret;
-}
-static int pmu_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- return regset->n;
-}
-
-static int pmu_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- /* Build tests */
- BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
- BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
- BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
- BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
-
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.siar, 0,
- 5 * sizeof(unsigned long));
-}
-
-static int pmu_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret = 0;
-
- /* Build tests */
- BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
- BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
- BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
- BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
-
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.siar, 0,
- sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.sdar, sizeof(unsigned long),
- 2 * sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.sier, 2 * sizeof(unsigned long),
- 3 * sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.mmcr2, 3 * sizeof(unsigned long),
- 4 * sizeof(unsigned long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.mmcr0, 4 * sizeof(unsigned long),
- 5 * sizeof(unsigned long));
- return ret;
-}
-#endif
-
-#ifdef CONFIG_PPC_MEM_KEYS
-static int pkey_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- if (!arch_pkeys_enabled())
- return -ENODEV;
-
- return regset->n;
-}
-
-static int pkey_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr));
- BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor));
-
- if (!arch_pkeys_enabled())
- return -ENODEV;
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.amr, 0,
- ELF_NPKEY * sizeof(unsigned long));
-}
-
-static int pkey_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- u64 new_amr;
- int ret;
-
- if (!arch_pkeys_enabled())
- return -ENODEV;
-
- /* Only the AMR can be set from userspace */
- if (pos != 0 || count != sizeof(new_amr))
- return -EINVAL;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &new_amr, 0, sizeof(new_amr));
- if (ret)
- return ret;
-
- /* UAMOR determines which bits of the AMR can be set from userspace. */
- target->thread.amr = (new_amr & target->thread.uamor) |
- (target->thread.amr & ~target->thread.uamor);
-
- return 0;
-}
-#endif /* CONFIG_PPC_MEM_KEYS */
-
-/*
- * These are our native regset flavors.
- */
-enum powerpc_regset {
- REGSET_GPR,
- REGSET_FPR,
-#ifdef CONFIG_ALTIVEC
- REGSET_VMX,
-#endif
-#ifdef CONFIG_VSX
- REGSET_VSX,
-#endif
-#ifdef CONFIG_SPE
- REGSET_SPE,
-#endif
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- REGSET_TM_CGPR, /* TM checkpointed GPR registers */
- REGSET_TM_CFPR, /* TM checkpointed FPR registers */
- REGSET_TM_CVMX, /* TM checkpointed VMX registers */
- REGSET_TM_CVSX, /* TM checkpointed VSX registers */
- REGSET_TM_SPR, /* TM specific SPR registers */
- REGSET_TM_CTAR, /* TM checkpointed TAR register */
- REGSET_TM_CPPR, /* TM checkpointed PPR register */
- REGSET_TM_CDSCR, /* TM checkpointed DSCR register */
-#endif
-#ifdef CONFIG_PPC64
- REGSET_PPR, /* PPR register */
- REGSET_DSCR, /* DSCR register */
-#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- REGSET_TAR, /* TAR register */
- REGSET_EBB, /* EBB registers */
- REGSET_PMR, /* Performance Monitor Registers */
-#endif
-#ifdef CONFIG_PPC_MEM_KEYS
- REGSET_PKEY, /* AMR register */
-#endif
-};
-
-static const struct user_regset native_regsets[] = {
- [REGSET_GPR] = {
- .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
- .size = sizeof(long), .align = sizeof(long),
- .get = gpr_get, .set = gpr_set
- },
- [REGSET_FPR] = {
- .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
- .size = sizeof(double), .align = sizeof(double),
- .get = fpr_get, .set = fpr_set
- },
-#ifdef CONFIG_ALTIVEC
- [REGSET_VMX] = {
- .core_note_type = NT_PPC_VMX, .n = 34,
- .size = sizeof(vector128), .align = sizeof(vector128),
- .active = vr_active, .get = vr_get, .set = vr_set
- },
-#endif
-#ifdef CONFIG_VSX
- [REGSET_VSX] = {
- .core_note_type = NT_PPC_VSX, .n = 32,
- .size = sizeof(double), .align = sizeof(double),
- .active = vsr_active, .get = vsr_get, .set = vsr_set
- },
-#endif
-#ifdef CONFIG_SPE
- [REGSET_SPE] = {
- .core_note_type = NT_PPC_SPE, .n = 35,
- .size = sizeof(u32), .align = sizeof(u32),
- .active = evr_active, .get = evr_get, .set = evr_set
- },
-#endif
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- [REGSET_TM_CGPR] = {
- .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
- .size = sizeof(long), .align = sizeof(long),
- .active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set
- },
- [REGSET_TM_CFPR] = {
- .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
- .size = sizeof(double), .align = sizeof(double),
- .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set
- },
- [REGSET_TM_CVMX] = {
- .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
- .size = sizeof(vector128), .align = sizeof(vector128),
- .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set
- },
- [REGSET_TM_CVSX] = {
- .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
- .size = sizeof(double), .align = sizeof(double),
- .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set
- },
- [REGSET_TM_SPR] = {
- .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set
- },
- [REGSET_TM_CTAR] = {
- .core_note_type = NT_PPC_TM_CTAR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set
- },
- [REGSET_TM_CPPR] = {
- .core_note_type = NT_PPC_TM_CPPR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set
- },
- [REGSET_TM_CDSCR] = {
- .core_note_type = NT_PPC_TM_CDSCR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set
- },
-#endif
-#ifdef CONFIG_PPC64
- [REGSET_PPR] = {
- .core_note_type = NT_PPC_PPR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = ppr_get, .set = ppr_set
- },
- [REGSET_DSCR] = {
- .core_note_type = NT_PPC_DSCR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = dscr_get, .set = dscr_set
- },
-#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- [REGSET_TAR] = {
- .core_note_type = NT_PPC_TAR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = tar_get, .set = tar_set
- },
- [REGSET_EBB] = {
- .core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = ebb_active, .get = ebb_get, .set = ebb_set
- },
- [REGSET_PMR] = {
- .core_note_type = NT_PPC_PMU, .n = ELF_NPMU,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = pmu_active, .get = pmu_get, .set = pmu_set
- },
-#endif
-#ifdef CONFIG_PPC_MEM_KEYS
- [REGSET_PKEY] = {
- .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = pkey_active, .get = pkey_get, .set = pkey_set
- },
-#endif
-};
-
-static const struct user_regset_view user_ppc_native_view = {
- .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI,
- .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
-};
-
-#ifdef CONFIG_PPC64
-#include <linux/compat.h>
-
-static int gpr32_get_common(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf,
- unsigned long *regs)
-{
- compat_ulong_t *k = kbuf;
- compat_ulong_t __user *u = ubuf;
- compat_ulong_t reg;
-
- pos /= sizeof(reg);
- count /= sizeof(reg);
-
- if (kbuf)
- for (; count > 0 && pos < PT_MSR; --count)
- *k++ = regs[pos++];
- else
- for (; count > 0 && pos < PT_MSR; --count)
- if (__put_user((compat_ulong_t) regs[pos++], u++))
- return -EFAULT;
-
- if (count > 0 && pos == PT_MSR) {
- reg = get_user_msr(target);
- if (kbuf)
- *k++ = reg;
- else if (__put_user(reg, u++))
- return -EFAULT;
- ++pos;
- --count;
- }
-
- if (kbuf)
- for (; count > 0 && pos < PT_REGS_COUNT; --count)
- *k++ = regs[pos++];
- else
- for (; count > 0 && pos < PT_REGS_COUNT; --count)
- if (__put_user((compat_ulong_t) regs[pos++], u++))
- return -EFAULT;
-
- kbuf = k;
- ubuf = u;
- pos *= sizeof(reg);
- count *= sizeof(reg);
- return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- PT_REGS_COUNT * sizeof(reg), -1);
-}
-
-static int gpr32_set_common(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf,
- unsigned long *regs)
-{
- const compat_ulong_t *k = kbuf;
- const compat_ulong_t __user *u = ubuf;
- compat_ulong_t reg;
-
- pos /= sizeof(reg);
- count /= sizeof(reg);
-
- if (kbuf)
- for (; count > 0 && pos < PT_MSR; --count)
- regs[pos++] = *k++;
- else
- for (; count > 0 && pos < PT_MSR; --count) {
- if (__get_user(reg, u++))
- return -EFAULT;
- regs[pos++] = reg;
- }
-
-
- if (count > 0 && pos == PT_MSR) {
- if (kbuf)
- reg = *k++;
- else if (__get_user(reg, u++))
- return -EFAULT;
- set_user_msr(target, reg);
- ++pos;
- --count;
- }
-
- if (kbuf) {
- for (; count > 0 && pos <= PT_MAX_PUT_REG; --count)
- regs[pos++] = *k++;
- for (; count > 0 && pos < PT_TRAP; --count, ++pos)
- ++k;
- } else {
- for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) {
- if (__get_user(reg, u++))
- return -EFAULT;
- regs[pos++] = reg;
- }
- for (; count > 0 && pos < PT_TRAP; --count, ++pos)
- if (__get_user(reg, u++))
- return -EFAULT;
- }
-
- if (count > 0 && pos == PT_TRAP) {
- if (kbuf)
- reg = *k++;
- else if (__get_user(reg, u++))
- return -EFAULT;
- set_user_trap(target, reg);
- ++pos;
- --count;
- }
-
- kbuf = k;
- ubuf = u;
- pos *= sizeof(reg);
- count *= sizeof(reg);
- return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
- (PT_TRAP + 1) * sizeof(reg), -1);
-}
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-static int tm_cgpr32_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- return gpr32_get_common(target, regset, pos, count, kbuf, ubuf,
- &target->thread.ckpt_regs.gpr[0]);
-}
-
-static int tm_cgpr32_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
- &target->thread.ckpt_regs.gpr[0]);
-}
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
-static int gpr32_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int i;
-
- if (target->thread.regs == NULL)
- return -EIO;
-
- if (!FULL_REGS(target->thread.regs)) {
- /*
- * We have a partial register set.
- * Fill 14-31 with bogus values.
- */
- for (i = 14; i < 32; i++)
- target->thread.regs->gpr[i] = NV_REG_POISON;
- }
- return gpr32_get_common(target, regset, pos, count, kbuf, ubuf,
- &target->thread.regs->gpr[0]);
-}
-
-static int gpr32_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- if (target->thread.regs == NULL)
- return -EIO;
-
- CHECK_FULL_REGS(target->thread.regs);
- return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
- &target->thread.regs->gpr[0]);
-}
-
-/*
- * These are the regset flavors matching the CONFIG_PPC32 native set.
- */
-static const struct user_regset compat_regsets[] = {
- [REGSET_GPR] = {
- .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
- .size = sizeof(compat_long_t), .align = sizeof(compat_long_t),
- .get = gpr32_get, .set = gpr32_set
- },
- [REGSET_FPR] = {
- .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
- .size = sizeof(double), .align = sizeof(double),
- .get = fpr_get, .set = fpr_set
- },
-#ifdef CONFIG_ALTIVEC
- [REGSET_VMX] = {
- .core_note_type = NT_PPC_VMX, .n = 34,
- .size = sizeof(vector128), .align = sizeof(vector128),
- .active = vr_active, .get = vr_get, .set = vr_set
- },
-#endif
-#ifdef CONFIG_SPE
- [REGSET_SPE] = {
- .core_note_type = NT_PPC_SPE, .n = 35,
- .size = sizeof(u32), .align = sizeof(u32),
- .active = evr_active, .get = evr_get, .set = evr_set
- },
-#endif
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- [REGSET_TM_CGPR] = {
- .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
- .size = sizeof(long), .align = sizeof(long),
- .active = tm_cgpr_active,
- .get = tm_cgpr32_get, .set = tm_cgpr32_set
- },
- [REGSET_TM_CFPR] = {
- .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
- .size = sizeof(double), .align = sizeof(double),
- .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set
- },
- [REGSET_TM_CVMX] = {
- .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
- .size = sizeof(vector128), .align = sizeof(vector128),
- .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set
- },
- [REGSET_TM_CVSX] = {
- .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
- .size = sizeof(double), .align = sizeof(double),
- .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set
- },
- [REGSET_TM_SPR] = {
- .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set
- },
- [REGSET_TM_CTAR] = {
- .core_note_type = NT_PPC_TM_CTAR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set
- },
- [REGSET_TM_CPPR] = {
- .core_note_type = NT_PPC_TM_CPPR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set
- },
- [REGSET_TM_CDSCR] = {
- .core_note_type = NT_PPC_TM_CDSCR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set
- },
-#endif
-#ifdef CONFIG_PPC64
- [REGSET_PPR] = {
- .core_note_type = NT_PPC_PPR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = ppr_get, .set = ppr_set
- },
- [REGSET_DSCR] = {
- .core_note_type = NT_PPC_DSCR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = dscr_get, .set = dscr_set
- },
-#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- [REGSET_TAR] = {
- .core_note_type = NT_PPC_TAR, .n = 1,
- .size = sizeof(u64), .align = sizeof(u64),
- .get = tar_get, .set = tar_set
- },
- [REGSET_EBB] = {
- .core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
- .size = sizeof(u64), .align = sizeof(u64),
- .active = ebb_active, .get = ebb_get, .set = ebb_set
- },
-#endif
-};
-
-static const struct user_regset_view user_ppc_compat_view = {
- .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI,
- .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets)
-};
-#endif /* CONFIG_PPC64 */
-
-const struct user_regset_view *task_user_regset_view(struct task_struct *task)
-{
-#ifdef CONFIG_PPC64
- if (test_tsk_thread_flag(task, TIF_32BIT))
- return &user_ppc_compat_view;
-#endif
- return &user_ppc_native_view;
-}
-
-
-void user_enable_single_step(struct task_struct *task)
-{
- struct pt_regs *regs = task->thread.regs;
-
- if (regs != NULL) {
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- task->thread.debug.dbcr0 &= ~DBCR0_BT;
- task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
- regs->msr |= MSR_DE;
-#else
- regs->msr &= ~MSR_BE;
- regs->msr |= MSR_SE;
-#endif
- }
- set_tsk_thread_flag(task, TIF_SINGLESTEP);
-}
-
-void user_enable_block_step(struct task_struct *task)
-{
- struct pt_regs *regs = task->thread.regs;
-
- if (regs != NULL) {
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- task->thread.debug.dbcr0 &= ~DBCR0_IC;
- task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT;
- regs->msr |= MSR_DE;
-#else
- regs->msr &= ~MSR_SE;
- regs->msr |= MSR_BE;
-#endif
- }
- set_tsk_thread_flag(task, TIF_SINGLESTEP);
-}
-
-void user_disable_single_step(struct task_struct *task)
-{
- struct pt_regs *regs = task->thread.regs;
-
- if (regs != NULL) {
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- /*
- * The logic to disable single stepping should be as
- * simple as turning off the Instruction Complete flag.
- * And, after doing so, if all debug flags are off, turn
- * off DBCR0(IDM) and MSR(DE) .... Torez
- */
- task->thread.debug.dbcr0 &= ~(DBCR0_IC|DBCR0_BT);
- /*
- * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set.
- */
- if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
- task->thread.debug.dbcr1)) {
- /*
- * All debug events were off.....
- */
- task->thread.debug.dbcr0 &= ~DBCR0_IDM;
- regs->msr &= ~MSR_DE;
- }
-#else
- regs->msr &= ~(MSR_SE | MSR_BE);
-#endif
- }
- clear_tsk_thread_flag(task, TIF_SINGLESTEP);
-}
-
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
-void ptrace_triggered(struct perf_event *bp,
- struct perf_sample_data *data, struct pt_regs *regs)
-{
- struct perf_event_attr attr;
-
- /*
- * Disable the breakpoint request here since ptrace has defined a
- * one-shot behaviour for breakpoint exceptions in PPC64.
- * The SIGTRAP signal is generated automatically for us in do_dabr().
- * We don't have to do anything about that here
- */
- attr = bp->attr;
- attr.disabled = true;
- modify_user_hw_breakpoint(bp, &attr);
-}
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-
-static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
- unsigned long data)
-{
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- int ret;
- struct thread_struct *thread = &(task->thread);
- struct perf_event *bp;
- struct perf_event_attr attr;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
- bool set_bp = true;
- struct arch_hw_breakpoint hw_brk;
-#endif
-
- /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
- * For embedded processors we support one DAC and no IAC's at the
- * moment.
- */
- if (addr > 0)
- return -EINVAL;
-
- /* The bottom 3 bits in dabr are flags */
- if ((data & ~0x7UL) >= TASK_SIZE)
- return -EIO;
-
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
- /* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
- * It was assumed, on previous implementations, that 3 bits were
- * passed together with the data address, fitting the design of the
- * DABR register, as follows:
- *
- * bit 0: Read flag
- * bit 1: Write flag
- * bit 2: Breakpoint translation
- *
- * Thus, we use them here as so.
- */
-
- /* Ensure breakpoint translation bit is set */
- if (data && !(data & HW_BRK_TYPE_TRANSLATE))
- return -EIO;
- hw_brk.address = data & (~HW_BRK_TYPE_DABR);
- hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
- hw_brk.len = DABR_MAX_LEN;
- hw_brk.hw_len = DABR_MAX_LEN;
- set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- bp = thread->ptrace_bps[0];
- if (!set_bp) {
- if (bp) {
- unregister_hw_breakpoint(bp);
- thread->ptrace_bps[0] = NULL;
- }
- return 0;
- }
- if (bp) {
- attr = bp->attr;
- attr.bp_addr = hw_brk.address;
- attr.bp_len = DABR_MAX_LEN;
- arch_bp_generic_fields(hw_brk.type, &attr.bp_type);
-
- /* Enable breakpoint */
- attr.disabled = false;
-
- ret = modify_user_hw_breakpoint(bp, &attr);
- if (ret) {
- return ret;
- }
- thread->ptrace_bps[0] = bp;
- thread->hw_brk = hw_brk;
- return 0;
- }
-
- /* Create a new breakpoint request if one doesn't exist already */
- hw_breakpoint_init(&attr);
- attr.bp_addr = hw_brk.address;
- attr.bp_len = DABR_MAX_LEN;
- arch_bp_generic_fields(hw_brk.type,
- &attr.bp_type);
-
- thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
- ptrace_triggered, NULL, task);
- if (IS_ERR(bp)) {
- thread->ptrace_bps[0] = NULL;
- return PTR_ERR(bp);
- }
-
-#else /* !CONFIG_HAVE_HW_BREAKPOINT */
- if (set_bp && (!ppc_breakpoint_available()))
- return -ENODEV;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
- task->thread.hw_brk = hw_brk;
-#else /* CONFIG_PPC_ADV_DEBUG_REGS */
- /* As described above, it was assumed 3 bits were passed with the data
- * address, but we will assume only the mode bits will be passed
- * as to not cause alignment restrictions for DAC-based processors.
- */
-
- /* DAC's hold the whole address without any mode flags */
- task->thread.debug.dac1 = data & ~0x3UL;
-
- if (task->thread.debug.dac1 == 0) {
- dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
- if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
- task->thread.debug.dbcr1)) {
- task->thread.regs->msr &= ~MSR_DE;
- task->thread.debug.dbcr0 &= ~DBCR0_IDM;
- }
- return 0;
- }
-
- /* Read or Write bits must be set */
-
- if (!(data & 0x3UL))
- return -EINVAL;
-
- /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
- register */
- task->thread.debug.dbcr0 |= DBCR0_IDM;
-
- /* Check for write and read flags and set DBCR0
- accordingly */
- dbcr_dac(task) &= ~(DBCR_DAC1R|DBCR_DAC1W);
- if (data & 0x1UL)
- dbcr_dac(task) |= DBCR_DAC1R;
- if (data & 0x2UL)
- dbcr_dac(task) |= DBCR_DAC1W;
- task->thread.regs->msr |= MSR_DE;
-#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
- return 0;
-}
-
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure single step bits etc are not set.
- */
-void ptrace_disable(struct task_struct *child)
-{
- /* make sure the single step bit is not set. */
- user_disable_single_step(child);
-}
-
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-static long set_instruction_bp(struct task_struct *child,
- struct ppc_hw_breakpoint *bp_info)
-{
- int slot;
- int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0);
- int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0);
- int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0);
- int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0);
-
- if (dbcr_iac_range(child) & DBCR_IAC12MODE)
- slot2_in_use = 1;
- if (dbcr_iac_range(child) & DBCR_IAC34MODE)
- slot4_in_use = 1;
-
- if (bp_info->addr >= TASK_SIZE)
- return -EIO;
-
- if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) {
-
- /* Make sure range is valid. */
- if (bp_info->addr2 >= TASK_SIZE)
- return -EIO;
-
- /* We need a pair of IAC regsisters */
- if ((!slot1_in_use) && (!slot2_in_use)) {
- slot = 1;
- child->thread.debug.iac1 = bp_info->addr;
- child->thread.debug.iac2 = bp_info->addr2;
- child->thread.debug.dbcr0 |= DBCR0_IAC1;
- if (bp_info->addr_mode ==
- PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
- dbcr_iac_range(child) |= DBCR_IAC12X;
- else
- dbcr_iac_range(child) |= DBCR_IAC12I;
-#if CONFIG_PPC_ADV_DEBUG_IACS > 2
- } else if ((!slot3_in_use) && (!slot4_in_use)) {
- slot = 3;
- child->thread.debug.iac3 = bp_info->addr;
- child->thread.debug.iac4 = bp_info->addr2;
- child->thread.debug.dbcr0 |= DBCR0_IAC3;
- if (bp_info->addr_mode ==
- PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
- dbcr_iac_range(child) |= DBCR_IAC34X;
- else
- dbcr_iac_range(child) |= DBCR_IAC34I;
-#endif
- } else
- return -ENOSPC;
- } else {
- /* We only need one. If possible leave a pair free in
- * case a range is needed later
- */
- if (!slot1_in_use) {
- /*
- * Don't use iac1 if iac1-iac2 are free and either
- * iac3 or iac4 (but not both) are free
- */
- if (slot2_in_use || (slot3_in_use == slot4_in_use)) {
- slot = 1;
- child->thread.debug.iac1 = bp_info->addr;
- child->thread.debug.dbcr0 |= DBCR0_IAC1;
- goto out;
- }
- }
- if (!slot2_in_use) {
- slot = 2;
- child->thread.debug.iac2 = bp_info->addr;
- child->thread.debug.dbcr0 |= DBCR0_IAC2;
-#if CONFIG_PPC_ADV_DEBUG_IACS > 2
- } else if (!slot3_in_use) {
- slot = 3;
- child->thread.debug.iac3 = bp_info->addr;
- child->thread.debug.dbcr0 |= DBCR0_IAC3;
- } else if (!slot4_in_use) {
- slot = 4;
- child->thread.debug.iac4 = bp_info->addr;
- child->thread.debug.dbcr0 |= DBCR0_IAC4;
-#endif
- } else
- return -ENOSPC;
- }
-out:
- child->thread.debug.dbcr0 |= DBCR0_IDM;
- child->thread.regs->msr |= MSR_DE;
-
- return slot;
-}
-
-static int del_instruction_bp(struct task_struct *child, int slot)
-{
- switch (slot) {
- case 1:
- if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0)
- return -ENOENT;
-
- if (dbcr_iac_range(child) & DBCR_IAC12MODE) {
- /* address range - clear slots 1 & 2 */
- child->thread.debug.iac2 = 0;
- dbcr_iac_range(child) &= ~DBCR_IAC12MODE;
- }
- child->thread.debug.iac1 = 0;
- child->thread.debug.dbcr0 &= ~DBCR0_IAC1;
- break;
- case 2:
- if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0)
- return -ENOENT;
-
- if (dbcr_iac_range(child) & DBCR_IAC12MODE)
- /* used in a range */
- return -EINVAL;
- child->thread.debug.iac2 = 0;
- child->thread.debug.dbcr0 &= ~DBCR0_IAC2;
- break;
-#if CONFIG_PPC_ADV_DEBUG_IACS > 2
- case 3:
- if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0)
- return -ENOENT;
-
- if (dbcr_iac_range(child) & DBCR_IAC34MODE) {
- /* address range - clear slots 3 & 4 */
- child->thread.debug.iac4 = 0;
- dbcr_iac_range(child) &= ~DBCR_IAC34MODE;
- }
- child->thread.debug.iac3 = 0;
- child->thread.debug.dbcr0 &= ~DBCR0_IAC3;
- break;
- case 4:
- if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0)
- return -ENOENT;
-
- if (dbcr_iac_range(child) & DBCR_IAC34MODE)
- /* Used in a range */
- return -EINVAL;
- child->thread.debug.iac4 = 0;
- child->thread.debug.dbcr0 &= ~DBCR0_IAC4;
- break;
-#endif
- default:
- return -EINVAL;
- }
- return 0;
-}
-
-static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
-{
- int byte_enable =
- (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT)
- & 0xf;
- int condition_mode =
- bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE;
- int slot;
-
- if (byte_enable && (condition_mode == 0))
- return -EINVAL;
-
- if (bp_info->addr >= TASK_SIZE)
- return -EIO;
-
- if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) {
- slot = 1;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
- dbcr_dac(child) |= DBCR_DAC1R;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
- dbcr_dac(child) |= DBCR_DAC1W;
- child->thread.debug.dac1 = (unsigned long)bp_info->addr;
-#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- if (byte_enable) {
- child->thread.debug.dvc1 =
- (unsigned long)bp_info->condition_value;
- child->thread.debug.dbcr2 |=
- ((byte_enable << DBCR2_DVC1BE_SHIFT) |
- (condition_mode << DBCR2_DVC1M_SHIFT));
- }
-#endif
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
- /* Both dac1 and dac2 are part of a range */
- return -ENOSPC;
-#endif
- } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) {
- slot = 2;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
- dbcr_dac(child) |= DBCR_DAC2R;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
- dbcr_dac(child) |= DBCR_DAC2W;
- child->thread.debug.dac2 = (unsigned long)bp_info->addr;
-#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- if (byte_enable) {
- child->thread.debug.dvc2 =
- (unsigned long)bp_info->condition_value;
- child->thread.debug.dbcr2 |=
- ((byte_enable << DBCR2_DVC2BE_SHIFT) |
- (condition_mode << DBCR2_DVC2M_SHIFT));
- }
-#endif
- } else
- return -ENOSPC;
- child->thread.debug.dbcr0 |= DBCR0_IDM;
- child->thread.regs->msr |= MSR_DE;
-
- return slot + 4;
-}
-
-static int del_dac(struct task_struct *child, int slot)
-{
- if (slot == 1) {
- if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0)
- return -ENOENT;
-
- child->thread.debug.dac1 = 0;
- dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W);
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
- child->thread.debug.dac2 = 0;
- child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
- }
- child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
-#endif
-#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- child->thread.debug.dvc1 = 0;
-#endif
- } else if (slot == 2) {
- if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0)
- return -ENOENT;
-
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE)
- /* Part of a range */
- return -EINVAL;
- child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
-#endif
-#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- child->thread.debug.dvc2 = 0;
-#endif
- child->thread.debug.dac2 = 0;
- dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W);
- } else
- return -EINVAL;
-
- return 0;
-}
-#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
-
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-static int set_dac_range(struct task_struct *child,
- struct ppc_hw_breakpoint *bp_info)
-{
- int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK;
-
- /* We don't allow range watchpoints to be used with DVC */
- if (bp_info->condition_mode)
- return -EINVAL;
-
- /*
- * Best effort to verify the address range. The user/supervisor bits
- * prevent trapping in kernel space, but let's fail on an obvious bad
- * range. The simple test on the mask is not fool-proof, and any
- * exclusive range will spill over into kernel space.
- */
- if (bp_info->addr >= TASK_SIZE)
- return -EIO;
- if (mode == PPC_BREAKPOINT_MODE_MASK) {
- /*
- * dac2 is a bitmask. Don't allow a mask that makes a
- * kernel space address from a valid dac1 value
- */
- if (~((unsigned long)bp_info->addr2) >= TASK_SIZE)
- return -EIO;
- } else {
- /*
- * For range breakpoints, addr2 must also be a valid address
- */
- if (bp_info->addr2 >= TASK_SIZE)
- return -EIO;
- }
-
- if (child->thread.debug.dbcr0 &
- (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W))
- return -ENOSPC;
-
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
- child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
- child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
- child->thread.debug.dac1 = bp_info->addr;
- child->thread.debug.dac2 = bp_info->addr2;
- if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
- child->thread.debug.dbcr2 |= DBCR2_DAC12M;
- else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
- child->thread.debug.dbcr2 |= DBCR2_DAC12MX;
- else /* PPC_BREAKPOINT_MODE_MASK */
- child->thread.debug.dbcr2 |= DBCR2_DAC12MM;
- child->thread.regs->msr |= MSR_DE;
-
- return 5;
-}
-#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */
-
-static long ppc_set_hwdebug(struct task_struct *child,
- struct ppc_hw_breakpoint *bp_info)
-{
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- int len = 0;
- struct thread_struct *thread = &(child->thread);
- struct perf_event *bp;
- struct perf_event_attr attr;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
- struct arch_hw_breakpoint brk;
-#endif
-
- if (bp_info->version != 1)
- return -ENOTSUPP;
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- /*
- * Check for invalid flags and combinations
- */
- if ((bp_info->trigger_type == 0) ||
- (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE |
- PPC_BREAKPOINT_TRIGGER_RW)) ||
- (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) ||
- (bp_info->condition_mode &
- ~(PPC_BREAKPOINT_CONDITION_MODE |
- PPC_BREAKPOINT_CONDITION_BE_ALL)))
- return -EINVAL;
-#if CONFIG_PPC_ADV_DEBUG_DVCS == 0
- if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
- return -EINVAL;
-#endif
-
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) {
- if ((bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE) ||
- (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE))
- return -EINVAL;
- return set_instruction_bp(child, bp_info);
- }
- if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
- return set_dac(child, bp_info);
-
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- return set_dac_range(child, bp_info);
-#else
- return -EINVAL;
-#endif
-#else /* !CONFIG_PPC_ADV_DEBUG_DVCS */
- /*
- * We only support one data breakpoint
- */
- if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 ||
- (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 ||
- bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
- return -EINVAL;
-
- if ((unsigned long)bp_info->addr >= TASK_SIZE)
- return -EIO;
-
- brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN;
- brk.type = HW_BRK_TYPE_TRANSLATE;
- brk.len = DABR_MAX_LEN;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
- brk.type |= HW_BRK_TYPE_READ;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
- brk.type |= HW_BRK_TYPE_WRITE;
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
- len = bp_info->addr2 - bp_info->addr;
- else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
- len = 1;
- else
- return -EINVAL;
- bp = thread->ptrace_bps[0];
- if (bp)
- return -ENOSPC;
-
- /* Create a new breakpoint request if one doesn't exist already */
- hw_breakpoint_init(&attr);
- attr.bp_addr = (unsigned long)bp_info->addr;
- attr.bp_len = len;
- arch_bp_generic_fields(brk.type, &attr.bp_type);
-
- thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
- ptrace_triggered, NULL, child);
- if (IS_ERR(bp)) {
- thread->ptrace_bps[0] = NULL;
- return PTR_ERR(bp);
- }
-
- return 1;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-
- if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT)
- return -EINVAL;
-
- if (child->thread.hw_brk.address)
- return -ENOSPC;
-
- if (!ppc_breakpoint_available())
- return -ENODEV;
-
- child->thread.hw_brk = brk;
-
- return 1;
-#endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */
-}
-
-static long ppc_del_hwdebug(struct task_struct *child, long data)
-{
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- int ret = 0;
- struct thread_struct *thread = &(child->thread);
- struct perf_event *bp;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- int rc;
-
- if (data <= 4)
- rc = del_instruction_bp(child, (int)data);
- else
- rc = del_dac(child, (int)data - 4);
-
- if (!rc) {
- if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0,
- child->thread.debug.dbcr1)) {
- child->thread.debug.dbcr0 &= ~DBCR0_IDM;
- child->thread.regs->msr &= ~MSR_DE;
- }
- }
- return rc;
-#else
- if (data != 1)
- return -EINVAL;
-
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- bp = thread->ptrace_bps[0];
- if (bp) {
- unregister_hw_breakpoint(bp);
- thread->ptrace_bps[0] = NULL;
- } else
- ret = -ENOENT;
- return ret;
-#else /* CONFIG_HAVE_HW_BREAKPOINT */
- if (child->thread.hw_brk.address == 0)
- return -ENOENT;
-
- child->thread.hw_brk.address = 0;
- child->thread.hw_brk.type = 0;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-
- return 0;
-#endif
-}
-
-long arch_ptrace(struct task_struct *child, long request,
- unsigned long addr, unsigned long data)
-{
- int ret = -EPERM;
- void __user *datavp = (void __user *) data;
- unsigned long __user *datalp = datavp;
-
- switch (request) {
- /* read the word at location addr in the USER area. */
- case PTRACE_PEEKUSR: {
- unsigned long index, tmp;
-
- ret = -EIO;
- /* convert to index and check */
-#ifdef CONFIG_PPC32
- index = addr >> 2;
- if ((addr & 3) || (index > PT_FPSCR)
- || (child->thread.regs == NULL))
-#else
- index = addr >> 3;
- if ((addr & 7) || (index > PT_FPSCR))
-#endif
- break;
-
- CHECK_FULL_REGS(child->thread.regs);
- if (index < PT_FPR0) {
- ret = ptrace_get_reg(child, (int) index, &tmp);
- if (ret)
- break;
- } else {
- unsigned int fpidx = index - PT_FPR0;
-
- flush_fp_to_thread(child);
- if (fpidx < (PT_FPSCR - PT_FPR0))
- memcpy(&tmp, &child->thread.TS_FPR(fpidx),
- sizeof(long));
- else
- tmp = child->thread.fp_state.fpscr;
- }
- ret = put_user(tmp, datalp);
- break;
- }
-
- /* write the word at location addr in the USER area */
- case PTRACE_POKEUSR: {
- unsigned long index;
-
- ret = -EIO;
- /* convert to index and check */
-#ifdef CONFIG_PPC32
- index = addr >> 2;
- if ((addr & 3) || (index > PT_FPSCR)
- || (child->thread.regs == NULL))
-#else
- index = addr >> 3;
- if ((addr & 7) || (index > PT_FPSCR))
-#endif
- break;
-
- CHECK_FULL_REGS(child->thread.regs);
- if (index < PT_FPR0) {
- ret = ptrace_put_reg(child, index, data);
- } else {
- unsigned int fpidx = index - PT_FPR0;
-
- flush_fp_to_thread(child);
- if (fpidx < (PT_FPSCR - PT_FPR0))
- memcpy(&child->thread.TS_FPR(fpidx), &data,
- sizeof(long));
- else
- child->thread.fp_state.fpscr = data;
- ret = 0;
- }
- break;
- }
-
- case PPC_PTRACE_GETHWDBGINFO: {
- struct ppc_debug_info dbginfo;
-
- dbginfo.version = 1;
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- dbginfo.num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS;
- dbginfo.num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS;
- dbginfo.num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS;
- dbginfo.data_bp_alignment = 4;
- dbginfo.sizeof_condition = 4;
- dbginfo.features = PPC_DEBUG_FEATURE_INSN_BP_RANGE |
- PPC_DEBUG_FEATURE_INSN_BP_MASK;
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- dbginfo.features |=
- PPC_DEBUG_FEATURE_DATA_BP_RANGE |
- PPC_DEBUG_FEATURE_DATA_BP_MASK;
-#endif
-#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
- dbginfo.num_instruction_bps = 0;
- if (ppc_breakpoint_available())
- dbginfo.num_data_bps = 1;
- else
- dbginfo.num_data_bps = 0;
- dbginfo.num_condition_regs = 0;
-#ifdef CONFIG_PPC64
- dbginfo.data_bp_alignment = 8;
-#else
- dbginfo.data_bp_alignment = 4;
-#endif
- dbginfo.sizeof_condition = 0;
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
- if (dawr_enabled())
- dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR;
-#else
- dbginfo.features = 0;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
-
- if (copy_to_user(datavp, &dbginfo,
- sizeof(struct ppc_debug_info)))
- return -EFAULT;
- return 0;
- }
-
- case PPC_PTRACE_SETHWDEBUG: {
- struct ppc_hw_breakpoint bp_info;
-
- if (copy_from_user(&bp_info, datavp,
- sizeof(struct ppc_hw_breakpoint)))
- return -EFAULT;
- return ppc_set_hwdebug(child, &bp_info);
- }
-
- case PPC_PTRACE_DELHWDEBUG: {
- ret = ppc_del_hwdebug(child, data);
- break;
- }
-
- case PTRACE_GET_DEBUGREG: {
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
- unsigned long dabr_fake;
-#endif
- ret = -EINVAL;
- /* We only support one DABR and no IABRS at the moment */
- if (addr > 0)
- break;
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- ret = put_user(child->thread.debug.dac1, datalp);
-#else
- dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |
- (child->thread.hw_brk.type & HW_BRK_TYPE_DABR));
- ret = put_user(dabr_fake, datalp);
-#endif
- break;
- }
-
- case PTRACE_SET_DEBUGREG:
- ret = ptrace_set_debugreg(child, addr, data);
- break;
-
-#ifdef CONFIG_PPC64
- case PTRACE_GETREGS64:
-#endif
- case PTRACE_GETREGS: /* Get all pt_regs from the child. */
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_GPR,
- 0, sizeof(struct user_pt_regs),
- datavp);
-
-#ifdef CONFIG_PPC64
- case PTRACE_SETREGS64:
-#endif
- case PTRACE_SETREGS: /* Set all gp regs in the child. */
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_GPR,
- 0, sizeof(struct user_pt_regs),
- datavp);
-
- case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_FPR,
- 0, sizeof(elf_fpregset_t),
- datavp);
-
- case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_FPR,
- 0, sizeof(elf_fpregset_t),
- datavp);
-
-#ifdef CONFIG_ALTIVEC
- case PTRACE_GETVRREGS:
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_VMX,
- 0, (33 * sizeof(vector128) +
- sizeof(u32)),
- datavp);
-
- case PTRACE_SETVRREGS:
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_VMX,
- 0, (33 * sizeof(vector128) +
- sizeof(u32)),
- datavp);
-#endif
-#ifdef CONFIG_VSX
- case PTRACE_GETVSRREGS:
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_VSX,
- 0, 32 * sizeof(double),
- datavp);
-
- case PTRACE_SETVSRREGS:
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_VSX,
- 0, 32 * sizeof(double),
- datavp);
-#endif
-#ifdef CONFIG_SPE
- case PTRACE_GETEVRREGS:
- /* Get the child spe register state. */
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_SPE, 0, 35 * sizeof(u32),
- datavp);
-
- case PTRACE_SETEVRREGS:
- /* Set the child spe register state. */
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_SPE, 0, 35 * sizeof(u32),
- datavp);
-#endif
-
- default:
- ret = ptrace_request(child, request, addr, data);
- break;
- }
- return ret;
-}
-
-#ifdef CONFIG_SECCOMP
-static int do_seccomp(struct pt_regs *regs)
-{
- if (!test_thread_flag(TIF_SECCOMP))
- return 0;
-
- /*
- * The ABI we present to seccomp tracers is that r3 contains
- * the syscall return value and orig_gpr3 contains the first
- * syscall parameter. This is different to the ptrace ABI where
- * both r3 and orig_gpr3 contain the first syscall parameter.
- */
- regs->gpr[3] = -ENOSYS;
-
- /*
- * We use the __ version here because we have already checked
- * TIF_SECCOMP. If this fails, there is nothing left to do, we
- * have already loaded -ENOSYS into r3, or seccomp has put
- * something else in r3 (via SECCOMP_RET_ERRNO/TRACE).
- */
- if (__secure_computing(NULL))
- return -1;
-
- /*
- * The syscall was allowed by seccomp, restore the register
- * state to what audit expects.
- * Note that we use orig_gpr3, which means a seccomp tracer can
- * modify the first syscall parameter (in orig_gpr3) and also
- * allow the syscall to proceed.
- */
- regs->gpr[3] = regs->orig_gpr3;
-
- return 0;
-}
-#else
-static inline int do_seccomp(struct pt_regs *regs) { return 0; }
-#endif /* CONFIG_SECCOMP */
-
-/**
- * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
- * @regs: the pt_regs of the task to trace (current)
- *
- * Performs various types of tracing on syscall entry. This includes seccomp,
- * ptrace, syscall tracepoints and audit.
- *
- * The pt_regs are potentially visible to userspace via ptrace, so their
- * contents is ABI.
- *
- * One or more of the tracers may modify the contents of pt_regs, in particular
- * to modify arguments or even the syscall number itself.
- *
- * It's also possible that a tracer can choose to reject the system call. In
- * that case this function will return an illegal syscall number, and will put
- * an appropriate return value in regs->r3.
- *
- * Return: the (possibly changed) syscall number.
- */
-long do_syscall_trace_enter(struct pt_regs *regs)
-{
- u32 flags;
-
- user_exit();
-
- flags = READ_ONCE(current_thread_info()->flags) &
- (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
-
- if (flags) {
- int rc = tracehook_report_syscall_entry(regs);
-
- if (unlikely(flags & _TIF_SYSCALL_EMU)) {
- /*
- * A nonzero return code from
- * tracehook_report_syscall_entry() tells us to prevent
- * the syscall execution, but we are not going to
- * execute it anyway.
- *
- * Returning -1 will skip the syscall execution. We want
- * to avoid clobbering any registers, so we don't goto
- * the skip label below.
- */
- return -1;
- }
-
- if (rc) {
- /*
- * The tracer decided to abort the syscall. Note that
- * the tracer may also just change regs->gpr[0] to an
- * invalid syscall number, that is handled below on the
- * exit path.
- */
- goto skip;
- }
- }
-
- /* Run seccomp after ptrace; allow it to set gpr[3]. */
- if (do_seccomp(regs))
- return -1;
-
- /* Avoid trace and audit when syscall is invalid. */
- if (regs->gpr[0] >= NR_syscalls)
- goto skip;
-
- if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
- trace_sys_enter(regs, regs->gpr[0]);
-
-#ifdef CONFIG_PPC64
- if (!is_32bit_task())
- audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
- regs->gpr[5], regs->gpr[6]);
- else
-#endif
- audit_syscall_entry(regs->gpr[0],
- regs->gpr[3] & 0xffffffff,
- regs->gpr[4] & 0xffffffff,
- regs->gpr[5] & 0xffffffff,
- regs->gpr[6] & 0xffffffff);
-
- /* Return the possibly modified but valid syscall number */
- return regs->gpr[0];
-
-skip:
- /*
- * If we are aborting explicitly, or if the syscall number is
- * now invalid, set the return value to -ENOSYS.
- */
- regs->gpr[3] = -ENOSYS;
- return -1;
-}
-
-void do_syscall_trace_leave(struct pt_regs *regs)
-{
- int step;
-
- audit_syscall_exit(regs);
-
- if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
- trace_sys_exit(regs, regs->result);
-
- step = test_thread_flag(TIF_SINGLESTEP);
- if (step || test_thread_flag(TIF_SYSCALL_TRACE))
- tracehook_report_syscall_exit(regs, step);
-
- user_enter();
-}
-
-void __init pt_regs_check(void);
-
-/*
- * Dummy function, its purpose is to break the build if struct pt_regs and
- * struct user_pt_regs don't match.
- */
-void __init pt_regs_check(void)
-{
- BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
- offsetof(struct user_pt_regs, gpr));
- BUILD_BUG_ON(offsetof(struct pt_regs, nip) !=
- offsetof(struct user_pt_regs, nip));
- BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
- offsetof(struct user_pt_regs, msr));
- BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
- offsetof(struct user_pt_regs, msr));
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct user_pt_regs, orig_gpr3));
- BUILD_BUG_ON(offsetof(struct pt_regs, ctr) !=
- offsetof(struct user_pt_regs, ctr));
- BUILD_BUG_ON(offsetof(struct pt_regs, link) !=
- offsetof(struct user_pt_regs, link));
- BUILD_BUG_ON(offsetof(struct pt_regs, xer) !=
- offsetof(struct user_pt_regs, xer));
- BUILD_BUG_ON(offsetof(struct pt_regs, ccr) !=
- offsetof(struct user_pt_regs, ccr));
-#ifdef __powerpc64__
- BUILD_BUG_ON(offsetof(struct pt_regs, softe) !=
- offsetof(struct user_pt_regs, softe));
-#else
- BUILD_BUG_ON(offsetof(struct pt_regs, mq) !=
- offsetof(struct user_pt_regs, mq));
-#endif
- BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
- offsetof(struct user_pt_regs, trap));
- BUILD_BUG_ON(offsetof(struct pt_regs, dar) !=
- offsetof(struct user_pt_regs, dar));
- BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
- offsetof(struct user_pt_regs, dsisr));
- BUILD_BUG_ON(offsetof(struct pt_regs, result) !=
- offsetof(struct user_pt_regs, result));
-
- BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
-
- // Now check that the pt_regs offsets match the uapi #defines
- #define CHECK_REG(_pt, _reg) \
- BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \
- sizeof(unsigned long)));
-
- CHECK_REG(PT_R0, gpr[0]);
- CHECK_REG(PT_R1, gpr[1]);
- CHECK_REG(PT_R2, gpr[2]);
- CHECK_REG(PT_R3, gpr[3]);
- CHECK_REG(PT_R4, gpr[4]);
- CHECK_REG(PT_R5, gpr[5]);
- CHECK_REG(PT_R6, gpr[6]);
- CHECK_REG(PT_R7, gpr[7]);
- CHECK_REG(PT_R8, gpr[8]);
- CHECK_REG(PT_R9, gpr[9]);
- CHECK_REG(PT_R10, gpr[10]);
- CHECK_REG(PT_R11, gpr[11]);
- CHECK_REG(PT_R12, gpr[12]);
- CHECK_REG(PT_R13, gpr[13]);
- CHECK_REG(PT_R14, gpr[14]);
- CHECK_REG(PT_R15, gpr[15]);
- CHECK_REG(PT_R16, gpr[16]);
- CHECK_REG(PT_R17, gpr[17]);
- CHECK_REG(PT_R18, gpr[18]);
- CHECK_REG(PT_R19, gpr[19]);
- CHECK_REG(PT_R20, gpr[20]);
- CHECK_REG(PT_R21, gpr[21]);
- CHECK_REG(PT_R22, gpr[22]);
- CHECK_REG(PT_R23, gpr[23]);
- CHECK_REG(PT_R24, gpr[24]);
- CHECK_REG(PT_R25, gpr[25]);
- CHECK_REG(PT_R26, gpr[26]);
- CHECK_REG(PT_R27, gpr[27]);
- CHECK_REG(PT_R28, gpr[28]);
- CHECK_REG(PT_R29, gpr[29]);
- CHECK_REG(PT_R30, gpr[30]);
- CHECK_REG(PT_R31, gpr[31]);
- CHECK_REG(PT_NIP, nip);
- CHECK_REG(PT_MSR, msr);
- CHECK_REG(PT_ORIG_R3, orig_gpr3);
- CHECK_REG(PT_CTR, ctr);
- CHECK_REG(PT_LNK, link);
- CHECK_REG(PT_XER, xer);
- CHECK_REG(PT_CCR, ccr);
-#ifdef CONFIG_PPC64
- CHECK_REG(PT_SOFTE, softe);
-#else
- CHECK_REG(PT_MQ, mq);
-#endif
- CHECK_REG(PT_TRAP, trap);
- CHECK_REG(PT_DAR, dar);
- CHECK_REG(PT_DSISR, dsisr);
- CHECK_REG(PT_RESULT, result);
- #undef CHECK_REG
-
- BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long));
-
- /*
- * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the
- * real registers.
- */
- BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long));
-}
diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile
new file mode 100644
index 000000000000..c2f2402ebc8c
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+CFLAGS_ptrace-view.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
+obj-y += ptrace.o ptrace-view.o
+obj-$(CONFIG_COMPAT) += ptrace32.o
+obj-$(CONFIG_VSX) += ptrace-vsx.o
+ifneq ($(CONFIG_VSX),y)
+obj-y += ptrace-novsx.o
+endif
+obj-$(CONFIG_ALTIVEC) += ptrace-altivec.o
+obj-$(CONFIG_SPE) += ptrace-spe.o
+obj-$(CONFIG_PPC_TRANSACTIONAL_MEM) += ptrace-tm.o
+obj-$(CONFIG_PPC_ADV_DEBUG_REGS) += ptrace-adv.o
+ifneq ($(CONFIG_PPC_ADV_DEBUG_REGS),y)
+obj-y += ptrace-noadv.o
+endif
diff --git a/arch/powerpc/kernel/ptrace/ptrace-adv.c b/arch/powerpc/kernel/ptrace/ptrace-adv.c
new file mode 100644
index 000000000000..3990c01ef8cf
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-adv.c
@@ -0,0 +1,492 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/hw_breakpoint.h>
+
+#include "ptrace-decl.h"
+
+void user_enable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+ task->thread.debug.dbcr0 &= ~DBCR0_BT;
+ task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+ regs->msr |= MSR_DE;
+ }
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+ task->thread.debug.dbcr0 &= ~DBCR0_IC;
+ task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT;
+ regs->msr |= MSR_DE;
+ }
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+ /*
+ * The logic to disable single stepping should be as
+ * simple as turning off the Instruction Complete flag.
+ * And, after doing so, if all debug flags are off, turn
+ * off DBCR0(IDM) and MSR(DE) .... Torez
+ */
+ task->thread.debug.dbcr0 &= ~(DBCR0_IC | DBCR0_BT);
+ /*
+ * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set.
+ */
+ if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+ task->thread.debug.dbcr1)) {
+ /*
+ * All debug events were off.....
+ */
+ task->thread.debug.dbcr0 &= ~DBCR0_IDM;
+ regs->msr &= ~MSR_DE;
+ }
+ }
+ clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void ppc_gethwdinfo(struct ppc_debug_info *dbginfo)
+{
+ dbginfo->version = 1;
+ dbginfo->num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS;
+ dbginfo->num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS;
+ dbginfo->num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS;
+ dbginfo->data_bp_alignment = 4;
+ dbginfo->sizeof_condition = 4;
+ dbginfo->features = PPC_DEBUG_FEATURE_INSN_BP_RANGE |
+ PPC_DEBUG_FEATURE_INSN_BP_MASK;
+ if (IS_ENABLED(CONFIG_PPC_ADV_DEBUG_DAC_RANGE))
+ dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_RANGE |
+ PPC_DEBUG_FEATURE_DATA_BP_MASK;
+}
+
+int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
+ unsigned long __user *datalp)
+{
+ /* We only support one DABR and no IABRS at the moment */
+ if (addr > 0)
+ return -EINVAL;
+ return put_user(child->thread.debug.dac1, datalp);
+}
+
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data)
+{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int ret;
+ struct thread_struct *thread = &task->thread;
+ struct perf_event *bp;
+ struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+ /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
+ * For embedded processors we support one DAC and no IAC's at the
+ * moment.
+ */
+ if (addr > 0)
+ return -EINVAL;
+
+ /* The bottom 3 bits in dabr are flags */
+ if ((data & ~0x7UL) >= TASK_SIZE)
+ return -EIO;
+
+ /* As described above, it was assumed 3 bits were passed with the data
+ * address, but we will assume only the mode bits will be passed
+ * as to not cause alignment restrictions for DAC-based processors.
+ */
+
+ /* DAC's hold the whole address without any mode flags */
+ task->thread.debug.dac1 = data & ~0x3UL;
+
+ if (task->thread.debug.dac1 == 0) {
+ dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+ if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+ task->thread.debug.dbcr1)) {
+ task->thread.regs->msr &= ~MSR_DE;
+ task->thread.debug.dbcr0 &= ~DBCR0_IDM;
+ }
+ return 0;
+ }
+
+ /* Read or Write bits must be set */
+
+ if (!(data & 0x3UL))
+ return -EINVAL;
+
+ /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 register */
+ task->thread.debug.dbcr0 |= DBCR0_IDM;
+
+ /* Check for write and read flags and set DBCR0 accordingly */
+ dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+ if (data & 0x1UL)
+ dbcr_dac(task) |= DBCR_DAC1R;
+ if (data & 0x2UL)
+ dbcr_dac(task) |= DBCR_DAC1W;
+ task->thread.regs->msr |= MSR_DE;
+ return 0;
+}
+
+static long set_instruction_bp(struct task_struct *child,
+ struct ppc_hw_breakpoint *bp_info)
+{
+ int slot;
+ int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0);
+ int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0);
+ int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0);
+ int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0);
+
+ if (dbcr_iac_range(child) & DBCR_IAC12MODE)
+ slot2_in_use = 1;
+ if (dbcr_iac_range(child) & DBCR_IAC34MODE)
+ slot4_in_use = 1;
+
+ if (bp_info->addr >= TASK_SIZE)
+ return -EIO;
+
+ if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) {
+ /* Make sure range is valid. */
+ if (bp_info->addr2 >= TASK_SIZE)
+ return -EIO;
+
+ /* We need a pair of IAC regsisters */
+ if (!slot1_in_use && !slot2_in_use) {
+ slot = 1;
+ child->thread.debug.iac1 = bp_info->addr;
+ child->thread.debug.iac2 = bp_info->addr2;
+ child->thread.debug.dbcr0 |= DBCR0_IAC1;
+ if (bp_info->addr_mode ==
+ PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+ dbcr_iac_range(child) |= DBCR_IAC12X;
+ else
+ dbcr_iac_range(child) |= DBCR_IAC12I;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ } else if ((!slot3_in_use) && (!slot4_in_use)) {
+ slot = 3;
+ child->thread.debug.iac3 = bp_info->addr;
+ child->thread.debug.iac4 = bp_info->addr2;
+ child->thread.debug.dbcr0 |= DBCR0_IAC3;
+ if (bp_info->addr_mode ==
+ PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+ dbcr_iac_range(child) |= DBCR_IAC34X;
+ else
+ dbcr_iac_range(child) |= DBCR_IAC34I;
+#endif
+ } else {
+ return -ENOSPC;
+ }
+ } else {
+ /* We only need one. If possible leave a pair free in
+ * case a range is needed later
+ */
+ if (!slot1_in_use) {
+ /*
+ * Don't use iac1 if iac1-iac2 are free and either
+ * iac3 or iac4 (but not both) are free
+ */
+ if (slot2_in_use || slot3_in_use == slot4_in_use) {
+ slot = 1;
+ child->thread.debug.iac1 = bp_info->addr;
+ child->thread.debug.dbcr0 |= DBCR0_IAC1;
+ goto out;
+ }
+ }
+ if (!slot2_in_use) {
+ slot = 2;
+ child->thread.debug.iac2 = bp_info->addr;
+ child->thread.debug.dbcr0 |= DBCR0_IAC2;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ } else if (!slot3_in_use) {
+ slot = 3;
+ child->thread.debug.iac3 = bp_info->addr;
+ child->thread.debug.dbcr0 |= DBCR0_IAC3;
+ } else if (!slot4_in_use) {
+ slot = 4;
+ child->thread.debug.iac4 = bp_info->addr;
+ child->thread.debug.dbcr0 |= DBCR0_IAC4;
+#endif
+ } else {
+ return -ENOSPC;
+ }
+ }
+out:
+ child->thread.debug.dbcr0 |= DBCR0_IDM;
+ child->thread.regs->msr |= MSR_DE;
+
+ return slot;
+}
+
+static int del_instruction_bp(struct task_struct *child, int slot)
+{
+ switch (slot) {
+ case 1:
+ if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC12MODE) {
+ /* address range - clear slots 1 & 2 */
+ child->thread.debug.iac2 = 0;
+ dbcr_iac_range(child) &= ~DBCR_IAC12MODE;
+ }
+ child->thread.debug.iac1 = 0;
+ child->thread.debug.dbcr0 &= ~DBCR0_IAC1;
+ break;
+ case 2:
+ if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC12MODE)
+ /* used in a range */
+ return -EINVAL;
+ child->thread.debug.iac2 = 0;
+ child->thread.debug.dbcr0 &= ~DBCR0_IAC2;
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ case 3:
+ if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC34MODE) {
+ /* address range - clear slots 3 & 4 */
+ child->thread.debug.iac4 = 0;
+ dbcr_iac_range(child) &= ~DBCR_IAC34MODE;
+ }
+ child->thread.debug.iac3 = 0;
+ child->thread.debug.dbcr0 &= ~DBCR0_IAC3;
+ break;
+ case 4:
+ if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC34MODE)
+ /* Used in a range */
+ return -EINVAL;
+ child->thread.debug.iac4 = 0;
+ child->thread.debug.dbcr0 &= ~DBCR0_IAC4;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
+{
+ int byte_enable =
+ (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT)
+ & 0xf;
+ int condition_mode =
+ bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE;
+ int slot;
+
+ if (byte_enable && condition_mode == 0)
+ return -EINVAL;
+
+ if (bp_info->addr >= TASK_SIZE)
+ return -EIO;
+
+ if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) {
+ slot = 1;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ dbcr_dac(child) |= DBCR_DAC1R;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ dbcr_dac(child) |= DBCR_DAC1W;
+ child->thread.debug.dac1 = (unsigned long)bp_info->addr;
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ if (byte_enable) {
+ child->thread.debug.dvc1 =
+ (unsigned long)bp_info->condition_value;
+ child->thread.debug.dbcr2 |=
+ ((byte_enable << DBCR2_DVC1BE_SHIFT) |
+ (condition_mode << DBCR2_DVC1M_SHIFT));
+ }
+#endif
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
+ /* Both dac1 and dac2 are part of a range */
+ return -ENOSPC;
+#endif
+ } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) {
+ slot = 2;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ dbcr_dac(child) |= DBCR_DAC2R;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ dbcr_dac(child) |= DBCR_DAC2W;
+ child->thread.debug.dac2 = (unsigned long)bp_info->addr;
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ if (byte_enable) {
+ child->thread.debug.dvc2 =
+ (unsigned long)bp_info->condition_value;
+ child->thread.debug.dbcr2 |=
+ ((byte_enable << DBCR2_DVC2BE_SHIFT) |
+ (condition_mode << DBCR2_DVC2M_SHIFT));
+ }
+#endif
+ } else {
+ return -ENOSPC;
+ }
+ child->thread.debug.dbcr0 |= DBCR0_IDM;
+ child->thread.regs->msr |= MSR_DE;
+
+ return slot + 4;
+}
+
+static int del_dac(struct task_struct *child, int slot)
+{
+ if (slot == 1) {
+ if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0)
+ return -ENOENT;
+
+ child->thread.debug.dac1 = 0;
+ dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
+ child->thread.debug.dac2 = 0;
+ child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
+ }
+ child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
+#endif
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ child->thread.debug.dvc1 = 0;
+#endif
+ } else if (slot == 2) {
+ if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0)
+ return -ENOENT;
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE)
+ /* Part of a range */
+ return -EINVAL;
+ child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
+#endif
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ child->thread.debug.dvc2 = 0;
+#endif
+ child->thread.debug.dac2 = 0;
+ dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W);
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+static int set_dac_range(struct task_struct *child,
+ struct ppc_hw_breakpoint *bp_info)
+{
+ int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK;
+
+ /* We don't allow range watchpoints to be used with DVC */
+ if (bp_info->condition_mode)
+ return -EINVAL;
+
+ /*
+ * Best effort to verify the address range. The user/supervisor bits
+ * prevent trapping in kernel space, but let's fail on an obvious bad
+ * range. The simple test on the mask is not fool-proof, and any
+ * exclusive range will spill over into kernel space.
+ */
+ if (bp_info->addr >= TASK_SIZE)
+ return -EIO;
+ if (mode == PPC_BREAKPOINT_MODE_MASK) {
+ /*
+ * dac2 is a bitmask. Don't allow a mask that makes a
+ * kernel space address from a valid dac1 value
+ */
+ if (~((unsigned long)bp_info->addr2) >= TASK_SIZE)
+ return -EIO;
+ } else {
+ /*
+ * For range breakpoints, addr2 must also be a valid address
+ */
+ if (bp_info->addr2 >= TASK_SIZE)
+ return -EIO;
+ }
+
+ if (child->thread.debug.dbcr0 &
+ (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W))
+ return -ENOSPC;
+
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
+ child->thread.debug.dac1 = bp_info->addr;
+ child->thread.debug.dac2 = bp_info->addr2;
+ if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
+ child->thread.debug.dbcr2 |= DBCR2_DAC12M;
+ else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+ child->thread.debug.dbcr2 |= DBCR2_DAC12MX;
+ else /* PPC_BREAKPOINT_MODE_MASK */
+ child->thread.debug.dbcr2 |= DBCR2_DAC12MM;
+ child->thread.regs->msr |= MSR_DE;
+
+ return 5;
+}
+#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */
+
+long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
+{
+ if (bp_info->version != 1)
+ return -ENOTSUPP;
+ /*
+ * Check for invalid flags and combinations
+ */
+ if (bp_info->trigger_type == 0 ||
+ (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE |
+ PPC_BREAKPOINT_TRIGGER_RW)) ||
+ (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) ||
+ (bp_info->condition_mode &
+ ~(PPC_BREAKPOINT_CONDITION_MODE |
+ PPC_BREAKPOINT_CONDITION_BE_ALL)))
+ return -EINVAL;
+#if CONFIG_PPC_ADV_DEBUG_DVCS == 0
+ if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+ return -EINVAL;
+#endif
+
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) {
+ if (bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE ||
+ bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+ return -EINVAL;
+ return set_instruction_bp(child, bp_info);
+ }
+ if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
+ return set_dac(child, bp_info);
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ return set_dac_range(child, bp_info);
+#else
+ return -EINVAL;
+#endif
+}
+
+long ppc_del_hwdebug(struct task_struct *child, long data)
+{
+ int rc;
+
+ if (data <= 4)
+ rc = del_instruction_bp(child, (int)data);
+ else
+ rc = del_dac(child, (int)data - 4);
+
+ if (!rc) {
+ if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0,
+ child->thread.debug.dbcr1)) {
+ child->thread.debug.dbcr0 &= ~DBCR0_IDM;
+ child->thread.regs->msr &= ~MSR_DE;
+ }
+ }
+ return rc;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-altivec.c b/arch/powerpc/kernel/ptrace/ptrace-altivec.c
new file mode 100644
index 000000000000..dd8b75dfbd06
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-altivec.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/elf.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
+ * The transfer totals 34 quadword. Quadwords 0-31 contain the
+ * corresponding vector registers. Quadword 32 contains the vscr as the
+ * last word (offset 12) within that quadword. Quadword 33 contains the
+ * vrsave as the first word (offset 0) within the quadword.
+ *
+ * This definition of the VMX state is compatible with the current PPC32
+ * ptrace interface. This allows signal handling and ptrace to use the
+ * same structures. This also simplifies the implementation of a bi-arch
+ * (combined (32- and 64-bit) gdb.
+ */
+
+int vr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ flush_altivec_to_thread(target);
+ return target->thread.used_vr ? regset->n : 0;
+}
+
+/*
+ * Regardless of transactions, 'vr_state' holds the current running
+ * value of all the VMX registers and 'ckvr_state' holds the last
+ * checkpointed value of all the VMX registers for the current
+ * transaction to fall back on in case it aborts.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ * };
+ */
+int vr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ flush_altivec_to_thread(target);
+
+ BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
+ offsetof(struct thread_vr_state, vr[32]));
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.vr_state, 0,
+ 33 * sizeof(vector128));
+ if (!ret) {
+ /*
+ * Copy out only the low-order word of vrsave.
+ */
+ int start, end;
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ memset(&vrsave, 0, sizeof(vrsave));
+
+ vrsave.word = target->thread.vrsave;
+
+ start = 33 * sizeof(vector128);
+ end = start + sizeof(vrsave);
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
+ start, end);
+ }
+
+ return ret;
+}
+
+/*
+ * Regardless of transactions, 'vr_state' holds the current running
+ * value of all the VMX registers and 'ckvr_state' holds the last
+ * checkpointed value of all the VMX registers for the current
+ * transaction to fall back on in case it aborts.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ * };
+ */
+int vr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ flush_altivec_to_thread(target);
+
+ BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
+ offsetof(struct thread_vr_state, vr[32]));
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.vr_state, 0,
+ 33 * sizeof(vector128));
+ if (!ret && count > 0) {
+ /*
+ * We use only the first word of vrsave.
+ */
+ int start, end;
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ memset(&vrsave, 0, sizeof(vrsave));
+
+ vrsave.word = target->thread.vrsave;
+
+ start = 33 * sizeof(vector128);
+ end = start + sizeof(vrsave);
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
+ start, end);
+ if (!ret)
+ target->thread.vrsave = vrsave.word;
+ }
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h
new file mode 100644
index 000000000000..3c8a81999292
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/*
+ * Set of msr bits that gdb can change on behalf of a process.
+ */
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+#define MSR_DEBUGCHANGE 0
+#else
+#define MSR_DEBUGCHANGE (MSR_SE | MSR_BE)
+#endif
+
+/*
+ * Max register writeable via put_reg
+ */
+#ifdef CONFIG_PPC32
+#define PT_MAX_PUT_REG PT_MQ
+#else
+#define PT_MAX_PUT_REG PT_CCR
+#endif
+
+#define TVSO(f) (offsetof(struct thread_vr_state, f))
+#define TFSO(f) (offsetof(struct thread_fp_state, f))
+#define TSO(f) (offsetof(struct thread_struct, f))
+
+/*
+ * These are our native regset flavors.
+ */
+enum powerpc_regset {
+ REGSET_GPR,
+ REGSET_FPR,
+#ifdef CONFIG_ALTIVEC
+ REGSET_VMX,
+#endif
+#ifdef CONFIG_VSX
+ REGSET_VSX,
+#endif
+#ifdef CONFIG_SPE
+ REGSET_SPE,
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ REGSET_TM_CGPR, /* TM checkpointed GPR registers */
+ REGSET_TM_CFPR, /* TM checkpointed FPR registers */
+ REGSET_TM_CVMX, /* TM checkpointed VMX registers */
+ REGSET_TM_CVSX, /* TM checkpointed VSX registers */
+ REGSET_TM_SPR, /* TM specific SPR registers */
+ REGSET_TM_CTAR, /* TM checkpointed TAR register */
+ REGSET_TM_CPPR, /* TM checkpointed PPR register */
+ REGSET_TM_CDSCR, /* TM checkpointed DSCR register */
+#endif
+#ifdef CONFIG_PPC64
+ REGSET_PPR, /* PPR register */
+ REGSET_DSCR, /* DSCR register */
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ REGSET_TAR, /* TAR register */
+ REGSET_EBB, /* EBB registers */
+ REGSET_PMR, /* Performance Monitor Registers */
+#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+ REGSET_PKEY, /* AMR register */
+#endif
+};
+
+/* ptrace-(no)vsx */
+
+int fpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int fpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace-vsx */
+
+int vsr_active(struct task_struct *target, const struct user_regset *regset);
+int vsr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int vsr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace-altivec */
+
+int vr_active(struct task_struct *target, const struct user_regset *regset);
+int vr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int vr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace-spe */
+
+int evr_active(struct task_struct *target, const struct user_regset *regset);
+int evr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int evr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace */
+
+int gpr32_get_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf,
+ unsigned long *regs);
+int gpr32_set_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf,
+ unsigned long *regs);
+
+/* ptrace-tm */
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void flush_tmregs_to_thread(struct task_struct *tsk);
+#else
+static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
+#endif
+
+int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset);
+int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset);
+int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_cfpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset);
+int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_cvmx_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset);
+int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_cvsx_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_spr_active(struct task_struct *target, const struct user_regset *regset);
+int tm_spr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_spr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_tar_active(struct task_struct *target, const struct user_regset *regset);
+int tm_tar_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_tar_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_ppr_active(struct task_struct *target, const struct user_regset *regset);
+int tm_ppr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_ppr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_dscr_active(struct task_struct *target, const struct user_regset *regset);
+int tm_dscr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_dscr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_cgpr32_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf);
+int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace-view */
+
+extern const struct user_regset_view user_ppc_native_view;
+
+/* ptrace-(no)adv */
+void ppc_gethwdinfo(struct ppc_debug_info *dbginfo);
+int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
+ unsigned long __user *datalp);
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data);
+long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info);
+long ppc_del_hwdebug(struct task_struct *child, long data);
diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
new file mode 100644
index 000000000000..f87e7c5c3bf3
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/hw_breakpoint.h>
+
+#include <asm/debug.h>
+
+#include "ptrace-decl.h"
+
+void user_enable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+ regs->msr &= ~MSR_BE;
+ regs->msr |= MSR_SE;
+ }
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+ regs->msr &= ~MSR_SE;
+ regs->msr |= MSR_BE;
+ }
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL)
+ regs->msr &= ~(MSR_SE | MSR_BE);
+
+ clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void ppc_gethwdinfo(struct ppc_debug_info *dbginfo)
+{
+ dbginfo->version = 1;
+ dbginfo->num_instruction_bps = 0;
+ if (ppc_breakpoint_available())
+ dbginfo->num_data_bps = 1;
+ else
+ dbginfo->num_data_bps = 0;
+ dbginfo->num_condition_regs = 0;
+ dbginfo->data_bp_alignment = sizeof(long);
+ dbginfo->sizeof_condition = 0;
+ if (IS_ENABLED(CONFIG_HAVE_HW_BREAKPOINT)) {
+ dbginfo->features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
+ if (dawr_enabled())
+ dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR;
+ } else {
+ dbginfo->features = 0;
+ }
+}
+
+int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
+ unsigned long __user *datalp)
+{
+ unsigned long dabr_fake;
+
+ /* We only support one DABR and no IABRS at the moment */
+ if (addr > 0)
+ return -EINVAL;
+ dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |
+ (child->thread.hw_brk.type & HW_BRK_TYPE_DABR));
+ return put_user(dabr_fake, datalp);
+}
+
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data)
+{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int ret;
+ struct thread_struct *thread = &task->thread;
+ struct perf_event *bp;
+ struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+ bool set_bp = true;
+ struct arch_hw_breakpoint hw_brk;
+
+ /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
+ * For embedded processors we support one DAC and no IAC's at the
+ * moment.
+ */
+ if (addr > 0)
+ return -EINVAL;
+
+ /* The bottom 3 bits in dabr are flags */
+ if ((data & ~0x7UL) >= TASK_SIZE)
+ return -EIO;
+
+ /* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
+ * It was assumed, on previous implementations, that 3 bits were
+ * passed together with the data address, fitting the design of the
+ * DABR register, as follows:
+ *
+ * bit 0: Read flag
+ * bit 1: Write flag
+ * bit 2: Breakpoint translation
+ *
+ * Thus, we use them here as so.
+ */
+
+ /* Ensure breakpoint translation bit is set */
+ if (data && !(data & HW_BRK_TYPE_TRANSLATE))
+ return -EIO;
+ hw_brk.address = data & (~HW_BRK_TYPE_DABR);
+ hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
+ hw_brk.len = DABR_MAX_LEN;
+ hw_brk.hw_len = DABR_MAX_LEN;
+ set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ bp = thread->ptrace_bps[0];
+ if (!set_bp) {
+ if (bp) {
+ unregister_hw_breakpoint(bp);
+ thread->ptrace_bps[0] = NULL;
+ }
+ return 0;
+ }
+ if (bp) {
+ attr = bp->attr;
+ attr.bp_addr = hw_brk.address;
+ attr.bp_len = DABR_MAX_LEN;
+ arch_bp_generic_fields(hw_brk.type, &attr.bp_type);
+
+ /* Enable breakpoint */
+ attr.disabled = false;
+
+ ret = modify_user_hw_breakpoint(bp, &attr);
+ if (ret)
+ return ret;
+
+ thread->ptrace_bps[0] = bp;
+ thread->hw_brk = hw_brk;
+ return 0;
+ }
+
+ /* Create a new breakpoint request if one doesn't exist already */
+ hw_breakpoint_init(&attr);
+ attr.bp_addr = hw_brk.address;
+ attr.bp_len = DABR_MAX_LEN;
+ arch_bp_generic_fields(hw_brk.type,
+ &attr.bp_type);
+
+ thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
+ ptrace_triggered, NULL, task);
+ if (IS_ERR(bp)) {
+ thread->ptrace_bps[0] = NULL;
+ return PTR_ERR(bp);
+ }
+
+#else /* !CONFIG_HAVE_HW_BREAKPOINT */
+ if (set_bp && (!ppc_breakpoint_available()))
+ return -ENODEV;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+ task->thread.hw_brk = hw_brk;
+ return 0;
+}
+
+long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
+{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int len = 0;
+ struct thread_struct *thread = &child->thread;
+ struct perf_event *bp;
+ struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+ struct arch_hw_breakpoint brk;
+
+ if (bp_info->version != 1)
+ return -ENOTSUPP;
+ /*
+ * We only support one data breakpoint
+ */
+ if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 ||
+ (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 ||
+ bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+ return -EINVAL;
+
+ if ((unsigned long)bp_info->addr >= TASK_SIZE)
+ return -EIO;
+
+ brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN;
+ brk.type = HW_BRK_TYPE_TRANSLATE;
+ brk.len = DABR_MAX_LEN;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ brk.type |= HW_BRK_TYPE_READ;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ brk.type |= HW_BRK_TYPE_WRITE;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
+ len = bp_info->addr2 - bp_info->addr;
+ else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
+ len = 1;
+ else
+ return -EINVAL;
+ bp = thread->ptrace_bps[0];
+ if (bp)
+ return -ENOSPC;
+
+ /* Create a new breakpoint request if one doesn't exist already */
+ hw_breakpoint_init(&attr);
+ attr.bp_addr = (unsigned long)bp_info->addr;
+ attr.bp_len = len;
+ arch_bp_generic_fields(brk.type, &attr.bp_type);
+
+ bp = register_user_hw_breakpoint(&attr, ptrace_triggered, NULL, child);
+ thread->ptrace_bps[0] = bp;
+ if (IS_ERR(bp)) {
+ thread->ptrace_bps[0] = NULL;
+ return PTR_ERR(bp);
+ }
+
+ return 1;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+ if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT)
+ return -EINVAL;
+
+ if (child->thread.hw_brk.address)
+ return -ENOSPC;
+
+ if (!ppc_breakpoint_available())
+ return -ENODEV;
+
+ child->thread.hw_brk = brk;
+
+ return 1;
+}
+
+long ppc_del_hwdebug(struct task_struct *child, long data)
+{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int ret = 0;
+ struct thread_struct *thread = &child->thread;
+ struct perf_event *bp;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+ if (data != 1)
+ return -EINVAL;
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ bp = thread->ptrace_bps[0];
+ if (bp) {
+ unregister_hw_breakpoint(bp);
+ thread->ptrace_bps[0] = NULL;
+ } else {
+ ret = -ENOENT;
+ }
+ return ret;
+#else /* CONFIG_HAVE_HW_BREAKPOINT */
+ if (child->thread.hw_brk.address == 0)
+ return -ENOENT;
+
+ child->thread.hw_brk.address = 0;
+ child->thread.hw_brk.type = 0;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-novsx.c b/arch/powerpc/kernel/ptrace/ptrace-novsx.c
new file mode 100644
index 000000000000..b2dc4e92d11a
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-novsx.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ */
+int fpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
+ offsetof(struct thread_fp_state, fpr[32]));
+
+ flush_fp_to_thread(target);
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fp_state, 0, -1);
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ *
+ */
+int fpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
+ offsetof(struct thread_fp_state, fpr[32]));
+
+ flush_fp_to_thread(target);
+
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fp_state, 0, -1);
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-spe.c b/arch/powerpc/kernel/ptrace/ptrace-spe.c
new file mode 100644
index 000000000000..68b86b4a4be4
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-spe.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * For get_evrregs/set_evrregs functions 'data' has the following layout:
+ *
+ * struct {
+ * u32 evr[32];
+ * u64 acc;
+ * u32 spefscr;
+ * }
+ */
+
+int evr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ flush_spe_to_thread(target);
+ return target->thread.used_spe ? regset->n : 0;
+}
+
+int evr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ flush_spe_to_thread(target);
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.evr,
+ 0, sizeof(target->thread.evr));
+
+ BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
+ offsetof(struct thread_struct, spefscr));
+
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.acc,
+ sizeof(target->thread.evr), -1);
+
+ return ret;
+}
+
+int evr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ flush_spe_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.evr,
+ 0, sizeof(target->thread.evr));
+
+ BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
+ offsetof(struct thread_struct, spefscr));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.acc,
+ sizeof(target->thread.evr), -1);
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c
new file mode 100644
index 000000000000..d75aff31f637
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c
@@ -0,0 +1,851 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+#include <asm/tm.h>
+#include <asm/asm-prototypes.h>
+
+#include "ptrace-decl.h"
+
+void flush_tmregs_to_thread(struct task_struct *tsk)
+{
+ /*
+ * If task is not current, it will have been flushed already to
+ * it's thread_struct during __switch_to().
+ *
+ * A reclaim flushes ALL the state or if not in TM save TM SPRs
+ * in the appropriate thread structures from live.
+ */
+
+ if (!cpu_has_feature(CPU_FTR_TM) || tsk != current)
+ return;
+
+ if (MSR_TM_SUSPENDED(mfmsr())) {
+ tm_reclaim_current(TM_CAUSE_SIGNAL);
+ } else {
+ tm_enable();
+ tm_save_sprs(&tsk->thread);
+ }
+}
+
+static unsigned long get_user_ckpt_msr(struct task_struct *task)
+{
+ return task->thread.ckpt_regs.msr | task->thread.fpexc_mode;
+}
+
+static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr)
+{
+ task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE;
+ task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE;
+ return 0;
+}
+
+static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap)
+{
+ task->thread.ckpt_regs.trap = trap & 0xfff0;
+ return 0;
+}
+
+/**
+ * tm_cgpr_active - get active number of registers in CGPR
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed GPR category.
+ */
+int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ return regset->n;
+}
+
+/**
+ * tm_cgpr_get - get CGPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy from.
+ * @ubuf: User buffer to copy into.
+ *
+ * This function gets transaction checkpointed GPR registers.
+ *
+ * When the transaction is active, 'ckpt_regs' holds all the checkpointed
+ * GPR register values for the current transaction to fall back on if it
+ * aborts in between. This function gets those checkpointed GPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * struct pt_regs ckpt_regs;
+ * };
+ */
+int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ckpt_regs,
+ 0, offsetof(struct pt_regs, msr));
+ if (!ret) {
+ unsigned long msr = get_user_ckpt_msr(target);
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr,
+ offsetof(struct pt_regs, msr),
+ offsetof(struct pt_regs, msr) +
+ sizeof(msr));
+ }
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct pt_regs, msr) + sizeof(long));
+
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ckpt_regs.orig_gpr3,
+ offsetof(struct pt_regs, orig_gpr3),
+ sizeof(struct user_pt_regs));
+ if (!ret)
+ ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ sizeof(struct user_pt_regs), -1);
+
+ return ret;
+}
+
+/*
+ * tm_cgpr_set - set the CGPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed GPR registers.
+ *
+ * When the transaction is active, 'ckpt_regs' holds the checkpointed
+ * GPR register values for the current transaction to fall back on if it
+ * aborts in between. This function sets those checkpointed GPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * struct pt_regs ckpt_regs;
+ * };
+ */
+int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ unsigned long reg;
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ckpt_regs,
+ 0, PT_MSR * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_MSR * sizeof(reg),
+ (PT_MSR + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_ckpt_msr(target, reg);
+ }
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct pt_regs, msr) + sizeof(long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ckpt_regs.orig_gpr3,
+ PT_ORIG_R3 * sizeof(reg),
+ (PT_MAX_PUT_REG + 1) * sizeof(reg));
+
+ if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
+ ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_MAX_PUT_REG + 1) * sizeof(reg),
+ PT_TRAP * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_TRAP * sizeof(reg),
+ (PT_TRAP + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_ckpt_trap(target, reg);
+ }
+
+ if (!ret)
+ ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_TRAP + 1) * sizeof(reg), -1);
+
+ return ret;
+}
+
+/**
+ * tm_cfpr_active - get active number of registers in CFPR
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed FPR category.
+ */
+int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ return regset->n;
+}
+
+/**
+ * tm_cfpr_get - get CFPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy from.
+ * @ubuf: User buffer to copy into.
+ *
+ * This function gets in transaction checkpointed FPR registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * values for the current transaction to fall back on if it aborts
+ * in between. This function gets those checkpointed FPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ *};
+ */
+int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ u64 buf[33];
+ int i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ /* copy to local buffer then write that out */
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_CKFPR(i);
+ buf[32] = target->thread.ckfp_state.fpscr;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+}
+
+/**
+ * tm_cfpr_set - set CFPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed FPR registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * FPR register values for the current transaction to fall back on
+ * if it aborts in between. This function sets these checkpointed
+ * FPR registers. The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ *};
+ */
+int tm_cfpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[33];
+ int i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ for (i = 0; i < 32; i++)
+ buf[i] = target->thread.TS_CKFPR(i);
+ buf[32] = target->thread.ckfp_state.fpscr;
+
+ /* copy to local buffer then write that out */
+ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+ if (i)
+ return i;
+ for (i = 0; i < 32 ; i++)
+ target->thread.TS_CKFPR(i) = buf[i];
+ target->thread.ckfp_state.fpscr = buf[32];
+ return 0;
+}
+
+/**
+ * tm_cvmx_active - get active number of registers in CVMX
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in checkpointed VMX category.
+ */
+int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ return regset->n;
+}
+
+/**
+ * tm_cvmx_get - get CMVX registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy from.
+ * @ubuf: User buffer to copy into.
+ *
+ * This function gets in transaction checkpointed VMX registers.
+ *
+ * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
+ * the checkpointed values for the current transaction to fall
+ * back on if it aborts in between. The userspace interface buffer
+ * layout is as follows.
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ *};
+ */
+int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ /* Flush the state */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.ckvr_state,
+ 0, 33 * sizeof(vector128));
+ if (!ret) {
+ /*
+ * Copy out only the low-order word of vrsave.
+ */
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ memset(&vrsave, 0, sizeof(vrsave));
+ vrsave.word = target->thread.ckvrsave;
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
+ 33 * sizeof(vector128), -1);
+ }
+
+ return ret;
+}
+
+/**
+ * tm_cvmx_set - set CMVX registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed VMX registers.
+ *
+ * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
+ * the checkpointed values for the current transaction to fall
+ * back on if it aborts in between. The userspace interface buffer
+ * layout is as follows.
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ *};
+ */
+int tm_cvmx_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.ckvr_state,
+ 0, 33 * sizeof(vector128));
+ if (!ret && count > 0) {
+ /*
+ * We use only the low-order word of vrsave.
+ */
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ memset(&vrsave, 0, sizeof(vrsave));
+ vrsave.word = target->thread.ckvrsave;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
+ 33 * sizeof(vector128), -1);
+ if (!ret)
+ target->thread.ckvrsave = vrsave.word;
+ }
+
+ return ret;
+}
+
+/**
+ * tm_cvsx_active - get active number of registers in CVSX
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed VSX category.
+ */
+int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ flush_vsx_to_thread(target);
+ return target->thread.used_vsr ? regset->n : 0;
+}
+
+/**
+ * tm_cvsx_get - get CVSX registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy from.
+ * @ubuf: User buffer to copy into.
+ *
+ * This function gets in transaction checkpointed VSX registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * values for the current transaction to fall back on if it aborts
+ * in between. This function gets those checkpointed VSX registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 vsx[32];
+ *};
+ */
+int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ u64 buf[32];
+ int ret, i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ /* Flush the state */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+
+ return ret;
+}
+
+/**
+ * tm_cvsx_set - set CFPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed VSX registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * VSX register values for the current transaction to fall back on
+ * if it aborts in between. This function sets these checkpointed
+ * FPR registers. The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 vsx[32];
+ *};
+ */
+int tm_cvsx_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[32];
+ int ret, i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ /* Flush the state */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+ if (!ret)
+ for (i = 0; i < 32 ; i++)
+ target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+
+ return ret;
+}
+
+/**
+ * tm_spr_active - get active number of registers in TM SPR
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks the active number of available
+ * regisers in the transactional memory SPR category.
+ */
+int tm_spr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ return regset->n;
+}
+
+/**
+ * tm_spr_get - get the TM related SPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy from.
+ * @ubuf: User buffer to copy into.
+ *
+ * This function gets transactional memory related SPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct {
+ * u64 tm_tfhar;
+ * u64 tm_texasr;
+ * u64 tm_tfiar;
+ * };
+ */
+int tm_spr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
+ BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
+ BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ /* Flush the states */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ /* TFHAR register */
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tfhar, 0, sizeof(u64));
+
+ /* TEXASR register */
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_texasr, sizeof(u64),
+ 2 * sizeof(u64));
+
+ /* TFIAR register */
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tfiar,
+ 2 * sizeof(u64), 3 * sizeof(u64));
+ return ret;
+}
+
+/**
+ * tm_spr_set - set the TM related SPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets transactional memory related SPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct {
+ * u64 tm_tfhar;
+ * u64 tm_texasr;
+ * u64 tm_tfiar;
+ * };
+ */
+int tm_spr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
+ BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
+ BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ /* Flush the states */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ /* TFHAR register */
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tfhar, 0, sizeof(u64));
+
+ /* TEXASR register */
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_texasr, sizeof(u64),
+ 2 * sizeof(u64));
+
+ /* TFIAR register */
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tfiar,
+ 2 * sizeof(u64), 3 * sizeof(u64));
+ return ret;
+}
+
+int tm_tar_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ return regset->n;
+
+ return 0;
+}
+
+int tm_tar_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tar, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_tar_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tar, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_ppr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ return regset->n;
+
+ return 0;
+}
+
+
+int tm_ppr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_ppr, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_ppr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_ppr, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_dscr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ return regset->n;
+
+ return 0;
+}
+
+int tm_dscr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_dscr, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_dscr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_dscr, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_cgpr32_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ return gpr32_get_common(target, regset, pos, count, kbuf, ubuf,
+ &target->thread.ckpt_regs.gpr[0]);
+}
+
+int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
+ &target->thread.ckpt_regs.gpr[0]);
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
new file mode 100644
index 000000000000..15e3b79b6395
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -0,0 +1,904 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/nospec.h>
+#include <linux/pkeys.h>
+
+#include "ptrace-decl.h"
+
+struct pt_regs_offset {
+ const char *name;
+ int offset;
+};
+
+#define STR(s) #s /* convert to string */
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define GPR_OFFSET_NAME(num) \
+ {.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \
+ {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+ GPR_OFFSET_NAME(0),
+ GPR_OFFSET_NAME(1),
+ GPR_OFFSET_NAME(2),
+ GPR_OFFSET_NAME(3),
+ GPR_OFFSET_NAME(4),
+ GPR_OFFSET_NAME(5),
+ GPR_OFFSET_NAME(6),
+ GPR_OFFSET_NAME(7),
+ GPR_OFFSET_NAME(8),
+ GPR_OFFSET_NAME(9),
+ GPR_OFFSET_NAME(10),
+ GPR_OFFSET_NAME(11),
+ GPR_OFFSET_NAME(12),
+ GPR_OFFSET_NAME(13),
+ GPR_OFFSET_NAME(14),
+ GPR_OFFSET_NAME(15),
+ GPR_OFFSET_NAME(16),
+ GPR_OFFSET_NAME(17),
+ GPR_OFFSET_NAME(18),
+ GPR_OFFSET_NAME(19),
+ GPR_OFFSET_NAME(20),
+ GPR_OFFSET_NAME(21),
+ GPR_OFFSET_NAME(22),
+ GPR_OFFSET_NAME(23),
+ GPR_OFFSET_NAME(24),
+ GPR_OFFSET_NAME(25),
+ GPR_OFFSET_NAME(26),
+ GPR_OFFSET_NAME(27),
+ GPR_OFFSET_NAME(28),
+ GPR_OFFSET_NAME(29),
+ GPR_OFFSET_NAME(30),
+ GPR_OFFSET_NAME(31),
+ REG_OFFSET_NAME(nip),
+ REG_OFFSET_NAME(msr),
+ REG_OFFSET_NAME(ctr),
+ REG_OFFSET_NAME(link),
+ REG_OFFSET_NAME(xer),
+ REG_OFFSET_NAME(ccr),
+#ifdef CONFIG_PPC64
+ REG_OFFSET_NAME(softe),
+#else
+ REG_OFFSET_NAME(mq),
+#endif
+ REG_OFFSET_NAME(trap),
+ REG_OFFSET_NAME(dar),
+ REG_OFFSET_NAME(dsisr),
+ REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name: the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->offset;
+ return -EINVAL;
+}
+
+/**
+ * regs_query_register_name() - query register name from its offset
+ * @offset: the offset of a register in struct pt_regs.
+ *
+ * regs_query_register_name() returns the name of a register from its
+ * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
+ */
+const char *regs_query_register_name(unsigned int offset)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (roff->offset == offset)
+ return roff->name;
+ return NULL;
+}
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+static unsigned long get_user_msr(struct task_struct *task)
+{
+ return task->thread.regs->msr | task->thread.fpexc_mode;
+}
+
+static int set_user_msr(struct task_struct *task, unsigned long msr)
+{
+ task->thread.regs->msr &= ~MSR_DEBUGCHANGE;
+ task->thread.regs->msr |= msr & MSR_DEBUGCHANGE;
+ return 0;
+}
+
+#ifdef CONFIG_PPC64
+static int get_user_dscr(struct task_struct *task, unsigned long *data)
+{
+ *data = task->thread.dscr;
+ return 0;
+}
+
+static int set_user_dscr(struct task_struct *task, unsigned long dscr)
+{
+ task->thread.dscr = dscr;
+ task->thread.dscr_inherit = 1;
+ return 0;
+}
+#else
+static int get_user_dscr(struct task_struct *task, unsigned long *data)
+{
+ return -EIO;
+}
+
+static int set_user_dscr(struct task_struct *task, unsigned long dscr)
+{
+ return -EIO;
+}
+#endif
+
+/*
+ * We prevent mucking around with the reserved area of trap
+ * which are used internally by the kernel.
+ */
+static int set_user_trap(struct task_struct *task, unsigned long trap)
+{
+ task->thread.regs->trap = trap & 0xfff0;
+ return 0;
+}
+
+/*
+ * Get contents of register REGNO in task TASK.
+ */
+int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
+{
+ unsigned int regs_max;
+
+ if (task->thread.regs == NULL || !data)
+ return -EIO;
+
+ if (regno == PT_MSR) {
+ *data = get_user_msr(task);
+ return 0;
+ }
+
+ if (regno == PT_DSCR)
+ return get_user_dscr(task, data);
+
+ /*
+ * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is
+ * no more used as a flag, lets force usr to alway see the softe value as 1
+ * which means interrupts are not soft disabled.
+ */
+ if (IS_ENABLED(CONFIG_PPC64) && regno == PT_SOFTE) {
+ *data = 1;
+ return 0;
+ }
+
+ regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long);
+ if (regno < regs_max) {
+ regno = array_index_nospec(regno, regs_max);
+ *data = ((unsigned long *)task->thread.regs)[regno];
+ return 0;
+ }
+
+ return -EIO;
+}
+
+/*
+ * Write contents of register REGNO in task TASK.
+ */
+int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data)
+{
+ if (task->thread.regs == NULL)
+ return -EIO;
+
+ if (regno == PT_MSR)
+ return set_user_msr(task, data);
+ if (regno == PT_TRAP)
+ return set_user_trap(task, data);
+ if (regno == PT_DSCR)
+ return set_user_dscr(task, data);
+
+ if (regno <= PT_MAX_PUT_REG) {
+ regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1);
+ ((unsigned long *)task->thread.regs)[regno] = data;
+ return 0;
+ }
+ return -EIO;
+}
+
+static int gpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ int i, ret;
+
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ if (!FULL_REGS(target->thread.regs)) {
+ /* We have a partial register set. Fill 14-31 with bogus values */
+ for (i = 14; i < 32; i++)
+ target->thread.regs->gpr[i] = NV_REG_POISON;
+ }
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ target->thread.regs,
+ 0, offsetof(struct pt_regs, msr));
+ if (!ret) {
+ unsigned long msr = get_user_msr(target);
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr,
+ offsetof(struct pt_regs, msr),
+ offsetof(struct pt_regs, msr) +
+ sizeof(msr));
+ }
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct pt_regs, msr) + sizeof(long));
+
+ if (!ret)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.regs->orig_gpr3,
+ offsetof(struct pt_regs, orig_gpr3),
+ sizeof(struct user_pt_regs));
+ if (!ret)
+ ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ sizeof(struct user_pt_regs), -1);
+
+ return ret;
+}
+
+static int gpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ unsigned long reg;
+ int ret;
+
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ CHECK_FULL_REGS(target->thread.regs);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ target->thread.regs,
+ 0, PT_MSR * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_MSR * sizeof(reg),
+ (PT_MSR + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_msr(target, reg);
+ }
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct pt_regs, msr) + sizeof(long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.regs->orig_gpr3,
+ PT_ORIG_R3 * sizeof(reg),
+ (PT_MAX_PUT_REG + 1) * sizeof(reg));
+
+ if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
+ ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_MAX_PUT_REG + 1) * sizeof(reg),
+ PT_TRAP * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_TRAP * sizeof(reg),
+ (PT_TRAP + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_trap(target, reg);
+ }
+
+ if (!ret)
+ ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_TRAP + 1) * sizeof(reg), -1);
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC64
+static int ppr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.regs->ppr, 0, sizeof(u64));
+}
+
+static int ppr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.regs->ppr, 0, sizeof(u64));
+}
+
+static int dscr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.dscr, 0, sizeof(u64));
+}
+static int dscr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.dscr, 0, sizeof(u64));
+}
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+static int tar_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tar, 0, sizeof(u64));
+}
+static int tar_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tar, 0, sizeof(u64));
+}
+
+static int ebb_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ if (target->thread.used_ebb)
+ return regset->n;
+
+ return 0;
+}
+
+static int ebb_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ /* Build tests */
+ BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
+ BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ if (!target->thread.used_ebb)
+ return -ENODATA;
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.ebbrr,
+ 0, 3 * sizeof(unsigned long));
+}
+
+static int ebb_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ int ret = 0;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
+ BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ if (target->thread.used_ebb)
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.ebbrr,
+ 0, sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ebbhr, sizeof(unsigned long),
+ 2 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.bescr, 2 * sizeof(unsigned long),
+ 3 * sizeof(unsigned long));
+
+ return ret;
+}
+static int pmu_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int pmu_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ /* Build tests */
+ BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
+ BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
+ BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
+ BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.siar,
+ 0, 5 * sizeof(unsigned long));
+}
+
+static int pmu_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ int ret = 0;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
+ BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
+ BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
+ BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.siar,
+ 0, sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.sdar, sizeof(unsigned long),
+ 2 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.sier, 2 * sizeof(unsigned long),
+ 3 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.mmcr2, 3 * sizeof(unsigned long),
+ 4 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.mmcr0, 4 * sizeof(unsigned long),
+ 5 * sizeof(unsigned long));
+ return ret;
+}
+#endif
+
+#ifdef CONFIG_PPC_MEM_KEYS
+static int pkey_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int pkey_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr));
+ BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor));
+
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.amr,
+ 0, ELF_NPKEY * sizeof(unsigned long));
+}
+
+static int pkey_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ u64 new_amr;
+ int ret;
+
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ /* Only the AMR can be set from userspace */
+ if (pos != 0 || count != sizeof(new_amr))
+ return -EINVAL;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &new_amr, 0, sizeof(new_amr));
+ if (ret)
+ return ret;
+
+ /* UAMOR determines which bits of the AMR can be set from userspace. */
+ target->thread.amr = (new_amr & target->thread.uamor) |
+ (target->thread.amr & ~target->thread.uamor);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
+static const struct user_regset native_regsets[] = {
+ [REGSET_GPR] = {
+ .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
+ .size = sizeof(long), .align = sizeof(long),
+ .get = gpr_get, .set = gpr_set
+ },
+ [REGSET_FPR] = {
+ .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .get = fpr_get, .set = fpr_set
+ },
+#ifdef CONFIG_ALTIVEC
+ [REGSET_VMX] = {
+ .core_note_type = NT_PPC_VMX, .n = 34,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = vr_active, .get = vr_get, .set = vr_set
+ },
+#endif
+#ifdef CONFIG_VSX
+ [REGSET_VSX] = {
+ .core_note_type = NT_PPC_VSX, .n = 32,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = vsr_active, .get = vsr_get, .set = vsr_set
+ },
+#endif
+#ifdef CONFIG_SPE
+ [REGSET_SPE] = {
+ .core_note_type = NT_PPC_SPE, .n = 35,
+ .size = sizeof(u32), .align = sizeof(u32),
+ .active = evr_active, .get = evr_get, .set = evr_set
+ },
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ [REGSET_TM_CGPR] = {
+ .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
+ .size = sizeof(long), .align = sizeof(long),
+ .active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set
+ },
+ [REGSET_TM_CFPR] = {
+ .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set
+ },
+ [REGSET_TM_CVMX] = {
+ .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set
+ },
+ [REGSET_TM_CVSX] = {
+ .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set
+ },
+ [REGSET_TM_SPR] = {
+ .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set
+ },
+ [REGSET_TM_CTAR] = {
+ .core_note_type = NT_PPC_TM_CTAR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set
+ },
+ [REGSET_TM_CPPR] = {
+ .core_note_type = NT_PPC_TM_CPPR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set
+ },
+ [REGSET_TM_CDSCR] = {
+ .core_note_type = NT_PPC_TM_CDSCR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC64
+ [REGSET_PPR] = {
+ .core_note_type = NT_PPC_PPR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = ppr_get, .set = ppr_set
+ },
+ [REGSET_DSCR] = {
+ .core_note_type = NT_PPC_DSCR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = dscr_get, .set = dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ [REGSET_TAR] = {
+ .core_note_type = NT_PPC_TAR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = tar_get, .set = tar_set
+ },
+ [REGSET_EBB] = {
+ .core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = ebb_active, .get = ebb_get, .set = ebb_set
+ },
+ [REGSET_PMR] = {
+ .core_note_type = NT_PPC_PMU, .n = ELF_NPMU,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = pmu_active, .get = pmu_get, .set = pmu_set
+ },
+#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+ [REGSET_PKEY] = {
+ .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = pkey_active, .get = pkey_get, .set = pkey_set
+ },
+#endif
+};
+
+const struct user_regset_view user_ppc_native_view = {
+ .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI,
+ .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
+};
+
+#include <linux/compat.h>
+
+int gpr32_get_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf,
+ unsigned long *regs)
+{
+ compat_ulong_t *k = kbuf;
+ compat_ulong_t __user *u = ubuf;
+ compat_ulong_t reg;
+
+ pos /= sizeof(reg);
+ count /= sizeof(reg);
+
+ if (kbuf)
+ for (; count > 0 && pos < PT_MSR; --count)
+ *k++ = regs[pos++];
+ else
+ for (; count > 0 && pos < PT_MSR; --count)
+ if (__put_user((compat_ulong_t)regs[pos++], u++))
+ return -EFAULT;
+
+ if (count > 0 && pos == PT_MSR) {
+ reg = get_user_msr(target);
+ if (kbuf)
+ *k++ = reg;
+ else if (__put_user(reg, u++))
+ return -EFAULT;
+ ++pos;
+ --count;
+ }
+
+ if (kbuf)
+ for (; count > 0 && pos < PT_REGS_COUNT; --count)
+ *k++ = regs[pos++];
+ else
+ for (; count > 0 && pos < PT_REGS_COUNT; --count)
+ if (__put_user((compat_ulong_t)regs[pos++], u++))
+ return -EFAULT;
+
+ kbuf = k;
+ ubuf = u;
+ pos *= sizeof(reg);
+ count *= sizeof(reg);
+ return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ PT_REGS_COUNT * sizeof(reg), -1);
+}
+
+int gpr32_set_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf,
+ unsigned long *regs)
+{
+ const compat_ulong_t *k = kbuf;
+ const compat_ulong_t __user *u = ubuf;
+ compat_ulong_t reg;
+
+ pos /= sizeof(reg);
+ count /= sizeof(reg);
+
+ if (kbuf)
+ for (; count > 0 && pos < PT_MSR; --count)
+ regs[pos++] = *k++;
+ else
+ for (; count > 0 && pos < PT_MSR; --count) {
+ if (__get_user(reg, u++))
+ return -EFAULT;
+ regs[pos++] = reg;
+ }
+
+
+ if (count > 0 && pos == PT_MSR) {
+ if (kbuf)
+ reg = *k++;
+ else if (__get_user(reg, u++))
+ return -EFAULT;
+ set_user_msr(target, reg);
+ ++pos;
+ --count;
+ }
+
+ if (kbuf) {
+ for (; count > 0 && pos <= PT_MAX_PUT_REG; --count)
+ regs[pos++] = *k++;
+ for (; count > 0 && pos < PT_TRAP; --count, ++pos)
+ ++k;
+ } else {
+ for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) {
+ if (__get_user(reg, u++))
+ return -EFAULT;
+ regs[pos++] = reg;
+ }
+ for (; count > 0 && pos < PT_TRAP; --count, ++pos)
+ if (__get_user(reg, u++))
+ return -EFAULT;
+ }
+
+ if (count > 0 && pos == PT_TRAP) {
+ if (kbuf)
+ reg = *k++;
+ else if (__get_user(reg, u++))
+ return -EFAULT;
+ set_user_trap(target, reg);
+ ++pos;
+ --count;
+ }
+
+ kbuf = k;
+ ubuf = u;
+ pos *= sizeof(reg);
+ count *= sizeof(reg);
+ return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_TRAP + 1) * sizeof(reg), -1);
+}
+
+static int gpr32_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int i;
+
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ if (!FULL_REGS(target->thread.regs)) {
+ /*
+ * We have a partial register set.
+ * Fill 14-31 with bogus values.
+ */
+ for (i = 14; i < 32; i++)
+ target->thread.regs->gpr[i] = NV_REG_POISON;
+ }
+ return gpr32_get_common(target, regset, pos, count, kbuf, ubuf,
+ &target->thread.regs->gpr[0]);
+}
+
+static int gpr32_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ CHECK_FULL_REGS(target->thread.regs);
+ return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
+ &target->thread.regs->gpr[0]);
+}
+
+/*
+ * These are the regset flavors matching the CONFIG_PPC32 native set.
+ */
+static const struct user_regset compat_regsets[] = {
+ [REGSET_GPR] = {
+ .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
+ .size = sizeof(compat_long_t), .align = sizeof(compat_long_t),
+ .get = gpr32_get, .set = gpr32_set
+ },
+ [REGSET_FPR] = {
+ .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .get = fpr_get, .set = fpr_set
+ },
+#ifdef CONFIG_ALTIVEC
+ [REGSET_VMX] = {
+ .core_note_type = NT_PPC_VMX, .n = 34,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = vr_active, .get = vr_get, .set = vr_set
+ },
+#endif
+#ifdef CONFIG_SPE
+ [REGSET_SPE] = {
+ .core_note_type = NT_PPC_SPE, .n = 35,
+ .size = sizeof(u32), .align = sizeof(u32),
+ .active = evr_active, .get = evr_get, .set = evr_set
+ },
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ [REGSET_TM_CGPR] = {
+ .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
+ .size = sizeof(long), .align = sizeof(long),
+ .active = tm_cgpr_active,
+ .get = tm_cgpr32_get, .set = tm_cgpr32_set
+ },
+ [REGSET_TM_CFPR] = {
+ .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set
+ },
+ [REGSET_TM_CVMX] = {
+ .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set
+ },
+ [REGSET_TM_CVSX] = {
+ .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set
+ },
+ [REGSET_TM_SPR] = {
+ .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set
+ },
+ [REGSET_TM_CTAR] = {
+ .core_note_type = NT_PPC_TM_CTAR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set
+ },
+ [REGSET_TM_CPPR] = {
+ .core_note_type = NT_PPC_TM_CPPR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set
+ },
+ [REGSET_TM_CDSCR] = {
+ .core_note_type = NT_PPC_TM_CDSCR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC64
+ [REGSET_PPR] = {
+ .core_note_type = NT_PPC_PPR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = ppr_get, .set = ppr_set
+ },
+ [REGSET_DSCR] = {
+ .core_note_type = NT_PPC_DSCR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = dscr_get, .set = dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ [REGSET_TAR] = {
+ .core_note_type = NT_PPC_TAR, .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .get = tar_get, .set = tar_set
+ },
+ [REGSET_EBB] = {
+ .core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = ebb_active, .get = ebb_get, .set = ebb_set
+ },
+#endif
+};
+
+static const struct user_regset_view user_ppc_compat_view = {
+ .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI,
+ .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+ if (IS_ENABLED(CONFIG_PPC64) && test_tsk_thread_flag(task, TIF_32BIT))
+ return &user_ppc_compat_view;
+ return &user_ppc_native_view;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-vsx.c b/arch/powerpc/kernel/ptrace/ptrace-vsx.c
new file mode 100644
index 000000000000..d53466d49cc0
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-vsx.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ */
+int fpr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ u64 buf[33];
+ int i;
+
+ flush_fp_to_thread(target);
+
+ /* copy to local buffer then write that out */
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_FPR(i);
+ buf[32] = target->thread.fp_state.fpscr;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ *
+ */
+int fpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[33];
+ int i;
+
+ flush_fp_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_FPR(i);
+ buf[32] = target->thread.fp_state.fpscr;
+
+ /* copy to local buffer then write that out */
+ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+ if (i)
+ return i;
+
+ for (i = 0; i < 32 ; i++)
+ target->thread.TS_FPR(i) = buf[i];
+ target->thread.fp_state.fpscr = buf[32];
+ return 0;
+}
+
+/*
+ * Currently to set and and get all the vsx state, you need to call
+ * the fp and VMX calls as well. This only get/sets the lower 32
+ * 128bit VSX registers.
+ */
+
+int vsr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ flush_vsx_to_thread(target);
+ return target->thread.used_vsr ? regset->n : 0;
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last
+ * checkpointed value of all FPR registers for the current
+ * transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 vsx[32];
+ * };
+ */
+int vsr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
+{
+ u64 buf[32];
+ int ret, i;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+
+ return ret;
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last
+ * checkpointed value of all FPR registers for the current
+ * transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 vsx[32];
+ * };
+ */
+int vsr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[32];
+ int ret, i;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+ if (!ret)
+ for (i = 0; i < 32 ; i++)
+ target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
new file mode 100644
index 000000000000..f6e51be47c6e
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -0,0 +1,481 @@
+/*
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Derived from "arch/m68k/kernel/ptrace.c"
+ * Copyright (C) 1994 by Hamish Macdonald
+ * Taken from linux/kernel/ptrace.c and modified for M680x0.
+ * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
+ *
+ * Modified by Cort Dougan (cort@hq.fsmlabs.com)
+ * and Paul Mackerras (paulus@samba.org).
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file README.legal in the main directory of
+ * this archive for more details.
+ */
+
+#include <linux/regset.h>
+#include <linux/tracehook.h>
+#include <linux/audit.h>
+#include <linux/context_tracking.h>
+#include <linux/syscalls.h>
+
+#include <asm/switch_to.h>
+#include <asm/asm-prototypes.h>
+#include <asm/debug.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+ /* make sure the single step bit is not set. */
+ user_disable_single_step(child);
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data)
+{
+ int ret = -EPERM;
+ void __user *datavp = (void __user *) data;
+ unsigned long __user *datalp = datavp;
+
+ switch (request) {
+ /* read the word at location addr in the USER area. */
+ case PTRACE_PEEKUSR: {
+ unsigned long index, tmp;
+
+ ret = -EIO;
+ /* convert to index and check */
+#ifdef CONFIG_PPC32
+ index = addr >> 2;
+ if ((addr & 3) || (index > PT_FPSCR)
+ || (child->thread.regs == NULL))
+#else
+ index = addr >> 3;
+ if ((addr & 7) || (index > PT_FPSCR))
+#endif
+ break;
+
+ CHECK_FULL_REGS(child->thread.regs);
+ if (index < PT_FPR0) {
+ ret = ptrace_get_reg(child, (int) index, &tmp);
+ if (ret)
+ break;
+ } else {
+ unsigned int fpidx = index - PT_FPR0;
+
+ flush_fp_to_thread(child);
+ if (fpidx < (PT_FPSCR - PT_FPR0))
+ memcpy(&tmp, &child->thread.TS_FPR(fpidx),
+ sizeof(long));
+ else
+ tmp = child->thread.fp_state.fpscr;
+ }
+ ret = put_user(tmp, datalp);
+ break;
+ }
+
+ /* write the word at location addr in the USER area */
+ case PTRACE_POKEUSR: {
+ unsigned long index;
+
+ ret = -EIO;
+ /* convert to index and check */
+#ifdef CONFIG_PPC32
+ index = addr >> 2;
+ if ((addr & 3) || (index > PT_FPSCR)
+ || (child->thread.regs == NULL))
+#else
+ index = addr >> 3;
+ if ((addr & 7) || (index > PT_FPSCR))
+#endif
+ break;
+
+ CHECK_FULL_REGS(child->thread.regs);
+ if (index < PT_FPR0) {
+ ret = ptrace_put_reg(child, index, data);
+ } else {
+ unsigned int fpidx = index - PT_FPR0;
+
+ flush_fp_to_thread(child);
+ if (fpidx < (PT_FPSCR - PT_FPR0))
+ memcpy(&child->thread.TS_FPR(fpidx), &data,
+ sizeof(long));
+ else
+ child->thread.fp_state.fpscr = data;
+ ret = 0;
+ }
+ break;
+ }
+
+ case PPC_PTRACE_GETHWDBGINFO: {
+ struct ppc_debug_info dbginfo;
+
+ ppc_gethwdinfo(&dbginfo);
+
+ if (copy_to_user(datavp, &dbginfo,
+ sizeof(struct ppc_debug_info)))
+ return -EFAULT;
+ return 0;
+ }
+
+ case PPC_PTRACE_SETHWDEBUG: {
+ struct ppc_hw_breakpoint bp_info;
+
+ if (copy_from_user(&bp_info, datavp,
+ sizeof(struct ppc_hw_breakpoint)))
+ return -EFAULT;
+ return ppc_set_hwdebug(child, &bp_info);
+ }
+
+ case PPC_PTRACE_DELHWDEBUG: {
+ ret = ppc_del_hwdebug(child, data);
+ break;
+ }
+
+ case PTRACE_GET_DEBUGREG:
+ ret = ptrace_get_debugreg(child, addr, datalp);
+ break;
+
+ case PTRACE_SET_DEBUGREG:
+ ret = ptrace_set_debugreg(child, addr, data);
+ break;
+
+#ifdef CONFIG_PPC64
+ case PTRACE_GETREGS64:
+#endif
+ case PTRACE_GETREGS: /* Get all pt_regs from the child. */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_GPR,
+ 0, sizeof(struct user_pt_regs),
+ datavp);
+
+#ifdef CONFIG_PPC64
+ case PTRACE_SETREGS64:
+#endif
+ case PTRACE_SETREGS: /* Set all gp regs in the child. */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_GPR,
+ 0, sizeof(struct user_pt_regs),
+ datavp);
+
+ case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_FPR,
+ 0, sizeof(elf_fpregset_t),
+ datavp);
+
+ case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_FPR,
+ 0, sizeof(elf_fpregset_t),
+ datavp);
+
+#ifdef CONFIG_ALTIVEC
+ case PTRACE_GETVRREGS:
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_VMX,
+ 0, (33 * sizeof(vector128) +
+ sizeof(u32)),
+ datavp);
+
+ case PTRACE_SETVRREGS:
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_VMX,
+ 0, (33 * sizeof(vector128) +
+ sizeof(u32)),
+ datavp);
+#endif
+#ifdef CONFIG_VSX
+ case PTRACE_GETVSRREGS:
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_VSX,
+ 0, 32 * sizeof(double),
+ datavp);
+
+ case PTRACE_SETVSRREGS:
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_VSX,
+ 0, 32 * sizeof(double),
+ datavp);
+#endif
+#ifdef CONFIG_SPE
+ case PTRACE_GETEVRREGS:
+ /* Get the child spe register state. */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_SPE, 0, 35 * sizeof(u32),
+ datavp);
+
+ case PTRACE_SETEVRREGS:
+ /* Set the child spe register state. */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_SPE, 0, 35 * sizeof(u32),
+ datavp);
+#endif
+
+ default:
+ ret = ptrace_request(child, request, addr, data);
+ break;
+ }
+ return ret;
+}
+
+#ifdef CONFIG_SECCOMP
+static int do_seccomp(struct pt_regs *regs)
+{
+ if (!test_thread_flag(TIF_SECCOMP))
+ return 0;
+
+ /*
+ * The ABI we present to seccomp tracers is that r3 contains
+ * the syscall return value and orig_gpr3 contains the first
+ * syscall parameter. This is different to the ptrace ABI where
+ * both r3 and orig_gpr3 contain the first syscall parameter.
+ */
+ regs->gpr[3] = -ENOSYS;
+
+ /*
+ * We use the __ version here because we have already checked
+ * TIF_SECCOMP. If this fails, there is nothing left to do, we
+ * have already loaded -ENOSYS into r3, or seccomp has put
+ * something else in r3 (via SECCOMP_RET_ERRNO/TRACE).
+ */
+ if (__secure_computing(NULL))
+ return -1;
+
+ /*
+ * The syscall was allowed by seccomp, restore the register
+ * state to what audit expects.
+ * Note that we use orig_gpr3, which means a seccomp tracer can
+ * modify the first syscall parameter (in orig_gpr3) and also
+ * allow the syscall to proceed.
+ */
+ regs->gpr[3] = regs->orig_gpr3;
+
+ return 0;
+}
+#else
+static inline int do_seccomp(struct pt_regs *regs) { return 0; }
+#endif /* CONFIG_SECCOMP */
+
+/**
+ * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
+ * @regs: the pt_regs of the task to trace (current)
+ *
+ * Performs various types of tracing on syscall entry. This includes seccomp,
+ * ptrace, syscall tracepoints and audit.
+ *
+ * The pt_regs are potentially visible to userspace via ptrace, so their
+ * contents is ABI.
+ *
+ * One or more of the tracers may modify the contents of pt_regs, in particular
+ * to modify arguments or even the syscall number itself.
+ *
+ * It's also possible that a tracer can choose to reject the system call. In
+ * that case this function will return an illegal syscall number, and will put
+ * an appropriate return value in regs->r3.
+ *
+ * Return: the (possibly changed) syscall number.
+ */
+long do_syscall_trace_enter(struct pt_regs *regs)
+{
+ u32 flags;
+
+ user_exit();
+
+ flags = READ_ONCE(current_thread_info()->flags) &
+ (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
+
+ if (flags) {
+ int rc = tracehook_report_syscall_entry(regs);
+
+ if (unlikely(flags & _TIF_SYSCALL_EMU)) {
+ /*
+ * A nonzero return code from
+ * tracehook_report_syscall_entry() tells us to prevent
+ * the syscall execution, but we are not going to
+ * execute it anyway.
+ *
+ * Returning -1 will skip the syscall execution. We want
+ * to avoid clobbering any registers, so we don't goto
+ * the skip label below.
+ */
+ return -1;
+ }
+
+ if (rc) {
+ /*
+ * The tracer decided to abort the syscall. Note that
+ * the tracer may also just change regs->gpr[0] to an
+ * invalid syscall number, that is handled below on the
+ * exit path.
+ */
+ goto skip;
+ }
+ }
+
+ /* Run seccomp after ptrace; allow it to set gpr[3]. */
+ if (do_seccomp(regs))
+ return -1;
+
+ /* Avoid trace and audit when syscall is invalid. */
+ if (regs->gpr[0] >= NR_syscalls)
+ goto skip;
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_enter(regs, regs->gpr[0]);
+
+ if (!is_32bit_task())
+ audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
+ regs->gpr[5], regs->gpr[6]);
+ else
+ audit_syscall_entry(regs->gpr[0],
+ regs->gpr[3] & 0xffffffff,
+ regs->gpr[4] & 0xffffffff,
+ regs->gpr[5] & 0xffffffff,
+ regs->gpr[6] & 0xffffffff);
+
+ /* Return the possibly modified but valid syscall number */
+ return regs->gpr[0];
+
+skip:
+ /*
+ * If we are aborting explicitly, or if the syscall number is
+ * now invalid, set the return value to -ENOSYS.
+ */
+ regs->gpr[3] = -ENOSYS;
+ return -1;
+}
+
+void do_syscall_trace_leave(struct pt_regs *regs)
+{
+ int step;
+
+ audit_syscall_exit(regs);
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_exit(regs, regs->result);
+
+ step = test_thread_flag(TIF_SINGLESTEP);
+ if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall_exit(regs, step);
+
+ user_enter();
+}
+
+void __init pt_regs_check(void);
+
+/*
+ * Dummy function, its purpose is to break the build if struct pt_regs and
+ * struct user_pt_regs don't match.
+ */
+void __init pt_regs_check(void)
+{
+ BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
+ offsetof(struct user_pt_regs, gpr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, nip) !=
+ offsetof(struct user_pt_regs, nip));
+ BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
+ offsetof(struct user_pt_regs, msr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
+ offsetof(struct user_pt_regs, msr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct user_pt_regs, orig_gpr3));
+ BUILD_BUG_ON(offsetof(struct pt_regs, ctr) !=
+ offsetof(struct user_pt_regs, ctr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, link) !=
+ offsetof(struct user_pt_regs, link));
+ BUILD_BUG_ON(offsetof(struct pt_regs, xer) !=
+ offsetof(struct user_pt_regs, xer));
+ BUILD_BUG_ON(offsetof(struct pt_regs, ccr) !=
+ offsetof(struct user_pt_regs, ccr));
+#ifdef __powerpc64__
+ BUILD_BUG_ON(offsetof(struct pt_regs, softe) !=
+ offsetof(struct user_pt_regs, softe));
+#else
+ BUILD_BUG_ON(offsetof(struct pt_regs, mq) !=
+ offsetof(struct user_pt_regs, mq));
+#endif
+ BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
+ offsetof(struct user_pt_regs, trap));
+ BUILD_BUG_ON(offsetof(struct pt_regs, dar) !=
+ offsetof(struct user_pt_regs, dar));
+ BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
+ offsetof(struct user_pt_regs, dsisr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, result) !=
+ offsetof(struct user_pt_regs, result));
+
+ BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
+
+ // Now check that the pt_regs offsets match the uapi #defines
+ #define CHECK_REG(_pt, _reg) \
+ BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \
+ sizeof(unsigned long)));
+
+ CHECK_REG(PT_R0, gpr[0]);
+ CHECK_REG(PT_R1, gpr[1]);
+ CHECK_REG(PT_R2, gpr[2]);
+ CHECK_REG(PT_R3, gpr[3]);
+ CHECK_REG(PT_R4, gpr[4]);
+ CHECK_REG(PT_R5, gpr[5]);
+ CHECK_REG(PT_R6, gpr[6]);
+ CHECK_REG(PT_R7, gpr[7]);
+ CHECK_REG(PT_R8, gpr[8]);
+ CHECK_REG(PT_R9, gpr[9]);
+ CHECK_REG(PT_R10, gpr[10]);
+ CHECK_REG(PT_R11, gpr[11]);
+ CHECK_REG(PT_R12, gpr[12]);
+ CHECK_REG(PT_R13, gpr[13]);
+ CHECK_REG(PT_R14, gpr[14]);
+ CHECK_REG(PT_R15, gpr[15]);
+ CHECK_REG(PT_R16, gpr[16]);
+ CHECK_REG(PT_R17, gpr[17]);
+ CHECK_REG(PT_R18, gpr[18]);
+ CHECK_REG(PT_R19, gpr[19]);
+ CHECK_REG(PT_R20, gpr[20]);
+ CHECK_REG(PT_R21, gpr[21]);
+ CHECK_REG(PT_R22, gpr[22]);
+ CHECK_REG(PT_R23, gpr[23]);
+ CHECK_REG(PT_R24, gpr[24]);
+ CHECK_REG(PT_R25, gpr[25]);
+ CHECK_REG(PT_R26, gpr[26]);
+ CHECK_REG(PT_R27, gpr[27]);
+ CHECK_REG(PT_R28, gpr[28]);
+ CHECK_REG(PT_R29, gpr[29]);
+ CHECK_REG(PT_R30, gpr[30]);
+ CHECK_REG(PT_R31, gpr[31]);
+ CHECK_REG(PT_NIP, nip);
+ CHECK_REG(PT_MSR, msr);
+ CHECK_REG(PT_ORIG_R3, orig_gpr3);
+ CHECK_REG(PT_CTR, ctr);
+ CHECK_REG(PT_LNK, link);
+ CHECK_REG(PT_XER, xer);
+ CHECK_REG(PT_CCR, ccr);
+#ifdef CONFIG_PPC64
+ CHECK_REG(PT_SOFTE, softe);
+#else
+ CHECK_REG(PT_MQ, mq);
+#endif
+ CHECK_REG(PT_TRAP, trap);
+ CHECK_REG(PT_DAR, dar);
+ CHECK_REG(PT_DSISR, dsisr);
+ CHECK_REG(PT_RESULT, result);
+ #undef CHECK_REG
+
+ BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long));
+
+ /*
+ * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the
+ * real registers.
+ */
+ BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long));
+}
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c
index f37eb53de1a1..7976ddf29c0e 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace/ptrace32.c
@@ -17,21 +17,10 @@
* this archive for more details.
*/
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
#include <linux/ptrace.h>
#include <linux/regset.h>
-#include <linux/user.h>
-#include <linux/security.h>
-#include <linux/signal.h>
#include <linux/compat.h>
-#include <linux/uaccess.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/switch_to.h>
/*
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 7f8c890360fe..f9c0d888ce8a 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -787,8 +787,7 @@ EXPORT_SYMBOL(powerpc_debugfs_root);
static int powerpc_debugfs_init(void)
{
powerpc_debugfs_root = debugfs_create_dir("powerpc", NULL);
-
- return powerpc_debugfs_root == NULL;
+ return 0;
}
arch_initcall(powerpc_debugfs_init);
#endif
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 2dd0d9cb5a20..2ec835574cc9 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -8,6 +8,12 @@
#ifndef __ARCH_POWERPC_KERNEL_SETUP_H
#define __ARCH_POWERPC_KERNEL_SETUP_H
+#ifdef CONFIG_CC_IS_CLANG
+#define __nostackprotector
+#else
+#define __nostackprotector __attribute__((__optimize__("no-stack-protector")))
+#endif
+
void initialize_cache_info(void);
void irqstack_early_init(void);
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 5b49b26eb154..305ca89d856f 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -58,7 +58,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid_phys);
int smp_hw_index[NR_CPUS];
EXPORT_SYMBOL(smp_hw_index);
-unsigned long ISA_DMA_THRESHOLD;
unsigned int DMA_MODE_READ;
unsigned int DMA_MODE_WRITE;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e05e6dd67ae6..438a9befce41 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -279,24 +279,42 @@ void __init record_spr_defaults(void)
* device-tree is not accessible via normal means at this point.
*/
-void __init early_setup(unsigned long dt_ptr)
+void __init __nostackprotector early_setup(unsigned long dt_ptr)
{
static __initdata struct paca_struct boot_paca;
/* -------- printk is _NOT_ safe to use here ! ------- */
- /* Try new device tree based feature discovery ... */
- if (!dt_cpu_ftrs_init(__va(dt_ptr)))
- /* Otherwise use the old style CPU table */
- identify_cpu(0, mfspr(SPRN_PVR));
-
- /* Assume we're on cpu 0 for now. Don't write to the paca yet! */
+ /*
+ * Assume we're on cpu 0 for now.
+ *
+ * We need to load a PACA very early for a few reasons.
+ *
+ * The stack protector canary is stored in the paca, so as soon as we
+ * call any stack protected code we need r13 pointing somewhere valid.
+ *
+ * If we are using kcov it will call in_task() in its instrumentation,
+ * which relies on the current task from the PACA.
+ *
+ * dt_cpu_ftrs_init() calls into generic OF/fdt code, as well as
+ * printk(), which can trigger both stack protector and kcov.
+ *
+ * percpu variables and spin locks also use the paca.
+ *
+ * So set up a temporary paca. It will be replaced below once we know
+ * what CPU we are on.
+ */
initialise_paca(&boot_paca, 0);
setup_paca(&boot_paca);
fixup_boot_paca();
/* -------- printk is now safe to use ------- */
+ /* Try new device tree based feature discovery ... */
+ if (!dt_cpu_ftrs_init(__va(dt_ptr)))
+ /* Otherwise use the old style CPU table */
+ identify_cpu(0, mfspr(SPRN_PVR));
+
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index d215f9554553..a264989626fd 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -18,12 +18,153 @@
#include <linux/syscalls.h>
#include <asm/hw_breakpoint.h>
#include <linux/uaccess.h>
+#include <asm/switch_to.h>
#include <asm/unistd.h>
#include <asm/debug.h>
#include <asm/tm.h>
#include "signal.h"
+#ifdef CONFIG_VSX
+unsigned long copy_fpr_to_user(void __user *to,
+ struct task_struct *task)
+{
+ u64 buf[ELF_NFPREG];
+ int i;
+
+ /* save FPR copy to local buffer then write to the thread_struct */
+ for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+ buf[i] = task->thread.TS_FPR(i);
+ buf[i] = task->thread.fp_state.fpscr;
+ return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
+}
+
+unsigned long copy_fpr_from_user(struct task_struct *task,
+ void __user *from)
+{
+ u64 buf[ELF_NFPREG];
+ int i;
+
+ if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
+ return 1;
+ for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+ task->thread.TS_FPR(i) = buf[i];
+ task->thread.fp_state.fpscr = buf[i];
+
+ return 0;
+}
+
+unsigned long copy_vsx_to_user(void __user *to,
+ struct task_struct *task)
+{
+ u64 buf[ELF_NVSRHALFREG];
+ int i;
+
+ /* save FPR copy to local buffer then write to the thread_struct */
+ for (i = 0; i < ELF_NVSRHALFREG; i++)
+ buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+ return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
+}
+
+unsigned long copy_vsx_from_user(struct task_struct *task,
+ void __user *from)
+{
+ u64 buf[ELF_NVSRHALFREG];
+ int i;
+
+ if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
+ return 1;
+ for (i = 0; i < ELF_NVSRHALFREG ; i++)
+ task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+ return 0;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+unsigned long copy_ckfpr_to_user(void __user *to,
+ struct task_struct *task)
+{
+ u64 buf[ELF_NFPREG];
+ int i;
+
+ /* save FPR copy to local buffer then write to the thread_struct */
+ for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+ buf[i] = task->thread.TS_CKFPR(i);
+ buf[i] = task->thread.ckfp_state.fpscr;
+ return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
+}
+
+unsigned long copy_ckfpr_from_user(struct task_struct *task,
+ void __user *from)
+{
+ u64 buf[ELF_NFPREG];
+ int i;
+
+ if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
+ return 1;
+ for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+ task->thread.TS_CKFPR(i) = buf[i];
+ task->thread.ckfp_state.fpscr = buf[i];
+
+ return 0;
+}
+
+unsigned long copy_ckvsx_to_user(void __user *to,
+ struct task_struct *task)
+{
+ u64 buf[ELF_NVSRHALFREG];
+ int i;
+
+ /* save FPR copy to local buffer then write to the thread_struct */
+ for (i = 0; i < ELF_NVSRHALFREG; i++)
+ buf[i] = task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
+ return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
+}
+
+unsigned long copy_ckvsx_from_user(struct task_struct *task,
+ void __user *from)
+{
+ u64 buf[ELF_NVSRHALFREG];
+ int i;
+
+ if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
+ return 1;
+ for (i = 0; i < ELF_NVSRHALFREG ; i++)
+ task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+ return 0;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#else
+inline unsigned long copy_fpr_to_user(void __user *to,
+ struct task_struct *task)
+{
+ return __copy_to_user(to, task->thread.fp_state.fpr,
+ ELF_NFPREG * sizeof(double));
+}
+
+inline unsigned long copy_fpr_from_user(struct task_struct *task,
+ void __user *from)
+{
+ return __copy_from_user(task->thread.fp_state.fpr, from,
+ ELF_NFPREG * sizeof(double));
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+inline unsigned long copy_ckfpr_to_user(void __user *to,
+ struct task_struct *task)
+{
+ return __copy_to_user(to, task->thread.ckfp_state.fpr,
+ ELF_NFPREG * sizeof(double));
+}
+
+inline unsigned long copy_ckfpr_from_user(struct task_struct *task,
+ void __user *from)
+{
+ return __copy_from_user(task->thread.ckfp_state.fpr, from,
+ ELF_NFPREG * sizeof(double));
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#endif
+
/* Log an error when sending an unhandled signal to a process. Controlled
* through debug.exception-trace sysctl.
*/
@@ -106,7 +247,6 @@ static void do_signal(struct task_struct *tsk)
sigset_t *oldset = sigmask_to_save();
struct ksignal ksig = { .sig = 0 };
int ret;
- int is32 = is_32bit_task();
BUG_ON(tsk != current);
@@ -136,7 +276,7 @@ static void do_signal(struct task_struct *tsk)
rseq_signal_deliver(&ksig, tsk->thread.regs);
- if (is32) {
+ if (is_32bit_task()) {
if (ksig.ka.sa.sa_flags & SA_SIGINFO)
ret = handle_rt_signal32(&ksig, oldset, tsk);
else
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 800433685888..d396efca4068 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -10,8 +10,6 @@
#ifndef _POWERPC_ARCH_SIGNAL_H
#define _POWERPC_ARCH_SIGNAL_H
-extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
-
extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp,
size_t frame_size, int is_32);
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 1b090a76b444..4f96d29a22bf 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -235,146 +235,6 @@ struct rt_sigframe {
int abigap[56];
};
-#ifdef CONFIG_VSX
-unsigned long copy_fpr_to_user(void __user *to,
- struct task_struct *task)
-{
- u64 buf[ELF_NFPREG];
- int i;
-
- /* save FPR copy to local buffer then write to the thread_struct */
- for (i = 0; i < (ELF_NFPREG - 1) ; i++)
- buf[i] = task->thread.TS_FPR(i);
- buf[i] = task->thread.fp_state.fpscr;
- return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
-}
-
-unsigned long copy_fpr_from_user(struct task_struct *task,
- void __user *from)
-{
- u64 buf[ELF_NFPREG];
- int i;
-
- if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
- return 1;
- for (i = 0; i < (ELF_NFPREG - 1) ; i++)
- task->thread.TS_FPR(i) = buf[i];
- task->thread.fp_state.fpscr = buf[i];
-
- return 0;
-}
-
-unsigned long copy_vsx_to_user(void __user *to,
- struct task_struct *task)
-{
- u64 buf[ELF_NVSRHALFREG];
- int i;
-
- /* save FPR copy to local buffer then write to the thread_struct */
- for (i = 0; i < ELF_NVSRHALFREG; i++)
- buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
- return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
-}
-
-unsigned long copy_vsx_from_user(struct task_struct *task,
- void __user *from)
-{
- u64 buf[ELF_NVSRHALFREG];
- int i;
-
- if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
- return 1;
- for (i = 0; i < ELF_NVSRHALFREG ; i++)
- task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
- return 0;
-}
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-unsigned long copy_ckfpr_to_user(void __user *to,
- struct task_struct *task)
-{
- u64 buf[ELF_NFPREG];
- int i;
-
- /* save FPR copy to local buffer then write to the thread_struct */
- for (i = 0; i < (ELF_NFPREG - 1) ; i++)
- buf[i] = task->thread.TS_CKFPR(i);
- buf[i] = task->thread.ckfp_state.fpscr;
- return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
-}
-
-unsigned long copy_ckfpr_from_user(struct task_struct *task,
- void __user *from)
-{
- u64 buf[ELF_NFPREG];
- int i;
-
- if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
- return 1;
- for (i = 0; i < (ELF_NFPREG - 1) ; i++)
- task->thread.TS_CKFPR(i) = buf[i];
- task->thread.ckfp_state.fpscr = buf[i];
-
- return 0;
-}
-
-unsigned long copy_ckvsx_to_user(void __user *to,
- struct task_struct *task)
-{
- u64 buf[ELF_NVSRHALFREG];
- int i;
-
- /* save FPR copy to local buffer then write to the thread_struct */
- for (i = 0; i < ELF_NVSRHALFREG; i++)
- buf[i] = task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
- return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
-}
-
-unsigned long copy_ckvsx_from_user(struct task_struct *task,
- void __user *from)
-{
- u64 buf[ELF_NVSRHALFREG];
- int i;
-
- if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
- return 1;
- for (i = 0; i < ELF_NVSRHALFREG ; i++)
- task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
- return 0;
-}
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-#else
-inline unsigned long copy_fpr_to_user(void __user *to,
- struct task_struct *task)
-{
- return __copy_to_user(to, task->thread.fp_state.fpr,
- ELF_NFPREG * sizeof(double));
-}
-
-inline unsigned long copy_fpr_from_user(struct task_struct *task,
- void __user *from)
-{
- return __copy_from_user(task->thread.fp_state.fpr, from,
- ELF_NFPREG * sizeof(double));
-}
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-inline unsigned long copy_ckfpr_to_user(void __user *to,
- struct task_struct *task)
-{
- return __copy_to_user(to, task->thread.ckfp_state.fpr,
- ELF_NFPREG * sizeof(double));
-}
-
-inline unsigned long copy_ckfpr_from_user(struct task_struct *task,
- void __user *from)
-{
- return __copy_from_user(task->thread.ckfp_state.fpr, from,
- ELF_NFPREG * sizeof(double));
-}
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-#endif
-
/*
* Save the current user registers on the user stack.
* We only save the altivec/spe registers if the process has used
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 84ed2e77ef9c..adfde59cf4ba 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -473,8 +473,10 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
err |= __get_user(tsk->thread.ckpt_regs.ccr,
&sc->gp_regs[PT_CCR]);
+ /* Don't allow userspace to set the trap value */
+ regs->trap = 0;
+
/* These regs are not checkpointed; they can go in 'regs'. */
- err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]);
err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ea6adbf6a221..6d2a3a3666f0 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1185,10 +1185,30 @@ static inline void add_cpu_to_smallcore_masks(int cpu)
}
}
+int get_physical_package_id(int cpu)
+{
+ int pkg_id = cpu_to_chip_id(cpu);
+
+ /*
+ * If the platform is PowerNV or Guest on KVM, ibm,chip-id is
+ * defined. Hence we would return the chip-id as the result of
+ * get_physical_package_id.
+ */
+ if (pkg_id == -1 && firmware_has_feature(FW_FEATURE_LPAR) &&
+ IS_ENABLED(CONFIG_PPC_SPLPAR)) {
+ struct device_node *np = of_get_cpu_node(cpu, NULL);
+ pkg_id = of_node_to_nid(np);
+ of_node_put(np);
+ }
+
+ return pkg_id;
+}
+EXPORT_SYMBOL_GPL(get_physical_package_id);
+
static void add_cpu_to_masks(int cpu)
{
int first_thread = cpu_first_thread_sibling(cpu);
- int chipid = cpu_to_chip_id(cpu);
+ int pkg_id = get_physical_package_id(cpu);
int i;
/*
@@ -1217,11 +1237,11 @@ static void add_cpu_to_masks(int cpu)
for_each_cpu(i, cpu_l2_cache_mask(cpu))
set_cpus_related(cpu, i, cpu_core_mask);
- if (chipid == -1)
+ if (pkg_id == -1)
return;
for_each_cpu(i, cpu_online_mask)
- if (cpu_to_chip_id(i) == chipid)
+ if (get_physical_package_id(i) == pkg_id)
set_cpus_related(cpu, i, cpu_core_mask);
}
@@ -1359,11 +1379,6 @@ void __init smp_cpus_done(unsigned int max_cpus)
if (smp_ops && smp_ops->bringup_done)
smp_ops->bringup_done();
- /*
- * On a shared LPAR, associativity needs to be requested.
- * Hence, get numa topology before dumping cpu topology
- */
- shared_proc_topology_init();
dump_numa_cpu_topology();
#ifdef CONFIG_SCHED_SMT
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index e2a46cfed5fd..c477b8585a29 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -57,7 +57,7 @@ void save_stack_trace(struct stack_trace *trace)
{
unsigned long sp;
- sp = current_stack_pointer();
+ sp = current_stack_frame();
save_context_stack(trace, sp, current, 1);
}
@@ -71,7 +71,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
return;
if (tsk == current)
- sp = current_stack_pointer();
+ sp = current_stack_frame();
else
sp = tsk->thread.ksp;
@@ -131,7 +131,7 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
}
if (tsk == current)
- sp = current_stack_pointer();
+ sp = current_stack_frame();
else
sp = tsk->thread.ksp;
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
new file mode 100644
index 000000000000..c74295a7765b
--- /dev/null
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/err.h>
+#include <asm/asm-prototypes.h>
+#include <asm/book3s/64/kup-radix.h>
+#include <asm/cputime.h>
+#include <asm/hw_irq.h>
+#include <asm/kprobes.h>
+#include <asm/paca.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/signal.h>
+#include <asm/switch_to.h>
+#include <asm/syscall.h>
+#include <asm/time.h>
+#include <asm/unistd.h>
+
+typedef long (*syscall_fn)(long, long, long, long, long, long);
+
+/* Has to run notrace because it is entered not completely "reconciled" */
+notrace long system_call_exception(long r3, long r4, long r5,
+ long r6, long r7, long r8,
+ unsigned long r0, struct pt_regs *regs)
+{
+ syscall_fn f;
+
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+
+ trace_hardirqs_off(); /* finish reconciling */
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S))
+ BUG_ON(!(regs->msr & MSR_RI));
+ BUG_ON(!(regs->msr & MSR_PR));
+ BUG_ON(!FULL_REGS(regs));
+ BUG_ON(regs->softe != IRQS_ENABLED);
+
+ account_cpu_user_entry();
+
+#ifdef CONFIG_PPC_SPLPAR
+ if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) &&
+ firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ struct lppaca *lp = local_paca->lppaca_ptr;
+
+ if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx)))
+ accumulate_stolen_time();
+ }
+#endif
+
+ kuap_check_amr();
+
+ /*
+ * This is not required for the syscall exit path, but makes the
+ * stack frame look nicer. If this was initialised in the first stack
+ * frame, or if the unwinder was taught the first stack frame always
+ * returns to user with IRQS_ENABLED, this store could be avoided!
+ */
+ regs->softe = IRQS_ENABLED;
+
+ local_irq_enable();
+
+ if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
+ /*
+ * We use the return value of do_syscall_trace_enter() as the
+ * syscall number. If the syscall was rejected for any reason
+ * do_syscall_trace_enter() returns an invalid syscall number
+ * and the test against NR_syscalls will fail and the return
+ * value to be used is in regs->gpr[3].
+ */
+ r0 = do_syscall_trace_enter(regs);
+ if (unlikely(r0 >= NR_syscalls))
+ return regs->gpr[3];
+ r3 = regs->gpr[3];
+ r4 = regs->gpr[4];
+ r5 = regs->gpr[5];
+ r6 = regs->gpr[6];
+ r7 = regs->gpr[7];
+ r8 = regs->gpr[8];
+
+ } else if (unlikely(r0 >= NR_syscalls)) {
+ return -ENOSYS;
+ }
+
+ /* May be faster to do array_index_nospec? */
+ barrier_nospec();
+
+ if (unlikely(is_32bit_task())) {
+ f = (void *)compat_sys_call_table[r0];
+
+ r3 &= 0x00000000ffffffffULL;
+ r4 &= 0x00000000ffffffffULL;
+ r5 &= 0x00000000ffffffffULL;
+ r6 &= 0x00000000ffffffffULL;
+ r7 &= 0x00000000ffffffffULL;
+ r8 &= 0x00000000ffffffffULL;
+
+ } else {
+ f = (void *)sys_call_table[r0];
+ }
+
+ return f(r3, r4, r5, r6, r7, r8);
+}
+
+/*
+ * This should be called after a syscall returns, with r3 the return value
+ * from the syscall. If this function returns non-zero, the system call
+ * exit assembly should additionally load all GPR registers and CTR and XER
+ * from the interrupt frame.
+ *
+ * The function graph tracer can not trace the return side of this function,
+ * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
+ */
+notrace unsigned long syscall_exit_prepare(unsigned long r3,
+ struct pt_regs *regs)
+{
+ unsigned long *ti_flagsp = &current_thread_info()->flags;
+ unsigned long ti_flags;
+ unsigned long ret = 0;
+
+ regs->result = r3;
+
+ /* Check whether the syscall is issued inside a restartable sequence */
+ rseq_syscall(regs);
+
+ ti_flags = *ti_flagsp;
+
+ if (unlikely(r3 >= (unsigned long)-MAX_ERRNO)) {
+ if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
+ r3 = -r3;
+ regs->ccr |= 0x10000000; /* Set SO bit in CR */
+ }
+ }
+
+ if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
+ if (ti_flags & _TIF_RESTOREALL)
+ ret = _TIF_RESTOREALL;
+ else
+ regs->gpr[3] = r3;
+ clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp);
+ } else {
+ regs->gpr[3] = r3;
+ }
+
+ if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+ do_syscall_trace_leave(regs);
+ ret |= _TIF_RESTOREALL;
+ }
+
+again:
+ local_irq_disable();
+ ti_flags = READ_ONCE(*ti_flagsp);
+ while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+ local_irq_enable();
+ if (ti_flags & _TIF_NEED_RESCHED) {
+ schedule();
+ } else {
+ /*
+ * SIGPENDING must restore signal handler function
+ * argument GPRs, and some non-volatiles (e.g., r1).
+ * Restore all for now. This could be made lighter.
+ */
+ if (ti_flags & _TIF_SIGPENDING)
+ ret |= _TIF_RESTOREALL;
+ do_notify_resume(regs, ti_flags);
+ }
+ local_irq_disable();
+ ti_flags = READ_ONCE(*ti_flagsp);
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+ unlikely((ti_flags & _TIF_RESTORE_TM))) {
+ restore_tm_state(regs);
+ } else {
+ unsigned long mathflags = MSR_FP;
+
+ if (cpu_has_feature(CPU_FTR_VSX))
+ mathflags |= MSR_VEC | MSR_VSX;
+ else if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ mathflags |= MSR_VEC;
+
+ if ((regs->msr & mathflags) != mathflags)
+ restore_math(regs);
+ }
+ }
+
+ /* This must be done with RI=1 because tracing may touch vmaps */
+ trace_hardirqs_on();
+
+ /* This pattern matches prep_irq_for_idle */
+ __hard_EE_RI_disable();
+ if (unlikely(lazy_irq_pending())) {
+ __hard_RI_enable();
+ trace_hardirqs_off();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ local_irq_enable();
+ /* Took an interrupt, may have more exit work to do. */
+ goto again;
+ }
+ local_paca->irq_happened = 0;
+ irq_soft_mask_set(IRQS_ENABLED);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ local_paca->tm_scratch = regs->msr;
+#endif
+
+ kuap_check_amr();
+
+ account_cpu_user_exit();
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
+notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
+{
+#ifdef CONFIG_PPC_BOOK3E
+ struct thread_struct *ts = &current->thread;
+#endif
+ unsigned long *ti_flagsp = &current_thread_info()->flags;
+ unsigned long ti_flags;
+ unsigned long flags;
+ unsigned long ret = 0;
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S))
+ BUG_ON(!(regs->msr & MSR_RI));
+ BUG_ON(!(regs->msr & MSR_PR));
+ BUG_ON(!FULL_REGS(regs));
+ BUG_ON(regs->softe != IRQS_ENABLED);
+
+ local_irq_save(flags);
+
+again:
+ ti_flags = READ_ONCE(*ti_flagsp);
+ while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+ local_irq_enable(); /* returning to user: may enable */
+ if (ti_flags & _TIF_NEED_RESCHED) {
+ schedule();
+ } else {
+ if (ti_flags & _TIF_SIGPENDING)
+ ret |= _TIF_RESTOREALL;
+ do_notify_resume(regs, ti_flags);
+ }
+ local_irq_disable();
+ ti_flags = READ_ONCE(*ti_flagsp);
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+ unlikely((ti_flags & _TIF_RESTORE_TM))) {
+ restore_tm_state(regs);
+ } else {
+ unsigned long mathflags = MSR_FP;
+
+ if (cpu_has_feature(CPU_FTR_VSX))
+ mathflags |= MSR_VEC | MSR_VSX;
+ else if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ mathflags |= MSR_VEC;
+
+ if ((regs->msr & mathflags) != mathflags)
+ restore_math(regs);
+ }
+ }
+
+ trace_hardirqs_on();
+ __hard_EE_RI_disable();
+ if (unlikely(lazy_irq_pending())) {
+ __hard_RI_enable();
+ trace_hardirqs_off();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ local_irq_enable();
+ local_irq_disable();
+ /* Took an interrupt, may have more exit work to do. */
+ goto again;
+ }
+ local_paca->irq_happened = 0;
+ irq_soft_mask_set(IRQS_ENABLED);
+
+#ifdef CONFIG_PPC_BOOK3E
+ if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) {
+ /*
+ * Check to see if the dbcr0 register is set up to debug.
+ * Use the internal debug mode bit to do this.
+ */
+ mtmsr(mfmsr() & ~MSR_DE);
+ mtspr(SPRN_DBCR0, ts->debug.dbcr0);
+ mtspr(SPRN_DBSR, -1);
+ }
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ local_paca->tm_scratch = regs->msr;
+#endif
+
+ kuap_check_amr();
+
+ account_cpu_user_exit();
+
+ return ret;
+}
+
+void unrecoverable_exception(struct pt_regs *regs);
+void preempt_schedule_irq(void);
+
+notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr)
+{
+ unsigned long *ti_flagsp = &current_thread_info()->flags;
+ unsigned long flags;
+ unsigned long ret = 0;
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI)))
+ unrecoverable_exception(regs);
+ BUG_ON(regs->msr & MSR_PR);
+ BUG_ON(!FULL_REGS(regs));
+
+ if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) {
+ clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp);
+ ret = 1;
+ }
+
+ local_irq_save(flags);
+
+ if (regs->softe == IRQS_ENABLED) {
+ /* Returning to a kernel context with local irqs enabled. */
+ WARN_ON_ONCE(!(regs->msr & MSR_EE));
+again:
+ if (IS_ENABLED(CONFIG_PREEMPT)) {
+ /* Return to preemptible kernel context */
+ if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED)) {
+ if (preempt_count() == 0)
+ preempt_schedule_irq();
+ }
+ }
+
+ trace_hardirqs_on();
+ __hard_EE_RI_disable();
+ if (unlikely(lazy_irq_pending())) {
+ __hard_RI_enable();
+ irq_soft_mask_set(IRQS_ALL_DISABLED);
+ trace_hardirqs_off();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ /*
+ * Can't local_irq_restore to replay if we were in
+ * interrupt context. Must replay directly.
+ */
+ if (irqs_disabled_flags(flags)) {
+ replay_soft_interrupts();
+ } else {
+ local_irq_restore(flags);
+ local_irq_save(flags);
+ }
+ /* Took an interrupt, may have more exit work to do. */
+ goto again;
+ }
+ local_paca->irq_happened = 0;
+ irq_soft_mask_set(IRQS_ENABLED);
+ } else {
+ /* Returning to a kernel context with local irqs disabled. */
+ __hard_EE_RI_disable();
+ if (regs->msr & MSR_EE)
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+ }
+
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ local_paca->tm_scratch = regs->msr;
+#endif
+
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
+ kuap_restore_amr(regs);
+
+ return ret;
+}
+#endif
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 35b61bfc1b1a..220ae11555f2 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -9,7 +9,9 @@
#
0 nospu restart_syscall sys_restart_syscall
1 nospu exit sys_exit
-2 nospu fork ppc_fork
+2 32 fork ppc_fork sys_fork
+2 64 fork sys_fork
+2 spu fork sys_ni_syscall
3 common read sys_read
4 common write sys_write
5 common open sys_open compat_sys_open
@@ -158,7 +160,9 @@
119 32 sigreturn sys_sigreturn compat_sys_sigreturn
119 64 sigreturn sys_ni_syscall
119 spu sigreturn sys_ni_syscall
-120 nospu clone ppc_clone
+120 32 clone ppc_clone sys_clone
+120 64 clone sys_clone
+120 spu clone sys_ni_syscall
121 common setdomainname sys_setdomainname
122 common uname sys_newuname
123 common modify_ldt sys_ni_syscall
@@ -240,7 +244,9 @@
186 spu sendfile sys_sendfile64
187 common getpmsg sys_ni_syscall
188 common putpmsg sys_ni_syscall
-189 nospu vfork ppc_vfork
+189 32 vfork ppc_vfork sys_vfork
+189 64 vfork sys_vfork
+189 spu vfork sys_ni_syscall
190 common ugetrlimit sys_getrlimit compat_sys_getrlimit
191 common readahead sys_readahead compat_sys_readahead
192 32 mmap2 sys_mmap2 compat_sys_mmap2
@@ -316,8 +322,8 @@
248 32 clock_nanosleep sys_clock_nanosleep_time32
248 64 clock_nanosleep sys_clock_nanosleep
248 spu clock_nanosleep sys_clock_nanosleep
-249 32 swapcontext ppc_swapcontext ppc32_swapcontext
-249 64 swapcontext ppc64_swapcontext
+249 32 swapcontext ppc_swapcontext compat_sys_swapcontext
+249 64 swapcontext sys_swapcontext
249 spu swapcontext sys_ni_syscall
250 common tgkill sys_tgkill
251 32 utimes sys_utimes_time32
@@ -456,7 +462,7 @@
361 common bpf sys_bpf
362 nospu execveat sys_execveat compat_sys_execveat
363 32 switch_endian sys_ni_syscall
-363 64 switch_endian ppc_switch_endian
+363 64 switch_endian sys_switch_endian
363 spu switch_endian sys_ni_syscall
364 common userfaultfd sys_userfaultfd
365 common membarrier sys_membarrier
@@ -516,6 +522,8 @@
432 common fsmount sys_fsmount
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
-435 nospu clone3 ppc_clone3
+435 32 clone3 ppc_clone3 sys_clone3
+435 64 clone3 sys_clone3
+435 spu clone3 sys_ni_syscall
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
diff --git a/arch/powerpc/kernel/syscalls/syscallhdr.sh b/arch/powerpc/kernel/syscalls/syscallhdr.sh
index c0a9a32937f1..02d6751f3be3 100644
--- a/arch/powerpc/kernel/syscalls/syscallhdr.sh
+++ b/arch/powerpc/kernel/syscalls/syscallhdr.sh
@@ -32,6 +32,5 @@ grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
printf "#define __NR_syscalls\t%s\n" "${nxt}"
printf "#endif\n"
printf "\n"
- printf "#endif /* %s */" "${fileguard}"
- printf "\n"
+ printf "#endif /* %s */\n" "${fileguard}"
) > "$out"
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 80a676da11cb..479c70680b76 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -87,6 +87,155 @@ __setup("smt-snooze-delay=", setup_smt_snooze_delay);
#endif /* CONFIG_PPC64 */
+#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \
+static void read_##NAME(void *val) \
+{ \
+ *(unsigned long *)val = mfspr(ADDRESS); \
+} \
+static void write_##NAME(void *val) \
+{ \
+ EXTRA; \
+ mtspr(ADDRESS, *(unsigned long *)val); \
+}
+
+#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \
+static ssize_t show_##NAME(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, dev); \
+ unsigned long val; \
+ smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \
+ return sprintf(buf, "%lx\n", val); \
+} \
+static ssize_t __used \
+ store_##NAME(struct device *dev, struct device_attribute *attr, \
+ const char *buf, size_t count) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, dev); \
+ unsigned long val; \
+ int ret = sscanf(buf, "%lx", &val); \
+ if (ret != 1) \
+ return -EINVAL; \
+ smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \
+ return count; \
+}
+
+#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+ __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \
+ __SYSFS_SPRSETUP_SHOW_STORE(NAME)
+#define SYSFS_SPRSETUP(NAME, ADDRESS) \
+ __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \
+ __SYSFS_SPRSETUP_SHOW_STORE(NAME)
+
+#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \
+ __SYSFS_SPRSETUP_SHOW_STORE(NAME)
+
+#ifdef CONFIG_PPC64
+
+/*
+ * This is the system wide DSCR register default value. Any
+ * change to this default value through the sysfs interface
+ * will update all per cpu DSCR default values across the
+ * system stored in their respective PACA structures.
+ */
+static unsigned long dscr_default;
+
+/**
+ * read_dscr() - Fetch the cpu specific DSCR default
+ * @val: Returned cpu specific DSCR default value
+ *
+ * This function returns the per cpu DSCR default value
+ * for any cpu which is contained in it's PACA structure.
+ */
+static void read_dscr(void *val)
+{
+ *(unsigned long *)val = get_paca()->dscr_default;
+}
+
+
+/**
+ * write_dscr() - Update the cpu specific DSCR default
+ * @val: New cpu specific DSCR default value to update
+ *
+ * This function updates the per cpu DSCR default value
+ * for any cpu which is contained in it's PACA structure.
+ */
+static void write_dscr(void *val)
+{
+ get_paca()->dscr_default = *(unsigned long *)val;
+ if (!current->thread.dscr_inherit) {
+ current->thread.dscr = *(unsigned long *)val;
+ mtspr(SPRN_DSCR, *(unsigned long *)val);
+ }
+}
+
+SYSFS_SPRSETUP_SHOW_STORE(dscr);
+static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
+
+static void add_write_permission_dev_attr(struct device_attribute *attr)
+{
+ attr->attr.mode |= 0200;
+}
+
+/**
+ * show_dscr_default() - Fetch the system wide DSCR default
+ * @dev: Device structure
+ * @attr: Device attribute structure
+ * @buf: Interface buffer
+ *
+ * This function returns the system wide DSCR default value.
+ */
+static ssize_t show_dscr_default(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lx\n", dscr_default);
+}
+
+/**
+ * store_dscr_default() - Update the system wide DSCR default
+ * @dev: Device structure
+ * @attr: Device attribute structure
+ * @buf: Interface buffer
+ * @count: Size of the update
+ *
+ * This function updates the system wide DSCR default value.
+ */
+static ssize_t __used store_dscr_default(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ int ret = 0;
+
+ ret = sscanf(buf, "%lx", &val);
+ if (ret != 1)
+ return -EINVAL;
+ dscr_default = val;
+
+ on_each_cpu(write_dscr, &val, 1);
+
+ return count;
+}
+
+static DEVICE_ATTR(dscr_default, 0600,
+ show_dscr_default, store_dscr_default);
+
+static void sysfs_create_dscr_default(void)
+{
+ if (cpu_has_feature(CPU_FTR_DSCR)) {
+ int err = 0;
+ int cpu;
+
+ dscr_default = spr_default_dscr;
+ for_each_possible_cpu(cpu)
+ paca_ptrs[cpu]->dscr_default = dscr_default;
+
+ err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
+ }
+}
+#endif /* CONFIG_PPC64 */
+
#ifdef CONFIG_PPC_FSL_BOOK3E
#define MAX_BIT 63
@@ -407,84 +556,35 @@ void ppc_enable_pmcs(void)
}
EXPORT_SYMBOL(ppc_enable_pmcs);
-#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \
-static void read_##NAME(void *val) \
-{ \
- *(unsigned long *)val = mfspr(ADDRESS); \
-} \
-static void write_##NAME(void *val) \
-{ \
- EXTRA; \
- mtspr(ADDRESS, *(unsigned long *)val); \
-}
-#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \
-static ssize_t show_##NAME(struct device *dev, \
- struct device_attribute *attr, \
- char *buf) \
-{ \
- struct cpu *cpu = container_of(dev, struct cpu, dev); \
- unsigned long val; \
- smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \
- return sprintf(buf, "%lx\n", val); \
-} \
-static ssize_t __used \
- store_##NAME(struct device *dev, struct device_attribute *attr, \
- const char *buf, size_t count) \
-{ \
- struct cpu *cpu = container_of(dev, struct cpu, dev); \
- unsigned long val; \
- int ret = sscanf(buf, "%lx", &val); \
- if (ret != 1) \
- return -EINVAL; \
- smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \
- return count; \
-}
-
-#define SYSFS_PMCSETUP(NAME, ADDRESS) \
- __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \
- __SYSFS_SPRSETUP_SHOW_STORE(NAME)
-#define SYSFS_SPRSETUP(NAME, ADDRESS) \
- __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \
- __SYSFS_SPRSETUP_SHOW_STORE(NAME)
-
-#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \
- __SYSFS_SPRSETUP_SHOW_STORE(NAME)
/* Let's define all possible registers, we'll only hook up the ones
* that are implemented on the current processor
*/
-#if defined(CONFIG_PPC64)
+#ifdef CONFIG_PMU_SYSFS
+#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_BOOK3S_32)
#define HAS_PPC_PMC_CLASSIC 1
#define HAS_PPC_PMC_IBM 1
-#define HAS_PPC_PMC_PA6T 1
-#elif defined(CONFIG_PPC_BOOK3S_32)
-#define HAS_PPC_PMC_CLASSIC 1
-#define HAS_PPC_PMC_IBM 1
-#define HAS_PPC_PMC_G4 1
#endif
+#ifdef CONFIG_PPC64
+#define HAS_PPC_PMC_PA6T 1
+#define HAS_PPC_PMC56 1
+#endif
-#ifdef HAS_PPC_PMC_CLASSIC
-SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
-SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
-SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
-SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
-SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
-SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
-SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
-SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
-
-#ifdef HAS_PPC_PMC_G4
-SYSFS_PMCSETUP(mmcr2, SPRN_MMCR2);
+#ifdef CONFIG_PPC_BOOK3S_32
+#define HAS_PPC_PMC_G4 1
#endif
+#endif /* CONFIG_PMU_SYSFS */
+#if defined(CONFIG_PPC64) && defined(CONFIG_DEBUG_MISC)
+#define HAS_PPC_PA6T
+#endif
+/*
+ * SPRs which are not related to PMU.
+ */
#ifdef CONFIG_PPC64
-SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
-SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
-
-SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
SYSFS_SPRSETUP(purr, SPRN_PURR);
SYSFS_SPRSETUP(spurr, SPRN_SPURR);
SYSFS_SPRSETUP(pir, SPRN_PIR);
@@ -495,115 +595,38 @@ SYSFS_SPRSETUP(tscr, SPRN_TSCR);
enable write when needed with a separate function.
Lets be conservative and default to pseries.
*/
-static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
static DEVICE_ATTR(pir, 0400, show_pir, NULL);
static DEVICE_ATTR(tscr, 0600, show_tscr, store_tscr);
+#endif /* CONFIG_PPC64 */
-/*
- * This is the system wide DSCR register default value. Any
- * change to this default value through the sysfs interface
- * will update all per cpu DSCR default values across the
- * system stored in their respective PACA structures.
- */
-static unsigned long dscr_default;
-
-/**
- * read_dscr() - Fetch the cpu specific DSCR default
- * @val: Returned cpu specific DSCR default value
- *
- * This function returns the per cpu DSCR default value
- * for any cpu which is contained in it's PACA structure.
- */
-static void read_dscr(void *val)
-{
- *(unsigned long *)val = get_paca()->dscr_default;
-}
-
-
-/**
- * write_dscr() - Update the cpu specific DSCR default
- * @val: New cpu specific DSCR default value to update
- *
- * This function updates the per cpu DSCR default value
- * for any cpu which is contained in it's PACA structure.
- */
-static void write_dscr(void *val)
-{
- get_paca()->dscr_default = *(unsigned long *)val;
- if (!current->thread.dscr_inherit) {
- current->thread.dscr = *(unsigned long *)val;
- mtspr(SPRN_DSCR, *(unsigned long *)val);
- }
-}
-
-SYSFS_SPRSETUP_SHOW_STORE(dscr);
-static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
-
-static void add_write_permission_dev_attr(struct device_attribute *attr)
-{
- attr->attr.mode |= 0200;
-}
-
-/**
- * show_dscr_default() - Fetch the system wide DSCR default
- * @dev: Device structure
- * @attr: Device attribute structure
- * @buf: Interface buffer
- *
- * This function returns the system wide DSCR default value.
- */
-static ssize_t show_dscr_default(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sprintf(buf, "%lx\n", dscr_default);
-}
-
-/**
- * store_dscr_default() - Update the system wide DSCR default
- * @dev: Device structure
- * @attr: Device attribute structure
- * @buf: Interface buffer
- * @count: Size of the update
- *
- * This function updates the system wide DSCR default value.
- */
-static ssize_t __used store_dscr_default(struct device *dev,
- struct device_attribute *attr, const char *buf,
- size_t count)
-{
- unsigned long val;
- int ret = 0;
-
- ret = sscanf(buf, "%lx", &val);
- if (ret != 1)
- return -EINVAL;
- dscr_default = val;
+#ifdef HAS_PPC_PMC_CLASSIC
+SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
+SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
+SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
+SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
+SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
+SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
+SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
+SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
+#endif
- on_each_cpu(write_dscr, &val, 1);
+#ifdef HAS_PPC_PMC_G4
+SYSFS_PMCSETUP(mmcr2, SPRN_MMCR2);
+#endif
- return count;
-}
+#ifdef HAS_PPC_PMC56
+SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
+SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
-static DEVICE_ATTR(dscr_default, 0600,
- show_dscr_default, store_dscr_default);
+SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
-static void sysfs_create_dscr_default(void)
-{
- if (cpu_has_feature(CPU_FTR_DSCR)) {
- int err = 0;
- int cpu;
+static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+#endif /* HAS_PPC_PMC56 */
- dscr_default = spr_default_dscr;
- for_each_possible_cpu(cpu)
- paca_ptrs[cpu]->dscr_default = dscr_default;
- err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
- }
-}
-#endif /* CONFIG_PPC64 */
#ifdef HAS_PPC_PMC_PA6T
SYSFS_PMCSETUP(pa6t_pmc0, SPRN_PA6T_PMC0);
@@ -612,7 +635,9 @@ SYSFS_PMCSETUP(pa6t_pmc2, SPRN_PA6T_PMC2);
SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3);
SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4);
SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5);
-#ifdef CONFIG_DEBUG_MISC
+#endif
+
+#ifdef HAS_PPC_PA6T
SYSFS_SPRSETUP(hid0, SPRN_HID0);
SYSFS_SPRSETUP(hid1, SPRN_HID1);
SYSFS_SPRSETUP(hid4, SPRN_HID4);
@@ -641,15 +666,14 @@ SYSFS_SPRSETUP(tsr0, SPRN_PA6T_TSR0);
SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1);
SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2);
SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3);
-#endif /* CONFIG_DEBUG_MISC */
-#endif /* HAS_PPC_PMC_PA6T */
+#endif /* HAS_PPC_PA6T */
#ifdef HAS_PPC_PMC_IBM
static struct device_attribute ibm_common_attrs[] = {
__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
};
-#endif /* HAS_PPC_PMC_G4 */
+#endif /* HAS_PPC_PMC_IBM */
#ifdef HAS_PPC_PMC_G4
static struct device_attribute g4_common_attrs[] = {
@@ -659,6 +683,7 @@ static struct device_attribute g4_common_attrs[] = {
};
#endif /* HAS_PPC_PMC_G4 */
+#ifdef HAS_PPC_PMC_CLASSIC
static struct device_attribute classic_pmc_attrs[] = {
__ATTR(pmc1, 0600, show_pmc1, store_pmc1),
__ATTR(pmc2, 0600, show_pmc2, store_pmc2),
@@ -666,14 +691,16 @@ static struct device_attribute classic_pmc_attrs[] = {
__ATTR(pmc4, 0600, show_pmc4, store_pmc4),
__ATTR(pmc5, 0600, show_pmc5, store_pmc5),
__ATTR(pmc6, 0600, show_pmc6, store_pmc6),
-#ifdef CONFIG_PPC64
+#ifdef HAS_PPC_PMC56
__ATTR(pmc7, 0600, show_pmc7, store_pmc7),
__ATTR(pmc8, 0600, show_pmc8, store_pmc8),
#endif
};
+#endif
-#ifdef HAS_PPC_PMC_PA6T
+#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T)
static struct device_attribute pa6t_attrs[] = {
+#ifdef HAS_PPC_PMC_PA6T
__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
__ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0),
@@ -682,7 +709,8 @@ static struct device_attribute pa6t_attrs[] = {
__ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3),
__ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4),
__ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5),
-#ifdef CONFIG_DEBUG_MISC
+#endif
+#ifdef HAS_PPC_PA6T
__ATTR(hid0, 0600, show_hid0, store_hid0),
__ATTR(hid1, 0600, show_hid1, store_hid1),
__ATTR(hid4, 0600, show_hid4, store_hid4),
@@ -711,10 +739,9 @@ static struct device_attribute pa6t_attrs[] = {
__ATTR(tsr1, 0600, show_tsr1, store_tsr1),
__ATTR(tsr2, 0600, show_tsr2, store_tsr2),
__ATTR(tsr3, 0600, show_tsr3, store_tsr3),
-#endif /* CONFIG_DEBUG_MISC */
+#endif /* HAS_PPC_PA6T */
};
-#endif /* HAS_PPC_PMC_PA6T */
-#endif /* HAS_PPC_PMC_CLASSIC */
+#endif
#ifdef CONFIG_PPC_SVM
static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char *buf)
@@ -765,14 +792,14 @@ static int register_cpu_online(unsigned int cpu)
pmc_attrs = classic_pmc_attrs;
break;
#endif /* HAS_PPC_PMC_G4 */
-#ifdef HAS_PPC_PMC_PA6T
+#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T)
case PPC_PMC_PA6T:
/* PA Semi starts counting at PMC0 */
attrs = pa6t_attrs;
nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
pmc_attrs = NULL;
break;
-#endif /* HAS_PPC_PMC_PA6T */
+#endif
default:
attrs = NULL;
nattrs = 0;
@@ -787,8 +814,10 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, &pmc_attrs[i]);
#ifdef CONFIG_PPC64
+#ifdef CONFIG_PMU_SYSFS
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, &dev_attr_mmcra);
+#endif /* CONFIG_PMU_SYSFS */
if (cpu_has_feature(CPU_FTR_PURR)) {
if (!firmware_has_feature(FW_FEATURE_LPAR))
@@ -854,14 +883,14 @@ static int unregister_cpu_online(unsigned int cpu)
pmc_attrs = classic_pmc_attrs;
break;
#endif /* HAS_PPC_PMC_G4 */
-#ifdef HAS_PPC_PMC_PA6T
+#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T)
case PPC_PMC_PA6T:
/* PA Semi starts counting at PMC0 */
attrs = pa6t_attrs;
nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
pmc_attrs = NULL;
break;
-#endif /* HAS_PPC_PMC_PA6T */
+#endif
default:
attrs = NULL;
nattrs = 0;
@@ -876,8 +905,10 @@ static int unregister_cpu_online(unsigned int cpu)
device_remove_file(s, &pmc_attrs[i]);
#ifdef CONFIG_PPC64
+#ifdef CONFIG_PMU_SYSFS
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, &dev_attr_mmcra);
+#endif /* CONFIG_PMU_SYSFS */
if (cpu_has_feature(CPU_FTR_PURR))
device_remove_file(s, &dev_attr_purr);
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 5b905a2f4e4d..d34276f3c495 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -16,25 +16,22 @@
#ifdef CONFIG_PPC64
.p2align 3
+#define __SYSCALL(nr, entry) .8byte entry
+#else
+#define __SYSCALL(nr, entry) .long entry
#endif
.globl sys_call_table
sys_call_table:
#ifdef CONFIG_PPC64
-#define __SYSCALL(nr, entry) .8byte DOTSYM(entry)
#include <asm/syscall_table_64.h>
-#undef __SYSCALL
#else
-#define __SYSCALL(nr, entry) .long entry
#include <asm/syscall_table_32.h>
-#undef __SYSCALL
#endif
#ifdef CONFIG_COMPAT
.globl compat_sys_call_table
compat_sys_call_table:
#define compat_sys_sigsuspend sys_sigsuspend
-#define __SYSCALL(nr, entry) .8byte DOTSYM(entry)
#include <asm/syscall_table_c32.h>
-#undef __SYSCALL
#endif
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 1168e8b37e30..6fcae436ae51 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -50,7 +50,7 @@
#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/irq_work.h>
-#include <linux/clk-provider.h>
+#include <linux/of_clk.h>
#include <linux/suspend.h>
#include <linux/sched/cputime.h>
#include <linux/processor.h>
@@ -522,35 +522,6 @@ static inline void clear_irq_work_pending(void)
"i" (offsetof(struct paca_struct, irq_work_pending)));
}
-void arch_irq_work_raise(void)
-{
- preempt_disable();
- set_irq_work_pending_flag();
- /*
- * Non-nmi code running with interrupts disabled will replay
- * irq_happened before it re-enables interrupts, so setthe
- * decrementer there instead of causing a hardware exception
- * which would immediately hit the masked interrupt handler
- * and have the net effect of setting the decrementer in
- * irq_happened.
- *
- * NMI interrupts can not check this when they return, so the
- * decrementer hardware exception is raised, which will fire
- * when interrupts are next enabled.
- *
- * BookE does not support this yet, it must audit all NMI
- * interrupt handlers to ensure they call nmi_enter() so this
- * check would be correct.
- */
- if (IS_ENABLED(CONFIG_BOOKE) || !irqs_disabled() || in_nmi()) {
- set_dec(1);
- } else {
- hard_irq_disable();
- local_paca->irq_happened |= PACA_IRQ_DEC;
- }
- preempt_enable();
-}
-
#else /* 32-bit */
DEFINE_PER_CPU(u8, irq_work_pending);
@@ -559,16 +530,27 @@ DEFINE_PER_CPU(u8, irq_work_pending);
#define test_irq_work_pending() __this_cpu_read(irq_work_pending)
#define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0)
+#endif /* 32 vs 64 bit */
+
void arch_irq_work_raise(void)
{
+ /*
+ * 64-bit code that uses irq soft-mask can just cause an immediate
+ * interrupt here that gets soft masked, if this is called under
+ * local_irq_disable(). It might be possible to prevent that happening
+ * by noticing interrupts are disabled and setting decrementer pending
+ * to be replayed when irqs are enabled. The problem there is that
+ * tracing can call irq_work_raise, including in code that does low
+ * level manipulations of irq soft-mask state (e.g., trace_hardirqs_on)
+ * which could get tangled up if we're messing with the same state
+ * here.
+ */
preempt_disable();
set_irq_work_pending_flag();
set_dec(1);
preempt_enable();
}
-#endif /* 32 vs 64 bit */
-
#else /* CONFIG_IRQ_WORK */
#define test_irq_work_pending() 0
@@ -663,15 +645,6 @@ void timer_broadcast_interrupt(void)
}
#endif
-/*
- * Hypervisor decrementer interrupts shouldn't occur but are sometimes
- * left pending on exit from a KVM guest. We don't need to do anything
- * to clear them, as they are edge-triggered.
- */
-void hdec_interrupt(struct pt_regs *regs)
-{
-}
-
#ifdef CONFIG_SUSPEND
static void generic_suspend_disable_irqs(void)
{
@@ -1158,9 +1131,7 @@ void __init time_init(void)
init_decrementer_clockevent();
tick_setup_hrtimer_broadcast();
-#ifdef CONFIG_COMMON_CLK
of_clk_init(NULL);
-#endif
}
/*
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 82a3438300fd..3fca22276bb1 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -2278,35 +2278,20 @@ void ppc_warn_emulated_print(const char *type)
static int __init ppc_warn_emulated_init(void)
{
- struct dentry *dir, *d;
+ struct dentry *dir;
unsigned int i;
struct ppc_emulated_entry *entries = (void *)&ppc_emulated;
- if (!powerpc_debugfs_root)
- return -ENODEV;
-
dir = debugfs_create_dir("emulated_instructions",
powerpc_debugfs_root);
- if (!dir)
- return -ENOMEM;
- d = debugfs_create_u32("do_warn", 0644, dir,
- &ppc_warn_emulated);
- if (!d)
- goto fail;
+ debugfs_create_u32("do_warn", 0644, dir, &ppc_warn_emulated);
- for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) {
- d = debugfs_create_u32(entries[i].name, 0644, dir,
- (u32 *)&entries[i].val.counter);
- if (!d)
- goto fail;
- }
+ for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++)
+ debugfs_create_u32(entries[i].name, 0644, dir,
+ (u32 *)&entries[i].val.counter);
return 0;
-
-fail:
- debugfs_remove_recursive(dir);
- return -ENOMEM;
}
device_initcall(ppc_warn_emulated_init);
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index b9a108411c0d..f38f26e844b6 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -391,12 +391,7 @@ static unsigned long __init find_function64(struct lib64_elfinfo *lib,
symname);
return 0;
}
-#ifdef VDS64_HAS_DESCRIPTORS
- return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) -
- VDSO64_LBASE;
-#else
return sym->st_value - VDSO64_LBASE;
-#endif
}
static int __init vdso_do_func_patch64(struct lib32_elfinfo *v32,
@@ -656,7 +651,8 @@ static void __init vdso_setup_syscall_map(void)
if (sys_call_table[i] != sys_ni_syscall)
vdso_data->syscall_map_64[i >> 5] |=
0x80000000UL >> (i & 0x1f);
- if (compat_sys_call_table[i] != sys_ni_syscall)
+ if (IS_ENABLED(CONFIG_COMPAT) &&
+ compat_sys_call_table[i] != sys_ni_syscall)
vdso_data->syscall_map_32[i >> 5] |=
0x80000000UL >> (i & 0x1f);
#else /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/vdso32/.gitignore b/arch/powerpc/kernel/vdso32/.gitignore
index fea5809857a5..824b863ec6bd 100644
--- a/arch/powerpc/kernel/vdso32/.gitignore
+++ b/arch/powerpc/kernel/vdso32/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
vdso32.lds
vdso32.so.dbg
diff --git a/arch/powerpc/kernel/vdso64/.gitignore b/arch/powerpc/kernel/vdso64/.gitignore
index 77a0b423642c..84151a7ba31d 100644
--- a/arch/powerpc/kernel/vdso64/.gitignore
+++ b/arch/powerpc/kernel/vdso64/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
vdso64.lds
vdso64.so.dbg
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 25c14a0981bf..d20c5e79e03c 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -134,7 +134,7 @@ _GLOBAL(load_up_vsx)
/* enable use of VSX after return */
oris r12,r12,MSR_VSX@h
std r12,_MSR(r1)
- b fast_exception_return
+ b fast_interrupt_return
#endif /* CONFIG_VSX */
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index a32d478a7f41..31a0f201fb6f 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -256,6 +256,7 @@ SECTIONS
*(.dynamic)
}
.hash : AT(ADDR(.hash) - LOAD_OFFSET) { *(.hash) }
+ .gnu.hash : AT(ADDR(.gnu.hash) - LOAD_OFFSET) { *(.gnu.hash) }
.interp : AT(ADDR(.interp) - LOAD_OFFSET) { *(.interp) }
.rela.dyn : AT(ADDR(.rela.dyn) - LOAD_OFFSET)
{
@@ -303,12 +304,6 @@ SECTIONS
*(.branch_lt)
}
-#ifdef CONFIG_DEBUG_INFO_BTF
- .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) {
- *(.BTF)
- }
-#endif
-
.opd : AT(ADDR(.opd) - LOAD_OFFSET) {
__start_opd = .;
KEEP(*(.opd))