aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile15
-rw-r--r--arch/powerpc/kernel/asm-offsets.c15
-rw-r--r--arch/powerpc/kernel/cpu_setup_6xx.S4
-rw-r--r--arch/powerpc/kernel/dma-iommu.c75
-rw-r--r--arch/powerpc/kernel/dma-mask.c12
-rw-r--r--arch/powerpc/kernel/dma-swiotlb.c89
-rw-r--r--arch/powerpc/kernel/dma.c362
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c25
-rw-r--r--arch/powerpc/kernel/eeh.c190
-rw-r--r--arch/powerpc/kernel/eeh_cache.c36
-rw-r--r--arch/powerpc/kernel/eeh_driver.c86
-rw-r--r--arch/powerpc/kernel/eeh_event.c16
-rw-r--r--arch/powerpc/kernel/eeh_pe.c68
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c3
-rw-r--r--arch/powerpc/kernel/entry_32.S97
-rw-r--r--arch/powerpc/kernel/entry_64.S53
-rw-r--r--arch/powerpc/kernel/epapr_hcalls.S5
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S14
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S94
-rw-r--r--arch/powerpc/kernel/head_32.S160
-rw-r--r--arch/powerpc/kernel/head_40x.S9
-rw-r--r--arch/powerpc/kernel/head_44x.S8
-rw-r--r--arch/powerpc/kernel/head_64.S20
-rw-r--r--arch/powerpc/kernel/head_8xx.S124
-rw-r--r--arch/powerpc/kernel/head_booke.h12
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S16
-rw-r--r--arch/powerpc/kernel/idle_6xx.S8
-rw-r--r--arch/powerpc/kernel/idle_book3e.S2
-rw-r--r--arch/powerpc/kernel/idle_e500.S8
-rw-r--r--arch/powerpc/kernel/idle_power4.S2
-rw-r--r--arch/powerpc/kernel/irq.c119
-rw-r--r--arch/powerpc/kernel/kgdb.c28
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c6
-rw-r--r--arch/powerpc/kernel/mce.c11
-rw-r--r--arch/powerpc/kernel/misc_32.S17
-rw-r--r--arch/powerpc/kernel/nvram_64.c158
-rw-r--r--arch/powerpc/kernel/paca.c25
-rw-r--r--arch/powerpc/kernel/pci-common.c24
-rw-r--r--arch/powerpc/kernel/pci_32.c3
-rw-r--r--arch/powerpc/kernel/process.c68
-rw-r--r--arch/powerpc/kernel/prom.c5
-rw-r--r--arch/powerpc/kernel/ptrace.c18
-rw-r--r--arch/powerpc/kernel/rtas.c6
-rw-r--r--arch/powerpc/kernel/setup-common.c12
-rw-r--r--arch/powerpc/kernel/setup_32.c62
-rw-r--r--arch/powerpc/kernel/setup_64.c60
-rw-r--r--arch/powerpc/kernel/smp.c109
-rw-r--r--arch/powerpc/kernel/stacktrace.c102
-rw-r--r--arch/powerpc/kernel/syscalls.c2
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl134
-rw-r--r--arch/powerpc/kernel/syscalls/syscalltbl.sh4
-rw-r--r--arch/powerpc/kernel/systbl.S6
-rw-r--r--arch/powerpc/kernel/time.c1
-rw-r--r--arch/powerpc/kernel/trace/Makefile3
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S6
-rw-r--r--arch/powerpc/kernel/traps.c133
-rw-r--r--arch/powerpc/kernel/udbg.c2
-rw-r--r--arch/powerpc/kernel/vdso.c2
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile1
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile1
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S14
61 files changed, 1236 insertions, 1534 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index cb7f0bb9ee71..cddadccf551d 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -36,7 +36,7 @@ obj-y := cputable.o ptrace.o syscalls.o \
process.o systbl.o idle.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
- udbg.o misc.o io.o dma.o misc_$(BITS).o \
+ udbg.o misc.o io.o misc_$(BITS).o \
of_platform.o prom_parse.o
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
@@ -105,6 +105,7 @@ obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o
+obj-$(CONFIG_ARCH_HAS_DMA_SET_MASK) += dma-mask.o
pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o
obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \
@@ -142,19 +143,29 @@ endif
obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
-# Disable GCOV & sanitizers in odd or sensitive code
+# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_prom_init.o := n
+KCOV_INSTRUMENT_prom_init.o := n
UBSAN_SANITIZE_prom_init.o := n
GCOV_PROFILE_machine_kexec_64.o := n
+KCOV_INSTRUMENT_machine_kexec_64.o := n
UBSAN_SANITIZE_machine_kexec_64.o := n
GCOV_PROFILE_machine_kexec_32.o := n
+KCOV_INSTRUMENT_machine_kexec_32.o := n
UBSAN_SANITIZE_machine_kexec_32.o := n
GCOV_PROFILE_kprobes.o := n
+KCOV_INSTRUMENT_kprobes.o := n
UBSAN_SANITIZE_kprobes.o := n
GCOV_PROFILE_kprobes-ftrace.o := n
+KCOV_INSTRUMENT_kprobes-ftrace.o := n
UBSAN_SANITIZE_kprobes-ftrace.o := n
UBSAN_SANITIZE_vdso.o := n
+# Necessary for booting with kcov enabled on book3e machines
+KCOV_INSTRUMENT_cputable.o := n
+KCOV_INSTRUMENT_setup_64.o := n
+KCOV_INSTRUMENT_paca.o := n
+
extra-$(CONFIG_PPC_FPU) += fpu.o
extra-$(CONFIG_ALTIVEC) += vector.o
extra-$(CONFIG_PPC64) += entry_64.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 9ffc72ded73a..86a61e5f8285 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -13,6 +13,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define GENERATING_ASM_OFFSETS /* asm/smp.h */
+
#include <linux/compat.h>
#include <linux/signal.h>
#include <linux/sched.h>
@@ -90,10 +92,15 @@ int main(void)
DEFINE(SIGSEGV, SIGSEGV);
DEFINE(NMI_MASK, NMI_MASK);
#else
- OFFSET(THREAD_INFO, task_struct, stack);
- DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
OFFSET(KSP_LIMIT, thread_struct, ksp_limit);
+#ifdef CONFIG_PPC_RTAS
+ OFFSET(RTAS_SP, thread_struct, rtas_sp);
+#endif
#endif /* CONFIG_PPC64 */
+ OFFSET(TASK_STACK, task_struct, stack);
+#ifdef CONFIG_SMP
+ OFFSET(TASK_CPU, task_struct, cpu);
+#endif
#ifdef CONFIG_LIVEPATCH
OFFSET(TI_livepatch_sp, thread_info, livepatch_sp);
@@ -161,8 +168,6 @@ int main(void)
OFFSET(TI_FLAGS, thread_info, flags);
OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags);
OFFSET(TI_PREEMPT, thread_info, preempt_count);
- OFFSET(TI_TASK, thread_info, task);
- OFFSET(TI_CPU, thread_info, cpu);
#ifdef CONFIG_PPC64
OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size);
@@ -177,6 +182,8 @@ int main(void)
OFFSET(PACAPROCSTART, paca_struct, cpu_start);
OFFSET(PACAKSAVE, paca_struct, kstack);
OFFSET(PACACURRENT, paca_struct, __current);
+ DEFINE(PACA_THREAD_INFO, offsetof(struct paca_struct, __current) +
+ offsetof(struct task_struct, thread_info));
OFFSET(PACASAVEDMSR, paca_struct, saved_msr);
OFFSET(PACAR1, paca_struct, saved_r1);
OFFSET(PACATOC, paca_struct, kernel_toc);
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index 8c069e96c478..6f1c11e0691f 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -24,6 +24,10 @@ BEGIN_MMU_FTR_SECTION
li r10,0
mtspr SPRN_SPRG_603_LRU,r10 /* init SW LRU tracking */
END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+ lis r10, (swapper_pg_dir - PAGE_OFFSET)@h
+ ori r10, r10, (swapper_pg_dir - PAGE_OFFSET)@l
+ mtspr SPRN_SPRG_PGDIR, r10
+
BEGIN_FTR_SECTION
bl __init_fpu_registers
END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE)
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index 9c9bcaae2f75..09231ef06d01 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -6,12 +6,31 @@
* busses using the iommu infrastructure
*/
+#include <linux/dma-direct.h>
+#include <linux/pci.h>
#include <asm/iommu.h>
/*
* Generic iommu implementation
*/
+/*
+ * The coherent mask may be smaller than the real mask, check if we can
+ * really use a direct window.
+ */
+static inline bool dma_iommu_alloc_bypass(struct device *dev)
+{
+ return dev->archdata.iommu_bypass && !iommu_fixed_is_weak &&
+ dma_direct_supported(dev, dev->coherent_dma_mask);
+}
+
+static inline bool dma_iommu_map_bypass(struct device *dev,
+ unsigned long attrs)
+{
+ return dev->archdata.iommu_bypass &&
+ (!iommu_fixed_is_weak || (attrs & DMA_ATTR_WEAK_ORDERING));
+}
+
/* Allocates a contiguous real buffer and creates mappings over it.
* Returns the virtual address of the buffer and sets dma_handle
* to the dma address (mapping) of the first page.
@@ -20,6 +39,8 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag,
unsigned long attrs)
{
+ if (dma_iommu_alloc_bypass(dev))
+ return dma_direct_alloc(dev, size, dma_handle, flag, attrs);
return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
dma_handle, dev->coherent_dma_mask, flag,
dev_to_node(dev));
@@ -29,7 +50,11 @@ static void dma_iommu_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle,
unsigned long attrs)
{
- iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
+ if (dma_iommu_alloc_bypass(dev))
+ dma_direct_free(dev, size, vaddr, dma_handle, attrs);
+ else
+ iommu_free_coherent(get_iommu_table_base(dev), size, vaddr,
+ dma_handle);
}
/* Creates TCEs for a user provided buffer. The user buffer must be
@@ -42,6 +67,9 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page,
enum dma_data_direction direction,
unsigned long attrs)
{
+ if (dma_iommu_map_bypass(dev, attrs))
+ return dma_direct_map_page(dev, page, offset, size, direction,
+ attrs);
return iommu_map_page(dev, get_iommu_table_base(dev), page, offset,
size, device_to_mask(dev), direction, attrs);
}
@@ -51,8 +79,9 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction direction,
unsigned long attrs)
{
- iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction,
- attrs);
+ if (!dma_iommu_map_bypass(dev, attrs))
+ iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size,
+ direction, attrs);
}
@@ -60,6 +89,8 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
unsigned long attrs)
{
+ if (dma_iommu_map_bypass(dev, attrs))
+ return dma_direct_map_sg(dev, sglist, nelems, direction, attrs);
return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
device_to_mask(dev), direction, attrs);
}
@@ -68,10 +99,20 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
unsigned long attrs)
{
- ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
+ if (!dma_iommu_map_bypass(dev, attrs))
+ ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
direction, attrs);
}
+static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct pci_controller *phb = pci_bus_to_host(pdev->bus);
+
+ return phb->controller_ops.iommu_bypass_supported &&
+ phb->controller_ops.iommu_bypass_supported(pdev, mask);
+}
+
/* We support DMA to/from any memory page via the iommu */
int dma_iommu_dma_supported(struct device *dev, u64 mask)
{
@@ -83,32 +124,48 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
return 0;
}
+ if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
+ dev->archdata.iommu_bypass = true;
+ dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+ return 1;
+ }
+
if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
dev_info(dev, "Warning: IOMMU offset too big for device mask\n");
dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",
mask, tbl->it_offset << tbl->it_page_shift);
return 0;
- } else
- return 1;
+ }
+
+ dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
+ dev->archdata.iommu_bypass = false;
+ return 1;
}
-static u64 dma_iommu_get_required_mask(struct device *dev)
+u64 dma_iommu_get_required_mask(struct device *dev)
{
struct iommu_table *tbl = get_iommu_table_base(dev);
u64 mask;
+
if (!tbl)
return 0;
+ if (dev_is_pci(dev)) {
+ u64 bypass_mask = dma_direct_get_required_mask(dev);
+
+ if (dma_iommu_bypass_supported(dev, bypass_mask))
+ return bypass_mask;
+ }
+
mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1);
mask += mask - 1;
return mask;
}
-struct dma_map_ops dma_iommu_ops = {
+const struct dma_map_ops dma_iommu_ops = {
.alloc = dma_iommu_alloc_coherent,
.free = dma_iommu_free_coherent,
- .mmap = dma_nommu_mmap_coherent,
.map_sg = dma_iommu_map_sg,
.unmap_sg = dma_iommu_unmap_sg,
.dma_supported = dma_iommu_dma_supported,
diff --git a/arch/powerpc/kernel/dma-mask.c b/arch/powerpc/kernel/dma-mask.c
new file mode 100644
index 000000000000..ffbbbc432612
--- /dev/null
+++ b/arch/powerpc/kernel/dma-mask.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/dma-mapping.h>
+#include <linux/export.h>
+#include <asm/machdep.h>
+
+void arch_dma_set_mask(struct device *dev, u64 dma_mask)
+{
+ if (ppc_md.dma_set_mask)
+ ppc_md.dma_set_mask(dev, dma_mask);
+}
+EXPORT_SYMBOL(arch_dma_set_mask);
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 7d5fc9751622..132d61c91629 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -10,101 +10,12 @@
* option) any later version.
*
*/
-
-#include <linux/dma-direct.h>
#include <linux/memblock.h>
-#include <linux/pfn.h>
-#include <linux/of_platform.h>
-#include <linux/platform_device.h>
-#include <linux/pci.h>
-
#include <asm/machdep.h>
#include <asm/swiotlb.h>
-#include <asm/dma.h>
unsigned int ppc_swiotlb_enable;
-static u64 swiotlb_powerpc_get_required(struct device *dev)
-{
- u64 end, mask, max_direct_dma_addr = dev->archdata.max_direct_dma_addr;
-
- end = memblock_end_of_DRAM();
- if (max_direct_dma_addr && end > max_direct_dma_addr)
- end = max_direct_dma_addr;
- end += get_dma_offset(dev);
-
- mask = 1ULL << (fls64(end) - 1);
- mask += mask - 1;
-
- return mask;
-}
-
-/*
- * At the moment, all platforms that use this code only require
- * swiotlb to be used if we're operating on HIGHMEM. Since
- * we don't ever call anything other than map_sg, unmap_sg,
- * map_page, and unmap_page on highmem, use normal dma_ops
- * for everything else.
- */
-const struct dma_map_ops powerpc_swiotlb_dma_ops = {
- .alloc = __dma_nommu_alloc_coherent,
- .free = __dma_nommu_free_coherent,
- .mmap = dma_nommu_mmap_coherent,
- .map_sg = dma_direct_map_sg,
- .unmap_sg = dma_direct_unmap_sg,
- .dma_supported = swiotlb_dma_supported,
- .map_page = dma_direct_map_page,
- .unmap_page = dma_direct_unmap_page,
- .sync_single_for_cpu = dma_direct_sync_single_for_cpu,
- .sync_single_for_device = dma_direct_sync_single_for_device,
- .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu,
- .sync_sg_for_device = dma_direct_sync_sg_for_device,
- .get_required_mask = swiotlb_powerpc_get_required,
-};
-
-void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
-{
- struct pci_controller *hose;
- struct dev_archdata *sd;
-
- hose = pci_bus_to_host(pdev->bus);
- sd = &pdev->dev.archdata;
- sd->max_direct_dma_addr =
- hose->dma_window_base_cur + hose->dma_window_size;
-}
-
-static int ppc_swiotlb_bus_notify(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct device *dev = data;
- struct dev_archdata *sd;
-
- /* We are only intereted in device addition */
- if (action != BUS_NOTIFY_ADD_DEVICE)
- return 0;
-
- sd = &dev->archdata;
- sd->max_direct_dma_addr = 0;
-
- /* May need to bounce if the device can't address all of DRAM */
- if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM())
- set_dma_ops(dev, &powerpc_swiotlb_dma_ops);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block ppc_swiotlb_plat_bus_notifier = {
- .notifier_call = ppc_swiotlb_bus_notify,
- .priority = 0,
-};
-
-int __init swiotlb_setup_bus_notifier(void)
-{
- bus_register_notifier(&platform_bus_type,
- &ppc_swiotlb_plat_bus_notifier);
- return 0;
-}
-
void __init swiotlb_detect_4g(void)
{
if ((memblock_end_of_DRAM() - 1) > 0xffffffff)
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
deleted file mode 100644
index b1903ebb2e9c..000000000000
--- a/arch/powerpc/kernel/dma.c
+++ /dev/null
@@ -1,362 +0,0 @@
-/*
- * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation
- *
- * Provide default implementations of the DMA mapping callbacks for
- * directly mapped busses.
- */
-
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/dma-debug.h>
-#include <linux/gfp.h>
-#include <linux/memblock.h>
-#include <linux/export.h>
-#include <linux/pci.h>
-#include <asm/vio.h>
-#include <asm/bug.h>
-#include <asm/machdep.h>
-#include <asm/swiotlb.h>
-#include <asm/iommu.h>
-
-/*
- * Generic direct DMA implementation
- *
- * This implementation supports a per-device offset that can be applied if
- * the address at which memory is visible to devices is not 0. Platform code
- * can set archdata.dma_data to an unsigned long holding the offset. By
- * default the offset is PCI_DRAM_OFFSET.
- */
-
-static u64 __maybe_unused get_pfn_limit(struct device *dev)
-{
- u64 pfn = (dev->coherent_dma_mask >> PAGE_SHIFT) + 1;
- struct dev_archdata __maybe_unused *sd = &dev->archdata;
-
-#ifdef CONFIG_SWIOTLB
- if (sd->max_direct_dma_addr && dev->dma_ops == &powerpc_swiotlb_dma_ops)
- pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT);
-#endif
-
- return pfn;
-}
-
-static int dma_nommu_dma_supported(struct device *dev, u64 mask)
-{
-#ifdef CONFIG_PPC64
- u64 limit = get_dma_offset(dev) + (memblock_end_of_DRAM() - 1);
-
- /* Limit fits in the mask, we are good */
- if (mask >= limit)
- return 1;
-
-#ifdef CONFIG_FSL_SOC
- /*
- * Freescale gets another chance via ZONE_DMA, however
- * that will have to be refined if/when they support iommus
- */
- return 1;
-#endif
- /* Sorry ... */
- return 0;
-#else
- return 1;
-#endif
-}
-
-#ifndef CONFIG_NOT_COHERENT_CACHE
-void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
- void *ret;
- struct page *page;
- int node = dev_to_node(dev);
-#ifdef CONFIG_FSL_SOC
- u64 pfn = get_pfn_limit(dev);
- int zone;
-
- /*
- * This code should be OK on other platforms, but we have drivers that
- * don't set coherent_dma_mask. As a workaround we just ifdef it. This
- * whole routine needs some serious cleanup.
- */
-
- zone = dma_pfn_limit_to_zone(pfn);
- if (zone < 0) {
- dev_err(dev, "%s: No suitable zone for pfn %#llx\n",
- __func__, pfn);
- return NULL;
- }
-
- switch (zone) {
-#ifdef CONFIG_ZONE_DMA
- case ZONE_DMA:
- flag |= GFP_DMA;
- break;
-#endif
- };
-#endif /* CONFIG_FSL_SOC */
-
- page = alloc_pages_node(node, flag, get_order(size));
- if (page == NULL)
- return NULL;
- ret = page_address(page);
- memset(ret, 0, size);
- *dma_handle = __pa(ret) + get_dma_offset(dev);
-
- return ret;
-}
-
-void __dma_nommu_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- free_pages((unsigned long)vaddr, get_order(size));
-}
-#endif /* !CONFIG_NOT_COHERENT_CACHE */
-
-static void *dma_nommu_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
- struct iommu_table *iommu;
-
- /* The coherent mask may be smaller than the real mask, check if
- * we can really use the direct ops
- */
- if (dma_nommu_dma_supported(dev, dev->coherent_dma_mask))
- return __dma_nommu_alloc_coherent(dev, size, dma_handle,
- flag, attrs);
-
- /* Ok we can't ... do we have an iommu ? If not, fail */
- iommu = get_iommu_table_base(dev);
- if (!iommu)
- return NULL;
-
- /* Try to use the iommu */
- return iommu_alloc_coherent(dev, iommu, size, dma_handle,
- dev->coherent_dma_mask, flag,
- dev_to_node(dev));
-}
-
-static void dma_nommu_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- struct iommu_table *iommu;
-
- /* See comments in dma_nommu_alloc_coherent() */
- if (dma_nommu_dma_supported(dev, dev->coherent_dma_mask))
- return __dma_nommu_free_coherent(dev, size, vaddr, dma_handle,
- attrs);
- /* Maybe we used an iommu ... */
- iommu = get_iommu_table_base(dev);
-
- /* If we hit that we should have never allocated in the first
- * place so how come we are freeing ?
- */
- if (WARN_ON(!iommu))
- return;
- iommu_free_coherent(iommu, size, vaddr, dma_handle);
-}
-
-int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t handle, size_t size,
- unsigned long attrs)
-{
- unsigned long pfn;
-
-#ifdef CONFIG_NOT_COHERENT_CACHE
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr);
-#else
- pfn = page_to_pfn(virt_to_page(cpu_addr));
-#endif
- return remap_pfn_range(vma, vma->vm_start,
- pfn + vma->vm_pgoff,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot);
-}
-
-static int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sgl, sg, nents, i) {
- sg->dma_address = sg_phys(sg) + get_dma_offset(dev);
- sg->dma_length = sg->length;
-
- if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
- continue;
-
- __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
- }
-
- return nents;
-}
-
-static void dma_nommu_unmap_sg(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sgl, sg, nents, i)
- __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
-}
-
-static u64 dma_nommu_get_required_mask(struct device *dev)
-{
- u64 end, mask;
-
- end = memblock_end_of_DRAM() + get_dma_offset(dev);
-
- mask = 1ULL << (fls64(end) - 1);
- mask += mask - 1;
-
- return mask;
-}
-
-static inline dma_addr_t dma_nommu_map_page(struct device *dev,
- struct page *page,
- unsigned long offset,
- size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- __dma_sync_page(page, offset, size, dir);
-
- return page_to_phys(page) + offset + get_dma_offset(dev);
-}
-
-static inline void dma_nommu_unmap_page(struct device *dev,
- dma_addr_t dma_address,
- size_t size,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- __dma_sync(bus_to_virt(dma_address), size, direction);
-}
-
-#ifdef CONFIG_NOT_COHERENT_CACHE
-static inline void dma_nommu_sync_sg(struct device *dev,
- struct scatterlist *sgl, int nents,
- enum dma_data_direction direction)
-{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sgl, sg, nents, i)
- __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
-}
-
-static inline void dma_nommu_sync_single(struct device *dev,
- dma_addr_t dma_handle, size_t size,
- enum dma_data_direction direction)
-{
- __dma_sync(bus_to_virt(dma_handle), size, direction);
-}
-#endif
-
-const struct dma_map_ops dma_nommu_ops = {
- .alloc = dma_nommu_alloc_coherent,
- .free = dma_nommu_free_coherent,
- .mmap = dma_nommu_mmap_coherent,
- .map_sg = dma_nommu_map_sg,
- .unmap_sg = dma_nommu_unmap_sg,
- .dma_supported = dma_nommu_dma_supported,
- .map_page = dma_nommu_map_page,
- .unmap_page = dma_nommu_unmap_page,
- .get_required_mask = dma_nommu_get_required_mask,
-#ifdef CONFIG_NOT_COHERENT_CACHE
- .sync_single_for_cpu = dma_nommu_sync_single,
- .sync_single_for_device = dma_nommu_sync_single,
- .sync_sg_for_cpu = dma_nommu_sync_sg,
- .sync_sg_for_device = dma_nommu_sync_sg,
-#endif
-};
-EXPORT_SYMBOL(dma_nommu_ops);
-
-int dma_set_coherent_mask(struct device *dev, u64 mask)
-{
- if (!dma_supported(dev, mask)) {
- /*
- * We need to special case the direct DMA ops which can
- * support a fallback for coherent allocations. There
- * is no dma_op->set_coherent_mask() so we have to do
- * things the hard way:
- */
- if (get_dma_ops(dev) != &dma_nommu_ops ||
- get_iommu_table_base(dev) == NULL ||
- !dma_iommu_dma_supported(dev, mask))
- return -EIO;
- }
- dev->coherent_dma_mask = mask;
- return 0;
-}
-EXPORT_SYMBOL(dma_set_coherent_mask);
-
-int dma_set_mask(struct device *dev, u64 dma_mask)
-{
- if (ppc_md.dma_set_mask)
- return ppc_md.dma_set_mask(dev, dma_mask);
-
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
- struct pci_controller *phb = pci_bus_to_host(pdev->bus);
- if (phb->controller_ops.dma_set_mask)
- return phb->controller_ops.dma_set_mask(pdev, dma_mask);
- }
-
- if (!dev->dma_mask || !dma_supported(dev, dma_mask))
- return -EIO;
- *dev->dma_mask = dma_mask;
- return 0;
-}
-EXPORT_SYMBOL(dma_set_mask);
-
-u64 __dma_get_required_mask(struct device *dev)
-{
- const struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- if (unlikely(dma_ops == NULL))
- return 0;
-
- if (dma_ops->get_required_mask)
- return dma_ops->get_required_mask(dev);
-
- return DMA_BIT_MASK(8 * sizeof(dma_addr_t));
-}
-
-u64 dma_get_required_mask(struct device *dev)
-{
- if (ppc_md.dma_get_required_mask)
- return ppc_md.dma_get_required_mask(dev);
-
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
- struct pci_controller *phb = pci_bus_to_host(pdev->bus);
- if (phb->controller_ops.dma_get_required_mask)
- return phb->controller_ops.dma_get_required_mask(pdev);
- }
-
- return __dma_get_required_mask(dev);
-}
-EXPORT_SYMBOL_GPL(dma_get_required_mask);
-
-static int __init dma_init(void)
-{
-#ifdef CONFIG_IBMVIO
- dma_debug_add_bus(&vio_bus_type);
-#endif
-
- return 0;
-}
-fs_initcall(dma_init);
-
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 8be3721d9302..c66fd3ce6478 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -666,8 +666,10 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
m = &dt_cpu_feature_match_table[i];
if (!strcmp(f->name, m->name)) {
known = true;
- if (m->enable(f))
+ if (m->enable(f)) {
+ cur_cpu_spec->cpu_features |= m->cpu_ftr_bit_mask;
break;
+ }
pr_info("not enabling: %s (disabled or unsupported by kernel)\n",
f->name);
@@ -675,17 +677,12 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
}
}
- if (!known && enable_unknown) {
- if (!feat_try_enable_unknown(f)) {
- pr_info("not enabling: %s (unknown and unsupported by kernel)\n",
- f->name);
- return false;
- }
+ if (!known && (!enable_unknown || !feat_try_enable_unknown(f))) {
+ pr_info("not enabling: %s (unknown and unsupported by kernel)\n",
+ f->name);
+ return false;
}
- if (m->cpu_ftr_bit_mask)
- cur_cpu_spec->cpu_features |= m->cpu_ftr_bit_mask;
-
if (known)
pr_debug("enabling: %s\n", f->name);
else
@@ -813,7 +810,6 @@ static int __init process_cpufeatures_node(unsigned long node,
int len;
f = &dt_cpu_features[i];
- memset(f, 0, sizeof(struct dt_cpu_feature));
f->node = node;
@@ -1008,7 +1004,12 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
/* Count and allocate space for cpu features */
of_scan_flat_dt_subnodes(node, count_cpufeatures_subnodes,
&nr_dt_cpu_features);
- dt_cpu_features = __va(memblock_phys_alloc(sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, PAGE_SIZE));
+ dt_cpu_features = memblock_alloc(sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, PAGE_SIZE);
+ if (!dt_cpu_features)
+ panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
+ __func__,
+ sizeof(struct dt_cpu_feature) * nr_dt_cpu_features,
+ PAGE_SIZE);
cpufeatures_setup_start(isa);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index ae05203eb4de..289c0b37d845 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -109,7 +109,14 @@ EXPORT_SYMBOL(eeh_subsystem_flags);
* frozen count in last hour exceeds this limit, the PE will
* be forced to be offline permanently.
*/
-int eeh_max_freezes = 5;
+u32 eeh_max_freezes = 5;
+
+/*
+ * Controls whether a recovery event should be scheduled when an
+ * isolated device is discovered. This is only really useful for
+ * debugging problems with the EEH core.
+ */
+bool eeh_debugfs_no_recover;
/* Platform dependent EEH operations */
struct eeh_ops *eeh_ops = NULL;
@@ -823,15 +830,15 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
switch (state) {
case pcie_deassert_reset:
eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
- eeh_unfreeze_pe(pe, false);
+ eeh_unfreeze_pe(pe);
if (!(pe->type & EEH_PE_VF))
- eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev);
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
break;
case pcie_hot_reset:
eeh_pe_mark_isolated(pe);
- eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
if (!(pe->type & EEH_PE_VF))
@@ -840,7 +847,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
break;
case pcie_warm_reset:
eeh_pe_mark_isolated(pe);
- eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
if (!(pe->type & EEH_PE_VF))
@@ -848,7 +855,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
break;
default:
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED);
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true);
return -EINVAL;
};
@@ -877,6 +884,24 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
return NULL;
}
+static void eeh_pe_refreeze_passed(struct eeh_pe *root)
+{
+ struct eeh_pe *pe;
+ int state;
+
+ eeh_for_each_pe(root, pe) {
+ if (eeh_pe_passed(pe)) {
+ state = eeh_ops->get_state(pe, NULL);
+ if (state &
+ (EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED)) {
+ pr_info("EEH: Passed-through PE PHB#%x-PE#%x was thawed by reset, re-freezing for safety.\n",
+ pe->phb->global_number, pe->addr);
+ eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE);
+ }
+ }
+ }
+}
+
/**
* eeh_pe_reset_full - Complete a full reset process on the indicated PE
* @pe: EEH PE
@@ -889,12 +914,12 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
*
* This function will attempt to reset a PE three times before failing.
*/
-int eeh_pe_reset_full(struct eeh_pe *pe)
+int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed)
{
int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
int type = EEH_RESET_HOT;
unsigned int freset = 0;
- int i, state, ret;
+ int i, state = 0, ret;
/*
* Determine the type of reset to perform - hot or fundamental.
@@ -911,32 +936,42 @@ int eeh_pe_reset_full(struct eeh_pe *pe)
/* Make three attempts at resetting the bus */
for (i = 0; i < 3; i++) {
- ret = eeh_pe_reset(pe, type);
- if (ret)
- break;
-
- ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
- if (ret)
- break;
+ ret = eeh_pe_reset(pe, type, include_passed);
+ if (!ret)
+ ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE,
+ include_passed);
+ if (ret) {
+ ret = -EIO;
+ pr_warn("EEH: Failure %d resetting PHB#%x-PE#%x (attempt %d)\n\n",
+ state, pe->phb->global_number, pe->addr, i + 1);
+ continue;
+ }
+ if (i)
+ pr_warn("EEH: PHB#%x-PE#%x: Successful reset (attempt %d)\n",
+ pe->phb->global_number, pe->addr, i + 1);
/* Wait until the PE is in a functioning state */
state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
if (state < 0) {
- pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x",
- __func__, pe->phb->global_number, pe->addr);
+ pr_warn("EEH: Unrecoverable slot failure on PHB#%x-PE#%x",
+ pe->phb->global_number, pe->addr);
ret = -ENOTRECOVERABLE;
break;
}
if (eeh_state_active(state))
break;
-
- /* Set error in case this is our last attempt */
- ret = -EIO;
- pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n",
- __func__, state, pe->phb->global_number, pe->addr, (i + 1));
+ else
+ pr_warn("EEH: PHB#%x-PE#%x: Slot inactive after reset: 0x%x (attempt %d)\n",
+ pe->phb->global_number, pe->addr, state, i + 1);
}
- eeh_pe_state_clear(pe, reset_state);
+ /* Resetting the PE may have unfrozen child PEs. If those PEs have been
+ * (potentially) passed through to a guest, re-freeze them:
+ */
+ if (!include_passed)
+ eeh_pe_refreeze_passed(pe);
+
+ eeh_pe_state_clear(pe, reset_state, true);
return ret;
}
@@ -1309,7 +1344,7 @@ void eeh_remove_device(struct pci_dev *dev)
edev->mode &= ~EEH_DEV_SYSFS;
}
-int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state)
+int eeh_unfreeze_pe(struct eeh_pe *pe)
{
int ret;
@@ -1327,10 +1362,6 @@ int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state)
return ret;
}
- /* Clear software isolated state */
- if (sw_state && (pe->state & EEH_PE_ISOLATED))
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
-
return ret;
}
@@ -1382,7 +1413,10 @@ static int eeh_pe_change_owner(struct eeh_pe *pe)
}
}
- return eeh_unfreeze_pe(pe, true);
+ ret = eeh_unfreeze_pe(pe);
+ if (!ret)
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
+ return ret;
}
/**
@@ -1612,13 +1646,12 @@ int eeh_pe_get_state(struct eeh_pe *pe)
}
EXPORT_SYMBOL_GPL(eeh_pe_get_state);
-static int eeh_pe_reenable_devices(struct eeh_pe *pe)
+static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed)
{
struct eeh_dev *edev, *tmp;
struct pci_dev *pdev;
int ret = 0;
- /* Restore config space */
eeh_pe_restore_bars(pe);
/*
@@ -1639,7 +1672,14 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe)
}
/* The PE is still in frozen state */
- return eeh_unfreeze_pe(pe, true);
+ if (include_passed || !eeh_pe_passed(pe)) {
+ ret = eeh_unfreeze_pe(pe);
+ } else
+ pr_info("EEH: Note: Leaving passthrough PHB#%x-PE#%x frozen.\n",
+ pe->phb->global_number, pe->addr);
+ if (!ret)
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED, include_passed);
+ return ret;
}
@@ -1652,7 +1692,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe)
* indicated type, either fundamental reset or hot reset.
* PE reset is the most important part for error recovery.
*/
-int eeh_pe_reset(struct eeh_pe *pe, int option)
+int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed)
{
int ret = 0;
@@ -1666,11 +1706,11 @@ int eeh_pe_reset(struct eeh_pe *pe, int option)
switch (option) {
case EEH_RESET_DEACTIVATE:
ret = eeh_ops->reset(pe, option);
- eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, include_passed);
if (ret)
break;
- ret = eeh_pe_reenable_devices(pe);
+ ret = eeh_pe_reenable_devices(pe, include_passed);
break;
case EEH_RESET_HOT:
case EEH_RESET_FUNDAMENTAL:
@@ -1796,22 +1836,64 @@ static int eeh_enable_dbgfs_get(void *data, u64 *val)
return 0;
}
-static int eeh_freeze_dbgfs_set(void *data, u64 val)
-{
- eeh_max_freezes = val;
- return 0;
-}
+DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
+ eeh_enable_dbgfs_set, "0x%llx\n");
-static int eeh_freeze_dbgfs_get(void *data, u64 *val)
+static ssize_t eeh_force_recover_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
{
- *val = eeh_max_freezes;
- return 0;
+ struct pci_controller *hose;
+ uint32_t phbid, pe_no;
+ struct eeh_pe *pe;
+ char buf[20];
+ int ret;
+
+ ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
+ if (!ret)
+ return -EFAULT;
+
+ /*
+ * When PE is NULL the event is a "special" event. Rather than
+ * recovering a specific PE it forces the EEH core to scan for failed
+ * PHBs and recovers each. This needs to be done before any device
+ * recoveries can occur.
+ */
+ if (!strncmp(buf, "hwcheck", 7)) {
+ __eeh_send_failure_event(NULL);
+ return count;
+ }
+
+ ret = sscanf(buf, "%x:%x", &phbid, &pe_no);
+ if (ret != 2)
+ return -EINVAL;
+
+ hose = pci_find_controller_for_domain(phbid);
+ if (!hose)
+ return -ENODEV;
+
+ /* Retrieve PE */
+ pe = eeh_pe_get(hose, pe_no, 0);
+ if (!pe)
+ return -ENODEV;
+
+ /*
+ * We don't do any state checking here since the detection
+ * process is async to the recovery process. The recovery
+ * thread *should* not break even if we schedule a recovery
+ * from an odd state (e.g. PE removed, or recovery of a
+ * non-isolated PE)
+ */
+ __eeh_send_failure_event(pe);
+
+ return ret < 0 ? ret : count;
}
-DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
- eeh_enable_dbgfs_set, "0x%llx\n");
-DEFINE_DEBUGFS_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get,
- eeh_freeze_dbgfs_set, "0x%llx\n");
+static const struct file_operations eeh_force_recover_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = eeh_force_recover_write,
+};
#endif
static int __init eeh_init_proc(void)
@@ -1822,9 +1904,15 @@ static int __init eeh_init_proc(void)
debugfs_create_file_unsafe("eeh_enable", 0600,
powerpc_debugfs_root, NULL,
&eeh_enable_dbgfs_ops);
- debugfs_create_file_unsafe("eeh_max_freezes", 0600,
- powerpc_debugfs_root, NULL,
- &eeh_freeze_dbgfs_ops);
+ debugfs_create_u32("eeh_max_freezes", 0600,
+ powerpc_debugfs_root, &eeh_max_freezes);
+ debugfs_create_bool("eeh_disable_recovery", 0600,
+ powerpc_debugfs_root,
+ &eeh_debugfs_no_recover);
+ debugfs_create_file_unsafe("eeh_force_recover", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_force_recover_fops);
+ eeh_cache_debugfs_init();
#endif
}
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 201943d54a6e..9c68f0837385 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -26,6 +26,7 @@
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <asm/pci-bridge.h>
+#include <asm/debugfs.h>
#include <asm/ppc-pci.h>
@@ -113,7 +114,7 @@ static void eeh_addr_cache_print(struct pci_io_addr_cache *cache)
while (n) {
struct pci_io_addr_range *piar;
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
- pr_debug("PCI: %s addr range %d [%pap-%pap]: %s\n",
+ pr_info("PCI: %s addr range %d [%pap-%pap]: %s\n",
(piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
&piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));
cnt++;
@@ -157,10 +158,8 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo,
piar->pcidev = dev;
piar->flags = flags;
-#ifdef DEBUG
pr_debug("PIAR: insert range=[%pap:%pap] dev=%s\n",
&alo, &ahi, pci_name(dev));
-#endif
rb_link_node(&piar->rb_node, parent, p);
rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
@@ -240,6 +239,8 @@ restart:
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
if (piar->pcidev == dev) {
+ pr_debug("PIAR: remove range=[%pap:%pap] dev=%s\n",
+ &piar->addr_lo, &piar->addr_hi, pci_name(dev));
rb_erase(n, &pci_io_addr_cache_root.rb_root);
kfree(piar);
goto restart;
@@ -298,9 +299,30 @@ void eeh_addr_cache_build(void)
eeh_addr_cache_insert_dev(dev);
eeh_sysfs_add_device(dev);
}
+}
-#ifdef DEBUG
- /* Verify tree built up above, echo back the list of addrs. */
- eeh_addr_cache_print(&pci_io_addr_cache_root);
-#endif
+static int eeh_addr_cache_show(struct seq_file *s, void *v)
+{
+ struct pci_io_addr_range *piar;
+ struct rb_node *n;
+
+ spin_lock(&pci_io_addr_cache_root.piar_lock);
+ for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) {
+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+
+ seq_printf(s, "%s addr range [%pap-%pap]: %s\n",
+ (piar->flags & IORESOURCE_IO) ? "i/o" : "mem",
+ &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));
+ }
+ spin_unlock(&pci_io_addr_cache_root.piar_lock);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache);
+
+void eeh_cache_debugfs_init(void)
+{
+ debugfs_create_file_unsafe("eeh_address_cache", 0400,
+ powerpc_debugfs_root, NULL,
+ &eeh_addr_cache_fops);
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 99eab7bc7edc..89623962c727 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -510,22 +510,11 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
* support EEH. So we just care about PCI devices for
* simplicity here.
*/
- if (!dev || (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
- return NULL;
-
- /*
- * We rely on count-based pcibios_release_device() to
- * detach permanently offlined PEs. Unfortunately, that's
- * not reliable enough. We might have the permanently
- * offlined PEs attached, but we needn't take care of
- * them and their child devices.
- */
- if (eeh_dev_removed(edev))
+ if (!eeh_edev_actionable(edev) ||
+ (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
return NULL;
if (rmv_data) {
- if (eeh_pe_passed(edev->pe))
- return NULL;
driver = eeh_pcid_get(dev);
if (driver) {
if (driver->err_handler &&
@@ -539,8 +528,8 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
}
/* Remove it from PCI subsystem */
- pr_debug("EEH: Removing %s without EEH sensitive driver\n",
- pci_name(dev));
+ pr_info("EEH: Removing %s without EEH sensitive driver\n",
+ pci_name(dev));
edev->mode |= EEH_DEV_DISCONNECTED;
if (rmv_data)
rmv_data->removed_dev_count++;
@@ -591,34 +580,22 @@ static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
* PE reset (for 3 times), we try to clear the frozen state
* for 3 times as well.
*/
-static void *__eeh_clear_pe_frozen_state(struct eeh_pe *pe, void *flag)
+static int eeh_clear_pe_frozen_state(struct eeh_pe *root, bool include_passed)
{
- bool clear_sw_state = *(bool *)flag;
- int i, rc = 1;
-
- for (i = 0; rc && i < 3; i++)
- rc = eeh_unfreeze_pe(pe, clear_sw_state);
+ struct eeh_pe *pe;
+ int i;
- /* Stop immediately on any errors */
- if (rc) {
- pr_warn("%s: Failure %d unfreezing PHB#%x-PE#%x\n",
- __func__, rc, pe->phb->global_number, pe->addr);
- return (void *)pe;
+ eeh_for_each_pe(root, pe) {
+ if (include_passed || !eeh_pe_passed(pe)) {
+ for (i = 0; i < 3; i++)
+ if (!eeh_unfreeze_pe(pe))
+ break;
+ if (i >= 3)
+ return -EIO;
+ }
}
-
- return NULL;
-}
-
-static int eeh_clear_pe_frozen_state(struct eeh_pe *pe,
- bool clear_sw_state)
-{
- void *rc;
-
- rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state);
- if (!rc)
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
-
- return rc ? -EIO : 0;
+ eeh_pe_state_clear(root, EEH_PE_ISOLATED, include_passed);
+ return 0;
}
int eeh_pe_reset_and_recover(struct eeh_pe *pe)
@@ -636,16 +613,16 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
/* Issue reset */
- ret = eeh_pe_reset_full(pe);
+ ret = eeh_pe_reset_full(pe, true);
if (ret) {
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
return ret;
}
/* Unfreeze the PE */
ret = eeh_clear_pe_frozen_state(pe, true);
if (ret) {
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
return ret;
}
@@ -653,7 +630,7 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL);
/* Clear recovery mode */
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
return 0;
}
@@ -676,6 +653,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
time64_t tstamp;
int cnt, rc;
struct eeh_dev *edev;
+ struct eeh_pe *tmp_pe;
+ bool any_passed = false;
+
+ eeh_for_each_pe(pe, tmp_pe)
+ any_passed |= eeh_pe_passed(tmp_pe);
/* pcibios will clear the counter; save the value */
cnt = pe->freeze_count;
@@ -688,7 +670,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
* into pci_hp_add_devices().
*/
eeh_pe_state_mark(pe, EEH_PE_KEEP);
- if (driver_eeh_aware || (pe->type & EEH_PE_VF)) {
+ if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) {
eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
} else {
pci_lock_rescan_remove();
@@ -705,7 +687,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
* config accesses. So we prefer to block them. However, controlled
* PCI config accesses initiated from EEH itself are allowed.
*/
- rc = eeh_pe_reset_full(pe);
+ rc = eeh_pe_reset_full(pe, false);
if (rc)
return rc;
@@ -744,11 +726,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
eeh_add_virt_device(edev);
} else {
if (!driver_eeh_aware)
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
pci_hp_add_devices(bus);
}
}
- eeh_pe_state_clear(pe, EEH_PE_KEEP);
+ eeh_pe_state_clear(pe, EEH_PE_KEEP, true);
pe->tstamp = tstamp;
pe->freeze_count = cnt;
@@ -900,7 +882,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
* is still in frozen state. Clear it before
* resuming the PE.
*/
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
result = PCI_ERS_RESULT_RECOVERED;
}
}
@@ -977,7 +959,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
} else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
pci_lock_rescan_remove();
@@ -987,7 +969,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
return;
}
}
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
}
/**
@@ -1069,7 +1051,7 @@ void eeh_handle_special_event(void)
continue;
/* Notify all devices to be down */
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
eeh_set_channel_state(pe, pci_channel_io_perm_failure);
eeh_pe_report(
"error_detected(permanent failure)", pe,
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index 227e57f980df..539aca055d70 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -121,7 +121,7 @@ int eeh_event_init(void)
* the actual event will be delivered in a normal context
* (from a workqueue).
*/
-int eeh_send_failure_event(struct eeh_pe *pe)
+int __eeh_send_failure_event(struct eeh_pe *pe)
{
unsigned long flags;
struct eeh_event *event;
@@ -144,6 +144,20 @@ int eeh_send_failure_event(struct eeh_pe *pe)
return 0;
}
+int eeh_send_failure_event(struct eeh_pe *pe)
+{
+ /*
+ * If we've manually supressed recovery events via debugfs
+ * then just drop it on the floor.
+ */
+ if (eeh_debugfs_no_recover) {
+ pr_err("EEH: Event dropped due to no_recover setting\n");
+ return 0;
+ }
+
+ return __eeh_send_failure_event(pe);
+}
+
/**
* eeh_remove_event - Remove EEH event from the queue
* @pe: Event binding to the PE
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 6fa2032e0594..8b578891f27c 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -657,62 +657,52 @@ void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode)
}
/**
- * __eeh_pe_state_clear - Clear state for the PE
+ * eeh_pe_state_clear - Clear state for the PE
* @data: EEH PE
- * @flag: state
+ * @state: state
+ * @include_passed: include passed-through devices?
*
* The function is used to clear the indicated state from the
* given PE. Besides, we also clear the check count of the PE
* as well.
*/
-static void *__eeh_pe_state_clear(struct eeh_pe *pe, void *flag)
+void eeh_pe_state_clear(struct eeh_pe *root, int state, bool include_passed)
{
- int state = *((int *)flag);
+ struct eeh_pe *pe;
struct eeh_dev *edev, *tmp;
struct pci_dev *pdev;
- /* Keep the state of permanently removed PE intact */
- if (pe->state & EEH_PE_REMOVED)
- return NULL;
+ eeh_for_each_pe(root, pe) {
+ /* Keep the state of permanently removed PE intact */
+ if (pe->state & EEH_PE_REMOVED)
+ continue;
- pe->state &= ~state;
+ if (!include_passed && eeh_pe_passed(pe))
+ continue;
- /*
- * Special treatment on clearing isolated state. Clear
- * check count since last isolation and put all affected
- * devices to normal state.
- */
- if (!(state & EEH_PE_ISOLATED))
- return NULL;
+ pe->state &= ~state;
- pe->check_count = 0;
- eeh_pe_for_each_dev(pe, edev, tmp) {
- pdev = eeh_dev_to_pci_dev(edev);
- if (!pdev)
+ /*
+ * Special treatment on clearing isolated state. Clear
+ * check count since last isolation and put all affected
+ * devices to normal state.
+ */
+ if (!(state & EEH_PE_ISOLATED))
continue;
- pdev->error_state = pci_channel_io_normal;
- }
-
- /* Unblock PCI config access if required */
- if (pe->state & EEH_PE_CFG_RESTRICTED)
- pe->state &= ~EEH_PE_CFG_BLOCKED;
+ pe->check_count = 0;
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (!pdev)
+ continue;
- return NULL;
-}
+ pdev->error_state = pci_channel_io_normal;
+ }
-/**
- * eeh_pe_state_clear - Clear state for the PE and its children
- * @pe: PE
- * @state: state to be cleared
- *
- * When the PE and its children has been recovered from error,
- * we need clear the error state for that. The function is used
- * for the purpose.
- */
-void eeh_pe_state_clear(struct eeh_pe *pe, int state)
-{
- eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
+ /* Unblock PCI config access if required */
+ if (pe->state & EEH_PE_CFG_RESTRICTED)
+ pe->state &= ~EEH_PE_CFG_BLOCKED;
+ }
}
/*
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index deed906dd8f1..3fa04dda1737 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -82,8 +82,9 @@ static ssize_t eeh_pe_state_store(struct device *dev,
if (!(edev->pe->state & EEH_PE_ISOLATED))
return count;
- if (eeh_unfreeze_pe(edev->pe, true))
+ if (eeh_unfreeze_pe(edev->pe))
return -EIO;
+ eeh_pe_state_clear(edev->pe, EEH_PE_ISOLATED, true);
return count;
}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 0768dfd8a64e..b61cfd29c76f 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -97,14 +97,11 @@ crit_transfer_to_handler:
mfspr r0,SPRN_SRR1
stw r0,_SRR1(r11)
- /* set the stack limit to the current stack
- * and set the limit to protect the thread_info
- * struct
- */
+ /* set the stack limit to the current stack */
mfspr r8,SPRN_SPRG_THREAD
lwz r0,KSP_LIMIT(r8)
stw r0,SAVED_KSP_LIMIT(r11)
- rlwimi r0,r1,0,0,(31-THREAD_SHIFT)
+ rlwinm r0,r1,0,0,(31 - THREAD_SHIFT)
stw r0,KSP_LIMIT(r8)
/* fall through */
#endif
@@ -121,14 +118,11 @@ crit_transfer_to_handler:
mfspr r0,SPRN_SRR1
stw r0,crit_srr1@l(0)
- /* set the stack limit to the current stack
- * and set the limit to protect the thread_info
- * struct
- */
+ /* set the stack limit to the current stack */
mfspr r8,SPRN_SPRG_THREAD
lwz r0,KSP_LIMIT(r8)
stw r0,saved_ksp_limit@l(0)
- rlwimi r0,r1,0,0,(31-THREAD_SHIFT)
+ rlwinm r0,r1,0,0,(31 - THREAD_SHIFT)
stw r0,KSP_LIMIT(r8)
/* fall through */
#endif
@@ -157,7 +151,6 @@ transfer_to_handler:
stw r2,_XER(r11)
mfspr r12,SPRN_SPRG_THREAD
addi r2,r12,-THREAD
- tovirt(r2,r2) /* set r2 to current */
beq 2f /* if from user, fix up THREAD.regs */
addi r11,r1,STACK_FRAME_OVERHEAD
stw r11,PT_REGS(r12)
@@ -166,6 +159,9 @@ transfer_to_handler:
internal debug mode bit to do this. */
lwz r12,THREAD_DBCR0(r12)
andis. r12,r12,DBCR0_IDM@h
+#endif
+ ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
+#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
beq+ 3f
/* From user and task is ptraced - load up global dbcr0 */
li r12,-1 /* clear all pending debug events */
@@ -174,8 +170,7 @@ transfer_to_handler:
tophys(r11,r11)
addi r11,r11,global_dbcr0@l
#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r9, r1)
- lwz r9,TI_CPU(r9)
+ lwz r9,TASK_CPU(r2)
slwi r9,r9,3
add r11,r11,r9
#endif
@@ -185,11 +180,6 @@ transfer_to_handler:
addi r12,r12,-1
stw r12,4(r11)
#endif
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- CURRENT_THREAD_INFO(r9, r1)
- tophys(r9, r9)
- ACCOUNT_CPU_USER_ENTRY(r9, r11, r12)
-#endif
b 3f
@@ -201,9 +191,7 @@ transfer_to_handler:
ble- stack_ovf /* then the kernel stack overflowed */
5:
#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
- CURRENT_THREAD_INFO(r9, r1)
- tophys(r9,r9) /* check local flags */
- lwz r12,TI_LOCAL_FLAGS(r9)
+ lwz r12,TI_LOCAL_FLAGS(r2)
mtcrf 0x01,r12
bt- 31-TLF_NAPPING,4f
bt- 31-TLF_SLEEPING,7f
@@ -212,6 +200,7 @@ transfer_to_handler:
transfer_to_handler_cont:
3:
mflr r9
+ tovirt(r2, r2) /* set r2 to current */
lwz r11,0(r9) /* virtual address of handler */
lwz r9,4(r9) /* where to go when done */
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
@@ -275,11 +264,11 @@ reenable_mmu: /* re-enable mmu so we can */
#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
4: rlwinm r12,r12,0,~_TLF_NAPPING
- stw r12,TI_LOCAL_FLAGS(r9)
+ stw r12,TI_LOCAL_FLAGS(r2)
b power_save_ppc32_restore
7: rlwinm r12,r12,0,~_TLF_SLEEPING
- stw r12,TI_LOCAL_FLAGS(r9)
+ stw r12,TI_LOCAL_FLAGS(r2)
lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */
rlwinm r9,r9,0,~MSR_EE
lwz r12,_LINK(r11) /* and return to address in LR */
@@ -351,8 +340,7 @@ _GLOBAL(DoSyscall)
mtmsr r11
1:
#endif /* CONFIG_TRACE_IRQFLAGS */
- CURRENT_THREAD_INFO(r10, r1)
- lwz r11,TI_FLAGS(r10)
+ lwz r11,TI_FLAGS(r2)
andi. r11,r11,_TIF_SYSCALL_DOTRACE
bne- syscall_dotrace
syscall_dotrace_cont:
@@ -385,13 +373,12 @@ ret_from_syscall:
lwz r3,GPR3(r1)
#endif
mr r6,r3
- CURRENT_THREAD_INFO(r12, r1)
/* disable interrupts so current_thread_info()->flags can't change */
LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
/* Note: We don't bother telling lockdep about it */
SYNC
MTMSRD(r10)
- lwz r9,TI_FLAGS(r12)
+ lwz r9,TI_FLAGS(r2)
li r8,-MAX_ERRNO
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
bne- syscall_exit_work
@@ -438,8 +425,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
andi. r4,r8,MSR_PR
beq 3f
- CURRENT_THREAD_INFO(r4, r1)
- ACCOUNT_CPU_USER_EXIT(r4, r5, r7)
+ ACCOUNT_CPU_USER_EXIT(r2, r5, r7)
3:
#endif
lwz r4,_LINK(r1)
@@ -532,7 +518,7 @@ syscall_exit_work:
/* Clear per-syscall TIF flags if any are set. */
li r11,_TIF_PERSYSCALL_MASK
- addi r12,r12,TI_FLAGS
+ addi r12,r2,TI_FLAGS
3: lwarx r8,0,r12
andc r8,r8,r11
#ifdef CONFIG_IBM405_ERR77
@@ -540,7 +526,6 @@ syscall_exit_work:
#endif
stwcx. r8,0,r12
bne- 3b
- subi r12,r12,TI_FLAGS
4: /* Anything which requires enabling interrupts? */
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
@@ -745,6 +730,9 @@ fast_exception_return:
mtcr r10
lwz r10,_LINK(r11)
mtlr r10
+ /* Clear the exception_marker on the stack to avoid confusing stacktrace */
+ li r10, 0
+ stw r10, 8(r11)
REST_GPR(10, r11)
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
@@ -819,8 +807,7 @@ ret_from_except:
user_exc_return: /* r10 contains MSR_KERNEL here */
/* Check current_thread_info()->flags */
- CURRENT_THREAD_INFO(r9, r1)
- lwz r9,TI_FLAGS(r9)
+ lwz r9,TI_FLAGS(r2)
andi. r0,r9,_TIF_USER_WORK_MASK
bne do_work
@@ -832,18 +819,14 @@ restore_user:
andis. r10,r0,DBCR0_IDM@h
bnel- load_dbcr0
#endif
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- CURRENT_THREAD_INFO(r9, r1)
- ACCOUNT_CPU_USER_EXIT(r9, r10, r11)
-#endif
+ ACCOUNT_CPU_USER_EXIT(r2, r10, r11)
b restore
/* N.B. the only way to get here is from the beq following ret_from_except. */
resume_kernel:
/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
- CURRENT_THREAD_INFO(r9, r1)
- lwz r8,TI_FLAGS(r9)
+ lwz r8,TI_FLAGS(r2)
andis. r0,r8,_TIF_EMULATE_STACK_STORE@h
beq+ 1f
@@ -869,7 +852,7 @@ resume_kernel:
/* Clear _TIF_EMULATE_STACK_STORE flag */
lis r11,_TIF_EMULATE_STACK_STORE@h
- addi r5,r9,TI_FLAGS
+ addi r5,r2,TI_FLAGS
0: lwarx r8,0,r5
andc r8,r8,r11
#ifdef CONFIG_IBM405_ERR77
@@ -881,7 +864,7 @@ resume_kernel:
#ifdef CONFIG_PREEMPT
/* check current_thread_info->preempt_count */
- lwz r0,TI_PREEMPT(r9)
+ lwz r0,TI_PREEMPT(r2)
cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
bne restore
andi. r8,r8,_TIF_NEED_RESCHED
@@ -897,8 +880,7 @@ resume_kernel:
bl trace_hardirqs_off
#endif
1: bl preempt_schedule_irq
- CURRENT_THREAD_INFO(r9, r1)
- lwz r3,TI_FLAGS(r9)
+ lwz r3,TI_FLAGS(r2)
andi. r0,r3,_TIF_NEED_RESCHED
bne- 1b
#ifdef CONFIG_TRACE_IRQFLAGS
@@ -982,6 +964,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
mtcrf 0xFF,r10
mtlr r11
+ /* Clear the exception_marker on the stack to avoid confusing stacktrace */
+ li r10, 0
+ stw r10, 8(r1)
/*
* Once we put values in SRR0 and SRR1, we are in a state
* where exceptions are not recoverable, since taking an
@@ -997,9 +982,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
.globl exc_exit_restart
exc_exit_restart:
lwz r12,_NIP(r1)
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
- mtspr SPRN_NRI, r0
-#endif
mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r9
REST_4GPRS(9, r1)
@@ -1021,6 +1003,9 @@ exc_exit_restart_end:
mtlr r11
lwz r10,_CCR(r1)
mtcrf 0xff,r10
+ /* Clear the exception_marker on the stack to avoid confusing stacktrace */
+ li r10, 0
+ stw r10, 8(r1)
REST_2GPRS(9, r1)
.globl exc_exit_restart
exc_exit_restart:
@@ -1166,10 +1151,6 @@ ret_from_debug_exc:
mfspr r9,SPRN_SPRG_THREAD
lwz r10,SAVED_KSP_LIMIT(r1)
stw r10,KSP_LIMIT(r9)
- lwz r9,THREAD_INFO-THREAD(r9)
- CURRENT_THREAD_INFO(r10, r1)
- lwz r10,TI_PREEMPT(r10)
- stw r10,TI_PREEMPT(r9)
RESTORE_xSRR(SRR0,SRR1);
RESTORE_xSRR(CSRR0,CSRR1);
RESTORE_MMU_REGS;
@@ -1201,8 +1182,7 @@ load_dbcr0:
lis r11,global_dbcr0@ha
addi r11,r11,global_dbcr0@l
#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r9, r1)
- lwz r9,TI_CPU(r9)
+ lwz r9,TASK_CPU(r2)
slwi r9,r9,3
add r11,r11,r9
#endif
@@ -1242,8 +1222,7 @@ recheck:
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
SYNC
MTMSRD(r10) /* disable interrupts */
- CURRENT_THREAD_INFO(r9, r1)
- lwz r9,TI_FLAGS(r9)
+ lwz r9,TI_FLAGS(r2)
andi. r0,r9,_TIF_NEED_RESCHED
bne- do_resched
andi. r0,r9,_TIF_USER_WORK_MASK
@@ -1292,10 +1271,13 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_601)
lwz r3,_TRAP(r1)
andi. r0,r3,1
- beq 4f
+ beq 5f
SAVE_NVGPRS(r1)
rlwinm r3,r3,0,0,30
stw r3,_TRAP(r1)
+5: mfspr r2,SPRN_SPRG_THREAD
+ addi r2,r2,-THREAD
+ tovirt(r2,r2) /* set back r2 to current */
4: addi r3,r1,STACK_FRAME_OVERHEAD
bl unrecoverable_exception
/* shouldn't return */
@@ -1335,7 +1317,7 @@ _GLOBAL(enter_rtas)
MTMSRD(r0) /* don't get trashed */
li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
mtlr r6
- mtspr SPRN_SPRG_RTAS,r7
+ stw r7, THREAD + RTAS_SP(r2)
mtspr SPRN_SRR0,r8
mtspr SPRN_SRR1,r9
RFI
@@ -1344,7 +1326,8 @@ _GLOBAL(enter_rtas)
lwz r9,8(r9) /* original msr value */
addi r1,r1,INT_FRAME_SIZE
li r0,0
- mtspr SPRN_SPRG_RTAS,r0
+ tophys(r7, r2)
+ stw r0, THREAD + RTAS_SP(r7)
mtspr SPRN_SRR0,r8
mtspr SPRN_SRR1,r9
RFI /* return to caller */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 435927f549c4..15c67d2c0534 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -166,7 +166,7 @@ system_call: /* label this so stack traces look sane */
li r10,IRQS_ENABLED
std r10,SOFTE(r1)
- CURRENT_THREAD_INFO(r11, r1)
+ ld r11, PACA_THREAD_INFO(r13)
ld r10,TI_FLAGS(r11)
andi. r11,r10,_TIF_SYSCALL_DOTRACE
bne .Lsyscall_dotrace /* does not return */
@@ -213,7 +213,7 @@ system_call: /* label this so stack traces look sane */
ld r3,RESULT(r1)
#endif
- CURRENT_THREAD_INFO(r12, r1)
+ ld r12, PACA_THREAD_INFO(r13)
ld r8,_MSR(r1)
#ifdef CONFIG_PPC_BOOK3S
@@ -236,18 +236,14 @@ system_call_exit:
/*
* Disable interrupts so current_thread_info()->flags can't change,
* and so that we don't get interrupted after loading SRR0/1.
+ *
+ * Leave MSR_RI enabled for now, because with THREAD_INFO_IN_TASK we
+ * could fault on the load of the TI_FLAGS below.
*/
#ifdef CONFIG_PPC_BOOK3E
wrteei 0
#else
- /*
- * For performance reasons we clear RI the same time that we
- * clear EE. We only need to clear RI just before we restore r13
- * below, but batching it with EE saves us one expensive mtmsrd call.
- * We have to be careful to restore RI if we branch anywhere from
- * here (eg syscall_exit_work).
- */
- li r11,0
+ li r11,MSR_RI
mtmsrd r11,1
#endif /* CONFIG_PPC_BOOK3E */
@@ -263,15 +259,7 @@ system_call_exit:
bne 3f
#endif
2: addi r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_BOOK3S
- li r10,MSR_RI
- mtmsrd r10,1 /* Restore RI */
-#endif
bl restore_math
-#ifdef CONFIG_PPC_BOOK3S
- li r11,0
- mtmsrd r11,1
-#endif
ld r8,_MSR(r1)
ld r3,RESULT(r1)
li r11,-MAX_ERRNO
@@ -287,6 +275,16 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
andi. r6,r8,MSR_PR
ld r4,_LINK(r1)
+#ifdef CONFIG_PPC_BOOK3S
+ /*
+ * Clear MSR_RI, MSR_EE is already and remains disabled. We could do
+ * this later, but testing shows that doing it here causes less slow
+ * down than doing it closer to the rfid.
+ */
+ li r11,0
+ mtmsrd r11,1
+#endif
+
beq- 1f
ACCOUNT_CPU_USER_EXIT(r13, r11, r12)
@@ -348,7 +346,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Repopulate r9 and r10 for the syscall path */
addi r9,r1,STACK_FRAME_OVERHEAD
- CURRENT_THREAD_INFO(r10, r1)
+ ld r10, PACA_THREAD_INFO(r13)
ld r10,TI_FLAGS(r10)
cmpldi r0,NR_syscalls
@@ -363,10 +361,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
b .Lsyscall_exit
.Lsyscall_exit_work:
-#ifdef CONFIG_PPC_BOOK3S
- li r10,MSR_RI
- mtmsrd r10,1 /* Restore RI */
-#endif
/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
If TIF_NOERROR is set, just save r3 as it is. */
@@ -695,7 +689,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
2:
#endif /* CONFIG_PPC_BOOK3S_64 */
- CURRENT_THREAD_INFO(r7, r8) /* base of new stack */
+ clrrdi r7, r8, THREAD_SHIFT /* base of new stack */
/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
because we don't need to leave the 288-byte ABI gap at the
top of the kernel stack. */
@@ -746,7 +740,7 @@ _GLOBAL(ret_from_except_lite)
mtmsrd r10,1 /* Update machine state */
#endif /* CONFIG_PPC_BOOK3E */
- CURRENT_THREAD_INFO(r9, r1)
+ ld r9, PACA_THREAD_INFO(r13)
ld r3,_MSR(r1)
#ifdef CONFIG_PPC_BOOK3E
ld r10,PACACURRENT(r13)
@@ -860,7 +854,7 @@ resume_kernel:
1: bl preempt_schedule_irq
/* Re-test flags and eventually loop */
- CURRENT_THREAD_INFO(r9, r1)
+ ld r9, PACA_THREAD_INFO(r13)
ld r4,TI_FLAGS(r9)
andi. r0,r4,_TIF_NEED_RESCHED
bne 1b
@@ -1002,6 +996,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r2,_NIP(r1)
mtspr SPRN_SRR0,r2
+ /*
+ * Leaving a stale exception_marker on the stack can confuse
+ * the reliable stack unwinder later on. Clear it.
+ */
+ li r2,0
+ std r2,STACK_FRAME_OVERHEAD-16(r1)
+
ld r0,GPR0(r1)
ld r2,GPR2(r1)
ld r3,GPR3(r1)
diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S
index 52ca2471ee1a..d252f4663a23 100644
--- a/arch/powerpc/kernel/epapr_hcalls.S
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -21,10 +21,9 @@
#ifndef CONFIG_PPC64
/* epapr_ev_idle() was derived from e500_idle() */
_GLOBAL(epapr_ev_idle)
- CURRENT_THREAD_INFO(r3, r1)
- PPC_LL r4, TI_LOCAL_FLAGS(r3) /* set napping bit */
+ PPC_LL r4, TI_LOCAL_FLAGS(r2) /* set napping bit */
ori r4, r4,_TLF_NAPPING /* so when we take an exception */
- PPC_STL r4, TI_LOCAL_FLAGS(r3) /* it will return to our caller */
+ PPC_STL r4, TI_LOCAL_FLAGS(r2) /* it will return to our caller */
wrteei 1
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index afb638778f44..49381f32b374 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -77,17 +77,6 @@ special_reg_save:
andi. r3,r3,MSR_PR
bnelr
- /* Copy info into temporary exception thread info */
- ld r11,PACAKSAVE(r13)
- CURRENT_THREAD_INFO(r11, r11)
- CURRENT_THREAD_INFO(r12, r1)
- ld r10,TI_FLAGS(r11)
- std r10,TI_FLAGS(r12)
- ld r10,TI_PREEMPT(r11)
- std r10,TI_PREEMPT(r12)
- ld r10,TI_TASK(r11)
- std r10,TI_TASK(r12)
-
/*
* Advance to the next TLB exception frame for handler
* types that don't do it automatically.
@@ -349,6 +338,7 @@ ret_from_mc_except:
#define GEN_BTB_FLUSH
#define CRIT_BTB_FLUSH
#define DBG_BTB_FLUSH
+#define MC_BTB_FLUSH
#define GDBELL_BTB_FLUSH
#endif
@@ -504,7 +494,7 @@ exc_##n##_bad_stack: \
* interrupts happen before the wait instruction.
*/
#define CHECK_NAPPING() \
- CURRENT_THREAD_INFO(r11, r1); \
+ ld r11, PACA_THREAD_INFO(r13); \
ld r10,TI_LOCAL_FLAGS(r11); \
andi. r9,r10,_TLF_NAPPING; \
beq+ 1f; \
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 9e253ce27e08..a5b8fbae56a0 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -68,6 +68,14 @@ OPEN_FIXED_SECTION(real_vectors, 0x0100, 0x1900)
OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x4000)
OPEN_FIXED_SECTION(virt_vectors, 0x4000, 0x5900)
OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000)
+
+#ifdef CONFIG_PPC_POWERNV
+ .globl start_real_trampolines
+ .globl end_real_trampolines
+ .globl start_virt_trampolines
+ .globl end_virt_trampolines
+#endif
+
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
/*
* Data area reserved for FWNMI option.
@@ -566,8 +574,36 @@ EXC_COMMON_BEGIN(mce_return)
RFI_TO_KERNEL
b .
-EXC_REAL(data_access, 0x300, 0x80)
-EXC_VIRT(data_access, 0x4300, 0x80, 0x300)
+EXC_REAL_BEGIN(data_access, 0x300, 0x80)
+SET_SCRATCH0(r13) /* save r13 */
+EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b tramp_real_data_access
+EXC_REAL_END(data_access, 0x300, 0x80)
+
+TRAMP_REAL_BEGIN(tramp_real_data_access)
+EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, 0x300)
+ /*
+ * DAR/DSISR must be read before setting MSR[RI], because
+ * a d-side MCE will clobber those registers so is not
+ * recoverable if they are live.
+ */
+ mfspr r10,SPRN_DAR
+ mfspr r11,SPRN_DSISR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ stw r11,PACA_EXGEN+EX_DSISR(r13)
+EXCEPTION_PROLOG_2(data_access_common, EXC_STD)
+
+EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
+SET_SCRATCH0(r13) /* save r13 */
+EXCEPTION_PROLOG_0(PACA_EXGEN)
+EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x300)
+ mfspr r10,SPRN_DAR
+ mfspr r11,SPRN_DSISR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ stw r11,PACA_EXGEN+EX_DSISR(r13)
+EXCEPTION_PROLOG_2_RELON(data_access_common, EXC_STD)
+EXC_VIRT_END(data_access, 0x4300, 0x80)
+
TRAMP_KVM_SKIP(PACA_EXGEN, 0x300)
EXC_COMMON_BEGIN(data_access_common)
@@ -575,11 +611,8 @@ EXC_COMMON_BEGIN(data_access_common)
* Here r13 points to the paca, r9 contains the saved CR,
* SRR0 and SRR1 are saved in r11 and r12,
* r9 - r13 are saved in paca->exgen.
+ * EX_DAR and EX_DSISR have saved DAR/DSISR
*/
- mfspr r10,SPRN_DAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
RECONCILE_IRQ_STATE(r10, r11)
ld r12,_MSR(r1)
@@ -596,18 +629,29 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
-EXCEPTION_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, KVMTEST_PR, 0x380);
+SET_SCRATCH0(r13) /* save r13 */
+EXCEPTION_PROLOG_0(PACA_EXSLB)
+ b tramp_real_data_access_slb
EXC_REAL_END(data_access_slb, 0x380, 0x80)
+TRAMP_REAL_BEGIN(tramp_real_data_access_slb)
+EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
+ mfspr r10,SPRN_DAR
+ std r10,PACA_EXSLB+EX_DAR(r13)
+EXCEPTION_PROLOG_2(data_access_slb_common, EXC_STD)
+
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
-EXCEPTION_RELON_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, NOTEST, 0x380);
+SET_SCRATCH0(r13) /* save r13 */
+EXCEPTION_PROLOG_0(PACA_EXSLB)
+EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
+ mfspr r10,SPRN_DAR
+ std r10,PACA_EXSLB+EX_DAR(r13)
+EXCEPTION_PROLOG_2_RELON(data_access_slb_common, EXC_STD)
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
EXC_COMMON_BEGIN(data_access_slb_common)
- mfspr r10,SPRN_DAR
- std r10,PACA_EXSLB+EX_DAR(r13)
EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
ld r4,PACA_EXSLB+EX_DAR(r13)
std r4,_DAR(r1)
@@ -703,14 +747,30 @@ TRAMP_KVM_HV(PACA_EXGEN, 0x500)
EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
-EXC_REAL(alignment, 0x600, 0x100)
-EXC_VIRT(alignment, 0x4600, 0x100, 0x600)
-TRAMP_KVM(PACA_EXGEN, 0x600)
-EXC_COMMON_BEGIN(alignment_common)
+EXC_REAL_BEGIN(alignment, 0x600, 0x100)
+SET_SCRATCH0(r13) /* save r13 */
+EXCEPTION_PROLOG_0(PACA_EXGEN)
+EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, 0x600)
mfspr r10,SPRN_DAR
+ mfspr r11,SPRN_DSISR
std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
+ stw r11,PACA_EXGEN+EX_DSISR(r13)
+EXCEPTION_PROLOG_2(alignment_common, EXC_STD)
+EXC_REAL_END(alignment, 0x600, 0x100)
+
+EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
+SET_SCRATCH0(r13) /* save r13 */
+EXCEPTION_PROLOG_0(PACA_EXGEN)
+EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x600)
+ mfspr r10,SPRN_DAR
+ mfspr r11,SPRN_DSISR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ stw r11,PACA_EXGEN+EX_DSISR(r13)
+EXCEPTION_PROLOG_2_RELON(alignment_common, EXC_STD)
+EXC_VIRT_END(alignment, 0x4600, 0x100)
+
+TRAMP_KVM(PACA_EXGEN, 0x600)
+EXC_COMMON_BEGIN(alignment_common)
EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
ld r3,PACA_EXGEN+EX_DAR(r13)
lwz r4,PACA_EXGEN+EX_DSISR(r13)
@@ -1629,7 +1689,7 @@ do_hash_page:
ori r0,r0,DSISR_BAD_FAULT_64S@l
and. r0,r4,r0 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
- CURRENT_THREAD_INFO(r11, r1)
+ ld r11, PACA_THREAD_INFO(r13)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
bne 77f /* then don't call hash_page now */
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 05b08db3901d..ce6a972f2584 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -261,7 +261,7 @@ __secondary_hold_acknowledge:
tophys(r11,r1); /* use tophys(r1) if kernel */ \
beq 1f; \
mfspr r11,SPRN_SPRG_THREAD; \
- lwz r11,THREAD_INFO-THREAD(r11); \
+ lwz r11,TASK_STACK-THREAD(r11); \
addi r11,r11,THREAD_SIZE; \
tophys(r11,r11); \
1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
@@ -352,9 +352,8 @@ i##n: \
* registers that might have bad values includes all the GPRs
* and all the BATs. We indicate that we are in RTAS by putting
* a non-zero value, the address of the exception frame to use,
- * in SPRG2. The machine check handler checks SPRG2 and uses its
- * value if it is non-zero. If we ever needed to free up SPRG2,
- * we could use a field in the thread_info or thread_struct instead.
+ * in thread.rtas_sp. The machine check handler checks thread.rtas_sp
+ * and uses its value if it is non-zero.
* (Other exception handlers assume that r1 is a valid kernel stack
* pointer when we take an exception from supervisor mode.)
* -- paulus.
@@ -365,16 +364,15 @@ i##n: \
mtspr SPRN_SPRG_SCRATCH1,r11
mfcr r10
#ifdef CONFIG_PPC_CHRP
- mfspr r11,SPRN_SPRG_RTAS
- cmpwi 0,r11,0
- bne 7f
+ mfspr r11, SPRN_SPRG_THREAD
+ lwz r11, RTAS_SP(r11)
+ cmpwi cr1, r11, 0
+ bne cr1, 7f
#endif /* CONFIG_PPC_CHRP */
EXCEPTION_PROLOG_1
7: EXCEPTION_PROLOG_2
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_CHRP
- mfspr r4,SPRN_SPRG_RTAS
- cmpwi cr1,r4,0
bne cr1,1f
#endif
EXC_XFER_STD(0x200, machine_check_exception)
@@ -500,18 +498,22 @@ InstructionTLBMiss:
*/
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_IMISS
+#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
lis r1,PAGE_OFFSET@h /* check if kernel address */
cmplw 0,r1,r3
- mfspr r2,SPRN_SPRG_THREAD
- li r1,_PAGE_USER|_PAGE_PRESENT|_PAGE_EXEC /* low addresses tested as user */
- lwz r2,PGDIR(r2)
+#endif
+ mfspr r2, SPRN_SPRG_PGDIR
+#ifdef CONFIG_SWAP
+ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+#else
+ li r1,_PAGE_PRESENT | _PAGE_EXEC
+#endif
+#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
bge- 112f
- mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
- rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
- lis r2,swapper_pg_dir@ha /* if kernel address, use */
- addi r2,r2,swapper_pg_dir@l /* kernel page table */
-112: tophys(r2,r2)
- rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
+ lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
+#endif
+112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- InstructionAddressInvalid /* return if no mapping */
@@ -519,20 +521,10 @@ InstructionTLBMiss:
lwz r0,0(r2) /* get linux-style pte */
andc. r1,r1,r0 /* check access & ~permission */
bne- InstructionAddressInvalid /* return if access not permitted */
- ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
- /*
- * NOTE! We are assuming this is not an SMP system, otherwise
- * we would need to update the pte atomically with lwarx/stwcx.
- */
- stw r0,0(r2) /* update PTE (accessed bit) */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
- rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
- and r1,r1,r2 /* writable if _RW and _DIRTY */
rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
- rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
- ori r1,r1,0xe04 /* clear out reserved bits */
- andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
+ ori r1, r1, 0xe05 /* clear out reserved bits */
+ andc r1, r0, r1 /* PP = user? 2 : 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -576,16 +568,16 @@ DataLoadTLBMiss:
mfspr r3,SPRN_DMISS
lis r1,PAGE_OFFSET@h /* check if kernel address */
cmplw 0,r1,r3
- mfspr r2,SPRN_SPRG_THREAD
- li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */
- lwz r2,PGDIR(r2)
+ mfspr r2, SPRN_SPRG_PGDIR
+#ifdef CONFIG_SWAP
+ li r1, _PAGE_PRESENT | _PAGE_ACCESSED
+#else
+ li r1, _PAGE_PRESENT
+#endif
bge- 112f
- mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
- rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
- lis r2,swapper_pg_dir@ha /* if kernel address, use */
- addi r2,r2,swapper_pg_dir@l /* kernel page table */
-112: tophys(r2,r2)
- rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
+ lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
+112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- DataAddressInvalid /* return if no mapping */
@@ -593,20 +585,16 @@ DataLoadTLBMiss:
lwz r0,0(r2) /* get linux-style pte */
andc. r1,r1,r0 /* check access & ~permission */
bne- DataAddressInvalid /* return if access not permitted */
- ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
/*
* NOTE! We are assuming this is not an SMP system, otherwise
* we would need to update the pte atomically with lwarx/stwcx.
*/
- stw r0,0(r2) /* update PTE (accessed bit) */
/* Convert linux-style PTE to low word of PPC-style PTE */
rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
- rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
- and r1,r1,r2 /* writable if _RW and _DIRTY */
rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
ori r1,r1,0xe04 /* clear out reserved bits */
- andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
+ andc r1,r0,r1 /* PP = user? rw? 2: 3: 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -660,16 +648,16 @@ DataStoreTLBMiss:
mfspr r3,SPRN_DMISS
lis r1,PAGE_OFFSET@h /* check if kernel address */
cmplw 0,r1,r3
- mfspr r2,SPRN_SPRG_THREAD
- li r1,_PAGE_RW|_PAGE_USER|_PAGE_PRESENT /* access flags */
- lwz r2,PGDIR(r2)
+ mfspr r2, SPRN_SPRG_PGDIR
+#ifdef CONFIG_SWAP
+ li r1, _PAGE_RW | _PAGE_PRESENT | _PAGE_ACCESSED
+#else
+ li r1, _PAGE_RW | _PAGE_PRESENT
+#endif
bge- 112f
- mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
- rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
- lis r2,swapper_pg_dir@ha /* if kernel address, use */
- addi r2,r2,swapper_pg_dir@l /* kernel page table */
-112: tophys(r2,r2)
- rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
+ lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
+112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- DataAddressInvalid /* return if no mapping */
@@ -677,12 +665,10 @@ DataStoreTLBMiss:
lwz r0,0(r2) /* get linux-style pte */
andc. r1,r1,r0 /* check access & ~permission */
bne- DataAddressInvalid /* return if access not permitted */
- ori r0,r0,_PAGE_ACCESSED|_PAGE_DIRTY
/*
* NOTE! We are assuming this is not an SMP system, otherwise
* we would need to update the pte atomically with lwarx/stwcx.
*/
- stw r0,0(r2) /* update PTE (accessed/dirty bits) */
/* Convert linux-style PTE to low word of PPC-style PTE */
rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
li r1,0xe05 /* clear out reserved bits & PP lsb */
@@ -845,12 +831,12 @@ __secondary_start:
bl init_idle_6xx
#endif /* CONFIG_PPC_BOOK3S_32 */
- /* get current_thread_info and current */
- lis r1,secondary_ti@ha
- tophys(r1,r1)
- lwz r1,secondary_ti@l(r1)
- tophys(r2,r1)
- lwz r2,TI_TASK(r2)
+ /* get current's stack and current */
+ lis r2,secondary_current@ha
+ tophys(r2,r2)
+ lwz r2,secondary_current@l(r2)
+ tophys(r1,r2)
+ lwz r1,TASK_STACK(r1)
/* stack */
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
@@ -865,8 +851,10 @@ __secondary_start:
tophys(r4,r2)
addi r4,r4,THREAD /* phys address of our thread_struct */
mtspr SPRN_SPRG_THREAD,r4
+#ifdef CONFIG_PPC_RTAS
li r3,0
- mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */
+ stw r3, RTAS_SP(r4) /* 0 => not in RTAS */
+#endif
/* enable MMU and jump to start_secondary */
li r4,MSR_KERNEL
@@ -950,8 +938,10 @@ start_here:
tophys(r4,r2)
addi r4,r4,THREAD /* init task's THREAD */
mtspr SPRN_SPRG_THREAD,r4
+#ifdef CONFIG_PPC_RTAS
li r3,0
- mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */
+ stw r3, RTAS_SP(r4) /* 0 => not in RTAS */
+#endif
/* stack */
lis r1,init_thread_union@ha
@@ -1022,15 +1012,16 @@ _ENTRY(switch_mmu_context)
li r0,NUM_USER_SEGMENTS
mtctr r0
+ lwz r4, MM_PGD(r4)
#ifdef CONFIG_BDI_SWITCH
/* Context switch the PTE pointer for the Abatron BDI2000.
* The PGDIR is passed as second argument.
*/
- lwz r4,MM_PGD(r4)
- lis r5, KERNELBASE@h
- lwz r5, 0xf0(r5)
- stw r4, 0x4(r5)
+ lis r5, abatron_pteptrs@ha
+ stw r4, abatron_pteptrs@l + 0x4(r5)
#endif
+ tophys(r4, r4)
+ mtspr SPRN_SPRG_PGDIR, r4
li r4,0
isync
3:
@@ -1105,6 +1096,41 @@ BEGIN_MMU_FTR_SECTION
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
blr
+_ENTRY(update_bats)
+ lis r4, 1f@h
+ ori r4, r4, 1f@l
+ tophys(r4, r4)
+ mfmsr r6
+ mflr r7
+ li r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)
+ rlwinm r0, r6, 0, ~MSR_RI
+ rlwinm r0, r0, 0, ~MSR_EE
+ mtmsr r0
+ mtspr SPRN_SRR0, r4
+ mtspr SPRN_SRR1, r3
+ SYNC
+ RFI
+1: bl clear_bats
+ lis r3, BATS@ha
+ addi r3, r3, BATS@l
+ tophys(r3, r3)
+ LOAD_BAT(0, r3, r4, r5)
+ LOAD_BAT(1, r3, r4, r5)
+ LOAD_BAT(2, r3, r4, r5)
+ LOAD_BAT(3, r3, r4, r5)
+BEGIN_MMU_FTR_SECTION
+ LOAD_BAT(4, r3, r4, r5)
+ LOAD_BAT(5, r3, r4, r5)
+ LOAD_BAT(6, r3, r4, r5)
+ LOAD_BAT(7, r3, r4, r5)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+ li r3, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI)
+ mtmsr r3
+ mtspr SPRN_SRR0, r7
+ mtspr SPRN_SRR1, r6
+ SYNC
+ RFI
+
flush_tlbs:
lis r10, 0x40
1: addic. r10, r10, -0x1000
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index b19d78410511..a9c934f2319b 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -115,7 +115,7 @@ _ENTRY(saved_ksp_limit)
andi. r11,r11,MSR_PR; \
beq 1f; \
mfspr r1,SPRN_SPRG_THREAD; /* if from user, start at top of */\
- lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\
+ lwz r1,TASK_STACK-THREAD(r1); /* this thread's kernel stack */\
addi r1,r1,THREAD_SIZE; \
1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\
tophys(r11,r1); \
@@ -158,7 +158,7 @@ _ENTRY(saved_ksp_limit)
beq 1f; \
/* COMING FROM USER MODE */ \
mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
- lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
+ lwz r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\
1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm */\
tophys(r11,r11); \
stw r10,_CCR(r11); /* save various registers */\
@@ -953,9 +953,8 @@ _GLOBAL(set_context)
/* Context switch the PTE pointer for the Abatron BDI2000.
* The PGDIR is the second parameter.
*/
- lis r5, KERNELBASE@h
- lwz r5, 0xf0(r5)
- stw r4, 0x4(r5)
+ lis r5, abatron_pteptrs@ha
+ stw r4, abatron_pteptrs@l + 0x4(r5)
#endif
sync
mtspr SPRN_PID,r3
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index bf23c19c92d6..37117ab11584 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -1019,10 +1019,10 @@ _GLOBAL(start_secondary_47x)
/* Now we can get our task struct and real stack pointer */
- /* Get current_thread_info and current */
- lis r1,secondary_ti@ha
- lwz r1,secondary_ti@l(r1)
- lwz r2,TI_TASK(r1)
+ /* Get current's stack and current */
+ lis r2,secondary_current@ha
+ lwz r2,secondary_current@l(r2)
+ lwz r1,TASK_STACK(r2)
/* Current stack pointer */
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 4898e9491a1c..3fad8d499767 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -801,21 +801,19 @@ __secondary_start:
/* Set thread priority to MEDIUM */
HMT_MEDIUM
- /* Initialize the kernel stack */
- LOAD_REG_ADDR(r3, current_set)
- sldi r28,r24,3 /* get current_set[cpu#] */
- ldx r14,r3,r28
- addi r14,r14,THREAD_SIZE-STACK_FRAME_OVERHEAD
- std r14,PACAKSAVE(r13)
-
- /* Do early setup for that CPU (SLB and hash table pointer) */
+ /*
+ * Do early setup for this CPU, in particular initialising the MMU so we
+ * can turn it on below. This is a call to C, which is OK, we're still
+ * running on the emergency stack.
+ */
bl early_setup_secondary
/*
- * setup the new stack pointer, but *don't* use this until
- * translation is on.
+ * The primary has initialized our kernel stack for us in the paca, grab
+ * it and put it in r1. We must *not* use it until we turn on the MMU
+ * below, because it may not be inside the RMO.
*/
- mr r1, r14
+ ld r1, PACAKSAVE(r13)
/* Clear backchain so we get nice backtraces */
li r7,0
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 20cc816b3508..03c73b4c6435 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -142,7 +142,7 @@ instruction_counter:
tophys(r11,r1); /* use tophys(r1) if kernel */ \
beq 1f; \
mfspr r11,SPRN_SPRG_THREAD; \
- lwz r11,THREAD_INFO-THREAD(r11); \
+ lwz r11,TASK_STACK-THREAD(r11); \
addi r11,r11,THREAD_SIZE; \
tophys(r11,r11); \
1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
@@ -292,6 +292,17 @@ SystemCall:
*/
EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD)
+/* Called from DataStoreTLBMiss when perf TLB misses events are activated */
+#ifdef CONFIG_PERF_EVENTS
+ patch_site 0f, patch__dtlbmiss_perf
+0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ addi r10, r10, 1
+ stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ rfi
+#endif
+
. = 0x1100
/*
* For the MPC8xx, this is a software tablewalk to load the instruction
@@ -337,8 +348,8 @@ InstructionTLBMiss:
rlwinm r10, r10, 16, 0xfff8
cmpli cr0, r10, PAGE_OFFSET@h
#ifndef CONFIG_PIN_TLB_TEXT
- /* It is assumed that kernel code fits into the first 8M page */
-0: cmpli cr7, r10, (PAGE_OFFSET + 0x0800000)@h
+ /* It is assumed that kernel code fits into the first 32M */
+0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h
patch_site 0b, patch__itlbmiss_linmem_top
#endif
#endif
@@ -405,10 +416,20 @@ InstructionTLBMiss:
#ifndef CONFIG_PIN_TLB_TEXT
ITLBMissLinear:
mtcr r11
+#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23
+ patch_site 0f, patch__itlbmiss_linmem_top8
+
+ mfspr r10, SPRN_SRR0
+0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha
+ rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K
+ ori r11, r11, MI_PS512K | MI_SVALID
+ rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */
+#else
/* Set 8M byte page and mark it valid */
li r11, MI_PS8MEG | MI_SVALID
- mtspr SPRN_MI_TWC, r11
rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */
+#endif
+ mtspr SPRN_MI_TWC, r11
ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
_PAGE_PRESENT
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
@@ -434,7 +455,7 @@ DataStoreTLBMiss:
#ifndef CONFIG_PIN_TLB_IMMR
cmpli cr6, r10, VIRT_IMMR_BASE@h
#endif
-0: cmpli cr7, r10, (PAGE_OFFSET + 0x1800000)@h
+0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h
patch_site 0b, patch__dtlbmiss_linmem_top
mfspr r10, SPRN_M_TWB /* Get level 1 table */
@@ -494,16 +515,6 @@ DataStoreTLBMiss:
rfi
patch_site 0b, patch__dtlbmiss_exit_1
-#ifdef CONFIG_PERF_EVENTS
- patch_site 0f, patch__dtlbmiss_perf
-0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
- addi r10, r10, 1
- stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
- mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
- rfi
-#endif
-
DTLBMissIMMR:
mtcr r11
/* Set 512k byte guarded page and mark it valid */
@@ -525,10 +536,29 @@ DTLBMissIMMR:
DTLBMissLinear:
mtcr r11
+ rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */
+#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_DATA_SHIFT < 23
+ patch_site 0f, patch__dtlbmiss_romem_top8
+
+0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha
+ rlwinm r11, r11, 0, 0xff800000
+ neg r10, r11
+ or r11, r11, r10
+ rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K
+ ori r11, r11, MI_PS512K | MI_SVALID
+ mfspr r10, SPRN_MD_EPN
+ rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */
+#else
/* Set 8M byte page and mark it valid */
li r11, MD_PS8MEG | MD_SVALID
+#endif
mtspr SPRN_MD_TWC, r11
- rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */
+#ifdef CONFIG_STRICT_KERNEL_RWX
+ patch_site 0f, patch__dtlbmiss_romem_top
+
+0: subis r11, r10, 0
+ rlwimi r10, r11, 11, _PAGE_RO
+#endif
ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
_PAGE_PRESENT
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
@@ -551,11 +581,11 @@ InstructionTLBError:
mr r4,r12
andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
andis. r10,r9,SRR1_ISI_NOPT@h
- beq+ 1f
+ beq+ .Litlbie
tlbie r4
-itlbie:
/* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
-1: EXC_XFER_LITE(0x400, handle_page_fault)
+.Litlbie:
+ EXC_XFER_LITE(0x400, handle_page_fault)
/* This is the data TLB error on the MPC8xx. This could be due to
* many reasons, including a dirty update to a pte. We bail out to
@@ -577,10 +607,10 @@ DARFixed:/* Return from dcbx instruction bug workaround */
stw r5,_DSISR(r11)
mfspr r4,SPRN_DAR
andis. r10,r5,DSISR_NOHPTE@h
- beq+ 1f
+ beq+ .Ldtlbie
tlbie r4
-dtlbie:
-1: li r10,RPN_PATTERN
+.Ldtlbie:
+ li r10,RPN_PATTERN
mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */
/* 0x300 is DataAccess exception, needed by bad_page_fault() */
EXC_XFER_LITE(0x300, handle_page_fault)
@@ -603,8 +633,8 @@ DataBreakpoint:
mtspr SPRN_SPRG_SCRATCH1, r11
mfcr r10
mfspr r11, SPRN_SRR0
- cmplwi cr0, r11, (dtlbie - PAGE_OFFSET)@l
- cmplwi cr7, r11, (itlbie - PAGE_OFFSET)@l
+ cmplwi cr0, r11, (.Ldtlbie - PAGE_OFFSET)@l
+ cmplwi cr7, r11, (.Litlbie - PAGE_OFFSET)@l
beq- cr0, 11f
beq- cr7, 11f
EXCEPTION_PROLOG_1
@@ -886,28 +916,11 @@ initial_mmu:
mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */
tlbia /* Invalidate all TLB entries */
-#ifdef CONFIG_PIN_TLB_TEXT
- lis r8, MI_RSV4I@h
- ori r8, r8, 0x1c00
-
- mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
-#endif
-
#ifdef CONFIG_PIN_TLB_DATA
oris r10, r10, MD_RSV4I@h
mtspr SPRN_MD_CTR, r10 /* Set data TLB control */
#endif
- /* Now map the lower 8 Meg into the ITLB. */
- lis r8, KERNELBASE@h /* Create vaddr for TLB */
- ori r8, r8, MI_EVALID /* Mark it valid */
- mtspr SPRN_MI_EPN, r8
- li r8, MI_PS8MEG /* Set 8M byte page */
- ori r8, r8, MI_SVALID /* Make it valid */
- mtspr SPRN_MI_TWC, r8
- li r8, MI_BOOTINIT /* Create RPN for address 0 */
- mtspr SPRN_MI_RPN, r8 /* Store TLB entry */
-
lis r8, MI_APG_INIT@h /* Set protection modes */
ori r8, r8, MI_APG_INIT@l
mtspr SPRN_MI_AP, r8
@@ -937,6 +950,34 @@ initial_mmu:
mtspr SPRN_MD_RPN, r8
#endif
+ /* Now map the lower RAM (up to 32 Mbytes) into the ITLB. */
+#ifdef CONFIG_PIN_TLB_TEXT
+ lis r8, MI_RSV4I@h
+ ori r8, r8, 0x1c00
+#endif
+ li r9, 4 /* up to 4 pages of 8M */
+ mtctr r9
+ lis r9, KERNELBASE@h /* Create vaddr for TLB */
+ li r10, MI_PS8MEG | MI_SVALID /* Set 8M byte page */
+ li r11, MI_BOOTINIT /* Create RPN for address 0 */
+ lis r12, _einittext@h
+ ori r12, r12, _einittext@l
+1:
+#ifdef CONFIG_PIN_TLB_TEXT
+ mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
+ addi r8, r8, 0x100
+#endif
+
+ ori r0, r9, MI_EVALID /* Mark it valid */
+ mtspr SPRN_MI_EPN, r0
+ mtspr SPRN_MI_TWC, r10
+ mtspr SPRN_MI_RPN, r11 /* Store TLB entry */
+ addis r9, r9, 0x80
+ addis r11, r11, 0x80
+
+ cmpl cr0, r9, r12
+ bdnzf gt, 1b
+
/* Since the cache is enabled according to the information we
* just loaded into the TLB, invalidate and enable the caches here.
* We should probably check/set other modes....later.
@@ -989,5 +1030,6 @@ swapper_pg_dir:
/* Room for two PTE table poiners, usually the kernel and current user
* pointer to their respective root page table (pgdir).
*/
+ .globl abatron_pteptrs
abatron_pteptrs:
.space 8
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 306e26c073a0..1b22a8dea399 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -55,7 +55,7 @@ END_BTB_FLUSH_SECTION
beq 1f; \
BOOKE_CLEAR_BTB(r11) \
/* if from user, start at top of this thread's kernel stack */ \
- lwz r11, THREAD_INFO-THREAD(r10); \
+ lwz r11, TASK_STACK - THREAD(r10); \
ALLOC_STACK_FRAME(r11, THREAD_SIZE); \
1 : subi r11, r11, INT_FRAME_SIZE; /* Allocate exception frame */ \
stw r13, _CCR(r11); /* save various registers */ \
@@ -142,7 +142,7 @@ END_BTB_FLUSH_SECTION
BOOKE_CLEAR_BTB(r10) \
andi. r11,r11,MSR_PR; \
mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
- lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
+ lwz r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\
addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\
beq 1f; \
/* COMING FROM USER MODE */ \
@@ -155,13 +155,7 @@ END_BTB_FLUSH_SECTION
stw r10,GPR11(r11); \
b 2f; \
/* COMING FROM PRIV MODE */ \
-1: lwz r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r11); \
- lwz r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r11); \
- stw r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r8); \
- stw r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r8); \
- lwz r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r11); \
- stw r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r8); \
- mr r11,r8; \
+1: mr r11, r8; \
2: mfspr r8,SPRN_SPRG_RSCRATCH_##exc_level; \
stw r12,GPR12(r11); /* save various registers */\
mflr r10; \
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 2386ce2a9c6e..1881127682e9 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -243,8 +243,9 @@ set_ivor:
li r0,0
stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
- CURRENT_THREAD_INFO(r22, r1)
- stw r24, TI_CPU(r22)
+#ifdef CONFIG_SMP
+ stw r24, TASK_CPU(r2)
+#endif
bl early_init
@@ -717,8 +718,7 @@ finish_tlb_load:
/* Get the next_tlbcam_idx percpu var */
#ifdef CONFIG_SMP
- lwz r12, THREAD_INFO-THREAD(r12)
- lwz r15, TI_CPU(r12)
+ lwz r15, TASK_CPU-THREAD(r12)
lis r14, __per_cpu_offset@h
ori r14, r14, __per_cpu_offset@l
rlwinm r15, r15, 2, 0, 29
@@ -1089,10 +1089,10 @@ __secondary_start:
mr r4,r24 /* Why? */
bl call_setup_cpu
- /* get current_thread_info and current */
- lis r1,secondary_ti@ha
- lwz r1,secondary_ti@l(r1)
- lwz r2,TI_TASK(r1)
+ /* get current's stack and current */
+ lis r2,secondary_current@ha
+ lwz r2,secondary_current@l(r2)
+ lwz r1,TASK_STACK(r2)
/* stack */
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index ff026c9d3cab..c5e7f5bb2e66 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -136,10 +136,9 @@ BEGIN_FTR_SECTION
DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- CURRENT_THREAD_INFO(r9, r1)
- lwz r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
+ lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */
ori r8,r8,_TLF_NAPPING /* so when we take an exception */
- stw r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
+ stw r8,TI_LOCAL_FLAGS(r2) /* it will return to our caller */
mfmsr r7
ori r7,r7,MSR_EE
oris r7,r7,MSR_POW@h
@@ -159,8 +158,7 @@ _GLOBAL(power_save_ppc32_restore)
stw r9,_NIP(r11) /* make it do a blr */
#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r12, r11)
- lwz r11,TI_CPU(r12) /* get cpu number * 4 */
+ lwz r11,TASK_CPU(r2) /* get cpu number * 4 */
slwi r11,r11,2
#else
li r11,0
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
index 4e0d94d02030..31e732c378ad 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_book3e.S
@@ -63,7 +63,7 @@ _GLOBAL(\name)
1: /* Let's set the _TLF_NAPPING flag so interrupts make us return
* to the right spot
*/
- CURRENT_THREAD_INFO(r11, r1)
+ ld r11, PACACURRENT(r13)
ld r10,TI_LOCAL_FLAGS(r11)
ori r10,r10,_TLF_NAPPING
std r10,TI_LOCAL_FLAGS(r11)
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S
index 583e55ac7d26..69dfcd2ca011 100644
--- a/arch/powerpc/kernel/idle_e500.S
+++ b/arch/powerpc/kernel/idle_e500.S
@@ -22,10 +22,9 @@
.text
_GLOBAL(e500_idle)
- CURRENT_THREAD_INFO(r3, r1)
- lwz r4,TI_LOCAL_FLAGS(r3) /* set napping bit */
+ lwz r4,TI_LOCAL_FLAGS(r2) /* set napping bit */
ori r4,r4,_TLF_NAPPING /* so when we take an exception */
- stw r4,TI_LOCAL_FLAGS(r3) /* it will return to our caller */
+ stw r4,TI_LOCAL_FLAGS(r2) /* it will return to our caller */
#ifdef CONFIG_PPC_E500MC
wrteei 1
@@ -88,8 +87,7 @@ _GLOBAL(power_save_ppc32_restore)
stw r9,_NIP(r11) /* make it do a blr */
#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r12, r1)
- lwz r11,TI_CPU(r12) /* get cpu number * 4 */
+ lwz r11,TASK_CPU(r2) /* get cpu number * 4 */
slwi r11,r11,2
#else
li r11,0
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
index a09b3c7ca176..a2fdb0a34b75 100644
--- a/arch/powerpc/kernel/idle_power4.S
+++ b/arch/powerpc/kernel/idle_power4.S
@@ -68,7 +68,7 @@ BEGIN_FTR_SECTION
DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- CURRENT_THREAD_INFO(r9, r1)
+ ld r9, PACA_THREAD_INFO(r13)
ld r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
ori r8,r8,_TLF_NAPPING /* so when we take an exception */
std r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 916ddc4aac44..8a936723c791 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -618,9 +618,8 @@ static inline void check_stack_overflow(void)
sp = current_stack_pointer() & (THREAD_SIZE-1);
/* check for stack overflow: is there less than 2KB free? */
- if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
- pr_err("do_IRQ: stack overflow: %ld\n",
- sp - sizeof(struct thread_info));
+ if (unlikely(sp < 2048)) {
+ pr_err("do_IRQ: stack overflow: %ld\n", sp);
dump_stack();
}
#endif
@@ -660,36 +659,21 @@ void __do_irq(struct pt_regs *regs)
void do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- struct thread_info *curtp, *irqtp, *sirqtp;
+ void *cursp, *irqsp, *sirqsp;
/* Switch to the irq stack to handle this */
- curtp = current_thread_info();
- irqtp = hardirq_ctx[raw_smp_processor_id()];
- sirqtp = softirq_ctx[raw_smp_processor_id()];
+ cursp = (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+ irqsp = hardirq_ctx[raw_smp_processor_id()];
+ sirqsp = softirq_ctx[raw_smp_processor_id()];
/* Already there ? */
- if (unlikely(curtp == irqtp || curtp == sirqtp)) {
+ if (unlikely(cursp == irqsp || cursp == sirqsp)) {
__do_irq(regs);
set_irq_regs(old_regs);
return;
}
-
- /* Prepare the thread_info in the irq stack */
- irqtp->task = curtp->task;
- irqtp->flags = 0;
-
- /* Copy the preempt_count so that the [soft]irq checks work. */
- irqtp->preempt_count = curtp->preempt_count;
-
/* Switch stack and call */
- call_do_irq(regs, irqtp);
-
- /* Restore stack limit */
- irqtp->task = NULL;
-
- /* Copy back updates to the thread_info */
- if (irqtp->flags)
- set_bits(irqtp->flags, &curtp->flags);
+ call_do_irq(regs, irqsp);
set_irq_regs(old_regs);
}
@@ -698,90 +682,20 @@ void __init init_IRQ(void)
{
if (ppc_md.init_IRQ)
ppc_md.init_IRQ();
-
- exc_lvl_ctx_init();
-
- irq_ctx_init();
}
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
-struct thread_info *critirq_ctx[NR_CPUS] __read_mostly;
-struct thread_info *dbgirq_ctx[NR_CPUS] __read_mostly;
-struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly;
-
-void exc_lvl_ctx_init(void)
-{
- struct thread_info *tp;
- int i, cpu_nr;
-
- for_each_possible_cpu(i) {
-#ifdef CONFIG_PPC64
- cpu_nr = i;
-#else
-#ifdef CONFIG_SMP
- cpu_nr = get_hard_smp_processor_id(i);
-#else
- cpu_nr = 0;
-#endif
+void *critirq_ctx[NR_CPUS] __read_mostly;
+void *dbgirq_ctx[NR_CPUS] __read_mostly;
+void *mcheckirq_ctx[NR_CPUS] __read_mostly;
#endif
- memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE);
- tp = critirq_ctx[cpu_nr];
- tp->cpu = cpu_nr;
- tp->preempt_count = 0;
-
-#ifdef CONFIG_BOOKE
- memset((void *)dbgirq_ctx[cpu_nr], 0, THREAD_SIZE);
- tp = dbgirq_ctx[cpu_nr];
- tp->cpu = cpu_nr;
- tp->preempt_count = 0;
-
- memset((void *)mcheckirq_ctx[cpu_nr], 0, THREAD_SIZE);
- tp = mcheckirq_ctx[cpu_nr];
- tp->cpu = cpu_nr;
- tp->preempt_count = HARDIRQ_OFFSET;
-#endif
- }
-}
-#endif
-
-struct thread_info *softirq_ctx[NR_CPUS] __read_mostly;
-struct thread_info *hardirq_ctx[NR_CPUS] __read_mostly;
-
-void irq_ctx_init(void)
-{
- struct thread_info *tp;
- int i;
-
- for_each_possible_cpu(i) {
- memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
- tp = softirq_ctx[i];
- tp->cpu = i;
- klp_init_thread_info(tp);
-
- memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
- tp = hardirq_ctx[i];
- tp->cpu = i;
- klp_init_thread_info(tp);
- }
-}
+void *softirq_ctx[NR_CPUS] __read_mostly;
+void *hardirq_ctx[NR_CPUS] __read_mostly;
void do_softirq_own_stack(void)
{
- struct thread_info *curtp, *irqtp;
-
- curtp = current_thread_info();
- irqtp = softirq_ctx[smp_processor_id()];
- irqtp->task = curtp->task;
- irqtp->flags = 0;
- call_do_softirq(irqtp);
- irqtp->task = NULL;
-
- /* Set any flag that may have been set on the
- * alternate stack
- */
- if (irqtp->flags)
- set_bits(irqtp->flags, &curtp->flags);
+ call_do_softirq(softirq_ctx[smp_processor_id()]);
}
irq_hw_number_t virq_to_hw(unsigned int virq)
@@ -827,11 +741,6 @@ int irq_choose_cpu(const struct cpumask *mask)
}
#endif
-int arch_early_irq_init(void)
-{
- return 0;
-}
-
#ifdef CONFIG_PPC64
static int __init setup_noirqdistrib(char *str)
{
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index e1865565f0ae..7dd55eb1259d 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -151,41 +151,13 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)
return 1;
}
-static DEFINE_PER_CPU(struct thread_info, kgdb_thread_info);
static int kgdb_singlestep(struct pt_regs *regs)
{
- struct thread_info *thread_info, *exception_thread_info;
- struct thread_info *backup_current_thread_info =
- this_cpu_ptr(&kgdb_thread_info);
-
if (user_mode(regs))
return 0;
- /*
- * On Book E and perhaps other processors, singlestep is handled on
- * the critical exception stack. This causes current_thread_info()
- * to fail, since it it locates the thread_info by masking off
- * the low bits of the current stack pointer. We work around
- * this issue by copying the thread_info from the kernel stack
- * before calling kgdb_handle_exception, and copying it back
- * afterwards. On most processors the copy is avoided since
- * exception_thread_info == thread_info.
- */
- thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1));
- exception_thread_info = current_thread_info();
-
- if (thread_info != exception_thread_info) {
- /* Save the original current_thread_info. */
- memcpy(backup_current_thread_info, exception_thread_info, sizeof *thread_info);
- memcpy(exception_thread_info, thread_info, sizeof *thread_info);
- }
-
kgdb_handle_exception(0, SIGTRAP, 0, regs);
- if (thread_info != exception_thread_info)
- /* Restore current_thread_info lastly. */
- memcpy(exception_thread_info, backup_current_thread_info, sizeof *thread_info);
-
return 1;
}
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index a0f6f45005bd..75692c327ba0 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -317,10 +317,8 @@ void default_machine_kexec(struct kimage *image)
* We setup preempt_count to avoid using VMX in memcpy.
* XXX: the task struct will likely be invalid once we do the copy!
*/
- kexec_stack.thread_info.task = current_thread_info()->task;
- kexec_stack.thread_info.flags = 0;
- kexec_stack.thread_info.preempt_count = HARDIRQ_OFFSET;
- kexec_stack.thread_info.cpu = current_thread_info()->cpu;
+ current_thread_info()->flags = 0;
+ current_thread_info()->preempt_count = HARDIRQ_OFFSET;
/* We need a static PACA, too; copy this CPU's PACA over and switch to
* it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index bd933a75f0bc..b5fec1f9751a 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -31,6 +31,7 @@
#include <asm/machdep.h>
#include <asm/mce.h>
+#include <asm/nmi.h>
static DEFINE_PER_CPU(int, mce_nest_count);
static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
@@ -301,13 +302,13 @@ static void machine_check_process_queued_event(struct irq_work *work)
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
evt = this_cpu_ptr(&mce_event_queue[index]);
- machine_check_print_event_info(evt, false);
+ machine_check_print_event_info(evt, false, false);
__this_cpu_dec(mce_queue_count);
}
}
void machine_check_print_event_info(struct machine_check_event *evt,
- bool user_mode)
+ bool user_mode, bool in_guest)
{
const char *level, *sevstr, *subtype;
static const char *mc_ue_types[] = {
@@ -387,7 +388,9 @@ void machine_check_print_event_info(struct machine_check_event *evt,
evt->disposition == MCE_DISPOSITION_RECOVERED ?
"Recovered" : "Not recovered");
- if (user_mode) {
+ if (in_guest) {
+ printk("%s Guest NIP: %016llx\n", level, evt->srr0);
+ } else if (user_mode) {
printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
evt->srr0, current->pid, current->comm);
} else {
@@ -488,6 +491,8 @@ long machine_check_early(struct pt_regs *regs)
{
long handled = 0;
+ hv_nmi_check_nonrecoverable(regs);
+
/*
* See if platform is capable of handling machine check.
*/
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 57d2ffb2d45c..0dda4f8e3d7a 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -46,11 +46,10 @@ _GLOBAL(call_do_softirq)
mflr r0
stw r0,4(r1)
lwz r10,THREAD+KSP_LIMIT(r2)
- addi r11,r3,THREAD_INFO_GAP
+ stw r3, THREAD+KSP_LIMIT(r2)
stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
mr r1,r3
stw r10,8(r1)
- stw r11,THREAD+KSP_LIMIT(r2)
bl __do_softirq
lwz r10,8(r1)
lwz r1,0(r1)
@@ -60,17 +59,16 @@ _GLOBAL(call_do_softirq)
blr
/*
- * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
+ * void call_do_irq(struct pt_regs *regs, void *sp);
*/
_GLOBAL(call_do_irq)
mflr r0
stw r0,4(r1)
lwz r10,THREAD+KSP_LIMIT(r2)
- addi r11,r4,THREAD_INFO_GAP
+ stw r4, THREAD+KSP_LIMIT(r2)
stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
mr r1,r4
stw r10,8(r1)
- stw r11,THREAD+KSP_LIMIT(r2)
bl __do_irq
lwz r10,8(r1)
lwz r1,0(r1)
@@ -183,10 +181,13 @@ _GLOBAL(low_choose_750fx_pll)
or r4,r4,r5
mtspr SPRN_HID1,r4
+#ifdef CONFIG_SMP
/* Store new HID1 image */
- CURRENT_THREAD_INFO(r6, r1)
- lwz r6,TI_CPU(r6)
+ lwz r6,TASK_CPU(r2)
slwi r6,r6,2
+#else
+ li r6, 0
+#endif
addis r6,r6,nap_save_hid1@ha
stw r4,nap_save_hid1@l(r6)
@@ -599,7 +600,7 @@ EXPORT_SYMBOL(__bswapdi2)
#ifdef CONFIG_SMP
_GLOBAL(start_secondary_resume)
/* Reset stack */
- CURRENT_THREAD_INFO(r1, r1)
+ rlwinm r1, r1, 0, 0, 31 - THREAD_SHIFT
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
li r3,0
stw r3,0(r1) /* Zero the stack frame pointer */
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 38b03a330cd2..244d2462e781 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -7,12 +7,6 @@
* 2 of the License, or (at your option) any later version.
*
* /dev/nvram driver for PPC64
- *
- * This perhaps should live in drivers/char
- *
- * TODO: Split the /dev/nvram part (that one can use
- * drivers/char/generic_nvram.c) from the arch & partition
- * parsing code.
*/
#include <linux/types.h>
@@ -714,137 +708,6 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
spin_unlock_irqrestore(&lock, flags);
}
-static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin)
-{
- if (ppc_md.nvram_size == NULL)
- return -ENODEV;
- return generic_file_llseek_size(file, offset, origin, MAX_LFS_FILESIZE,
- ppc_md.nvram_size());
-}
-
-
-static ssize_t dev_nvram_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- ssize_t ret;
- char *tmp = NULL;
- ssize_t size;
-
- if (!ppc_md.nvram_size) {
- ret = -ENODEV;
- goto out;
- }
-
- size = ppc_md.nvram_size();
- if (size < 0) {
- ret = size;
- goto out;
- }
-
- if (*ppos >= size) {
- ret = 0;
- goto out;
- }
-
- count = min_t(size_t, count, size - *ppos);
- count = min(count, PAGE_SIZE);
-
- tmp = kmalloc(count, GFP_KERNEL);
- if (!tmp) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = ppc_md.nvram_read(tmp, count, ppos);
- if (ret <= 0)
- goto out;
-
- if (copy_to_user(buf, tmp, ret))
- ret = -EFAULT;
-
-out:
- kfree(tmp);
- return ret;
-
-}
-
-static ssize_t dev_nvram_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- ssize_t ret;
- char *tmp = NULL;
- ssize_t size;
-
- ret = -ENODEV;
- if (!ppc_md.nvram_size)
- goto out;
-
- ret = 0;
- size = ppc_md.nvram_size();
- if (*ppos >= size || size < 0)
- goto out;
-
- count = min_t(size_t, count, size - *ppos);
- count = min(count, PAGE_SIZE);
-
- tmp = memdup_user(buf, count);
- if (IS_ERR(tmp)) {
- ret = PTR_ERR(tmp);
- goto out;
- }
-
- ret = ppc_md.nvram_write(tmp, count, ppos);
-
- kfree(tmp);
-out:
- return ret;
-}
-
-static long dev_nvram_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- switch(cmd) {
-#ifdef CONFIG_PPC_PMAC
- case OBSOLETE_PMAC_NVRAM_GET_OFFSET:
- printk(KERN_WARNING "nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n");
- /* fall through */
- case IOC_NVRAM_GET_OFFSET: {
- int part, offset;
-
- if (!machine_is(powermac))
- return -EINVAL;
- if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0)
- return -EFAULT;
- if (part < pmac_nvram_OF || part > pmac_nvram_NR)
- return -EINVAL;
- offset = pmac_get_partition(part);
- if (offset < 0)
- return offset;
- if (copy_to_user((void __user*)arg, &offset, sizeof(offset)) != 0)
- return -EFAULT;
- return 0;
- }
-#endif /* CONFIG_PPC_PMAC */
- default:
- return -EINVAL;
- }
-}
-
-static const struct file_operations nvram_fops = {
- .owner = THIS_MODULE,
- .llseek = dev_nvram_llseek,
- .read = dev_nvram_read,
- .write = dev_nvram_write,
- .unlocked_ioctl = dev_nvram_ioctl,
-};
-
-static struct miscdevice nvram_dev = {
- NVRAM_MINOR,
- "nvram",
- &nvram_fops
-};
-
-
#ifdef DEBUG_NVRAM
static void __init nvram_print_partitions(char * label)
{
@@ -992,6 +855,8 @@ loff_t __init nvram_create_partition(const char *name, int sig,
long size = 0;
int rc;
+ BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16);
+
/* Convert sizes from bytes to blocks */
req_size = _ALIGN_UP(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
min_size = _ALIGN_UP(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
@@ -1192,22 +1057,3 @@ int __init nvram_scan_partitions(void)
kfree(header);
return err;
}
-
-static int __init nvram_init(void)
-{
- int rc;
-
- BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16);
-
- if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0)
- return -ENODEV;
-
- rc = misc_register(&nvram_dev);
- if (rc != 0) {
- printk(KERN_ERR "nvram_init: failed to register device\n");
- return rc;
- }
-
- return rc;
-}
-device_initcall(nvram_init);
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 913bfca09c4f..e7382abee868 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -11,6 +11,7 @@
#include <linux/export.h>
#include <linux/memblock.h>
#include <linux/sched/task.h>
+#include <linux/numa.h>
#include <asm/lppaca.h>
#include <asm/paca.h>
@@ -27,7 +28,7 @@
static void *__init alloc_paca_data(unsigned long size, unsigned long align,
unsigned long limit, int cpu)
{
- unsigned long pa;
+ void *ptr;
int nid;
/*
@@ -36,23 +37,21 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align,
* which will put its paca in the right place.
*/
if (cpu == boot_cpuid) {
- nid = -1;
+ nid = NUMA_NO_NODE;
memblock_set_bottom_up(true);
} else {
nid = early_cpu_to_node(cpu);
}
- pa = memblock_alloc_base_nid(size, align, limit, nid, MEMBLOCK_NONE);
- if (!pa) {
- pa = memblock_alloc_base(size, align, limit);
- if (!pa)
- panic("cannot allocate paca data");
- }
+ ptr = memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
+ limit, nid);
+ if (!ptr)
+ panic("cannot allocate paca data");
if (cpu == boot_cpuid)
memblock_set_bottom_up(false);
- return __va(pa);
+ return ptr;
}
#ifdef CONFIG_PPC_PSERIES
@@ -118,7 +117,6 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
}
s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu);
- memset(s, 0, sizeof(*s));
s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
s->buffer_length = cpu_to_be32(sizeof(*s));
@@ -198,7 +196,11 @@ void __init allocate_paca_ptrs(void)
paca_nr_cpu_ids = nr_cpu_ids;
paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
- paca_ptrs = __va(memblock_phys_alloc(paca_ptrs_size, SMP_CACHE_BYTES));
+ paca_ptrs = memblock_alloc_raw(paca_ptrs_size, SMP_CACHE_BYTES);
+ if (!paca_ptrs)
+ panic("Failed to allocate %d bytes for paca pointers\n",
+ paca_ptrs_size);
+
memset(paca_ptrs, 0x88, paca_ptrs_size);
}
@@ -222,7 +224,6 @@ void __init allocate_paca(int cpu)
paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES,
limit, cpu);
paca_ptrs[cpu] = paca;
- memset(paca, 0, sizeof(struct paca_struct));
initialise_paca(paca, cpu);
#ifdef CONFIG_PPC_PSERIES
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 88e4f69a09e5..ff4b7539cbdf 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -32,6 +32,7 @@
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/vgaarb.h>
+#include <linux/numa.h>
#include <asm/processor.h>
#include <asm/io.h>
@@ -62,19 +63,13 @@ resource_size_t isa_mem_base;
EXPORT_SYMBOL(isa_mem_base);
-static const struct dma_map_ops *pci_dma_ops = &dma_nommu_ops;
+static const struct dma_map_ops *pci_dma_ops;
void set_pci_dma_ops(const struct dma_map_ops *dma_ops)
{
pci_dma_ops = dma_ops;
}
-const struct dma_map_ops *get_pci_dma_ops(void)
-{
- return pci_dma_ops;
-}
-EXPORT_SYMBOL(get_pci_dma_ops);
-
/*
* This function should run under locking protection, specifically
* hose_spinlock.
@@ -132,7 +127,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
int nid = of_node_to_nid(dev);
if (nid < 0 || !node_online(nid))
- nid = -1;
+ nid = NUMA_NO_NODE;
PHB_SET_NODE(phb, nid);
}
@@ -357,6 +352,17 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node)
return NULL;
}
+struct pci_controller *pci_find_controller_for_domain(int domain_nr)
+{
+ struct pci_controller *hose;
+
+ list_for_each_entry(hose, &hose_list, list_node)
+ if (hose->global_number == domain_nr)
+ return hose;
+
+ return NULL;
+}
+
/*
* Reads the interrupt pin to determine if interrupt is use by card.
* If the interrupt is used, then gets the interrupt line from the
@@ -972,7 +978,7 @@ static void pcibios_setup_device(struct pci_dev *dev)
/* Hook up default DMA ops */
set_dma_ops(&dev->dev, pci_dma_ops);
- set_dma_offset(&dev->dev, PCI_DRAM_OFFSET);
+ dev->dev.archdata.dma_offset = PCI_DRAM_OFFSET;
/* Additional platform DMA/iommu setup */
phb = pci_bus_to_host(dev->bus);
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index d3f04f2d8249..0417fda13636 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -205,6 +205,9 @@ pci_create_OF_bus_map(void)
of_prop = memblock_alloc(sizeof(struct property) + 256,
SMP_CACHE_BYTES);
+ if (!of_prop)
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ sizeof(struct property) + 256);
dn = of_find_node_by_path("/");
if (dn) {
memset(of_prop, -1, sizeof(struct property) + 256);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ce393df243aa..dd9e0d5386ee 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -176,7 +176,7 @@ static void __giveup_fpu(struct task_struct *tsk)
save_fpu(tsk);
msr = tsk->thread.regs->msr;
- msr &= ~MSR_FP;
+ msr &= ~(MSR_FP|MSR_FE0|MSR_FE1);
#ifdef CONFIG_VSX
if (cpu_has_feature(CPU_FTR_VSX))
msr &= ~MSR_VSX;
@@ -1231,8 +1231,8 @@ struct task_struct *__switch_to(struct task_struct *prev,
batch->active = 1;
}
- if (current_thread_info()->task->thread.regs) {
- restore_math(current_thread_info()->task->thread.regs);
+ if (current->thread.regs) {
+ restore_math(current->thread.regs);
/*
* The copy-paste buffer can only store into foreign real
@@ -1242,7 +1242,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
* mappings, we must issue a cp_abort to clear any state and
* prevent snooping, corruption or a covert channel.
*/
- if (current_thread_info()->task->thread.used_vas)
+ if (current->thread.used_vas)
asm volatile(PPC_CP_ABORT);
}
#endif /* CONFIG_PPC_BOOK3S_64 */
@@ -1634,7 +1634,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
struct thread_info *ti = task_thread_info(p);
- klp_init_thread_info(ti);
+ klp_init_thread_info(p);
/* Copy registers */
sp -= sizeof(struct pt_regs);
@@ -1691,8 +1691,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
sp -= STACK_FRAME_OVERHEAD;
p->thread.ksp = sp;
#ifdef CONFIG_PPC32
- p->thread.ksp_limit = (unsigned long)task_stack_page(p) +
- _ALIGN_UP(sizeof(struct thread_info), 16);
+ p->thread.ksp_limit = (unsigned long)end_of_stack(p);
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
p->thread.ptrace_bps[0] = NULL;
@@ -1995,21 +1994,14 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,
unsigned long stack_page;
unsigned long cpu = task_cpu(p);
- /*
- * Avoid crashing if the stack has overflowed and corrupted
- * task_cpu(p), which is in the thread_info struct.
- */
- if (cpu < NR_CPUS && cpu_possible(cpu)) {
- stack_page = (unsigned long) hardirq_ctx[cpu];
- if (sp >= stack_page + sizeof(struct thread_struct)
- && sp <= stack_page + THREAD_SIZE - nbytes)
- return 1;
-
- stack_page = (unsigned long) softirq_ctx[cpu];
- if (sp >= stack_page + sizeof(struct thread_struct)
- && sp <= stack_page + THREAD_SIZE - nbytes)
- return 1;
- }
+ stack_page = (unsigned long)hardirq_ctx[cpu];
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
+ stack_page = (unsigned long)softirq_ctx[cpu];
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
return 0;
}
@@ -2018,8 +2010,10 @@ int validate_sp(unsigned long sp, struct task_struct *p,
{
unsigned long stack_page = (unsigned long)task_stack_page(p);
- if (sp >= stack_page + sizeof(struct thread_struct)
- && sp <= stack_page + THREAD_SIZE - nbytes)
+ if (sp < THREAD_SIZE)
+ return 0;
+
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
return 1;
return valid_irq_stack(sp, p, nbytes);
@@ -2027,7 +2021,7 @@ int validate_sp(unsigned long sp, struct task_struct *p,
EXPORT_SYMBOL(validate_sp);
-unsigned long get_wchan(struct task_struct *p)
+static unsigned long __get_wchan(struct task_struct *p)
{
unsigned long ip, sp;
int count = 0;
@@ -2053,6 +2047,20 @@ unsigned long get_wchan(struct task_struct *p)
return 0;
}
+unsigned long get_wchan(struct task_struct *p)
+{
+ unsigned long ret;
+
+ if (!try_get_task_stack(p))
+ return 0;
+
+ ret = __get_wchan(p);
+
+ put_task_stack(p);
+
+ return ret;
+}
+
static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH;
void show_stack(struct task_struct *tsk, unsigned long *stack)
@@ -2067,9 +2075,13 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
int curr_frame = 0;
#endif
- sp = (unsigned long) stack;
if (tsk == NULL)
tsk = current;
+
+ if (!try_get_task_stack(tsk))
+ return;
+
+ sp = (unsigned long) stack;
if (sp == 0) {
if (tsk == current)
sp = current_stack_pointer();
@@ -2081,7 +2093,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
printk("Call Trace:\n");
do {
if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
- return;
+ break;
stack = (unsigned long *) sp;
newsp = stack[0];
@@ -2121,6 +2133,8 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
sp = newsp;
} while (count++ < kstack_depth_to_print);
+
+ put_task_stack(tsk);
}
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 4181ec715f88..4221527b082f 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -126,7 +126,10 @@ static void __init move_device_tree(void)
if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) ||
!memblock_is_memory(start + size - 1) ||
overlaps_crashkernel(start, size) || overlaps_initrd(start, size)) {
- p = __va(memblock_phys_alloc(size, PAGE_SIZE));
+ p = memblock_alloc_raw(size, PAGE_SIZE);
+ if (!p)
+ panic("Failed to allocate %lu bytes to move device tree\n",
+ size);
memcpy(p, initial_boot_params, size);
initial_boot_params = p;
DBG("Moved device tree to 0x%px\n", p);
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index cdd5d1d3ae41..d9ac7d94656e 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -33,6 +33,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/perf_event.h>
#include <linux/context_tracking.h>
+#include <linux/nospec.h>
#include <linux/uaccess.h>
#include <linux/pkeys.h>
@@ -274,6 +275,8 @@ static int set_user_trap(struct task_struct *task, unsigned long trap)
*/
int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
{
+ unsigned int regs_max;
+
if ((task->thread.regs == NULL) || !data)
return -EIO;
@@ -297,7 +300,9 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
}
#endif
- if (regno < (sizeof(struct user_pt_regs) / sizeof(unsigned long))) {
+ regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long);
+ if (regno < regs_max) {
+ regno = array_index_nospec(regno, regs_max);
*data = ((unsigned long *)task->thread.regs)[regno];
return 0;
}
@@ -321,6 +326,7 @@ int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data)
return set_user_dscr(task, data);
if (regno <= PT_MAX_PUT_REG) {
+ regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1);
((unsigned long *)task->thread.regs)[regno] = data;
return 0;
}
@@ -561,6 +567,7 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset,
/*
* Copy out only the low-order word of vrsave.
*/
+ int start, end;
union {
elf_vrreg_t reg;
u32 word;
@@ -569,8 +576,10 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset,
vrsave.word = target->thread.vrsave;
+ start = 33 * sizeof(vector128);
+ end = start + sizeof(vrsave);
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
- 33 * sizeof(vector128), -1);
+ start, end);
}
return ret;
@@ -608,6 +617,7 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset,
/*
* We use only the first word of vrsave.
*/
+ int start, end;
union {
elf_vrreg_t reg;
u32 word;
@@ -616,8 +626,10 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset,
vrsave.word = target->thread.vrsave;
+ start = 33 * sizeof(vector128);
+ end = start + sizeof(vrsave);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
- 33 * sizeof(vector128), -1);
+ start, end);
if (!ret)
target->thread.vrsave = vrsave.word;
}
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index de35bd8f047f..fbc676160adf 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1187,7 +1187,11 @@ void __init rtas_initialize(void)
ibm_suspend_me_token = rtas_token("ibm,suspend-me");
}
#endif
- rtas_rmo_buf = memblock_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region);
+ rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE,
+ 0, rtas_region);
+ if (!rtas_rmo_buf)
+ panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n",
+ PAGE_SIZE, &rtas_region);
#ifdef CONFIG_RTAS_ERROR_LOGGING
rtas_last_error_token = rtas_token("rtas-last-error");
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index ca00fbb97cf8..2e5dfb6e0823 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -459,8 +459,11 @@ void __init smp_setup_cpu_maps(void)
DBG("smp_setup_cpu_maps()\n");
- cpu_to_phys_id = __va(memblock_phys_alloc(nr_cpu_ids * sizeof(u32), __alignof__(u32)));
- memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32));
+ cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32),
+ __alignof__(u32));
+ if (!cpu_to_phys_id)
+ panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
+ __func__, nr_cpu_ids * sizeof(u32), __alignof__(u32));
for_each_node_by_type(dn, "cpu") {
const __be32 *intserv;
@@ -634,7 +637,7 @@ void probe_machine(void)
}
/* What can we do if we didn't find ? */
if (machine_id >= &__machine_desc_end) {
- DBG("No suitable machine found !\n");
+ pr_err("No suitable machine description found !\n");
for (;;);
}
@@ -791,7 +794,6 @@ void arch_setup_pdev_archdata(struct platform_device *pdev)
{
pdev->archdata.dma_mask = DMA_BIT_MASK(32);
pdev->dev.dma_mask = &pdev->archdata.dma_mask;
- set_dma_ops(&pdev->dev, &dma_nommu_ops);
}
static __init void print_system_info(void)
@@ -938,7 +940,7 @@ void __init setup_arch(char **cmdline_p)
/* Reserve large chunks of memory for use by CMA for KVM. */
kvm_cma_reserve();
- klp_init_thread_info(&init_thread_info);
+ klp_init_thread_info(&init_task);
init_mm.start_code = (unsigned long)_stext;
init_mm.end_code = (unsigned long) _etext;
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 947f904688b0..4a65e08a6042 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -17,6 +17,7 @@
#include <linux/console.h>
#include <linux/memblock.h>
#include <linux/export.h>
+#include <linux/nvram.h>
#include <asm/io.h>
#include <asm/prom.h>
@@ -147,41 +148,6 @@ static int __init ppc_setup_l3cr(char *str)
}
__setup("l3cr=", ppc_setup_l3cr);
-#ifdef CONFIG_GENERIC_NVRAM
-
-/* Generic nvram hooks used by drivers/char/gen_nvram.c */
-unsigned char nvram_read_byte(int addr)
-{
- if (ppc_md.nvram_read_val)
- return ppc_md.nvram_read_val(addr);
- return 0xff;
-}
-EXPORT_SYMBOL(nvram_read_byte);
-
-void nvram_write_byte(unsigned char val, int addr)
-{
- if (ppc_md.nvram_write_val)
- ppc_md.nvram_write_val(addr, val);
-}
-EXPORT_SYMBOL(nvram_write_byte);
-
-ssize_t nvram_get_size(void)
-{
- if (ppc_md.nvram_size)
- return ppc_md.nvram_size();
- return -1;
-}
-EXPORT_SYMBOL(nvram_get_size);
-
-void nvram_sync(void)
-{
- if (ppc_md.nvram_sync)
- ppc_md.nvram_sync();
-}
-EXPORT_SYMBOL(nvram_sync);
-
-#endif /* CONFIG_NVRAM */
-
static int __init ppc_init(void)
{
/* clear the progress line */
@@ -196,6 +162,17 @@ static int __init ppc_init(void)
}
arch_initcall(ppc_init);
+static void *__init alloc_stack(void)
+{
+ void *ptr = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
+
+ if (!ptr)
+ panic("cannot allocate %d bytes for stack at %pS\n",
+ THREAD_SIZE, (void *)_RET_IP_);
+
+ return ptr;
+}
+
void __init irqstack_early_init(void)
{
unsigned int i;
@@ -203,10 +180,8 @@ void __init irqstack_early_init(void)
/* interrupt stacks must be in lowmem, we get that for free on ppc32
* as the memblock is limited to lowmem by default */
for_each_possible_cpu(i) {
- softirq_ctx[i] = (struct thread_info *)
- __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
- hardirq_ctx[i] = (struct thread_info *)
- __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
+ softirq_ctx[i] = alloc_stack();
+ hardirq_ctx[i] = alloc_stack();
}
}
@@ -224,13 +199,10 @@ void __init exc_lvl_early_init(void)
hw_cpu = 0;
#endif
- critirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
+ critirq_ctx[hw_cpu] = alloc_stack();
#ifdef CONFIG_BOOKE
- dbgirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
- mcheckirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
+ dbgirq_ctx[hw_cpu] = alloc_stack();
+ mcheckirq_ctx[hw_cpu] = alloc_stack();
#endif
}
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 236c1151a3a7..ba404dd9ce1d 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -634,19 +634,17 @@ __init u64 ppc64_bolted_size(void)
static void *__init alloc_stack(unsigned long limit, int cpu)
{
- unsigned long pa;
+ void *ptr;
BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16);
- pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit,
- early_cpu_to_node(cpu), MEMBLOCK_NONE);
- if (!pa) {
- pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
- if (!pa)
- panic("cannot allocate stacks");
- }
+ ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_SIZE,
+ MEMBLOCK_LOW_LIMIT, limit,
+ early_cpu_to_node(cpu));
+ if (!ptr)
+ panic("cannot allocate stacks");
- return __va(pa);
+ return ptr;
}
void __init irqstack_early_init(void)
@@ -692,24 +690,6 @@ void __init exc_lvl_early_init(void)
#endif
/*
- * Emergency stacks are used for a range of things, from asynchronous
- * NMIs (system reset, machine check) to synchronous, process context.
- * We set preempt_count to zero, even though that isn't necessarily correct. To
- * get the right value we'd need to copy it from the previous thread_info, but
- * doing that might fault causing more problems.
- * TODO: what to do with accounting?
- */
-static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
-{
- ti->task = NULL;
- ti->cpu = cpu;
- ti->preempt_count = 0;
- ti->local_flags = 0;
- ti->flags = 0;
- klp_init_thread_info(ti);
-}
-
-/*
* Stack space used when we detect a bad kernel stack pointer, and
* early in SMP boots before relocation is enabled. Exclusive emergency
* stack for machine checks.
@@ -736,25 +716,14 @@ void __init emergency_stack_init(void)
limit = min(ppc64_bolted_size(), ppc64_rma_size);
for_each_possible_cpu(i) {
- struct thread_info *ti;
-
- ti = alloc_stack(limit, i);
- memset(ti, 0, THREAD_SIZE);
- emerg_stack_init_thread_info(ti, i);
- paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
#ifdef CONFIG_PPC_BOOK3S_64
/* emergency stack for NMI exception handling. */
- ti = alloc_stack(limit, i);
- memset(ti, 0, THREAD_SIZE);
- emerg_stack_init_thread_info(ti, i);
- paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->nmi_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
/* emergency stack for machine check exception handling. */
- ti = alloc_stack(limit, i);
- memset(ti, 0, THREAD_SIZE);
- emerg_stack_init_thread_info(ti, i);
- paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->mc_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
#endif
}
}
@@ -933,8 +902,13 @@ static void __ref init_fallback_flush(void)
* hardware prefetch runoff. We don't have a recipe for load patterns to
* reliably avoid the prefetcher.
*/
- l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
- memset(l1d_flush_fallback_area, 0, l1d_size * 2);
+ l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2,
+ l1d_size, MEMBLOCK_LOW_LIMIT,
+ limit, NUMA_NO_NODE);
+ if (!l1d_flush_fallback_area)
+ panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n",
+ __func__, l1d_size * 2, l1d_size, &limit);
+
for_each_possible_cpu(cpu) {
struct paca_struct *paca = paca_ptrs[cpu];
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 3f15edf25a0d..e784342bdaa1 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -20,6 +20,7 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/sched/mm.h>
+#include <linux/sched/task_stack.h>
#include <linux/sched/topology.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
@@ -75,7 +76,7 @@
static DEFINE_PER_CPU(int, cpu_state) = { 0 };
#endif
-struct thread_info *secondary_ti;
+struct task_struct *secondary_current;
bool has_big_cores;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
@@ -358,13 +359,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
* NMI IPIs may not be recoverable, so should not be used as ongoing part of
* a running system. They can be used for crash, debug, halt/reboot, etc.
*
- * NMI IPIs are globally single threaded. No more than one in progress at
- * any time.
- *
* The IPI call waits with interrupts disabled until all targets enter the
- * NMI handler, then the call returns.
+ * NMI handler, then returns. Subsequent IPIs can be issued before targets
+ * have returned from their handlers, so there is no guarantee about
+ * concurrency or re-entrancy.
*
- * No new NMI can be initiated until targets exit the handler.
+ * A new NMI can be issued before all targets exit the handler.
*
* The IPI call may time out without all targets entering the NMI handler.
* In that case, there is some logic to recover (and ignore subsequent
@@ -375,7 +375,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0);
static struct cpumask nmi_ipi_pending_mask;
-static int nmi_ipi_busy_count = 0;
+static bool nmi_ipi_busy = false;
static void (*nmi_ipi_function)(struct pt_regs *) = NULL;
static void nmi_ipi_lock_start(unsigned long *flags)
@@ -414,7 +414,7 @@ static void nmi_ipi_unlock_end(unsigned long *flags)
*/
int smp_handle_nmi_ipi(struct pt_regs *regs)
{
- void (*fn)(struct pt_regs *);
+ void (*fn)(struct pt_regs *) = NULL;
unsigned long flags;
int me = raw_smp_processor_id();
int ret = 0;
@@ -425,29 +425,17 @@ int smp_handle_nmi_ipi(struct pt_regs *regs)
* because the caller may have timed out.
*/
nmi_ipi_lock_start(&flags);
- if (!nmi_ipi_busy_count)
- goto out;
- if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask))
- goto out;
-
- fn = nmi_ipi_function;
- if (!fn)
- goto out;
-
- cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
- nmi_ipi_busy_count++;
- nmi_ipi_unlock();
-
- ret = 1;
-
- fn(regs);
-
- nmi_ipi_lock();
- if (nmi_ipi_busy_count > 1) /* Can race with caller time-out */
- nmi_ipi_busy_count--;
-out:
+ if (cpumask_test_cpu(me, &nmi_ipi_pending_mask)) {
+ cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
+ fn = READ_ONCE(nmi_ipi_function);
+ WARN_ON_ONCE(!fn);
+ ret = 1;
+ }
nmi_ipi_unlock_end(&flags);
+ if (fn)
+ fn(regs);
+
return ret;
}
@@ -473,9 +461,10 @@ static void do_smp_send_nmi_ipi(int cpu, bool safe)
* - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
* - fn is the target callback function.
* - delay_us > 0 is the delay before giving up waiting for targets to
- * complete executing the handler, == 0 specifies indefinite delay.
+ * begin executing the handler, == 0 specifies indefinite delay.
*/
-int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool safe)
+static int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *),
+ u64 delay_us, bool safe)
{
unsigned long flags;
int me = raw_smp_processor_id();
@@ -487,31 +476,33 @@ int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool
if (unlikely(!smp_ops))
return 0;
- /* Take the nmi_ipi_busy count/lock with interrupts hard disabled */
nmi_ipi_lock_start(&flags);
- while (nmi_ipi_busy_count) {
+ while (nmi_ipi_busy) {
nmi_ipi_unlock_end(&flags);
- spin_until_cond(nmi_ipi_busy_count == 0);
+ spin_until_cond(!nmi_ipi_busy);
nmi_ipi_lock_start(&flags);
}
-
+ nmi_ipi_busy = true;
nmi_ipi_function = fn;
+ WARN_ON_ONCE(!cpumask_empty(&nmi_ipi_pending_mask));
+
if (cpu < 0) {
/* ALL_OTHERS */
cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask);
cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
} else {
- /* cpumask starts clear */
cpumask_set_cpu(cpu, &nmi_ipi_pending_mask);
}
- nmi_ipi_busy_count++;
+
nmi_ipi_unlock();
+ /* Interrupts remain hard disabled */
+
do_smp_send_nmi_ipi(cpu, safe);
nmi_ipi_lock();
- /* nmi_ipi_busy_count is held here, so unlock/lock is okay */
+ /* nmi_ipi_busy is set here, so unlock/lock is okay */
while (!cpumask_empty(&nmi_ipi_pending_mask)) {
nmi_ipi_unlock();
udelay(1);
@@ -523,29 +514,15 @@ int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool
}
}
- while (nmi_ipi_busy_count > 1) {
- nmi_ipi_unlock();
- udelay(1);
- nmi_ipi_lock();
- if (delay_us) {
- delay_us--;
- if (!delay_us)
- break;
- }
- }
-
if (!cpumask_empty(&nmi_ipi_pending_mask)) {
/* Timeout waiting for CPUs to call smp_handle_nmi_ipi */
ret = 0;
cpumask_clear(&nmi_ipi_pending_mask);
}
- if (nmi_ipi_busy_count > 1) {
- /* Timeout waiting for CPUs to execute fn */
- ret = 0;
- nmi_ipi_busy_count = 1;
- }
- nmi_ipi_busy_count--;
+ nmi_ipi_function = NULL;
+ nmi_ipi_busy = false;
+
nmi_ipi_unlock_end(&flags);
return ret;
@@ -613,17 +590,8 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
static void nmi_stop_this_cpu(struct pt_regs *regs)
{
/*
- * This is a special case because it never returns, so the NMI IPI
- * handling would never mark it as done, which makes any later
- * smp_send_nmi_ipi() call spin forever. Mark it done now.
- *
* IRQs are already hard disabled by the smp_handle_nmi_ipi.
*/
- nmi_ipi_lock();
- if (nmi_ipi_busy_count > 1)
- nmi_ipi_busy_count--;
- nmi_ipi_unlock();
-
spin_begin();
while (1)
spin_cpu_relax();
@@ -663,7 +631,7 @@ void smp_send_stop(void)
}
#endif /* CONFIG_NMI_IPI */
-struct thread_info *current_set[NR_CPUS];
+struct task_struct *current_set[NR_CPUS];
static void smp_store_cpu_info(int id)
{
@@ -928,7 +896,7 @@ void smp_prepare_boot_cpu(void)
paca_ptrs[boot_cpuid]->__current = current;
#endif
set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
- current_set[boot_cpuid] = task_thread_info(current);
+ current_set[boot_cpuid] = current;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -1013,14 +981,13 @@ static bool secondaries_inhibited(void)
static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
{
- struct thread_info *ti = task_thread_info(idle);
-
#ifdef CONFIG_PPC64
paca_ptrs[cpu]->__current = idle;
- paca_ptrs[cpu]->kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
+ paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
+ THREAD_SIZE - STACK_FRAME_OVERHEAD;
#endif
- ti->cpu = cpu;
- secondary_ti = current_set[cpu] = ti;
+ idle->cpu = cpu;
+ secondary_current = current_set[cpu] = idle;
}
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index e2c50b55138f..1e2276963f6d 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -67,12 +67,17 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
unsigned long sp;
+ if (!try_get_task_stack(tsk))
+ return;
+
if (tsk == current)
sp = current_stack_pointer();
else
sp = tsk->thread.ksp;
save_context_stack(trace, sp, tsk, 0);
+
+ put_task_stack(tsk);
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
@@ -84,25 +89,21 @@ save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
EXPORT_SYMBOL_GPL(save_stack_trace_regs);
#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
-int
-save_stack_trace_tsk_reliable(struct task_struct *tsk,
- struct stack_trace *trace)
+/*
+ * This function returns an error if it detects any unreliable features of the
+ * stack. Otherwise it guarantees that the stack trace is reliable.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
+ */
+static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
+ struct stack_trace *trace)
{
unsigned long sp;
+ unsigned long newsp;
unsigned long stack_page = (unsigned long)task_stack_page(tsk);
unsigned long stack_end;
int graph_idx = 0;
-
- /*
- * The last frame (unwinding first) may not yet have saved
- * its LR onto the stack.
- */
- int firstframe = 1;
-
- if (tsk == current)
- sp = current_stack_pointer();
- else
- sp = tsk->thread.ksp;
+ bool firstframe;
stack_end = stack_page + THREAD_SIZE;
if (!is_idle_task(tsk)) {
@@ -129,40 +130,53 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk,
stack_end -= STACK_FRAME_OVERHEAD;
}
+ if (tsk == current)
+ sp = current_stack_pointer();
+ else
+ sp = tsk->thread.ksp;
+
if (sp < stack_page + sizeof(struct thread_struct) ||
sp > stack_end - STACK_FRAME_MIN_SIZE) {
- return 1;
+ return -EINVAL;
}
- for (;;) {
+ for (firstframe = true; sp != stack_end;
+ firstframe = false, sp = newsp) {
unsigned long *stack = (unsigned long *) sp;
- unsigned long newsp, ip;
+ unsigned long ip;
/* sanity check: ABI requires SP to be aligned 16 bytes. */
if (sp & 0xF)
- return 1;
-
- /* Mark stacktraces with exception frames as unreliable. */
- if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
- stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
- return 1;
- }
+ return -EINVAL;
newsp = stack[0];
/* Stack grows downwards; unwinder may only go up. */
if (newsp <= sp)
- return 1;
+ return -EINVAL;
if (newsp != stack_end &&
newsp > stack_end - STACK_FRAME_MIN_SIZE) {
- return 1; /* invalid backlink, too far up. */
+ return -EINVAL; /* invalid backlink, too far up. */
+ }
+
+ /*
+ * We can only trust the bottom frame's backlink, the
+ * rest of the frame may be uninitialized, continue to
+ * the next.
+ */
+ if (firstframe)
+ continue;
+
+ /* Mark stacktraces with exception frames as unreliable. */
+ if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
+ stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+ return -EINVAL;
}
/* Examine the saved LR: it must point into kernel code. */
ip = stack[STACK_FRAME_LR_SAVE];
- if (!firstframe && !__kernel_text_address(ip))
- return 1;
- firstframe = 0;
+ if (!__kernel_text_address(ip))
+ return -EINVAL;
/*
* FIXME: IMHO these tests do not belong in
@@ -175,25 +189,37 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk,
* as unreliable.
*/
if (ip == (unsigned long)kretprobe_trampoline)
- return 1;
+ return -EINVAL;
#endif
+ if (trace->nr_entries >= trace->max_entries)
+ return -E2BIG;
if (!trace->skip)
trace->entries[trace->nr_entries++] = ip;
else
trace->skip--;
+ }
+ return 0;
+}
- if (newsp == stack_end)
- break;
+int save_stack_trace_tsk_reliable(struct task_struct *tsk,
+ struct stack_trace *trace)
+{
+ int ret;
- if (trace->nr_entries >= trace->max_entries)
- return -E2BIG;
+ /*
+ * If the task doesn't have a stack (e.g., a zombie), the stack is
+ * "reliably" empty.
+ */
+ if (!try_get_task_stack(tsk))
+ return 0;
- sp = newsp;
- }
- return 0;
+ ret = __save_stack_trace_tsk_reliable(tsk, trace);
+
+ put_task_stack(tsk);
+
+ return ret;
}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable);
#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index e6982ab21816..e52a8878c2fb 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -123,7 +123,7 @@ long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
(u64)len_high << 32 | len_low, advice);
}
-long sys_switch_endian(void)
+SYSCALL_DEFINE0(switch_endian)
{
struct thread_info *ti;
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index db3bbb8744af..b18abb0c3dae 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -20,7 +20,9 @@
10 common unlink sys_unlink
11 nospu execve sys_execve compat_sys_execve
12 common chdir sys_chdir
-13 common time sys_time compat_sys_time
+13 32 time sys_time32
+13 64 time sys_time
+13 spu time sys_time
14 common mknod sys_mknod
15 common chmod sys_chmod
16 common lchown sys_lchown
@@ -36,14 +38,17 @@
22 spu umount sys_ni_syscall
23 common setuid sys_setuid
24 common getuid sys_getuid
-25 common stime sys_stime compat_sys_stime
+25 32 stime sys_stime32
+25 64 stime sys_stime
+25 spu stime sys_stime
26 nospu ptrace sys_ptrace compat_sys_ptrace
27 common alarm sys_alarm
28 32 oldfstat sys_fstat sys_ni_syscall
28 64 oldfstat sys_ni_syscall
28 spu oldfstat sys_ni_syscall
29 nospu pause sys_pause
-30 nospu utime sys_utime compat_sys_utime
+30 32 utime sys_utime32
+30 64 utime sys_utime
31 common stty sys_ni_syscall
32 common gtty sys_ni_syscall
33 common access sys_access
@@ -157,7 +162,9 @@
121 common setdomainname sys_setdomainname
122 common uname sys_newuname
123 common modify_ldt sys_ni_syscall
-124 common adjtimex sys_adjtimex compat_sys_adjtimex
+124 32 adjtimex sys_adjtimex_time32
+124 64 adjtimex sys_adjtimex
+124 spu adjtimex sys_adjtimex
125 common mprotect sys_mprotect
126 32 sigprocmask sys_sigprocmask compat_sys_sigprocmask
126 64 sigprocmask sys_ni_syscall
@@ -198,8 +205,12 @@
158 common sched_yield sys_sched_yield
159 common sched_get_priority_max sys_sched_get_priority_max
160 common sched_get_priority_min sys_sched_get_priority_min
-161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval
-162 common nanosleep sys_nanosleep compat_sys_nanosleep
+161 32 sched_rr_get_interval sys_sched_rr_get_interval_time32
+161 64 sched_rr_get_interval sys_sched_rr_get_interval
+161 spu sched_rr_get_interval sys_sched_rr_get_interval
+162 32 nanosleep sys_nanosleep_time32
+162 64 nanosleep sys_nanosleep
+162 spu nanosleep sys_nanosleep
163 common mremap sys_mremap
164 common setresuid sys_setresuid
165 common getresuid sys_getresuid
@@ -213,7 +224,8 @@
173 nospu rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
174 nospu rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask
175 nospu rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
-176 nospu rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
+176 32 rt_sigtimedwait sys_rt_sigtimedwait_time32 compat_sys_rt_sigtimedwait_time32
+176 64 rt_sigtimedwait sys_rt_sigtimedwait
177 nospu rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
179 common pread64 sys_pread64 compat_sys_pread64
@@ -260,7 +272,9 @@
218 common removexattr sys_removexattr
219 common lremovexattr sys_lremovexattr
220 common fremovexattr sys_fremovexattr
-221 common futex sys_futex compat_sys_futex
+221 32 futex sys_futex_time32
+221 64 futex sys_futex
+221 spu futex sys_futex
222 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
223 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
# 224 unused
@@ -268,7 +282,9 @@
226 32 sendfile64 sys_sendfile64 compat_sys_sendfile64
227 common io_setup sys_io_setup compat_sys_io_setup
228 common io_destroy sys_io_destroy
-229 common io_getevents sys_io_getevents compat_sys_io_getevents
+229 32 io_getevents sys_io_getevents_time32
+229 64 io_getevents sys_io_getevents
+229 spu io_getevents sys_io_getevents
230 common io_submit sys_io_submit compat_sys_io_submit
231 common io_cancel sys_io_cancel
232 nospu set_tid_address sys_set_tid_address
@@ -280,19 +296,33 @@
238 common epoll_wait sys_epoll_wait
239 common remap_file_pages sys_remap_file_pages
240 common timer_create sys_timer_create compat_sys_timer_create
-241 common timer_settime sys_timer_settime compat_sys_timer_settime
-242 common timer_gettime sys_timer_gettime compat_sys_timer_gettime
+241 32 timer_settime sys_timer_settime32
+241 64 timer_settime sys_timer_settime
+241 spu timer_settime sys_timer_settime
+242 32 timer_gettime sys_timer_gettime32
+242 64 timer_gettime sys_timer_gettime
+242 spu timer_gettime sys_timer_gettime
243 common timer_getoverrun sys_timer_getoverrun
244 common timer_delete sys_timer_delete
-245 common clock_settime sys_clock_settime compat_sys_clock_settime
-246 common clock_gettime sys_clock_gettime compat_sys_clock_gettime
-247 common clock_getres sys_clock_getres compat_sys_clock_getres
-248 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep
+245 32 clock_settime sys_clock_settime32
+245 64 clock_settime sys_clock_settime
+245 spu clock_settime sys_clock_settime
+246 32 clock_gettime sys_clock_gettime32
+246 64 clock_gettime sys_clock_gettime
+246 spu clock_gettime sys_clock_gettime
+247 32 clock_getres sys_clock_getres_time32
+247 64 clock_getres sys_clock_getres
+247 spu clock_getres sys_clock_getres
+248 32 clock_nanosleep sys_clock_nanosleep_time32
+248 64 clock_nanosleep sys_clock_nanosleep
+248 spu clock_nanosleep sys_clock_nanosleep
249 32 swapcontext ppc_swapcontext ppc32_swapcontext
249 64 swapcontext ppc64_swapcontext
249 spu swapcontext sys_ni_syscall
250 common tgkill sys_tgkill
-251 common utimes sys_utimes compat_sys_utimes
+251 32 utimes sys_utimes_time32
+251 64 utimes sys_utimes
+251 spu utimes sys_utimes
252 common statfs64 sys_statfs64 compat_sys_statfs64
253 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
254 32 fadvise64_64 ppc_fadvise64_64
@@ -308,8 +338,10 @@
261 nospu set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
262 nospu mq_open sys_mq_open compat_sys_mq_open
263 nospu mq_unlink sys_mq_unlink
-264 nospu mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend
-265 nospu mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive
+264 32 mq_timedsend sys_mq_timedsend_time32
+264 64 mq_timedsend sys_mq_timedsend
+265 32 mq_timedreceive sys_mq_timedreceive_time32
+265 64 mq_timedreceive sys_mq_timedreceive
266 nospu mq_notify sys_mq_notify compat_sys_mq_notify
267 nospu mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
268 nospu kexec_load sys_kexec_load compat_sys_kexec_load
@@ -324,8 +356,10 @@
277 nospu inotify_rm_watch sys_inotify_rm_watch
278 nospu spu_run sys_spu_run
279 nospu spu_create sys_spu_create
-280 nospu pselect6 sys_pselect6 compat_sys_pselect6
-281 nospu ppoll sys_ppoll compat_sys_ppoll
+280 32 pselect6 sys_pselect6_time32 compat_sys_pselect6_time32
+280 64 pselect6 sys_pselect6
+281 32 ppoll sys_ppoll_time32 compat_sys_ppoll_time32
+281 64 ppoll sys_ppoll
282 common unshare sys_unshare
283 common splice sys_splice
284 common tee sys_tee
@@ -334,7 +368,9 @@
287 common mkdirat sys_mkdirat
288 common mknodat sys_mknodat
289 common fchownat sys_fchownat
-290 common futimesat sys_futimesat compat_sys_futimesat
+290 32 futimesat sys_futimesat_time32
+290 64 futimesat sys_futimesat
+290 spu utimesat sys_futimesat
291 32 fstatat64 sys_fstatat64
291 64 newfstatat sys_newfstatat
291 spu newfstatat sys_newfstatat
@@ -350,15 +386,21 @@
301 common move_pages sys_move_pages compat_sys_move_pages
302 common getcpu sys_getcpu
303 nospu epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
-304 common utimensat sys_utimensat compat_sys_utimensat
+304 32 utimensat sys_utimensat_time32
+304 64 utimensat sys_utimensat
+304 spu utimensat sys_utimensat
305 common signalfd sys_signalfd compat_sys_signalfd
306 common timerfd_create sys_timerfd_create
307 common eventfd sys_eventfd
308 common sync_file_range2 sys_sync_file_range2 compat_sys_sync_file_range2
309 nospu fallocate sys_fallocate compat_sys_fallocate
310 nospu subpage_prot sys_subpage_prot
-311 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
-312 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
+311 32 timerfd_settime sys_timerfd_settime32
+311 64 timerfd_settime sys_timerfd_settime
+311 spu timerfd_settime sys_timerfd_settime
+312 32 timerfd_gettime sys_timerfd_gettime32
+312 64 timerfd_gettime sys_timerfd_gettime
+312 spu timerfd_gettime sys_timerfd_gettime
313 common signalfd4 sys_signalfd4 compat_sys_signalfd4
314 common eventfd2 sys_eventfd2
315 common epoll_create1 sys_epoll_create1
@@ -389,11 +431,15 @@
340 common getsockopt sys_getsockopt compat_sys_getsockopt
341 common sendmsg sys_sendmsg compat_sys_sendmsg
342 common recvmsg sys_recvmsg compat_sys_recvmsg
-343 common recvmmsg sys_recvmmsg compat_sys_recvmmsg
+343 32 recvmmsg sys_recvmmsg_time32 compat_sys_recvmmsg_time32
+343 64 recvmmsg sys_recvmmsg
+343 spu recvmmsg sys_recvmmsg
344 common accept4 sys_accept4
345 common name_to_handle_at sys_name_to_handle_at
346 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
-347 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
+347 32 clock_adjtime sys_clock_adjtime32
+347 64 clock_adjtime sys_clock_adjtime
+347 spu clock_adjtime sys_clock_adjtime
348 common syncfs sys_syncfs
349 common sendmmsg sys_sendmmsg compat_sys_sendmmsg
350 common setns sys_setns
@@ -414,6 +460,7 @@
363 spu switch_endian sys_ni_syscall
364 common userfaultfd sys_userfaultfd
365 common membarrier sys_membarrier
+# 366-377 originally left for IPC, now unused
378 nospu mlock2 sys_mlock2
379 nospu copy_file_range sys_copy_file_range
380 common preadv2 sys_preadv2 compat_sys_preadv2
@@ -424,4 +471,37 @@
385 nospu pkey_free sys_pkey_free
386 nospu pkey_mprotect sys_pkey_mprotect
387 nospu rseq sys_rseq
-388 nospu io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents
+388 32 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents
+388 64 io_pgetevents sys_io_pgetevents
+# room for arch specific syscalls
+392 64 semtimedop sys_semtimedop
+393 common semget sys_semget
+394 common semctl sys_semctl compat_sys_semctl
+395 common shmget sys_shmget
+396 common shmctl sys_shmctl compat_sys_shmctl
+397 common shmat sys_shmat compat_sys_shmat
+398 common shmdt sys_shmdt
+399 common msgget sys_msgget
+400 common msgsnd sys_msgsnd compat_sys_msgsnd
+401 common msgrcv sys_msgrcv compat_sys_msgrcv
+402 common msgctl sys_msgctl compat_sys_msgctl
+403 32 clock_gettime64 sys_clock_gettime sys_clock_gettime
+404 32 clock_settime64 sys_clock_settime sys_clock_settime
+405 32 clock_adjtime64 sys_clock_adjtime sys_clock_adjtime
+406 32 clock_getres_time64 sys_clock_getres sys_clock_getres
+407 32 clock_nanosleep_time64 sys_clock_nanosleep sys_clock_nanosleep
+408 32 timer_gettime64 sys_timer_gettime sys_timer_gettime
+409 32 timer_settime64 sys_timer_settime sys_timer_settime
+410 32 timerfd_gettime64 sys_timerfd_gettime sys_timerfd_gettime
+411 32 timerfd_settime64 sys_timerfd_settime sys_timerfd_settime
+412 32 utimensat_time64 sys_utimensat sys_utimensat
+413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
+414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
+416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents
+417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
+418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend
+419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive
+420 32 semtimedop_time64 sys_semtimedop sys_semtimedop
+421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
+422 32 futex_time64 sys_futex sys_futex
+423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
diff --git a/arch/powerpc/kernel/syscalls/syscalltbl.sh b/arch/powerpc/kernel/syscalls/syscalltbl.sh
index fd620490a542..f7393a7b18aa 100644
--- a/arch/powerpc/kernel/syscalls/syscalltbl.sh
+++ b/arch/powerpc/kernel/syscalls/syscalltbl.sh
@@ -13,10 +13,10 @@ emit() {
t_entry="$3"
while [ $t_nxt -lt $t_nr ]; do
- printf "__SYSCALL(%s,sys_ni_syscall, )\n" "${t_nxt}"
+ printf "__SYSCALL(%s,sys_ni_syscall)\n" "${t_nxt}"
t_nxt=$((t_nxt+1))
done
- printf "__SYSCALL(%s,%s, )\n" "${t_nxt}" "${t_entry}"
+ printf "__SYSCALL(%s,%s)\n" "${t_nxt}" "${t_entry}"
}
grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 23265a28740b..02f28faba125 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -25,11 +25,11 @@
.globl sys_call_table
sys_call_table:
#ifdef CONFIG_PPC64
-#define __SYSCALL(nr, entry, nargs) .8byte DOTSYM(entry)
+#define __SYSCALL(nr, entry) .8byte DOTSYM(entry)
#include <asm/syscall_table_64.h>
#undef __SYSCALL
#else
-#define __SYSCALL(nr, entry, nargs) .long entry
+#define __SYSCALL(nr, entry) .long entry
#include <asm/syscall_table_32.h>
#undef __SYSCALL
#endif
@@ -38,7 +38,7 @@ sys_call_table:
.globl compat_sys_call_table
compat_sys_call_table:
#define compat_sys_sigsuspend sys_sigsuspend
-#define __SYSCALL(nr, entry, nargs) .8byte DOTSYM(entry)
+#define __SYSCALL(nr, entry) .8byte DOTSYM(entry)
#include <asm/syscall_table_c32.h>
#undef __SYSCALL
#endif
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 3646affae963..bc0503ef9c9c 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -57,7 +57,6 @@
#include <linux/irq_work.h>
#include <linux/clk-provider.h>
#include <linux/suspend.h>
-#include <linux/rtc.h>
#include <linux/sched/cputime.h>
#include <linux/processor.h>
#include <asm/trace.h>
diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile
index b1725ad3e13d..858503775c58 100644
--- a/arch/powerpc/kernel/trace/Makefile
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_TRACING) += trace_clock.o
obj-$(CONFIG_PPC64) += $(obj64-y)
obj-$(CONFIG_PPC32) += $(obj32-y)
-# Disable GCOV & sanitizers in odd or sensitive code
+# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_ftrace.o := n
+KCOV_INSTRUMENT_ftrace.o := n
UBSAN_SANITIZE_ftrace.o := n
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 32476a6e4e9c..01b1224add49 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -229,7 +229,7 @@ ftrace_call:
* - r0, r11 & r12 are free
*/
livepatch_handler:
- CURRENT_THREAD_INFO(r12, r1)
+ ld r12, PACA_THREAD_INFO(r13)
/* Allocate 3 x 8 bytes */
ld r11, TI_livepatch_sp(r12)
@@ -256,7 +256,7 @@ livepatch_handler:
* restore it.
*/
- CURRENT_THREAD_INFO(r12, r1)
+ ld r12, PACA_THREAD_INFO(r13)
ld r11, TI_livepatch_sp(r12)
@@ -273,7 +273,7 @@ livepatch_handler:
ld r2, -24(r11)
/* Pop livepatch stack frame */
- CURRENT_THREAD_INFO(r12, r1)
+ ld r12, PACA_THREAD_INFO(r13)
subi r11, r11, 24
std r11, TI_livepatch_sp(r12)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 64936b60d521..a21200c6aaea 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -257,24 +257,17 @@ static int __die(const char *str, struct pt_regs *regs, long err)
{
printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
- if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
- printk("LE ");
- else
- printk("BE ");
-
- if (IS_ENABLED(CONFIG_PREEMPT))
- pr_cont("PREEMPT ");
-
- if (IS_ENABLED(CONFIG_SMP))
- pr_cont("SMP NR_CPUS=%d ", NR_CPUS);
-
- if (debug_pagealloc_enabled())
- pr_cont("DEBUG_PAGEALLOC ");
-
- if (IS_ENABLED(CONFIG_NUMA))
- pr_cont("NUMA ");
-
- pr_cont("%s\n", ppc_md.name ? ppc_md.name : "");
+ printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s%s %s\n",
+ IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
+ PAGE_SIZE / 1024,
+ early_radix_enabled() ? " MMU=Radix" : "",
+ early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ? " MMU=Hash" : "",
+ IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+ IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
+ IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
+ debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
+ IS_ENABLED(CONFIG_NUMA) ? " NUMA" : "",
+ ppc_md.name ? ppc_md.name : "");
if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
return 1;
@@ -376,16 +369,101 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
force_sig_fault(signr, code, (void __user *)addr, current);
}
+/*
+ * The interrupt architecture has a quirk in that the HV interrupts excluding
+ * the NMIs (0x100 and 0x200) do not clear MSR[RI] at entry. The first thing
+ * that an interrupt handler must do is save off a GPR into a scratch register,
+ * and all interrupts on POWERNV (HV=1) use the HSPRG1 register as scratch.
+ * Therefore an NMI can clobber an HV interrupt's live HSPRG1 without noticing
+ * that it is non-reentrant, which leads to random data corruption.
+ *
+ * The solution is for NMI interrupts in HV mode to check if they originated
+ * from these critical HV interrupt regions. If so, then mark them not
+ * recoverable.
+ *
+ * An alternative would be for HV NMIs to use SPRG for scratch to avoid the
+ * HSPRG1 clobber, however this would cause guest SPRG to be clobbered. Linux
+ * guests should always have MSR[RI]=0 when its scratch SPRG is in use, so
+ * that would work. However any other guest OS that may have the SPRG live
+ * and MSR[RI]=1 could encounter silent corruption.
+ *
+ * Builds that do not support KVM could take this second option to increase
+ * the recoverability of NMIs.
+ */
+void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_POWERNV
+ unsigned long kbase = (unsigned long)_stext;
+ unsigned long nip = regs->nip;
+
+ if (!(regs->msr & MSR_RI))
+ return;
+ if (!(regs->msr & MSR_HV))
+ return;
+ if (regs->msr & MSR_PR)
+ return;
+
+ /*
+ * Now test if the interrupt has hit a range that may be using
+ * HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The
+ * problem ranges all run un-relocated. Test real and virt modes
+ * at the same time by droping the high bit of the nip (virt mode
+ * entry points still have the +0x4000 offset).
+ */
+ nip &= ~0xc000000000000000ULL;
+ if ((nip >= 0x500 && nip < 0x600) || (nip >= 0x4500 && nip < 0x4600))
+ goto nonrecoverable;
+ if ((nip >= 0x980 && nip < 0xa00) || (nip >= 0x4980 && nip < 0x4a00))
+ goto nonrecoverable;
+ if ((nip >= 0xe00 && nip < 0xec0) || (nip >= 0x4e00 && nip < 0x4ec0))
+ goto nonrecoverable;
+ if ((nip >= 0xf80 && nip < 0xfa0) || (nip >= 0x4f80 && nip < 0x4fa0))
+ goto nonrecoverable;
+
+ /* Trampoline code runs un-relocated so subtract kbase. */
+ if (nip >= (unsigned long)(start_real_trampolines - kbase) &&
+ nip < (unsigned long)(end_real_trampolines - kbase))
+ goto nonrecoverable;
+ if (nip >= (unsigned long)(start_virt_trampolines - kbase) &&
+ nip < (unsigned long)(end_virt_trampolines - kbase))
+ goto nonrecoverable;
+ return;
+
+nonrecoverable:
+ regs->msr &= ~MSR_RI;
+#endif
+}
+
void system_reset_exception(struct pt_regs *regs)
{
+ unsigned long hsrr0, hsrr1;
+ bool nested = in_nmi();
+ bool saved_hsrrs = false;
+
/*
* Avoid crashes in case of nested NMI exceptions. Recoverability
* is determined by RI and in_nmi
*/
- bool nested = in_nmi();
if (!nested)
nmi_enter();
+ /*
+ * System reset can interrupt code where HSRRs are live and MSR[RI]=1.
+ * The system reset interrupt itself may clobber HSRRs (e.g., to call
+ * OPAL), so save them here and restore them before returning.
+ *
+ * Machine checks don't need to save HSRRs, as the real mode handler
+ * is careful to avoid them, and the regular handler is not delivered
+ * as an NMI.
+ */
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ hsrr0 = mfspr(SPRN_HSRR0);
+ hsrr1 = mfspr(SPRN_HSRR1);
+ saved_hsrrs = true;
+ }
+
+ hv_nmi_check_nonrecoverable(regs);
+
__this_cpu_inc(irq_stat.sreset_irqs);
/* See if any machine dependent calls */
@@ -433,6 +511,11 @@ out:
if (!(regs->msr & MSR_RI))
nmi_panic(regs, "Unrecoverable System Reset");
+ if (saved_hsrrs) {
+ mtspr(SPRN_HSRR0, hsrr0);
+ mtspr(SPRN_HSRR1, hsrr1);
+ }
+
if (!nested)
nmi_exit();
@@ -763,15 +846,15 @@ void machine_check_exception(struct pt_regs *regs)
if (check_io_access(regs))
goto bail;
- /* Must die if the interrupt is not recoverable */
- if (!(regs->msr & MSR_RI))
- nmi_panic(regs, "Unrecoverable Machine check");
-
if (!nested)
nmi_exit();
die("Machine check", regs, SIGBUS);
+ /* Must die if the interrupt is not recoverable */
+ if (!(regs->msr & MSR_RI))
+ nmi_panic(regs, "Unrecoverable Machine check");
+
return;
bail:
@@ -1542,8 +1625,8 @@ bail:
void StackOverflow(struct pt_regs *regs)
{
- printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n",
- current, regs->gpr[1]);
+ pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n",
+ current->comm, task_pid_nr(current), regs->gpr[1]);
debugger(regs);
show_regs(regs);
panic("kernel stack overflow");
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index 7cc38b5b58bc..8db4891acdaf 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -74,7 +74,7 @@ void __init udbg_early_init(void)
#endif
#ifdef CONFIG_PPC_EARLY_DEBUG
- console_loglevel = 10;
+ console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
register_early_udbg_console();
#endif
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 7725a9714736..a31b6234fcd7 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -798,7 +798,6 @@ static int __init vdso_init(void)
BUG_ON(vdso32_pagelist == NULL);
for (i = 0; i < vdso32_pages; i++) {
struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
- ClearPageReserved(pg);
get_page(pg);
vdso32_pagelist[i] = pg;
}
@@ -812,7 +811,6 @@ static int __init vdso_init(void)
BUG_ON(vdso64_pagelist == NULL);
for (i = 0; i < vdso64_pages; i++) {
struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
- ClearPageReserved(pg);
get_page(pg);
vdso64_pagelist[i] = pg;
}
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 50112d4473bb..ce199f6e4256 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -23,6 +23,7 @@ targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
GCOV_PROFILE := n
+KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
ccflags-y := -shared -fno-common -fno-builtin
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 69cecb346269..28e7d112aa2f 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -9,6 +9,7 @@ targets := $(obj-vdso64) vdso64.so vdso64.so.dbg
obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
GCOV_PROFILE := n
+KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
ccflags-y := -shared -fno-common -fno-builtin
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index ad1c77f71f54..060a1acd7c6d 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -12,11 +12,8 @@
#include <asm/cache.h>
#include <asm/thread_info.h>
-#if defined(CONFIG_STRICT_KERNEL_RWX) && !defined(CONFIG_PPC32)
-#define STRICT_ALIGN_SIZE (1 << 24)
-#else
-#define STRICT_ALIGN_SIZE PAGE_SIZE
-#endif
+#define STRICT_ALIGN_SIZE (1 << CONFIG_DATA_SHIFT)
+#define ETEXT_ALIGN_SIZE (1 << CONFIG_ETEXT_SHIFT)
ENTRY(_stext)
@@ -86,11 +83,11 @@ SECTIONS
#ifdef CONFIG_PPC64
/*
- * BLOCK(0) overrides the default output section alignment because
+ * ALIGN(0) overrides the default output section alignment because
* this needs to start right after .head.text in order for fixed
* section placement to work.
*/
- .text BLOCK(0) : AT(ADDR(.text) - LOAD_OFFSET) {
+ .text ALIGN(0) : AT(ADDR(.text) - LOAD_OFFSET) {
#ifdef CONFIG_LD_HEAD_STUB_CATCH
KEEP(*(.linker_stub_catch));
. = . ;
@@ -131,7 +128,7 @@ SECTIONS
} :kernel
- . = ALIGN(PAGE_SIZE);
+ . = ALIGN(ETEXT_ALIGN_SIZE);
_etext = .;
PROVIDE32 (etext = .);
@@ -319,6 +316,7 @@ SECTIONS
*(.sdata2)
*(.got.plt) *(.got)
*(.plt)
+ *(.branch_lt)
}
#else
.data : AT(ADDR(.data) - LOAD_OFFSET) {