aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/4xx/msi.c51
-rw-r--r--arch/powerpc/platforms/52xx/Makefile2
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_pm.c5
-rw-r--r--arch/powerpc/platforms/85xx/t1042rdb_diu.c4
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype39
-rw-r--r--arch/powerpc/platforms/cell/Makefile2
-rw-r--r--arch/powerpc/platforms/cell/cbe_thermal.c1
-rw-r--r--arch/powerpc/platforms/cell/spufs/fault.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/sputrace.h1
-rw-r--r--arch/powerpc/platforms/chrp/nvram.c3
-rw-r--r--arch/powerpc/platforms/embedded6xx/wii.c10
-rw-r--r--arch/powerpc/platforms/pasemi/dma_lib.c6
-rw-r--r--arch/powerpc/platforms/pasemi/gpio_mdio.c2
-rw-r--r--arch/powerpc/platforms/pasemi/idle.c4
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c2
-rw-r--r--arch/powerpc/platforms/pasemi/misc.c4
-rw-r--r--arch/powerpc/platforms/pasemi/pci.c16
-rw-r--r--arch/powerpc/platforms/pasemi/setup.c53
-rw-r--r--arch/powerpc/platforms/powermac/cache.S1
-rw-r--r--arch/powerpc/platforms/powermac/feature.c2
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c2
-rw-r--r--arch/powerpc/platforms/powermac/pci.c12
-rw-r--r--arch/powerpc/platforms/powermac/sleep.S1
-rw-r--r--arch/powerpc/platforms/powermac/time.c33
-rw-r--r--arch/powerpc/platforms/powermac/udbg_scc.c4
-rw-r--r--arch/powerpc/platforms/powernv/Makefile2
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c12
-rw-r--r--arch/powerpc/platforms/powernv/idle.c245
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c121
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c17
-rw-r--r--arch/powerpc/platforms/powernv/opal-dump.c18
-rw-r--r--arch/powerpc/platforms/powernv/opal-irqchip.c126
-rw-r--r--arch/powerpc/platforms/powernv/opal-kmsg.c30
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor-groups.c28
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S4
-rw-r--r--arch/powerpc/platforms/powernv/opal.c179
-rw-r--r--arch/powerpc/platforms/powernv/pci-cxl.c199
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda-tce.c399
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c248
-rw-r--r--arch/powerpc/platforms/powernv/pci.c158
-rw-r--r--arch/powerpc/platforms/powernv/pci.h53
-rw-r--r--arch/powerpc/platforms/powernv/setup.c10
-rw-r--r--arch/powerpc/platforms/powernv/smp.c27
-rw-r--r--arch/powerpc/platforms/powernv/vas.h1
-rw-r--r--arch/powerpc/platforms/pseries/Makefile2
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S1
-rw-r--r--arch/powerpc/platforms/pseries/kexec.c2
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c51
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c1
-rw-r--r--arch/powerpc/platforms/pseries/ras.c51
-rw-r--r--arch/powerpc/platforms/pseries/setup.c53
51 files changed, 1259 insertions, 1041 deletions
diff --git a/arch/powerpc/platforms/4xx/msi.c b/arch/powerpc/platforms/4xx/msi.c
index 81b2cbce7df8..7c324eff2f22 100644
--- a/arch/powerpc/platforms/4xx/msi.c
+++ b/arch/powerpc/platforms/4xx/msi.c
@@ -146,13 +146,19 @@ static int ppc4xx_setup_pcieh_hw(struct platform_device *dev,
const u32 *sdr_addr;
dma_addr_t msi_phys;
void *msi_virt;
+ int err;
sdr_addr = of_get_property(dev->dev.of_node, "sdr-base", NULL);
if (!sdr_addr)
- return -1;
+ return -EINVAL;
- mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start)); /*HIGH addr */
- mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start)); /* Low addr */
+ msi_data = of_get_property(dev->dev.of_node, "msi-data", NULL);
+ if (!msi_data)
+ return -EINVAL;
+
+ msi_mask = of_get_property(dev->dev.of_node, "msi-mask", NULL);
+ if (!msi_mask)
+ return -EINVAL;
msi->msi_dev = of_find_node_by_name(NULL, "ppc4xx-msi");
if (!msi->msi_dev)
@@ -160,30 +166,30 @@ static int ppc4xx_setup_pcieh_hw(struct platform_device *dev,
msi->msi_regs = of_iomap(msi->msi_dev, 0);
if (!msi->msi_regs) {
- dev_err(&dev->dev, "of_iomap problem failed\n");
- return -ENOMEM;
+ dev_err(&dev->dev, "of_iomap failed\n");
+ err = -ENOMEM;
+ goto node_put;
}
dev_dbg(&dev->dev, "PCIE-MSI: msi register mapped 0x%x 0x%x\n",
(u32) (msi->msi_regs + PEIH_TERMADH), (u32) (msi->msi_regs));
msi_virt = dma_alloc_coherent(&dev->dev, 64, &msi_phys, GFP_KERNEL);
- if (!msi_virt)
- return -ENOMEM;
+ if (!msi_virt) {
+ err = -ENOMEM;
+ goto iounmap;
+ }
msi->msi_addr_hi = upper_32_bits(msi_phys);
msi->msi_addr_lo = lower_32_bits(msi_phys & 0xffffffff);
dev_dbg(&dev->dev, "PCIE-MSI: msi address high 0x%x, low 0x%x\n",
msi->msi_addr_hi, msi->msi_addr_lo);
+ mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start)); /*HIGH addr */
+ mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start)); /* Low addr */
+
/* Progam the Interrupt handler Termination addr registers */
out_be32(msi->msi_regs + PEIH_TERMADH, msi->msi_addr_hi);
out_be32(msi->msi_regs + PEIH_TERMADL, msi->msi_addr_lo);
- msi_data = of_get_property(dev->dev.of_node, "msi-data", NULL);
- if (!msi_data)
- return -1;
- msi_mask = of_get_property(dev->dev.of_node, "msi-mask", NULL);
- if (!msi_mask)
- return -1;
/* Program MSI Expected data and Mask bits */
out_be32(msi->msi_regs + PEIH_MSIED, *msi_data);
out_be32(msi->msi_regs + PEIH_MSIMK, *msi_mask);
@@ -191,6 +197,12 @@ static int ppc4xx_setup_pcieh_hw(struct platform_device *dev,
dma_free_coherent(&dev->dev, 64, msi_virt, msi_phys);
return 0;
+
+iounmap:
+ iounmap(msi->msi_regs);
+node_put:
+ of_node_put(msi->msi_dev);
+ return err;
}
static int ppc4xx_of_msi_remove(struct platform_device *dev)
@@ -209,7 +221,6 @@ static int ppc4xx_of_msi_remove(struct platform_device *dev)
msi_bitmap_free(&msi->bitmap);
iounmap(msi->msi_regs);
of_node_put(msi->msi_dev);
- kfree(msi);
return 0;
}
@@ -223,18 +234,16 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n");
- msi = kzalloc(sizeof(*msi), GFP_KERNEL);
- if (!msi) {
- dev_err(&dev->dev, "No memory for MSI structure\n");
+ msi = devm_kzalloc(&dev->dev, sizeof(*msi), GFP_KERNEL);
+ if (!msi)
return -ENOMEM;
- }
dev->dev.platform_data = msi;
/* Get MSI ranges */
err = of_address_to_resource(dev->dev.of_node, 0, &res);
if (err) {
dev_err(&dev->dev, "%pOF resource error!\n", dev->dev.of_node);
- goto error_out;
+ return err;
}
msi_irqs = of_irq_count(dev->dev.of_node);
@@ -243,7 +252,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
err = ppc4xx_setup_pcieh_hw(dev, res, msi);
if (err)
- goto error_out;
+ return err;
err = ppc4xx_msi_init_allocator(dev, msi);
if (err) {
@@ -256,7 +265,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
phb->controller_ops.setup_msi_irqs = ppc4xx_setup_msi_irqs;
phb->controller_ops.teardown_msi_irqs = ppc4xx_teardown_msi_irqs;
}
- return err;
+ return 0;
error_out:
ppc4xx_of_msi_remove(dev);
diff --git a/arch/powerpc/platforms/52xx/Makefile b/arch/powerpc/platforms/52xx/Makefile
index ff2f86fe5429..f40d48eab779 100644
--- a/arch/powerpc/platforms/52xx/Makefile
+++ b/arch/powerpc/platforms/52xx/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_PPC_LITE5200) += lite5200.o
obj-$(CONFIG_PPC_MEDIA5200) += media5200.o
obj-$(CONFIG_PM) += mpc52xx_sleep.o mpc52xx_pm.o
-ifeq ($(CONFIG_PPC_LITE5200),y)
+ifdef CONFIG_PPC_LITE5200
obj-$(CONFIG_PM) += lite5200_sleep.o lite5200_pm.o
endif
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pm.c b/arch/powerpc/platforms/52xx/mpc52xx_pm.c
index 31d3515672f3..b1d208ded981 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pm.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pm.c
@@ -117,7 +117,10 @@ int mpc52xx_pm_enter(suspend_state_t state)
u32 intr_main_mask;
void __iomem * irq_0x500 = (void __iomem *)CONFIG_KERNEL_START + 0x500;
unsigned long irq_0x500_stop = (unsigned long)irq_0x500 + mpc52xx_ds_cached_size;
- char saved_0x500[mpc52xx_ds_cached_size];
+ char saved_0x500[0x600-0x500];
+
+ if (WARN_ON(mpc52xx_ds_cached_size > sizeof(saved_0x500)))
+ return -ENOMEM;
/* disable all interrupts in PIC */
intr_main_mask = in_be32(&intr->main_mask);
diff --git a/arch/powerpc/platforms/85xx/t1042rdb_diu.c b/arch/powerpc/platforms/85xx/t1042rdb_diu.c
index 58fa3d319f1c..dac36ba82fea 100644
--- a/arch/powerpc/platforms/85xx/t1042rdb_diu.c
+++ b/arch/powerpc/platforms/85xx/t1042rdb_diu.c
@@ -9,8 +9,10 @@
* option) any later version.
*/
+#include <linux/init.h>
#include <linux/io.h>
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_address.h>
@@ -150,3 +152,5 @@ static int __init t1042rdb_diu_init(void)
}
early_initcall(t1042rdb_diu_init);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index e6a1de521319..6c6a7c72cae4 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -74,7 +74,6 @@ config PPC_BOOK3S_64
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select ARCH_SUPPORTS_NUMA_BALANCING
select IRQ_WORK
- select HAVE_KERNEL_XZ
config PPC_BOOK3E_64
bool "Embedded processors"
@@ -86,7 +85,6 @@ endchoice
choice
prompt "CPU selection"
- depends on PPC64
default GENERIC_CPU
help
This will create a kernel which is optimised for a particular CPU.
@@ -96,13 +94,17 @@ choice
config GENERIC_CPU
bool "Generic (POWER4 and above)"
- depends on !CPU_LITTLE_ENDIAN
+ depends on PPC64 && !CPU_LITTLE_ENDIAN
config GENERIC_CPU
bool "Generic (POWER8 and above)"
- depends on CPU_LITTLE_ENDIAN
+ depends on PPC64 && CPU_LITTLE_ENDIAN
select ARCH_HAS_FAST_MULTIPLIER
+config GENERIC_CPU
+ bool "Generic 32 bits powerpc"
+ depends on PPC32 && !PPC_8xx
+
config CELL_CPU
bool "Cell Broadband Engine"
depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
@@ -138,8 +140,37 @@ config E6500_CPU
bool "Freescale e6500"
depends on E500
+config 860_CPU
+ bool "8xx family"
+ depends on PPC_8xx
+
+config E300C2_CPU
+ bool "e300c2 (832x)"
+ depends on PPC_BOOK3S_32
+
+config E300C3_CPU
+ bool "e300c3 (831x)"
+ depends on PPC_BOOK3S_32
+
endchoice
+config TARGET_CPU_BOOL
+ bool
+ default !GENERIC_CPU
+
+config TARGET_CPU
+ string
+ depends on TARGET_CPU_BOOL
+ default "cell" if CELL_CPU
+ default "power5" if POWER5_CPU
+ default "power6" if POWER6_CPU
+ default "power7" if POWER7_CPU
+ default "power8" if POWER8_CPU
+ default "power9" if POWER9_CPU
+ default "860" if 860_CPU
+ default "e300c2" if E300C2_CPU
+ default "e300c3" if E300C3_CPU
+
config PPC_BOOK3S
def_bool y
depends on PPC_BOOK3S_32 || PPC_BOOK3S_64
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index d5f808e8a5f3..10064a33ca96 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_CBE_CPUFREQ_SPU_GOVERNOR) += cpufreq_spudemand.o
obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON) += cbe_powerbutton.o
-ifeq ($(CONFIG_SMP),y)
+ifdef CONFIG_SMP
obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o
endif
diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c
index 2c15ff094483..55aac74e1cb9 100644
--- a/arch/powerpc/platforms/cell/cbe_thermal.c
+++ b/arch/powerpc/platforms/cell/cbe_thermal.c
@@ -49,6 +49,7 @@
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/cpu.h>
+#include <linux/stringify.h>
#include <asm/spu.h>
#include <asm/io.h>
#include <asm/prom.h>
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index 1e002e94d0f6..83cf58daaa79 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -111,7 +111,7 @@ int spufs_handle_class1(struct spu_context *ctx)
{
u64 ea, dsisr, access;
unsigned long flags;
- unsigned flt = 0;
+ vm_fault_t flt = 0;
int ret;
/*
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.h b/arch/powerpc/platforms/cell/spufs/sputrace.h
index d557e999b662..1def11e911ac 100644
--- a/arch/powerpc/platforms/cell/spufs/sputrace.h
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.h
@@ -3,6 +3,7 @@
#define _TRACE_SPUFS_H
#include <linux/tracepoint.h>
+#include <linux/stringify.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM spufs
diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c
index c3ede2c365c3..791b86398e1d 100644
--- a/arch/powerpc/platforms/chrp/nvram.c
+++ b/arch/powerpc/platforms/chrp/nvram.c
@@ -11,6 +11,7 @@
*/
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
@@ -89,3 +90,5 @@ void __init chrp_nvram_init(void)
return;
}
+
+MODULE_LICENSE("GPL v2");
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index 8bb46dcbebd8..403523c061ba 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -67,16 +67,6 @@ void __init wii_memory_fixups(void)
{
struct memblock_region *p = memblock.memory.regions;
- /*
- * This is part of a workaround to allow the use of two
- * discontinuous RAM ranges on the Wii, even if this is
- * currently unsupported on 32-bit PowerPC Linux.
- *
- * We coalesce the two memory ranges of the Wii into a
- * single range, then create a reservation for the "hole"
- * between both ranges.
- */
-
BUG_ON(memblock.memory.cnt != 2);
BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base));
diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c
index 2c72263ad6ab..c80f72c370ae 100644
--- a/arch/powerpc/platforms/pasemi/dma_lib.c
+++ b/arch/powerpc/platforms/pasemi/dma_lib.c
@@ -531,7 +531,7 @@ int pasemi_dma_init(void)
iob_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa001, NULL);
if (!iob_pdev) {
BUG();
- printk(KERN_WARNING "Can't find I/O Bridge\n");
+ pr_warn("Can't find I/O Bridge\n");
err = -ENODEV;
goto out;
}
@@ -540,7 +540,7 @@ int pasemi_dma_init(void)
dma_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa007, NULL);
if (!dma_pdev) {
BUG();
- printk(KERN_WARNING "Can't find DMA controller\n");
+ pr_warn("Can't find DMA controller\n");
err = -ENODEV;
goto out;
}
@@ -623,7 +623,7 @@ int pasemi_dma_init(void)
pasemi_write_dma_reg(PAS_DMA_TXF_CFLG0, 0xffffffff);
pasemi_write_dma_reg(PAS_DMA_TXF_CFLG1, 0xffffffff);
- printk(KERN_INFO "PA Semi PWRficient DMA library initialized "
+ pr_info("PA Semi PWRficient DMA library initialized "
"(%d tx, %d rx channels)\n", num_txch, num_rxch);
out:
diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c
index c23e60959aa8..6f35a2afe522 100644
--- a/arch/powerpc/platforms/pasemi/gpio_mdio.c
+++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c
@@ -256,7 +256,7 @@ static int gpio_mdio_probe(struct platform_device *ofdev)
err = of_mdiobus_register(new_bus, np);
if (err != 0) {
- printk(KERN_ERR "%s: Cannot register as MDIO bus, err %d\n",
+ pr_err("%s: Cannot register as MDIO bus, err %d\n",
new_bus->name, err);
goto out_free_irq;
}
diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c
index 44e0d9226f0a..8bb4e8082441 100644
--- a/arch/powerpc/platforms/pasemi/idle.c
+++ b/arch/powerpc/platforms/pasemi/idle.c
@@ -78,13 +78,13 @@ static int pasemi_system_reset_exception(struct pt_regs *regs)
static int __init pasemi_idle_init(void)
{
#ifndef CONFIG_PPC_PASEMI_CPUFREQ
- printk(KERN_WARNING "No cpufreq driver, powersavings modes disabled\n");
+ pr_warn("No cpufreq driver, powersavings modes disabled\n");
current_mode = 0;
#endif
ppc_md.system_reset_exception = pasemi_system_reset_exception;
ppc_md.power_save = modes[current_mode].entry;
- printk(KERN_INFO "Using PA6T idle loop (%s)\n", modes[current_mode].name);
+ pr_info("Using PA6T idle loop (%s)\n", modes[current_mode].name);
return 0;
}
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 78b80cbd9768..f06c83f321e6 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -210,7 +210,7 @@ static int __init iob_init(struct device_node *dn)
/* For 2G space, 8x64 pages (2^21 bytes) is max total l2 size */
iob_l2_base = (u32 *)__va(memblock_alloc_base(1UL<<21, 1UL<<21, 0x80000000));
- printk(KERN_INFO "IOBMAP L2 allocated at: %p\n", iob_l2_base);
+ pr_info("IOBMAP L2 allocated at: %p\n", iob_l2_base);
/* Allocate a spare page to map all invalid IOTLB pages. */
tmp = memblock_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE);
diff --git a/arch/powerpc/platforms/pasemi/misc.c b/arch/powerpc/platforms/pasemi/misc.c
index 8571e7bf78b6..aa958a46957f 100644
--- a/arch/powerpc/platforms/pasemi/misc.c
+++ b/arch/powerpc/platforms/pasemi/misc.c
@@ -69,9 +69,7 @@ static int __init pasemi_register_i2c_devices(void)
addr = of_get_property(node, "reg", &len);
if (!addr || len < sizeof(int) ||
*addr > (1 << 10) - 1) {
- printk(KERN_WARNING
- "pasemi_register_i2c_devices: "
- "invalid i2c device entry\n");
+ pr_warn("pasemi_register_i2c_devices: invalid i2c device entry\n");
continue;
}
diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c
index aea9ff2c8e6d..c3c64172482d 100644
--- a/arch/powerpc/platforms/pasemi/pci.c
+++ b/arch/powerpc/platforms/pasemi/pci.c
@@ -205,7 +205,7 @@ static int __init pas_add_bridge(struct device_node *dev)
setup_pa_pxp(hose);
- printk(KERN_INFO "Found PA-PXP PCI host bridge.\n");
+ pr_info("Found PA-PXP PCI host bridge.\n");
/* Interpret the "ranges" property */
pci_process_bridge_OF_ranges(hose, dev, 1);
@@ -216,21 +216,21 @@ static int __init pas_add_bridge(struct device_node *dev)
void __init pas_pci_init(void)
{
struct device_node *np, *root;
+ int res;
root = of_find_node_by_path("/");
if (!root) {
- printk(KERN_CRIT "pas_pci_init: can't find root "
- "of device tree\n");
+ pr_crit("pas_pci_init: can't find root of device tree\n");
return;
}
pci_set_flags(PCI_SCAN_ALL_PCIE_DEVS);
- for (np = NULL; (np = of_get_next_child(root, np)) != NULL;)
- if (np->name && !strcmp(np->name, "pxp") && !pas_add_bridge(np))
- of_node_get(np);
-
- of_node_put(root);
+ np = of_find_compatible_node(root, NULL, "pasemi,rootbus");
+ if (np) {
+ res = pas_add_bridge(np);
+ of_node_put(np);
+ }
}
void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset)
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
index d0b8ae53660d..9a6eb04cca83 100644
--- a/arch/powerpc/platforms/pasemi/setup.c
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -207,8 +207,7 @@ static __init void pas_init_IRQ(void)
break;
}
if (!mpic_node) {
- printk(KERN_ERR
- "Failed to locate the MPIC interrupt controller\n");
+ pr_err("Failed to locate the MPIC interrupt controller\n");
return;
}
@@ -217,12 +216,12 @@ static __init void pas_init_IRQ(void)
naddr = of_n_addr_cells(root);
opprop = of_get_property(root, "platform-open-pic", &opplen);
if (!opprop) {
- printk(KERN_ERR "No platform-open-pic property.\n");
+ pr_err("No platform-open-pic property.\n");
of_node_put(root);
return;
}
openpic_addr = of_read_number(opprop, naddr);
- printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr);
+ pr_debug("OpenPIC addr: %lx\n", openpic_addr);
mpic_flags = MPIC_LARGE_VECTORS | MPIC_NO_BIAS | MPIC_NO_RESET;
@@ -265,72 +264,72 @@ static int pas_machine_check_handler(struct pt_regs *regs)
srr1 = regs->msr;
if (nmi_virq && mpic_get_mcirq() == nmi_virq) {
- printk(KERN_ERR "NMI delivered\n");
+ pr_err("NMI delivered\n");
debugger(regs);
mpic_end_irq(irq_get_irq_data(nmi_virq));
goto out;
}
dsisr = mfspr(SPRN_DSISR);
- printk(KERN_ERR "Machine Check on CPU %d\n", cpu);
- printk(KERN_ERR "SRR0 0x%016lx SRR1 0x%016lx\n", srr0, srr1);
- printk(KERN_ERR "DSISR 0x%016lx DAR 0x%016lx\n", dsisr, regs->dar);
- printk(KERN_ERR "BER 0x%016lx MER 0x%016lx\n", mfspr(SPRN_PA6T_BER),
+ pr_err("Machine Check on CPU %d\n", cpu);
+ pr_err("SRR0 0x%016lx SRR1 0x%016lx\n", srr0, srr1);
+ pr_err("DSISR 0x%016lx DAR 0x%016lx\n", dsisr, regs->dar);
+ pr_err("BER 0x%016lx MER 0x%016lx\n", mfspr(SPRN_PA6T_BER),
mfspr(SPRN_PA6T_MER));
- printk(KERN_ERR "IER 0x%016lx DER 0x%016lx\n", mfspr(SPRN_PA6T_IER),
+ pr_err("IER 0x%016lx DER 0x%016lx\n", mfspr(SPRN_PA6T_IER),
mfspr(SPRN_PA6T_DER));
- printk(KERN_ERR "Cause:\n");
+ pr_err("Cause:\n");
if (srr1 & 0x200000)
- printk(KERN_ERR "Signalled by SDC\n");
+ pr_err("Signalled by SDC\n");
if (srr1 & 0x100000) {
- printk(KERN_ERR "Load/Store detected error:\n");
+ pr_err("Load/Store detected error:\n");
if (dsisr & 0x8000)
- printk(KERN_ERR "D-cache ECC double-bit error or bus error\n");
+ pr_err("D-cache ECC double-bit error or bus error\n");
if (dsisr & 0x4000)
- printk(KERN_ERR "LSU snoop response error\n");
+ pr_err("LSU snoop response error\n");
if (dsisr & 0x2000) {
- printk(KERN_ERR "MMU SLB multi-hit or invalid B field\n");
+ pr_err("MMU SLB multi-hit or invalid B field\n");
dump_slb = 1;
}
if (dsisr & 0x1000)
- printk(KERN_ERR "Recoverable Duptags\n");
+ pr_err("Recoverable Duptags\n");
if (dsisr & 0x800)
- printk(KERN_ERR "Recoverable D-cache parity error count overflow\n");
+ pr_err("Recoverable D-cache parity error count overflow\n");
if (dsisr & 0x400)
- printk(KERN_ERR "TLB parity error count overflow\n");
+ pr_err("TLB parity error count overflow\n");
}
if (srr1 & 0x80000)
- printk(KERN_ERR "Bus Error\n");
+ pr_err("Bus Error\n");
if (srr1 & 0x40000) {
- printk(KERN_ERR "I-side SLB multiple hit\n");
+ pr_err("I-side SLB multiple hit\n");
dump_slb = 1;
}
if (srr1 & 0x20000)
- printk(KERN_ERR "I-cache parity error hit\n");
+ pr_err("I-cache parity error hit\n");
if (num_mce_regs == 0)
- printk(KERN_ERR "No MCE registers mapped yet, can't dump\n");
+ pr_err("No MCE registers mapped yet, can't dump\n");
else
- printk(KERN_ERR "SoC debug registers:\n");
+ pr_err("SoC debug registers:\n");
for (i = 0; i < num_mce_regs; i++)
- printk(KERN_ERR "%s: 0x%08x\n", mce_regs[i].name,
+ pr_err("%s: 0x%08x\n", mce_regs[i].name,
in_le32(mce_regs[i].addr));
if (dump_slb) {
unsigned long e, v;
int i;
- printk(KERN_ERR "slb contents:\n");
+ pr_err("slb contents:\n");
for (i = 0; i < mmu_slb_size; i++) {
asm volatile("slbmfee %0,%1" : "=r" (e) : "r" (i));
asm volatile("slbmfev %0,%1" : "=r" (v) : "r" (i));
- printk(KERN_ERR "%02d %016lx %016lx\n", i, e, v);
+ pr_err("%02d %016lx %016lx\n", i, e, v);
}
}
diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S
index cc5347eb1662..27862feee4a5 100644
--- a/arch/powerpc/platforms/powermac/cache.S
+++ b/arch/powerpc/platforms/powermac/cache.S
@@ -17,6 +17,7 @@
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/cputable.h>
+#include <asm/feature-fixups.h>
/*
* Flush and disable all data caches (dL1, L2, L3). This is used
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 3f82cb24eb2b..4eb8cb38fc69 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -2889,10 +2889,8 @@ set_initial_features(void)
/* On all machines, switch modem & serial ports off */
for_each_node_by_name(np, "ch-a")
initial_serial_shutdown(np);
- of_node_put(np);
for_each_node_by_name(np, "ch-b")
initial_serial_shutdown(np);
- of_node_put(np);
}
void __init
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index fa89f30e7f27..d4d411820597 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -1192,7 +1192,7 @@ static void pmac_i2c_devscan(void (*callback)(struct device_node *dev,
{ NULL, NULL, 0 },
};
- /* Only some devices need to have platform functions instanciated
+ /* Only some devices need to have platform functions instantiated
* here. For now, we have a table. Others, like 9554 i2c GPIOs used
* on Xserve, if we ever do a driver for them, will use their own
* platform function instance
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index df762bb3c735..04527d13d5a4 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -781,12 +781,12 @@ static int __init pmac_add_bridge(struct device_node *dev)
struct resource rsrc;
char *disp_name;
const int *bus_range;
- int primary = 1, has_address = 0;
+ int primary = 1;
DBG("Adding PCI host bridge %pOF\n", dev);
/* Fetch host bridge registers address */
- has_address = (of_address_to_resource(dev, 0, &rsrc) == 0);
+ of_address_to_resource(dev, 0, &rsrc);
/* Get bus range if any */
bus_range = of_get_property(dev, "bus-range", &len);
@@ -904,7 +904,7 @@ static int pmac_pci_root_bridge_prepare(struct pci_host_bridge *bridge)
void __init pmac_pci_init(void)
{
struct device_node *np, *root;
- struct device_node *ht = NULL;
+ struct device_node *ht __maybe_unused = NULL;
pci_set_flags(PCI_CAN_SKIP_ISA_ALIGN);
@@ -1019,7 +1019,7 @@ static bool pmac_pci_enable_device_hook(struct pci_dev *dev)
return true;
}
-void pmac_pci_fixup_ohci(struct pci_dev *dev)
+static void pmac_pci_fixup_ohci(struct pci_dev *dev)
{
struct device_node *node = pci_device_to_OF_node(dev);
@@ -1054,7 +1054,7 @@ void __init pmac_pcibios_after_init(void)
}
}
-void pmac_pci_fixup_cardbus(struct pci_dev* dev)
+static void pmac_pci_fixup_cardbus(struct pci_dev *dev)
{
if (!machine_is(powermac))
return;
@@ -1091,7 +1091,7 @@ void pmac_pci_fixup_cardbus(struct pci_dev* dev)
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_ANY_ID, pmac_pci_fixup_cardbus);
-void pmac_pci_fixup_pciata(struct pci_dev* dev)
+static void pmac_pci_fixup_pciata(struct pci_dev *dev)
{
u8 progif = 0;
diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S
index 1c2802fabd57..f89808b9713d 100644
--- a/arch/powerpc/platforms/powermac/sleep.S
+++ b/arch/powerpc/platforms/powermac/sleep.S
@@ -18,6 +18,7 @@
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/mmu.h>
+#include <asm/feature-fixups.h>
#define MAGIC 0x4c617273 /* 'Lars' */
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index 7c968e46736f..f92c1918fb56 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -34,6 +34,8 @@
#include <asm/nvram.h>
#include <asm/smu.h>
+#include "pmac.h"
+
#undef DEBUG
#ifdef DEBUG
@@ -42,7 +44,11 @@
#define DBG(x...)
#endif
-/* Apparently the RTC stores seconds since 1 Jan 1904 */
+/*
+ * Offset between Unix time (1970-based) and Mac time (1904-based). Cuda and PMU
+ * times wrap in 2040. If we need to handle later times, the read_time functions
+ * need to be changed to interpret wrapped times as post-2040.
+ */
#define RTC_OFFSET 2082844800
/*
@@ -97,8 +103,11 @@ static time64_t cuda_get_time(void)
if (req.reply_len != 7)
printk(KERN_ERR "cuda_get_time: got %d byte reply\n",
req.reply_len);
- now = (req.reply[3] << 24) + (req.reply[4] << 16)
- + (req.reply[5] << 8) + req.reply[6];
+ now = (u32)((req.reply[3] << 24) + (req.reply[4] << 16) +
+ (req.reply[5] << 8) + req.reply[6]);
+ /* it's either after year 2040, or the RTC has gone backwards */
+ WARN_ON(now < RTC_OFFSET);
+
return now - RTC_OFFSET;
}
@@ -106,10 +115,10 @@ static time64_t cuda_get_time(void)
static int cuda_set_rtc_time(struct rtc_time *tm)
{
- time64_t nowtime;
+ u32 nowtime;
struct adb_request req;
- nowtime = rtc_tm_to_time64(tm) + RTC_OFFSET;
+ nowtime = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET);
if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME,
nowtime >> 24, nowtime >> 16, nowtime >> 8,
nowtime) < 0)
@@ -140,8 +149,12 @@ static time64_t pmu_get_time(void)
if (req.reply_len != 4)
printk(KERN_ERR "pmu_get_time: got %d byte reply from PMU\n",
req.reply_len);
- now = (req.reply[0] << 24) + (req.reply[1] << 16)
- + (req.reply[2] << 8) + req.reply[3];
+ now = (u32)((req.reply[0] << 24) + (req.reply[1] << 16) +
+ (req.reply[2] << 8) + req.reply[3]);
+
+ /* it's either after year 2040, or the RTC has gone backwards */
+ WARN_ON(now < RTC_OFFSET);
+
return now - RTC_OFFSET;
}
@@ -149,10 +162,10 @@ static time64_t pmu_get_time(void)
static int pmu_set_rtc_time(struct rtc_time *tm)
{
- time64_t nowtime;
+ u32 nowtime;
struct adb_request req;
- nowtime = rtc_tm_to_time64(tm) + RTC_OFFSET;
+ nowtime = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET);
if (pmu_request(&req, NULL, 5, PMU_SET_RTC, nowtime >> 24,
nowtime >> 16, nowtime >> 8, nowtime) < 0)
return -ENXIO;
@@ -238,7 +251,7 @@ int pmac_set_rtc_time(struct rtc_time *tm)
* Calibrate the decrementer register using VIA timer 1.
* This is used both on powermacs and CHRP machines.
*/
-int __init via_calibrate_decr(void)
+static int __init via_calibrate_decr(void)
{
struct device_node *vias;
volatile unsigned char __iomem *via;
diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c
index d83135a9830e..8901973ed683 100644
--- a/arch/powerpc/platforms/powermac/udbg_scc.c
+++ b/arch/powerpc/platforms/powermac/udbg_scc.c
@@ -73,7 +73,7 @@ void udbg_scc_init(int force_scc)
struct device_node *stdout = NULL, *escc = NULL, *macio = NULL;
struct device_node *ch, *ch_def = NULL, *ch_a = NULL;
const char *path;
- int i, x;
+ int i;
escc = of_find_node_by_name(NULL, "escc");
if (escc == NULL)
@@ -120,7 +120,7 @@ void udbg_scc_init(int force_scc)
mb();
for (i = 20000; i != 0; --i)
- x = in_8(sccc);
+ in_8(sccc);
out_8(sccc, 0x09); /* reset A or B side */
out_8(sccc, 0xc0);
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 703a350a7f4e..b540ce8eec55 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -6,7 +6,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
-obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o
+obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o
obj-$(CONFIG_CXL_BASE) += pci-cxl.o
obj-$(CONFIG_EEH) += eeh-powernv.o
obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index ddfc3544d285..3c1beae29f2d 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -223,6 +223,14 @@ int pnv_eeh_post_init(void)
eeh_probe_devices();
eeh_addr_cache_build();
+ if (eeh_has_flag(EEH_POSTPONED_PROBE)) {
+ eeh_clear_flag(EEH_POSTPONED_PROBE);
+ if (eeh_enabled())
+ pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
+ else
+ pr_info("EEH: No capable adapters found\n");
+ }
+
/* Register OPAL event notifier */
eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
if (eeh_event_irq < 0) {
@@ -384,8 +392,10 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
return NULL;
/* Skip if we haven't probed yet */
- if (phb->ioda.pe_rmap[config_addr] == IODA_INVALID_PE)
+ if (phb->ioda.pe_rmap[config_addr] == IODA_INVALID_PE) {
+ eeh_add_flag(EEH_POSTPONED_PROBE);
return NULL;
+ }
/* Initialize eeh device */
edev->class_code = pdn->class_code;
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 1c5d0675b43c..35f699ebb662 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -36,6 +36,8 @@
#define P9_STOP_SPR_PSSCR 855
static u32 supported_cpuidle_states;
+struct pnv_idle_states_t *pnv_idle_states;
+int nr_pnv_idle_states;
/*
* The default stop state that will be used by ppc_md.power_save
@@ -177,11 +179,6 @@ static void pnv_alloc_idle_core_states(void)
paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state;
paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING;
paca_ptrs[cpu]->thread_mask = 1 << j;
- if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
- continue;
- paca_ptrs[cpu]->thread_sibling_pacas =
- kmalloc_node(paca_ptr_array_size,
- GFP_KERNEL, node);
}
}
@@ -622,48 +619,10 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
* @dt_idle_states: Number of idle state entries
* Returns 0 on success
*/
-static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
- int dt_idle_states)
+static int __init pnv_power9_idle_init(void)
{
- u64 *psscr_val = NULL;
- u64 *psscr_mask = NULL;
- u32 *residency_ns = NULL;
u64 max_residency_ns = 0;
- int rc = 0, i;
-
- psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL);
- psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL);
- residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns),
- GFP_KERNEL);
-
- if (!psscr_val || !psscr_mask || !residency_ns) {
- rc = -1;
- goto out;
- }
-
- if (of_property_read_u64_array(np,
- "ibm,cpu-idle-state-psscr",
- psscr_val, dt_idle_states)) {
- pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
- rc = -1;
- goto out;
- }
-
- if (of_property_read_u64_array(np,
- "ibm,cpu-idle-state-psscr-mask",
- psscr_mask, dt_idle_states)) {
- pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
- rc = -1;
- goto out;
- }
-
- if (of_property_read_u32_array(np,
- "ibm,cpu-idle-state-residency-ns",
- residency_ns, dt_idle_states)) {
- pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
- rc = -1;
- goto out;
- }
+ int i;
/*
* Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
@@ -679,33 +638,37 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
* the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state.
*/
pnv_first_deep_stop_state = MAX_STOP_STATE;
- for (i = 0; i < dt_idle_states; i++) {
+ for (i = 0; i < nr_pnv_idle_states; i++) {
int err;
- u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK;
+ struct pnv_idle_states_t *state = &pnv_idle_states[i];
+ u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
- if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) &&
- (pnv_first_deep_stop_state > psscr_rl))
+ if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
+ pnv_first_deep_stop_state > psscr_rl)
pnv_first_deep_stop_state = psscr_rl;
- err = validate_psscr_val_mask(&psscr_val[i], &psscr_mask[i],
- flags[i]);
+ err = validate_psscr_val_mask(&state->psscr_val,
+ &state->psscr_mask,
+ state->flags);
if (err) {
- report_invalid_psscr_val(psscr_val[i], err);
+ report_invalid_psscr_val(state->psscr_val, err);
continue;
}
- if (max_residency_ns < residency_ns[i]) {
- max_residency_ns = residency_ns[i];
- pnv_deepest_stop_psscr_val = psscr_val[i];
- pnv_deepest_stop_psscr_mask = psscr_mask[i];
- pnv_deepest_stop_flag = flags[i];
+ state->valid = true;
+
+ if (max_residency_ns < state->residency_ns) {
+ max_residency_ns = state->residency_ns;
+ pnv_deepest_stop_psscr_val = state->psscr_val;
+ pnv_deepest_stop_psscr_mask = state->psscr_mask;
+ pnv_deepest_stop_flag = state->flags;
deepest_stop_found = true;
}
if (!default_stop_found &&
- (flags[i] & OPAL_PM_STOP_INST_FAST)) {
- pnv_default_stop_val = psscr_val[i];
- pnv_default_stop_mask = psscr_mask[i];
+ (state->flags & OPAL_PM_STOP_INST_FAST)) {
+ pnv_default_stop_val = state->psscr_val;
+ pnv_default_stop_mask = state->psscr_mask;
default_stop_found = true;
}
}
@@ -728,11 +691,8 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n",
pnv_first_deep_stop_state);
-out:
- kfree(psscr_val);
- kfree(psscr_mask);
- kfree(residency_ns);
- return rc;
+
+ return 0;
}
/*
@@ -740,50 +700,146 @@ out:
*/
static void __init pnv_probe_idle_states(void)
{
- struct device_node *np;
- int dt_idle_states;
- u32 *flags = NULL;
int i;
+ if (nr_pnv_idle_states < 0) {
+ pr_warn("cpuidle-powernv: no idle states found in the DT\n");
+ return;
+ }
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (pnv_power9_idle_init())
+ return;
+ }
+
+ for (i = 0; i < nr_pnv_idle_states; i++)
+ supported_cpuidle_states |= pnv_idle_states[i].flags;
+}
+
+/*
+ * This function parses device-tree and populates all the information
+ * into pnv_idle_states structure. It also sets up nr_pnv_idle_states
+ * which is the number of cpuidle states discovered through device-tree.
+ */
+
+static int pnv_parse_cpuidle_dt(void)
+{
+ struct device_node *np;
+ int nr_idle_states, i;
+ int rc = 0;
+ u32 *temp_u32;
+ u64 *temp_u64;
+ const char **temp_string;
+
np = of_find_node_by_path("/ibm,opal/power-mgt");
if (!np) {
pr_warn("opal: PowerMgmt Node not found\n");
- goto out;
+ return -ENODEV;
}
- dt_idle_states = of_property_count_u32_elems(np,
- "ibm,cpu-idle-state-flags");
- if (dt_idle_states < 0) {
- pr_warn("cpuidle-powernv: no idle states found in the DT\n");
+ nr_idle_states = of_property_count_u32_elems(np,
+ "ibm,cpu-idle-state-flags");
+
+ pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states),
+ GFP_KERNEL);
+ temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL);
+ temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL);
+ temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL);
+
+ if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) {
+ pr_err("Could not allocate memory for dt parsing\n");
+ rc = -ENOMEM;
goto out;
}
- flags = kcalloc(dt_idle_states, sizeof(*flags), GFP_KERNEL);
-
- if (of_property_read_u32_array(np,
- "ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
+ /* Read flags */
+ if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags",
+ temp_u32, nr_idle_states)) {
pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ for (i = 0; i < nr_idle_states; i++)
+ pnv_idle_states[i].flags = temp_u32[i];
+
+ /* Read latencies */
+ if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns",
+ temp_u32, nr_idle_states)) {
+ pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ for (i = 0; i < nr_idle_states; i++)
+ pnv_idle_states[i].latency_ns = temp_u32[i];
+
+ /* Read residencies */
+ if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns",
+ temp_u32, nr_idle_states)) {
+ pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
+ rc = -EINVAL;
goto out;
}
+ for (i = 0; i < nr_idle_states; i++)
+ pnv_idle_states[i].residency_ns = temp_u32[i];
+ /* For power9 */
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- if (pnv_power9_idle_init(np, flags, dt_idle_states))
+ /* Read pm_crtl_val */
+ if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr",
+ temp_u64, nr_idle_states)) {
+ pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ for (i = 0; i < nr_idle_states; i++)
+ pnv_idle_states[i].psscr_val = temp_u64[i];
+
+ /* Read pm_crtl_mask */
+ if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask",
+ temp_u64, nr_idle_states)) {
+ pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
+ rc = -EINVAL;
goto out;
+ }
+ for (i = 0; i < nr_idle_states; i++)
+ pnv_idle_states[i].psscr_mask = temp_u64[i];
}
- for (i = 0; i < dt_idle_states; i++)
- supported_cpuidle_states |= flags[i];
+ /*
+ * power8 specific properties ibm,cpu-idle-state-pmicr-mask and
+ * ibm,cpu-idle-state-pmicr-val were never used and there is no
+ * plan to use it in near future. Hence, not parsing these properties
+ */
+ if (of_property_read_string_array(np, "ibm,cpu-idle-state-names",
+ temp_string, nr_idle_states) < 0) {
+ pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ for (i = 0; i < nr_idle_states; i++)
+ strlcpy(pnv_idle_states[i].name, temp_string[i],
+ PNV_IDLE_NAME_LEN);
+ nr_pnv_idle_states = nr_idle_states;
+ rc = 0;
out:
- kfree(flags);
+ kfree(temp_u32);
+ kfree(temp_u64);
+ kfree(temp_string);
+ return rc;
}
+
static int __init pnv_init_idle_states(void)
{
-
+ int rc = 0;
supported_cpuidle_states = 0;
+ /* In case we error out nr_pnv_idle_states will be zero */
+ nr_pnv_idle_states = 0;
if (cpuidle_disable != IDLE_NO_OVERRIDE)
goto out;
-
+ rc = pnv_parse_cpuidle_dt();
+ if (rc)
+ return rc;
pnv_probe_idle_states();
if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
@@ -805,29 +861,6 @@ static int __init pnv_init_idle_states(void)
pnv_alloc_idle_core_states();
- /*
- * For each CPU, record its PACA address in each of it's
- * sibling thread's PACA at the slot corresponding to this
- * CPU's index in the core.
- */
- if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
- int cpu;
-
- pr_info("powernv: idle: Saving PACA pointers of all CPUs in their thread sibling PACA\n");
- for_each_present_cpu(cpu) {
- int base_cpu = cpu_first_thread_sibling(cpu);
- int idx = cpu_thread_in_core(cpu);
- int i;
-
- for (i = 0; i < threads_per_core; i++) {
- int j = base_cpu + i;
-
- paca_ptrs[j]->thread_sibling_pacas[idx] =
- paca_ptrs[cpu];
- }
- }
- }
-
if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
ppc_md.power_save = power7_idle;
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index b99283df8584..51dc398ae3f7 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -47,38 +47,9 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
}
-static bool valid_memtrace_range(struct memtrace_entry *dev,
- unsigned long start, unsigned long size)
-{
- if ((start >= dev->start) &&
- ((start + size) <= (dev->start + dev->size)))
- return true;
-
- return false;
-}
-
-static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma)
-{
- unsigned long size = vma->vm_end - vma->vm_start;
- struct memtrace_entry *dev = filp->private_data;
-
- if (!valid_memtrace_range(dev, vma->vm_pgoff << PAGE_SHIFT, size))
- return -EINVAL;
-
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
- if (remap_pfn_range(vma, vma->vm_start,
- vma->vm_pgoff + (dev->start >> PAGE_SHIFT),
- size, vma->vm_page_prot))
- return -EAGAIN;
-
- return 0;
-}
-
static const struct file_operations memtrace_fops = {
.llseek = default_llseek,
.read = memtrace_read,
- .mmap = memtrace_mmap,
.open = simple_open,
};
@@ -206,8 +177,11 @@ static int memtrace_init_debugfs(void)
snprintf(ent->name, 16, "%08x", ent->nid);
dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
- if (!dir)
+ if (!dir) {
+ pr_err("Failed to create debugfs directory for node %d\n",
+ ent->nid);
return -1;
+ }
ent->dir = dir;
debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops);
@@ -218,18 +192,93 @@ static int memtrace_init_debugfs(void)
return ret;
}
+static int online_mem_block(struct memory_block *mem, void *arg)
+{
+ return device_online(&mem->dev);
+}
+
+/*
+ * Iterate through the chunks of memory we have removed from the kernel
+ * and attempt to add them back to the kernel.
+ */
+static int memtrace_online(void)
+{
+ int i, ret = 0;
+ struct memtrace_entry *ent;
+
+ for (i = memtrace_array_nr - 1; i >= 0; i--) {
+ ent = &memtrace_array[i];
+
+ /* We have onlined this chunk previously */
+ if (ent->nid == -1)
+ continue;
+
+ /* Remove from io mappings */
+ if (ent->mem) {
+ iounmap(ent->mem);
+ ent->mem = 0;
+ }
+
+ if (add_memory(ent->nid, ent->start, ent->size)) {
+ pr_err("Failed to add trace memory to node %d\n",
+ ent->nid);
+ ret += 1;
+ continue;
+ }
+
+ /*
+ * If kernel isn't compiled with the auto online option
+ * we need to online the memory ourselves.
+ */
+ if (!memhp_auto_online) {
+ walk_memory_range(PFN_DOWN(ent->start),
+ PFN_UP(ent->start + ent->size - 1),
+ NULL, online_mem_block);
+ }
+
+ /*
+ * Memory was added successfully so clean up references to it
+ * so on reentry we can tell that this chunk was added.
+ */
+ debugfs_remove_recursive(ent->dir);
+ pr_info("Added trace memory back to node %d\n", ent->nid);
+ ent->size = ent->start = ent->nid = -1;
+ }
+ if (ret)
+ return ret;
+
+ /* If all chunks of memory were added successfully, reset globals */
+ kfree(memtrace_array);
+ memtrace_array = NULL;
+ memtrace_size = 0;
+ memtrace_array_nr = 0;
+ return 0;
+}
+
static int memtrace_enable_set(void *data, u64 val)
{
- if (memtrace_size)
+ u64 bytes;
+
+ /*
+ * Don't attempt to do anything if size isn't aligned to a memory
+ * block or equal to zero.
+ */
+ bytes = memory_block_size_bytes();
+ if (val & (bytes - 1)) {
+ pr_err("Value must be aligned with 0x%llx\n", bytes);
return -EINVAL;
+ }
- if (!val)
- return -EINVAL;
+ /* Re-add/online previously removed/offlined memory */
+ if (memtrace_size) {
+ if (memtrace_online())
+ return -EAGAIN;
+ }
- /* Make sure size is aligned to a memory block */
- if (val & (memory_block_size_bytes() - 1))
- return -EINVAL;
+ if (!val)
+ return 0;
+ /* Offline and remove memory */
if (memtrace_init_regions_runtime(val))
return -EINVAL;
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 8cdf91f5d3a4..8006c54a91e3 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -17,7 +17,9 @@
#include <linux/pci.h>
#include <linux/memblock.h>
#include <linux/iommu.h>
+#include <linux/debugfs.h>
+#include <asm/debugfs.h>
#include <asm/tlb.h>
#include <asm/powernv.h>
#include <asm/reg.h>
@@ -44,7 +46,8 @@ static DEFINE_SPINLOCK(npu_context_lock);
* entire TLB on the GPU for the given PID rather than each specific address in
* the range.
*/
-#define ATSD_THRESHOLD (2*1024*1024)
+static uint64_t atsd_threshold = 2 * 1024 * 1024;
+static struct dentry *atsd_threshold_dentry;
/*
* Other types of TCE cache invalidation are not functional in the
@@ -437,8 +440,9 @@ static int get_mmio_atsd_reg(struct npu *npu)
int i;
for (i = 0; i < npu->mmio_atsd_count; i++) {
- if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage))
- return i;
+ if (!test_bit(i, &npu->mmio_atsd_usage))
+ if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage))
+ return i;
}
return -ENOSPC;
@@ -683,7 +687,7 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
struct npu_context *npu_context = mn_to_npu_context(mn);
unsigned long address;
- if (end - start > ATSD_THRESHOLD) {
+ if (end - start > atsd_threshold) {
/*
* Just invalidate the entire PID if the address range is too
* large.
@@ -958,6 +962,11 @@ int pnv_npu2_init(struct pnv_phb *phb)
static int npu_index;
uint64_t rc = 0;
+ if (!atsd_threshold_dentry) {
+ atsd_threshold_dentry = debugfs_create_x64("atsd_threshold",
+ 0600, powerpc_debugfs_root, &atsd_threshold);
+ }
+
phb->npu.nmmu_flush =
of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush");
for_each_child_of_node(phb->hose->dn, dn) {
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
index 0dc8fa4e0af2..198143833f00 100644
--- a/arch/powerpc/platforms/powernv/opal-dump.c
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -225,13 +225,16 @@ static int64_t dump_read_info(uint32_t *dump_id, uint32_t *dump_size, uint32_t *
if (rc == OPAL_PARAMETER)
rc = opal_dump_info(&id, &size);
+ if (rc) {
+ pr_warn("%s: Failed to get dump info (%d)\n",
+ __func__, rc);
+ return rc;
+ }
+
*dump_id = be32_to_cpu(id);
*dump_size = be32_to_cpu(size);
*dump_type = be32_to_cpu(type);
- if (rc)
- pr_warn("%s: Failed to get dump info (%d)\n",
- __func__, rc);
return rc;
}
@@ -368,13 +371,12 @@ static irqreturn_t process_dump(int irq, void *data)
{
int rc;
uint32_t dump_id, dump_size, dump_type;
- struct dump_obj *dump;
char name[22];
struct kobject *kobj;
rc = dump_read_info(&dump_id, &dump_size, &dump_type);
if (rc != OPAL_SUCCESS)
- return rc;
+ return IRQ_HANDLED;
sprintf(name, "0x%x-0x%x", dump_type, dump_id);
@@ -386,12 +388,10 @@ static irqreturn_t process_dump(int irq, void *data)
if (kobj) {
/* Drop reference added by kset_find_obj() */
kobject_put(kobj);
- return 0;
+ return IRQ_HANDLED;
}
- dump = create_dump_obj(dump_id, dump_size, dump_type);
- if (!dump)
- return -1;
+ create_dump_obj(dump_id, dump_size, dump_type);
return IRQ_HANDLED;
}
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index 605c7e5d52c2..bc97770a67db 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -22,6 +22,7 @@
#include <linux/kthread.h>
#include <linux/delay.h>
#include <linux/slab.h>
+#include <linux/of_irq.h>
#include <asm/machdep.h>
#include <asm/opal.h>
@@ -38,8 +39,8 @@ struct opal_event_irqchip {
};
static struct opal_event_irqchip opal_event_irqchip;
static u64 last_outstanding_events;
-static unsigned int opal_irq_count;
-static unsigned int *opal_irqs;
+static int opal_irq_count;
+static struct resource *opal_irqs;
void opal_handle_events(void)
{
@@ -165,24 +166,23 @@ void opal_event_shutdown(void)
/* First free interrupts, which will also mask them */
for (i = 0; i < opal_irq_count; i++) {
- if (!opal_irqs[i])
+ if (!opal_irqs || !opal_irqs[i].start)
continue;
if (in_interrupt() || irqs_disabled())
- disable_irq_nosync(opal_irqs[i]);
+ disable_irq_nosync(opal_irqs[i].start);
else
- free_irq(opal_irqs[i], NULL);
+ free_irq(opal_irqs[i].start, NULL);
- opal_irqs[i] = 0;
+ opal_irqs[i].start = 0;
}
}
int __init opal_event_init(void)
{
struct device_node *dn, *opal_node;
- const char **names;
- u32 *irqs;
- int i, rc;
+ bool old_style = false;
+ int i, rc = 0;
opal_node = of_find_node_by_path("/ibm,opal");
if (!opal_node) {
@@ -207,67 +207,91 @@ int __init opal_event_init(void)
goto out;
}
- /* Get opal-interrupts property and names if present */
- rc = of_property_count_u32_elems(opal_node, "opal-interrupts");
- if (rc < 0)
- goto out;
+ /* Look for new-style (standard) "interrupts" property */
+ opal_irq_count = of_irq_count(opal_node);
- opal_irq_count = rc;
- pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count);
+ /* Absent ? Look for the old one */
+ if (opal_irq_count < 1) {
+ /* Get opal-interrupts property and names if present */
+ rc = of_property_count_u32_elems(opal_node, "opal-interrupts");
+ if (rc > 0)
+ opal_irq_count = rc;
+ old_style = true;
+ }
- irqs = kcalloc(opal_irq_count, sizeof(*irqs), GFP_KERNEL);
- names = kcalloc(opal_irq_count, sizeof(*names), GFP_KERNEL);
- opal_irqs = kcalloc(opal_irq_count, sizeof(*opal_irqs), GFP_KERNEL);
+ /* No interrupts ? Bail out */
+ if (!opal_irq_count)
+ goto out;
- if (WARN_ON(!irqs || !names || !opal_irqs))
- goto out_free;
+ pr_debug("OPAL: Found %d interrupts reserved for OPAL using %s scheme\n",
+ opal_irq_count, old_style ? "old" : "new");
- rc = of_property_read_u32_array(opal_node, "opal-interrupts",
- irqs, opal_irq_count);
- if (rc < 0) {
- pr_err("Error %d reading opal-interrupts array\n", rc);
- goto out_free;
+ /* Allocate an IRQ resources array */
+ opal_irqs = kcalloc(opal_irq_count, sizeof(struct resource), GFP_KERNEL);
+ if (WARN_ON(!opal_irqs)) {
+ rc = -ENOMEM;
+ goto out;
}
- /* It's not an error for the names to be missing */
- of_property_read_string_array(opal_node, "opal-interrupts-names",
- names, opal_irq_count);
+ /* Build the resources array */
+ if (old_style) {
+ /* Old style "opal-interrupts" property */
+ for (i = 0; i < opal_irq_count; i++) {
+ struct resource *r = &opal_irqs[i];
+ const char *name = NULL;
+ u32 hw_irq;
+ int virq;
+
+ rc = of_property_read_u32_index(opal_node, "opal-interrupts",
+ i, &hw_irq);
+ if (WARN_ON(rc < 0)) {
+ opal_irq_count = i;
+ break;
+ }
+ of_property_read_string_index(opal_node, "opal-interrupts-names",
+ i, &name);
+ virq = irq_create_mapping(NULL, hw_irq);
+ if (!virq) {
+ pr_warn("Failed to map OPAL irq 0x%x\n", hw_irq);
+ continue;
+ }
+ r->start = r->end = virq;
+ r->flags = IORESOURCE_IRQ | IRQ_TYPE_LEVEL_LOW;
+ r->name = name;
+ }
+ } else {
+ /* new style standard "interrupts" property */
+ rc = of_irq_to_resource_table(opal_node, opal_irqs, opal_irq_count);
+ if (WARN_ON(rc < 0)) {
+ opal_irq_count = 0;
+ kfree(opal_irqs);
+ goto out;
+ }
+ if (WARN_ON(rc < opal_irq_count))
+ opal_irq_count = rc;
+ }
/* Install interrupt handlers */
for (i = 0; i < opal_irq_count; i++) {
- unsigned int virq;
- char *name;
-
- /* Get hardware and virtual IRQ */
- virq = irq_create_mapping(NULL, irqs[i]);
- if (!virq) {
- pr_warn("Failed to map irq 0x%x\n", irqs[i]);
- continue;
- }
+ struct resource *r = &opal_irqs[i];
+ const char *name;
- if (names[i] && strlen(names[i]))
- name = kasprintf(GFP_KERNEL, "opal-%s", names[i]);
+ /* Prefix name */
+ if (r->name && strlen(r->name))
+ name = kasprintf(GFP_KERNEL, "opal-%s", r->name);
else
name = kasprintf(GFP_KERNEL, "opal");
/* Install interrupt handler */
- rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW,
+ rc = request_irq(r->start, opal_interrupt, r->flags & IRQD_TRIGGER_MASK,
name, NULL);
if (rc) {
- irq_dispose_mapping(virq);
- pr_warn("Error %d requesting irq %d (0x%x)\n",
- rc, virq, irqs[i]);
+ pr_warn("Error %d requesting OPAL irq %d\n", rc, (int)r->start);
continue;
}
-
- /* Cache IRQ */
- opal_irqs[i] = virq;
}
-
-out_free:
- kfree(irqs);
- kfree(names);
-out:
+ rc = 0;
+ out:
of_node_put(opal_node);
return rc;
}
diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c
index 6f1214d4de92..55691950d981 100644
--- a/arch/powerpc/platforms/powernv/opal-kmsg.c
+++ b/arch/powerpc/platforms/powernv/opal-kmsg.c
@@ -23,12 +23,9 @@
* may not be completely printed. This function does not actually dump the
* message, it just ensures that OPAL completely flushes the console buffer.
*/
-static void force_opal_console_flush(struct kmsg_dumper *dumper,
+static void kmsg_dump_opal_console_flush(struct kmsg_dumper *dumper,
enum kmsg_dump_reason reason)
{
- int i;
- int64_t ret;
-
/*
* Outside of a panic context the pollers will continue to run,
* so we don't need to do any special flushing.
@@ -36,32 +33,11 @@ static void force_opal_console_flush(struct kmsg_dumper *dumper,
if (reason != KMSG_DUMP_PANIC)
return;
- if (opal_check_token(OPAL_CONSOLE_FLUSH)) {
- ret = opal_console_flush(0);
-
- if (ret == OPAL_UNSUPPORTED || ret == OPAL_PARAMETER)
- return;
-
- /* Incrementally flush until there's nothing left */
- while (opal_console_flush(0) != OPAL_SUCCESS);
- } else {
- /*
- * If OPAL_CONSOLE_FLUSH is not implemented in the firmware,
- * the console can still be flushed by calling the polling
- * function enough times to flush the buffer. We don't know
- * how much output still needs to be flushed, but we can be
- * generous since the kernel is in panic and doesn't need
- * to do much else.
- */
- printk(KERN_NOTICE "opal: OPAL_CONSOLE_FLUSH missing.\n");
- for (i = 0; i < 1024; i++) {
- opal_poll_events(NULL);
- }
- }
+ opal_flush_console(0);
}
static struct kmsg_dumper opal_kmsg_dumper = {
- .dump = force_opal_console_flush
+ .dump = kmsg_dump_opal_console_flush
};
void __init opal_kmsg_init(void)
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
index 541c9ea04a32..f7d04b6a2d7a 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -32,6 +32,34 @@ static struct sensor_group {
struct sg_attr *sgattrs;
} *sgs;
+int sensor_group_enable(u32 handle, bool enable)
+{
+ struct opal_msg msg;
+ int token, ret;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0)
+ return token;
+
+ ret = opal_sensor_group_enable(handle, token, enable);
+ if (ret == OPAL_ASYNC_COMPLETION) {
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ } else {
+ ret = opal_error_code(ret);
+ }
+
+out:
+ opal_async_release_token(token);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(sensor_group_enable);
+
static ssize_t sg_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index a8d9b4089c31..251528231a9e 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -14,6 +14,8 @@
#include <asm/hvcall.h>
#include <asm/asm-offsets.h>
#include <asm/opal.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
.section ".text"
@@ -327,3 +329,5 @@ OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET);
OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
OPAL_CALL(opal_sensor_read_u64, OPAL_SENSOR_READ_U64);
+OPAL_CALL(opal_sensor_group_enable, OPAL_SENSOR_GROUP_ENABLE);
+OPAL_CALL(opal_nx_coproc_init, OPAL_NX_COPROC_INIT);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 0d539c661748..38fe4087484a 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -344,72 +344,148 @@ int opal_get_chars(uint32_t vtermno, char *buf, int count)
return 0;
}
-int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
+static int __opal_put_chars(uint32_t vtermno, const char *data, int total_len, bool atomic)
{
- int written = 0;
+ unsigned long flags = 0 /* shut up gcc */;
+ int written;
__be64 olen;
- s64 len, rc;
- unsigned long flags;
- __be64 evt;
+ s64 rc;
if (!opal.entry)
return -ENODEV;
- /* We want put_chars to be atomic to avoid mangling of hvsi
- * packets. To do that, we first test for room and return
- * -EAGAIN if there isn't enough.
- *
- * Unfortunately, opal_console_write_buffer_space() doesn't
- * appear to work on opal v1, so we just assume there is
- * enough room and be done with it
- */
- spin_lock_irqsave(&opal_write_lock, flags);
+ if (atomic)
+ spin_lock_irqsave(&opal_write_lock, flags);
rc = opal_console_write_buffer_space(vtermno, &olen);
- len = be64_to_cpu(olen);
- if (rc || len < total_len) {
- spin_unlock_irqrestore(&opal_write_lock, flags);
+ if (rc || be64_to_cpu(olen) < total_len) {
/* Closed -> drop characters */
if (rc)
- return total_len;
- opal_poll_events(NULL);
- return -EAGAIN;
+ written = total_len;
+ else
+ written = -EAGAIN;
+ goto out;
}
- /* We still try to handle partial completions, though they
- * should no longer happen.
- */
- rc = OPAL_BUSY;
- while(total_len > 0 && (rc == OPAL_BUSY ||
- rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
- olen = cpu_to_be64(total_len);
- rc = opal_console_write(vtermno, &olen, data);
- len = be64_to_cpu(olen);
-
- /* Closed or other error drop */
- if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
- rc != OPAL_BUSY_EVENT) {
- written = total_len;
- break;
- }
- if (rc == OPAL_SUCCESS) {
- total_len -= len;
- data += len;
- written += len;
+ /* Should not get a partial write here because space is available. */
+ olen = cpu_to_be64(total_len);
+ rc = opal_console_write(vtermno, &olen, data);
+ if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+ if (rc == OPAL_BUSY_EVENT)
+ opal_poll_events(NULL);
+ written = -EAGAIN;
+ goto out;
+ }
+
+ /* Closed or other error drop */
+ if (rc != OPAL_SUCCESS) {
+ written = opal_error_code(rc);
+ goto out;
+ }
+
+ written = be64_to_cpu(olen);
+ if (written < total_len) {
+ if (atomic) {
+ /* Should not happen */
+ pr_warn("atomic console write returned partial "
+ "len=%d written=%d\n", total_len, written);
}
- /* This is a bit nasty but we need that for the console to
- * flush when there aren't any interrupts. We will clean
- * things a bit later to limit that to synchronous path
- * such as the kernel console and xmon/udbg
- */
- do
- opal_poll_events(&evt);
- while(rc == OPAL_SUCCESS &&
- (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT));
+ if (!written)
+ written = -EAGAIN;
}
- spin_unlock_irqrestore(&opal_write_lock, flags);
+
+out:
+ if (atomic)
+ spin_unlock_irqrestore(&opal_write_lock, flags);
+
return written;
}
+int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
+{
+ return __opal_put_chars(vtermno, data, total_len, false);
+}
+
+/*
+ * opal_put_chars_atomic will not perform partial-writes. Data will be
+ * atomically written to the terminal or not at all. This is not strictly
+ * true at the moment because console space can race with OPAL's console
+ * writes.
+ */
+int opal_put_chars_atomic(uint32_t vtermno, const char *data, int total_len)
+{
+ return __opal_put_chars(vtermno, data, total_len, true);
+}
+
+static s64 __opal_flush_console(uint32_t vtermno)
+{
+ s64 rc;
+
+ if (!opal_check_token(OPAL_CONSOLE_FLUSH)) {
+ __be64 evt;
+
+ /*
+ * If OPAL_CONSOLE_FLUSH is not implemented in the firmware,
+ * the console can still be flushed by calling the polling
+ * function while it has OPAL_EVENT_CONSOLE_OUTPUT events.
+ */
+ WARN_ONCE(1, "opal: OPAL_CONSOLE_FLUSH missing.\n");
+
+ opal_poll_events(&evt);
+ if (!(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT))
+ return OPAL_SUCCESS;
+ return OPAL_BUSY;
+
+ } else {
+ rc = opal_console_flush(vtermno);
+ if (rc == OPAL_BUSY_EVENT) {
+ opal_poll_events(NULL);
+ rc = OPAL_BUSY;
+ }
+ return rc;
+ }
+
+}
+
+/*
+ * opal_flush_console spins until the console is flushed
+ */
+int opal_flush_console(uint32_t vtermno)
+{
+ for (;;) {
+ s64 rc = __opal_flush_console(vtermno);
+
+ if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) {
+ mdelay(1);
+ continue;
+ }
+
+ return opal_error_code(rc);
+ }
+}
+
+/*
+ * opal_flush_chars is an hvc interface that sleeps until the console is
+ * flushed if wait, otherwise it will return -EBUSY if the console has data,
+ * -EAGAIN if it has data and some of it was flushed.
+ */
+int opal_flush_chars(uint32_t vtermno, bool wait)
+{
+ for (;;) {
+ s64 rc = __opal_flush_console(vtermno);
+
+ if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) {
+ if (wait) {
+ msleep(OPAL_BUSY_DELAY_MS);
+ continue;
+ }
+ if (rc == OPAL_PARTIAL)
+ return -EAGAIN;
+ }
+
+ return opal_error_code(rc);
+ }
+}
+
static int opal_recover_mce(struct pt_regs *regs,
struct machine_check_event *evt)
{
@@ -922,6 +998,7 @@ EXPORT_SYMBOL_GPL(opal_flash_read);
EXPORT_SYMBOL_GPL(opal_flash_write);
EXPORT_SYMBOL_GPL(opal_flash_erase);
EXPORT_SYMBOL_GPL(opal_prd_msg);
+EXPORT_SYMBOL_GPL(opal_check_token);
/* Convert a region of vmalloc memory to an opal sg list */
struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
@@ -1034,3 +1111,5 @@ EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
EXPORT_SYMBOL_GPL(opal_int_eoi);
EXPORT_SYMBOL_GPL(opal_error_code);
+/* Export the below symbol for NX compression */
+EXPORT_SYMBOL(opal_nx_coproc_init);
diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c
index cee003de63af..1b18111453d7 100644
--- a/arch/powerpc/platforms/powernv/pci-cxl.c
+++ b/arch/powerpc/platforms/powernv/pci-cxl.c
@@ -8,11 +8,8 @@
*/
#include <linux/module.h>
-#include <linux/msi.h>
-#include <asm/pci-bridge.h>
#include <asm/pnv-pci.h>
#include <asm/opal.h>
-#include <misc/cxl.h>
#include "pci.h"
@@ -179,199 +176,3 @@ static inline int get_cxl_module(void)
#else
static inline int get_cxl_module(void) { return 0; }
#endif
-
-/*
- * Sets flags and switches the controller ops to enable the cxl kernel api.
- * Originally the cxl kernel API operated on a virtual PHB, but certain cards
- * such as the Mellanox CX4 use a peer model instead and for these cards the
- * cxl kernel api will operate on the real PHB.
- */
-int pnv_cxl_enable_phb_kernel_api(struct pci_controller *hose, bool enable)
-{
- struct pnv_phb *phb = hose->private_data;
- int rc;
-
- if (!enable) {
- /*
- * Once cxl mode is enabled on the PHB, there is currently no
- * known safe method to disable it again, and trying risks a
- * checkstop. If we can find a way to safely disable cxl mode
- * in the future we can revisit this, but for now the only sane
- * thing to do is to refuse to disable cxl mode:
- */
- return -EPERM;
- }
-
- /*
- * Hold a reference to the cxl module since several PHB operations now
- * depend on it, and it would be insane to allow it to be removed so
- * long as we are in this mode (and since we can't safely disable this
- * mode once enabled...).
- */
- rc = get_cxl_module();
- if (rc)
- return rc;
-
- phb->flags |= PNV_PHB_FLAG_CXL;
- hose->controller_ops = pnv_cxl_cx4_ioda_controller_ops;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(pnv_cxl_enable_phb_kernel_api);
-
-bool pnv_pci_on_cxl_phb(struct pci_dev *dev)
-{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- struct pnv_phb *phb = hose->private_data;
-
- return !!(phb->flags & PNV_PHB_FLAG_CXL);
-}
-EXPORT_SYMBOL_GPL(pnv_pci_on_cxl_phb);
-
-struct cxl_afu *pnv_cxl_phb_to_afu(struct pci_controller *hose)
-{
- struct pnv_phb *phb = hose->private_data;
-
- return (struct cxl_afu *)phb->cxl_afu;
-}
-EXPORT_SYMBOL_GPL(pnv_cxl_phb_to_afu);
-
-void pnv_cxl_phb_set_peer_afu(struct pci_dev *dev, struct cxl_afu *afu)
-{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- struct pnv_phb *phb = hose->private_data;
-
- phb->cxl_afu = afu;
-}
-EXPORT_SYMBOL_GPL(pnv_cxl_phb_set_peer_afu);
-
-/*
- * In the peer cxl model, the XSL/PSL is physical function 0, and will be used
- * by other functions on the device for memory access and interrupts. When the
- * other functions are enabled we explicitly take a reference on the cxl
- * function since they will use it, and allocate a default context associated
- * with that function just like the vPHB model of the cxl kernel API.
- */
-bool pnv_cxl_enable_device_hook(struct pci_dev *dev)
-{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- struct pnv_phb *phb = hose->private_data;
- struct cxl_afu *afu = phb->cxl_afu;
-
- if (!pnv_pci_enable_device_hook(dev))
- return false;
-
-
- /* No special handling for the cxl function, which is always PF 0 */
- if (PCI_FUNC(dev->devfn) == 0)
- return true;
-
- if (!afu) {
- dev_WARN(&dev->dev, "Attempted to enable function > 0 on CXL PHB without a peer AFU\n");
- return false;
- }
-
- dev_info(&dev->dev, "Enabling function on CXL enabled PHB with peer AFU\n");
-
- /* Make sure the peer AFU can't go away while this device is active */
- cxl_afu_get(afu);
-
- return cxl_pci_associate_default_context(dev, afu);
-}
-
-void pnv_cxl_disable_device(struct pci_dev *dev)
-{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- struct pnv_phb *phb = hose->private_data;
- struct cxl_afu *afu = phb->cxl_afu;
-
- /* No special handling for cxl function: */
- if (PCI_FUNC(dev->devfn) == 0)
- return;
-
- cxl_pci_disable_device(dev);
- cxl_afu_put(afu);
-}
-
-/*
- * This is a special version of pnv_setup_msi_irqs for cards in cxl mode. This
- * function handles setting up the IVTE entries for the XSL to use.
- *
- * We are currently not filling out the MSIX table, since the only currently
- * supported adapter (CX4) uses a custom MSIX table format in cxl mode and it
- * is up to their driver to fill that out. In the future we may fill out the
- * MSIX table (and change the IVTE entries to be an index to the MSIX table)
- * for adapters implementing the Full MSI-X mode described in the CAIA.
- */
-int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
-{
- struct pci_controller *hose = pci_bus_to_host(pdev->bus);
- struct pnv_phb *phb = hose->private_data;
- struct msi_desc *entry;
- struct cxl_context *ctx = NULL;
- unsigned int virq;
- int hwirq;
- int afu_irq = 0;
- int rc;
-
- if (WARN_ON(!phb) || !phb->msi_bmp.bitmap)
- return -ENODEV;
-
- if (pdev->no_64bit_msi && !phb->msi32_support)
- return -ENODEV;
-
- rc = cxl_cx4_setup_msi_irqs(pdev, nvec, type);
- if (rc)
- return rc;
-
- for_each_pci_msi_entry(entry, pdev) {
- if (!entry->msi_attrib.is_64 && !phb->msi32_support) {
- pr_warn("%s: Supports only 64-bit MSIs\n",
- pci_name(pdev));
- return -ENXIO;
- }
-
- hwirq = cxl_next_msi_hwirq(pdev, &ctx, &afu_irq);
- if (WARN_ON(hwirq <= 0))
- return (hwirq ? hwirq : -ENOMEM);
-
- virq = irq_create_mapping(NULL, hwirq);
- if (!virq) {
- pr_warn("%s: Failed to map cxl mode MSI to linux irq\n",
- pci_name(pdev));
- return -ENOMEM;
- }
-
- rc = pnv_cxl_ioda_msi_setup(pdev, hwirq, virq);
- if (rc) {
- pr_warn("%s: Failed to setup cxl mode MSI\n", pci_name(pdev));
- irq_dispose_mapping(virq);
- return rc;
- }
-
- irq_set_msi_desc(virq, entry);
- }
-
- return 0;
-}
-
-void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev)
-{
- struct pci_controller *hose = pci_bus_to_host(pdev->bus);
- struct pnv_phb *phb = hose->private_data;
- struct msi_desc *entry;
- irq_hw_number_t hwirq;
-
- if (WARN_ON(!phb))
- return;
-
- for_each_pci_msi_entry(entry, pdev) {
- if (!entry->irq)
- continue;
- hwirq = virq_to_hw(entry->irq);
- irq_set_msi_desc(entry->irq, NULL);
- irq_dispose_mapping(entry->irq);
- }
-
- cxl_cx4_teardown_msi_irqs(pdev);
-}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
new file mode 100644
index 000000000000..6c5db1acbe8d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -0,0 +1,399 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * TCE helpers for IODA PCI/PCIe on PowerNV platforms
+ *
+ * Copyright 2018 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/iommu.h>
+
+#include <asm/iommu.h>
+#include <asm/tce.h>
+#include "pci.h"
+
+void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
+ void *tce_mem, u64 tce_size,
+ u64 dma_offset, unsigned int page_shift)
+{
+ tbl->it_blocksize = 16;
+ tbl->it_base = (unsigned long)tce_mem;
+ tbl->it_page_shift = page_shift;
+ tbl->it_offset = dma_offset >> tbl->it_page_shift;
+ tbl->it_index = 0;
+ tbl->it_size = tce_size >> 3;
+ tbl->it_busno = 0;
+ tbl->it_type = TCE_PCI;
+}
+
+static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
+{
+ struct page *tce_mem = NULL;
+ __be64 *addr;
+
+ tce_mem = alloc_pages_node(nid, GFP_KERNEL, shift - PAGE_SHIFT);
+ if (!tce_mem) {
+ pr_err("Failed to allocate a TCE memory, level shift=%d\n",
+ shift);
+ return NULL;
+ }
+ addr = page_address(tce_mem);
+ memset(addr, 0, 1UL << shift);
+
+ return addr;
+}
+
+static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
+{
+ __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
+ int level = tbl->it_indirect_levels;
+ const long shift = ilog2(tbl->it_level_size);
+ unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
+
+ while (level) {
+ int n = (idx & mask) >> (level * shift);
+ unsigned long tce;
+
+ if (tmp[n] == 0) {
+ __be64 *tmp2;
+
+ if (!alloc)
+ return NULL;
+
+ tmp2 = pnv_alloc_tce_level(tbl->it_nid,
+ ilog2(tbl->it_level_size) + 3);
+ if (!tmp2)
+ return NULL;
+
+ tmp[n] = cpu_to_be64(__pa(tmp2) |
+ TCE_PCI_READ | TCE_PCI_WRITE);
+ }
+ tce = be64_to_cpu(tmp[n]);
+
+ tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
+ idx &= ~mask;
+ mask >>= shift;
+ --level;
+ }
+
+ return tmp + idx;
+}
+
+int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+ unsigned long uaddr, enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ u64 proto_tce = iommu_direction_to_tce_perm(direction);
+ u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
+ long i;
+
+ if (proto_tce & TCE_PCI_WRITE)
+ proto_tce |= TCE_PCI_READ;
+
+ for (i = 0; i < npages; i++) {
+ unsigned long newtce = proto_tce |
+ ((rpn + i) << tbl->it_page_shift);
+ unsigned long idx = index - tbl->it_offset + i;
+
+ *(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_IOMMU_API
+int pnv_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction,
+ bool alloc)
+{
+ u64 proto_tce = iommu_direction_to_tce_perm(*direction);
+ unsigned long newtce = *hpa | proto_tce, oldtce;
+ unsigned long idx = index - tbl->it_offset;
+ __be64 *ptce = NULL;
+
+ BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
+
+ if (*direction == DMA_NONE) {
+ ptce = pnv_tce(tbl, false, idx, false);
+ if (!ptce) {
+ *hpa = 0;
+ return 0;
+ }
+ }
+
+ if (!ptce) {
+ ptce = pnv_tce(tbl, false, idx, alloc);
+ if (!ptce)
+ return alloc ? H_HARDWARE : H_TOO_HARD;
+ }
+
+ if (newtce & TCE_PCI_WRITE)
+ newtce |= TCE_PCI_READ;
+
+ oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
+ *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+ *direction = iommu_tce_direction(oldtce);
+
+ return 0;
+}
+
+__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
+{
+ if (WARN_ON_ONCE(!tbl->it_userspace))
+ return NULL;
+
+ return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
+}
+#endif
+
+void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
+{
+ long i;
+
+ for (i = 0; i < npages; i++) {
+ unsigned long idx = index - tbl->it_offset + i;
+ __be64 *ptce = pnv_tce(tbl, false, idx, false);
+
+ if (ptce)
+ *ptce = cpu_to_be64(0);
+ }
+}
+
+unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+{
+ __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);
+
+ if (!ptce)
+ return 0;
+
+ return be64_to_cpu(*ptce);
+}
+
+static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+ unsigned long size, unsigned int levels)
+{
+ const unsigned long addr_ul = (unsigned long) addr &
+ ~(TCE_PCI_READ | TCE_PCI_WRITE);
+
+ if (levels) {
+ long i;
+ u64 *tmp = (u64 *) addr_ul;
+
+ for (i = 0; i < size; ++i) {
+ unsigned long hpa = be64_to_cpu(tmp[i]);
+
+ if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
+ continue;
+
+ pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
+ levels - 1);
+ }
+ }
+
+ free_pages(addr_ul, get_order(size << 3));
+}
+
+void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
+{
+ const unsigned long size = tbl->it_indirect_levels ?
+ tbl->it_level_size : tbl->it_size;
+
+ if (!tbl->it_size)
+ return;
+
+ pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
+ tbl->it_indirect_levels);
+ if (tbl->it_userspace) {
+ pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
+ tbl->it_indirect_levels);
+ }
+}
+
+static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
+ unsigned int levels, unsigned long limit,
+ unsigned long *current_offset, unsigned long *total_allocated)
+{
+ __be64 *addr, *tmp;
+ unsigned long allocated = 1UL << shift;
+ unsigned int entries = 1UL << (shift - 3);
+ long i;
+
+ addr = pnv_alloc_tce_level(nid, shift);
+ *total_allocated += allocated;
+
+ --levels;
+ if (!levels) {
+ *current_offset += allocated;
+ return addr;
+ }
+
+ for (i = 0; i < entries; ++i) {
+ tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
+ levels, limit, current_offset, total_allocated);
+ if (!tmp)
+ break;
+
+ addr[i] = cpu_to_be64(__pa(tmp) |
+ TCE_PCI_READ | TCE_PCI_WRITE);
+
+ if (*current_offset >= limit)
+ break;
+ }
+
+ return addr;
+}
+
+long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ __u32 page_shift, __u64 window_size, __u32 levels,
+ bool alloc_userspace_copy, struct iommu_table *tbl)
+{
+ void *addr, *uas = NULL;
+ unsigned long offset = 0, level_shift, total_allocated = 0;
+ unsigned long total_allocated_uas = 0;
+ const unsigned int window_shift = ilog2(window_size);
+ unsigned int entries_shift = window_shift - page_shift;
+ unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
+ PAGE_SHIFT);
+ const unsigned long tce_table_size = 1UL << table_shift;
+ unsigned int tmplevels = levels;
+
+ if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
+ return -EINVAL;
+
+ if (!is_power_of_2(window_size))
+ return -EINVAL;
+
+ if (alloc_userspace_copy && (window_size > (1ULL << 32)))
+ tmplevels = 1;
+
+ /* Adjust direct table size from window_size and levels */
+ entries_shift = (entries_shift + levels - 1) / levels;
+ level_shift = entries_shift + 3;
+ level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
+
+ if ((level_shift - 3) * levels + page_shift >= 60)
+ return -EINVAL;
+
+ /* Allocate TCE table */
+ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
+ tmplevels, tce_table_size, &offset, &total_allocated);
+
+ /* addr==NULL means that the first level allocation failed */
+ if (!addr)
+ return -ENOMEM;
+
+ /*
+ * First level was allocated but some lower level failed as
+ * we did not allocate as much as we wanted,
+ * release partially allocated table.
+ */
+ if (tmplevels == levels && offset < tce_table_size)
+ goto free_tces_exit;
+
+ /* Allocate userspace view of the TCE table */
+ if (alloc_userspace_copy) {
+ offset = 0;
+ uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
+ levels, tce_table_size, &offset,
+ &total_allocated_uas);
+ if (!uas)
+ goto free_tces_exit;
+ if (tmplevels == levels && (offset < tce_table_size ||
+ total_allocated_uas != total_allocated))
+ goto free_uas_exit;
+ }
+
+ /* Setup linux iommu table */
+ pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
+ page_shift);
+ tbl->it_level_size = 1ULL << (level_shift - 3);
+ tbl->it_indirect_levels = levels - 1;
+ tbl->it_allocated_size = total_allocated;
+ tbl->it_userspace = uas;
+ tbl->it_nid = nid;
+
+ pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
+ window_size, tce_table_size, bus_offset, tbl->it_base,
+ tbl->it_userspace, tmplevels, levels);
+
+ return 0;
+
+free_uas_exit:
+ pnv_pci_ioda2_table_do_free_pages(uas,
+ 1ULL << (level_shift - 3), levels - 1);
+free_tces_exit:
+ pnv_pci_ioda2_table_do_free_pages(addr,
+ 1ULL << (level_shift - 3), levels - 1);
+
+ return -ENOMEM;
+}
+
+static void pnv_iommu_table_group_link_free(struct rcu_head *head)
+{
+ struct iommu_table_group_link *tgl = container_of(head,
+ struct iommu_table_group_link, rcu);
+
+ kfree(tgl);
+}
+
+void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+ struct iommu_table_group *table_group)
+{
+ long i;
+ bool found;
+ struct iommu_table_group_link *tgl;
+
+ if (!tbl || !table_group)
+ return;
+
+ /* Remove link to a group from table's list of attached groups */
+ found = false;
+ list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+ if (tgl->table_group == table_group) {
+ list_del_rcu(&tgl->next);
+ call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free);
+ found = true;
+ break;
+ }
+ }
+ if (WARN_ON(!found))
+ return;
+
+ /* Clean a pointer to iommu_table in iommu_table_group::tables[] */
+ found = false;
+ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+ if (table_group->tables[i] == tbl) {
+ table_group->tables[i] = NULL;
+ found = true;
+ break;
+ }
+ }
+ WARN_ON(!found);
+}
+
+long pnv_pci_link_table_and_group(int node, int num,
+ struct iommu_table *tbl,
+ struct iommu_table_group *table_group)
+{
+ struct iommu_table_group_link *tgl = NULL;
+
+ if (WARN_ON(!tbl || !table_group))
+ return -EINVAL;
+
+ tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
+ node);
+ if (!tgl)
+ return -ENOMEM;
+
+ tgl->table_group = table_group;
+ list_add_rcu(&tgl->next, &tbl->it_group_list);
+
+ table_group->tables[num] = tbl;
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 5bd0eb6681bc..cde710297a4e 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -46,17 +46,14 @@
#include "powernv.h"
#include "pci.h"
+#include "../../../../drivers/pci/pci.h"
#define PNV_IODA1_M64_NUM 16 /* Number of M64 BARs */
#define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */
#define PNV_IODA1_DMA32_SEGSIZE 0x10000000
-#define POWERNV_IOMMU_DEFAULT_LEVELS 1
-#define POWERNV_IOMMU_MAX_LEVELS 5
-
static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK",
"NPU_OCAPI" };
-static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...)
@@ -2007,7 +2004,7 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
unsigned long *hpa, enum dma_data_direction *direction)
{
- long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+ long ret = pnv_tce_xchg(tbl, index, hpa, direction, true);
if (!ret)
pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, false);
@@ -2018,7 +2015,7 @@ static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index,
unsigned long *hpa, enum dma_data_direction *direction)
{
- long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+ long ret = pnv_tce_xchg(tbl, index, hpa, direction, false);
if (!ret)
pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true);
@@ -2040,6 +2037,7 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
#ifdef CONFIG_IOMMU_API
.exchange = pnv_ioda1_tce_xchg,
.exchange_rm = pnv_ioda1_tce_xchg_rm,
+ .useraddrptr = pnv_tce_useraddrptr,
#endif
.clear = pnv_ioda1_tce_free,
.get = pnv_tce_get,
@@ -2171,7 +2169,7 @@ static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
unsigned long *hpa, enum dma_data_direction *direction)
{
- long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+ long ret = pnv_tce_xchg(tbl, index, hpa, direction, true);
if (!ret)
pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false);
@@ -2182,7 +2180,7 @@ static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index,
unsigned long *hpa, enum dma_data_direction *direction)
{
- long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+ long ret = pnv_tce_xchg(tbl, index, hpa, direction, false);
if (!ret)
pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true);
@@ -2199,20 +2197,16 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
}
-static void pnv_ioda2_table_free(struct iommu_table *tbl)
-{
- pnv_pci_ioda2_table_free_pages(tbl);
-}
-
static struct iommu_table_ops pnv_ioda2_iommu_ops = {
.set = pnv_ioda2_tce_build,
#ifdef CONFIG_IOMMU_API
.exchange = pnv_ioda2_tce_xchg,
.exchange_rm = pnv_ioda2_tce_xchg_rm,
+ .useraddrptr = pnv_tce_useraddrptr,
#endif
.clear = pnv_ioda2_tce_free,
.get = pnv_tce_get,
- .free = pnv_ioda2_table_free,
+ .free = pnv_pci_ioda2_table_free_pages,
};
static int pnv_pci_ioda_dev_dma_weight(struct pci_dev *dev, void *data)
@@ -2462,13 +2456,9 @@ void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
pe->tce_bypass_enabled = enable;
}
-static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
- __u32 page_shift, __u64 window_size, __u32 levels,
- struct iommu_table *tbl);
-
static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
int num, __u32 page_shift, __u64 window_size, __u32 levels,
- struct iommu_table **ptbl)
+ bool alloc_userspace_copy, struct iommu_table **ptbl)
{
struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
table_group);
@@ -2485,7 +2475,7 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
ret = pnv_pci_ioda2_table_alloc_pages(nid,
bus_offset, page_shift, window_size,
- levels, tbl);
+ levels, alloc_userspace_copy, tbl);
if (ret) {
iommu_tce_table_put(tbl);
return ret;
@@ -2518,7 +2508,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
rc = pnv_pci_ioda2_create_table(&pe->table_group, 0,
IOMMU_PAGE_SHIFT_4K,
window_size,
- POWERNV_IOMMU_DEFAULT_LEVELS, &tbl);
+ POWERNV_IOMMU_DEFAULT_LEVELS, false, &tbl);
if (rc) {
pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
rc);
@@ -2605,7 +2595,16 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
tce_table_size, direct_table_size);
}
- return bytes;
+ return bytes + bytes; /* one for HW table, one for userspace copy */
+}
+
+static long pnv_pci_ioda2_create_table_userspace(
+ struct iommu_table_group *table_group,
+ int num, __u32 page_shift, __u64 window_size, __u32 levels,
+ struct iommu_table **ptbl)
+{
+ return pnv_pci_ioda2_create_table(table_group,
+ num, page_shift, window_size, levels, true, ptbl);
}
static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
@@ -2634,7 +2633,7 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
.get_table_size = pnv_pci_ioda2_get_table_size,
- .create_table = pnv_pci_ioda2_create_table,
+ .create_table = pnv_pci_ioda2_create_table_userspace,
.set_window = pnv_pci_ioda2_set_window,
.unset_window = pnv_pci_ioda2_unset_window,
.take_ownership = pnv_ioda2_take_ownership,
@@ -2739,7 +2738,7 @@ static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group)
static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = {
.get_table_size = pnv_pci_ioda2_get_table_size,
- .create_table = pnv_pci_ioda2_create_table,
+ .create_table = pnv_pci_ioda2_create_table_userspace,
.set_window = pnv_pci_ioda2_npu_set_window,
.unset_window = pnv_pci_ioda2_npu_unset_window,
.take_ownership = pnv_ioda2_npu_take_ownership,
@@ -2773,144 +2772,6 @@ static void pnv_pci_ioda_setup_iommu_api(void)
static void pnv_pci_ioda_setup_iommu_api(void) { };
#endif
-static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift,
- unsigned levels, unsigned long limit,
- unsigned long *current_offset, unsigned long *total_allocated)
-{
- struct page *tce_mem = NULL;
- __be64 *addr, *tmp;
- unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT;
- unsigned long allocated = 1UL << (order + PAGE_SHIFT);
- unsigned entries = 1UL << (shift - 3);
- long i;
-
- tce_mem = alloc_pages_node(nid, GFP_KERNEL, order);
- if (!tce_mem) {
- pr_err("Failed to allocate a TCE memory, order=%d\n", order);
- return NULL;
- }
- addr = page_address(tce_mem);
- memset(addr, 0, allocated);
- *total_allocated += allocated;
-
- --levels;
- if (!levels) {
- *current_offset += allocated;
- return addr;
- }
-
- for (i = 0; i < entries; ++i) {
- tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
- levels, limit, current_offset, total_allocated);
- if (!tmp)
- break;
-
- addr[i] = cpu_to_be64(__pa(tmp) |
- TCE_PCI_READ | TCE_PCI_WRITE);
-
- if (*current_offset >= limit)
- break;
- }
-
- return addr;
-}
-
-static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
- unsigned long size, unsigned level);
-
-static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
- __u32 page_shift, __u64 window_size, __u32 levels,
- struct iommu_table *tbl)
-{
- void *addr;
- unsigned long offset = 0, level_shift, total_allocated = 0;
- const unsigned window_shift = ilog2(window_size);
- unsigned entries_shift = window_shift - page_shift;
- unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT);
- const unsigned long tce_table_size = 1UL << table_shift;
-
- if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
- return -EINVAL;
-
- if (!is_power_of_2(window_size))
- return -EINVAL;
-
- /* Adjust direct table size from window_size and levels */
- entries_shift = (entries_shift + levels - 1) / levels;
- level_shift = entries_shift + 3;
- level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
-
- if ((level_shift - 3) * levels + page_shift >= 60)
- return -EINVAL;
-
- /* Allocate TCE table */
- addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
- levels, tce_table_size, &offset, &total_allocated);
-
- /* addr==NULL means that the first level allocation failed */
- if (!addr)
- return -ENOMEM;
-
- /*
- * First level was allocated but some lower level failed as
- * we did not allocate as much as we wanted,
- * release partially allocated table.
- */
- if (offset < tce_table_size) {
- pnv_pci_ioda2_table_do_free_pages(addr,
- 1ULL << (level_shift - 3), levels - 1);
- return -ENOMEM;
- }
-
- /* Setup linux iommu table */
- pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
- page_shift);
- tbl->it_level_size = 1ULL << (level_shift - 3);
- tbl->it_indirect_levels = levels - 1;
- tbl->it_allocated_size = total_allocated;
-
- pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n",
- window_size, tce_table_size, bus_offset);
-
- return 0;
-}
-
-static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
- unsigned long size, unsigned level)
-{
- const unsigned long addr_ul = (unsigned long) addr &
- ~(TCE_PCI_READ | TCE_PCI_WRITE);
-
- if (level) {
- long i;
- u64 *tmp = (u64 *) addr_ul;
-
- for (i = 0; i < size; ++i) {
- unsigned long hpa = be64_to_cpu(tmp[i]);
-
- if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
- continue;
-
- pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
- level - 1);
- }
- }
-
- free_pages(addr_ul, get_order(size << 3));
-}
-
-static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
-{
- const unsigned long size = tbl->it_indirect_levels ?
- tbl->it_level_size : tbl->it_size;
-
- if (!tbl->it_size)
- return;
-
- pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
- tbl->it_indirect_levels);
-}
-
static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
{
struct pci_controller *hose = phb->hose;
@@ -2925,7 +2786,7 @@ static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
/* Add 16M for POWER8 by default */
if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
!cpu_has_feature(CPU_FTR_ARCH_300))
- mask |= SZ_16M;
+ mask |= SZ_16M | SZ_256M;
return mask;
}
@@ -3138,7 +2999,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
struct pci_dn *pdn;
int mul, total_vfs;
- if (!pdev->is_physfn || pdev->is_added)
+ if (!pdev->is_physfn || pci_dev_is_added(pdev))
return;
pdn = pci_get_pdn(pdev);
@@ -3367,12 +3228,49 @@ static void pnv_pci_ioda_create_dbgfs(void)
#endif /* CONFIG_DEBUG_FS */
}
+static void pnv_pci_enable_bridge(struct pci_bus *bus)
+{
+ struct pci_dev *dev = bus->self;
+ struct pci_bus *child;
+
+ /* Empty bus ? bail */
+ if (list_empty(&bus->devices))
+ return;
+
+ /*
+ * If there's a bridge associated with that bus enable it. This works
+ * around races in the generic code if the enabling is done during
+ * parallel probing. This can be removed once those races have been
+ * fixed.
+ */
+ if (dev) {
+ int rc = pci_enable_device(dev);
+ if (rc)
+ pci_err(dev, "Error enabling bridge (%d)\n", rc);
+ pci_set_master(dev);
+ }
+
+ /* Perform the same to child busses */
+ list_for_each_entry(child, &bus->children, node)
+ pnv_pci_enable_bridge(child);
+}
+
+static void pnv_pci_enable_bridges(void)
+{
+ struct pci_controller *hose;
+
+ list_for_each_entry(hose, &hose_list, list_node)
+ pnv_pci_enable_bridge(hose->bus);
+}
+
static void pnv_pci_ioda_fixup(void)
{
pnv_pci_ioda_setup_PEs();
pnv_pci_ioda_setup_iommu_api();
pnv_pci_ioda_create_dbgfs();
+ pnv_pci_enable_bridges();
+
#ifdef CONFIG_EEH
pnv_eeh_post_init();
#endif
@@ -3575,7 +3473,7 @@ static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
/* Prevent enabling devices for which we couldn't properly
* assign a PE
*/
-bool pnv_pci_enable_device_hook(struct pci_dev *dev)
+static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
{
struct pci_controller *hose = pci_bus_to_host(dev->bus);
struct pnv_phb *phb = hose->private_data;
@@ -3843,26 +3741,6 @@ static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
.shutdown = pnv_pci_ioda_shutdown,
};
-#ifdef CONFIG_CXL_BASE
-const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = {
- .dma_dev_setup = pnv_pci_dma_dev_setup,
- .dma_bus_setup = pnv_pci_dma_bus_setup,
-#ifdef CONFIG_PCI_MSI
- .setup_msi_irqs = pnv_cxl_cx4_setup_msi_irqs,
- .teardown_msi_irqs = pnv_cxl_cx4_teardown_msi_irqs,
-#endif
- .enable_device_hook = pnv_cxl_enable_device_hook,
- .disable_device = pnv_cxl_disable_device,
- .release_device = pnv_pci_release_device,
- .window_alignment = pnv_pci_window_alignment,
- .setup_bridge = pnv_pci_setup_bridge,
- .reset_secondary_bus = pnv_pci_reset_secondary_bus,
- .dma_set_mask = pnv_pci_ioda_dma_set_mask,
- .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask,
- .shutdown = pnv_pci_ioda_shutdown,
-};
-#endif
-
static void __init pnv_pci_init_ioda_phb(struct device_node *np,
u64 hub_id, int ioda_type)
{
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index b265ecc0836a..13aef2323bbc 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -802,85 +802,6 @@ struct pci_ops pnv_pci_ops = {
.write = pnv_pci_write_config,
};
-static __be64 *pnv_tce(struct iommu_table *tbl, long idx)
-{
- __be64 *tmp = ((__be64 *)tbl->it_base);
- int level = tbl->it_indirect_levels;
- const long shift = ilog2(tbl->it_level_size);
- unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
-
- while (level) {
- int n = (idx & mask) >> (level * shift);
- unsigned long tce = be64_to_cpu(tmp[n]);
-
- tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
- idx &= ~mask;
- mask >>= shift;
- --level;
- }
-
- return tmp + idx;
-}
-
-int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
- unsigned long uaddr, enum dma_data_direction direction,
- unsigned long attrs)
-{
- u64 proto_tce = iommu_direction_to_tce_perm(direction);
- u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
- long i;
-
- if (proto_tce & TCE_PCI_WRITE)
- proto_tce |= TCE_PCI_READ;
-
- for (i = 0; i < npages; i++) {
- unsigned long newtce = proto_tce |
- ((rpn + i) << tbl->it_page_shift);
- unsigned long idx = index - tbl->it_offset + i;
-
- *(pnv_tce(tbl, idx)) = cpu_to_be64(newtce);
- }
-
- return 0;
-}
-
-#ifdef CONFIG_IOMMU_API
-int pnv_tce_xchg(struct iommu_table *tbl, long index,
- unsigned long *hpa, enum dma_data_direction *direction)
-{
- u64 proto_tce = iommu_direction_to_tce_perm(*direction);
- unsigned long newtce = *hpa | proto_tce, oldtce;
- unsigned long idx = index - tbl->it_offset;
-
- BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
-
- if (newtce & TCE_PCI_WRITE)
- newtce |= TCE_PCI_READ;
-
- oldtce = be64_to_cpu(xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce)));
- *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
- *direction = iommu_tce_direction(oldtce);
-
- return 0;
-}
-#endif
-
-void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
-{
- long i;
-
- for (i = 0; i < npages; i++) {
- unsigned long idx = index - tbl->it_offset + i;
-
- *(pnv_tce(tbl, idx)) = cpu_to_be64(0);
- }
-}
-
-unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
-{
- return be64_to_cpu(*(pnv_tce(tbl, index - tbl->it_offset)));
-}
-
struct iommu_table *pnv_pci_table_alloc(int nid)
{
struct iommu_table *tbl;
@@ -895,85 +816,6 @@ struct iommu_table *pnv_pci_table_alloc(int nid)
return tbl;
}
-long pnv_pci_link_table_and_group(int node, int num,
- struct iommu_table *tbl,
- struct iommu_table_group *table_group)
-{
- struct iommu_table_group_link *tgl = NULL;
-
- if (WARN_ON(!tbl || !table_group))
- return -EINVAL;
-
- tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
- node);
- if (!tgl)
- return -ENOMEM;
-
- tgl->table_group = table_group;
- list_add_rcu(&tgl->next, &tbl->it_group_list);
-
- table_group->tables[num] = tbl;
-
- return 0;
-}
-
-static void pnv_iommu_table_group_link_free(struct rcu_head *head)
-{
- struct iommu_table_group_link *tgl = container_of(head,
- struct iommu_table_group_link, rcu);
-
- kfree(tgl);
-}
-
-void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
- struct iommu_table_group *table_group)
-{
- long i;
- bool found;
- struct iommu_table_group_link *tgl;
-
- if (!tbl || !table_group)
- return;
-
- /* Remove link to a group from table's list of attached groups */
- found = false;
- list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
- if (tgl->table_group == table_group) {
- list_del_rcu(&tgl->next);
- call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free);
- found = true;
- break;
- }
- }
- if (WARN_ON(!found))
- return;
-
- /* Clean a pointer to iommu_table in iommu_table_group::tables[] */
- found = false;
- for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
- if (table_group->tables[i] == tbl) {
- table_group->tables[i] = NULL;
- found = true;
- break;
- }
- }
- WARN_ON(!found);
-}
-
-void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
- void *tce_mem, u64 tce_size,
- u64 dma_offset, unsigned page_shift)
-{
- tbl->it_blocksize = 16;
- tbl->it_base = (unsigned long)tce_mem;
- tbl->it_page_shift = page_shift;
- tbl->it_offset = dma_offset >> tbl->it_page_shift;
- tbl->it_index = 0;
- tbl->it_size = tce_size >> 3;
- tbl->it_busno = 0;
- tbl->it_type = TCE_PCI;
-}
-
void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index eada4b6068cb..8b37b28e3831 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -88,7 +88,6 @@ struct pnv_ioda_pe {
};
#define PNV_PHB_FLAG_EEH (1 << 0)
-#define PNV_PHB_FLAG_CXL (1 << 1) /* Real PHB supporting the cxl kernel API */
struct pnv_phb {
struct pci_controller *hose;
@@ -194,20 +193,10 @@ struct pnv_phb {
bool nmmu_flush;
} npu;
-#ifdef CONFIG_CXL_BASE
- struct cxl_afu *cxl_afu;
-#endif
int p2p_target_count;
};
extern struct pci_ops pnv_pci_ops;
-extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
- unsigned long uaddr, enum dma_data_direction direction,
- unsigned long attrs);
-extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
-extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
- unsigned long *hpa, enum dma_data_direction *direction);
-extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
unsigned char *log_buff);
@@ -217,14 +206,6 @@ int pnv_pci_cfg_write(struct pci_dn *pdn,
int where, int size, u32 val);
extern struct iommu_table *pnv_pci_table_alloc(int nid);
-extern long pnv_pci_link_table_and_group(int node, int num,
- struct iommu_table *tbl,
- struct iommu_table_group *table_group);
-extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
- struct iommu_table_group *table_group);
-extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
- void *tce_mem, u64 tce_size,
- u64 dma_offset, unsigned page_shift);
extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
extern void pnv_pci_init_npu_phb(struct device_node *np);
@@ -238,7 +219,6 @@ extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
-extern bool pnv_pci_enable_device_hook(struct pci_dev *dev);
extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
extern int pnv_eeh_post_init(void);
@@ -262,14 +242,33 @@ extern void pnv_npu_take_ownership(struct pnv_ioda_pe *npe);
extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe);
extern int pnv_npu2_init(struct pnv_phb *phb);
-/* cxl functions */
-extern bool pnv_cxl_enable_device_hook(struct pci_dev *dev);
-extern void pnv_cxl_disable_device(struct pci_dev *dev);
-extern int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
-extern void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev);
+/* pci-ioda-tce.c */
+#define POWERNV_IOMMU_DEFAULT_LEVELS 1
+#define POWERNV_IOMMU_MAX_LEVELS 5
+extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+ unsigned long uaddr, enum dma_data_direction direction,
+ unsigned long attrs);
+extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
+extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction,
+ bool alloc);
+extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index,
+ bool alloc);
+extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
+
+extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ __u32 page_shift, __u64 window_size, __u32 levels,
+ bool alloc_userspace_copy, struct iommu_table *tbl);
+extern void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
-/* phb ops (cxl switches these when enabling the kernel api on the phb) */
-extern const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops;
+extern long pnv_pci_link_table_and_group(int node, int num,
+ struct iommu_table *tbl,
+ struct iommu_table_group *table_group);
+extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+ struct iommu_table_group *table_group);
+extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
+ void *tce_mem, u64 tce_size,
+ u64 dma_offset, unsigned int page_shift);
#endif /* __POWERNV_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index f96df0a25d05..adddde023622 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -78,6 +78,12 @@ static void init_fw_feat_flags(struct device_node *np)
if (fw_feature_is("enabled", "fw-count-cache-disabled", np))
security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
+ if (fw_feature_is("enabled", "fw-count-cache-flush-bcctr2,0,0", np))
+ security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
+
+ if (fw_feature_is("enabled", "needs-count-cache-flush-on-context-switch", np))
+ security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
+
/*
* The features below are enabled by default, so we instead look to see
* if firmware has *disabled* them, and clear them if so.
@@ -124,7 +130,7 @@ static void pnv_setup_rfi_flush(void)
security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
setup_rfi_flush(type, enable);
- setup_barrier_nospec();
+ setup_count_cache_flush();
}
static void __init pnv_setup_arch(void)
@@ -314,7 +320,7 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
u64 reinit_flags;
if (xive_enabled())
- xive_kexec_teardown_cpu(secondary);
+ xive_teardown_cpu();
else
xics_kexec_teardown_cpu(secondary);
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index b80909957792..0d354e19ef92 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -283,23 +283,6 @@ static void pnv_cause_ipi(int cpu)
ic_cause_ipi(cpu);
}
-static void pnv_p9_dd1_cause_ipi(int cpu)
-{
- int this_cpu = get_cpu();
-
- /*
- * POWER9 DD1 has a global addressed msgsnd, but for now we restrict
- * IPIs to same core, because it requires additional synchronization
- * for inter-core doorbells which we do not implement.
- */
- if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu)))
- doorbell_global_ipi(cpu);
- else
- ic_cause_ipi(cpu);
-
- put_cpu();
-}
-
static void __init pnv_smp_probe(void)
{
if (xive_enabled())
@@ -311,14 +294,10 @@ static void __init pnv_smp_probe(void)
ic_cause_ipi = smp_ops->cause_ipi;
WARN_ON(!ic_cause_ipi);
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- smp_ops->cause_ipi = pnv_p9_dd1_cause_ipi;
- else
- smp_ops->cause_ipi = doorbell_global_ipi;
- } else {
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ smp_ops->cause_ipi = doorbell_global_ipi;
+ else
smp_ops->cause_ipi = pnv_cause_ipi;
- }
}
}
diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h
index ae0100fd35bb..f5493dbdd7ff 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -15,6 +15,7 @@
#include <linux/io.h>
#include <linux/dcache.h>
#include <linux/mutex.h>
+#include <linux/stringify.h>
/*
* Overview of Virtual Accelerator Switchboard (VAS).
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 13eede60c24d..7e89d5c47068 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -25,6 +25,6 @@ obj-$(CONFIG_LPARCFG) += lparcfg.o
obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_IBMEBUS) += ibmebus.o
-ifeq ($(CONFIG_PPC_PSERIES),y)
+ifdef CONFIG_PPC_PSERIES
obj-$(CONFIG_SUSPEND) += suspend.o
endif
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index c511a1743a44..d91412c591ef 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -13,6 +13,7 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
+#include <asm/feature-fixups.h>
.section ".text"
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 46fbaef69a59..23f54223ed56 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -58,7 +58,7 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
}
if (xive_enabled()) {
- xive_kexec_teardown_cpu(secondary);
+ xive_teardown_cpu();
if (!secondary)
xive_shutdown();
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 5a392e40f3d2..d3992ced0782 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -21,6 +21,7 @@
/* Enables debugging of low-level hash table routines - careful! */
#undef DEBUG
+#define pr_fmt(fmt) "lpar: " fmt
#include <linux/kernel.h>
#include <linux/dma-mapping.h>
@@ -36,7 +37,6 @@
#include <asm/machdep.h>
#include <asm/mmu_context.h>
#include <asm/iommu.h>
-#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/prom.h>
#include <asm/cputable.h>
@@ -165,8 +165,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot);
if (unlikely(lpar_rc == H_PTEG_FULL)) {
- if (!(vflags & HPTE_V_BOLTED))
- pr_devel(" full\n");
+ pr_devel("Hash table group is full\n");
return -1;
}
@@ -176,8 +175,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
* or we will loop forever, so return -2 in this case.
*/
if (unlikely(lpar_rc != H_SUCCESS)) {
- if (!(vflags & HPTE_V_BOLTED))
- pr_devel(" lpar err %ld\n", lpar_rc);
+ pr_err("Failed hash pte insert with error %ld\n", lpar_rc);
return -2;
}
if (!(vflags & HPTE_V_BOLTED))
@@ -240,8 +238,11 @@ static void manual_hpte_clear_all(void)
*/
for (i = 0; i < hpte_count; i += 4) {
lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes);
- if (lpar_rc != H_SUCCESS)
+ if (lpar_rc != H_SUCCESS) {
+ pr_info("Failed to read hash page table at %ld err %ld\n",
+ i, lpar_rc);
continue;
+ }
for (j = 0; j < 4; j++){
if ((ptes[j].pteh & HPTE_V_VRMA_MASK) ==
HPTE_V_VRMA_MASK)
@@ -340,8 +341,11 @@ static long __pSeries_lpar_hpte_find(unsigned long want_v, unsigned long hpte_gr
for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) {
lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes);
- if (lpar_rc != H_SUCCESS)
+ if (lpar_rc != H_SUCCESS) {
+ pr_info("Failed to read hash page table at %ld err %ld\n",
+ hpte_group, lpar_rc);
continue;
+ }
for (j = 0; j < 4; j++) {
if (HPTE_V_COMPARE(ptes[j].pteh, want_v) &&
@@ -612,8 +616,8 @@ static int __init disable_bulk_remove(char *str)
{
if (strcmp(str, "off") == 0 &&
firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
- printk(KERN_INFO "Disabling BULK_REMOVE firmware feature");
- powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE;
+ pr_info("Disabling BULK_REMOVE firmware feature");
+ powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE;
}
return 1;
}
@@ -659,8 +663,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE))
return -ENODEV;
- printk(KERN_INFO "lpar: Attempting to resize HPT to shift %lu\n",
- shift);
+ pr_info("Attempting to resize HPT to shift %lu\n", shift);
t0 = ktime_get();
@@ -672,8 +675,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
/* prepare with shift==0 cancels an in-progress resize */
rc = plpar_resize_hpt_prepare(0, 0);
if (rc != H_SUCCESS)
- printk(KERN_WARNING
- "lpar: Unexpected error %d cancelling timed out HPT resize\n",
+ pr_warn("Unexpected error %d cancelling timed out HPT resize\n",
rc);
return -ETIMEDOUT;
}
@@ -691,9 +693,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
case H_RESOURCE:
return -EPERM;
default:
- printk(KERN_WARNING
- "lpar: Unexpected error %d from H_RESIZE_HPT_PREPARE\n",
- rc);
+ pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc);
return -EIO;
}
@@ -706,22 +706,19 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
if (rc != 0) {
switch (state.commit_rc) {
case H_PTEG_FULL:
- printk(KERN_WARNING
- "lpar: Hash collision while resizing HPT\n");
+ pr_warn("Hash collision while resizing HPT\n");
return -ENOSPC;
default:
- printk(KERN_WARNING
- "lpar: Unexpected error %d from H_RESIZE_HPT_COMMIT\n",
- state.commit_rc);
+ pr_warn("Unexpected error %d from H_RESIZE_HPT_COMMIT\n",
+ state.commit_rc);
return -EIO;
};
}
- printk(KERN_INFO
- "lpar: HPT resize to shift %lu complete (%lld ms / %lld ms)\n",
- shift, (long long) ktime_ms_delta(t1, t0),
- (long long) ktime_ms_delta(t2, t1));
+ pr_info("HPT resize to shift %lu complete (%lld ms / %lld ms)\n",
+ shift, (long long) ktime_ms_delta(t1, t0),
+ (long long) ktime_ms_delta(t2, t1));
return 0;
}
@@ -785,13 +782,13 @@ static int __init cmo_free_hint(char *str)
parm = strstrip(str);
if (strcasecmp(parm, "no") == 0 || strcasecmp(parm, "off") == 0) {
- printk(KERN_INFO "cmo_free_hint: CMO free page hinting is not active.\n");
+ pr_info("%s: CMO free page hinting is not active.\n", __func__);
cmo_free_hint_flag = 0;
return 1;
}
cmo_free_hint_flag = 1;
- printk(KERN_INFO "cmo_free_hint: CMO free page hinting is active.\n");
+ pr_info("%s: CMO free page hinting is active.\n", __func__);
if (strcasecmp(parm, "yes") == 0 || strcasecmp(parm, "on") == 0)
return 1;
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 8a8033a249c7..f0e30dc94988 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -17,6 +17,7 @@
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/slab.h>
+#include <linux/stringify.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 5e1ef9150182..851ce326874a 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -22,6 +22,7 @@
#include <linux/of.h>
#include <linux/fs.h>
#include <linux/reboot.h>
+#include <linux/irq_work.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
@@ -32,11 +33,13 @@
static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
static DEFINE_SPINLOCK(ras_log_buf_lock);
-static char global_mce_data_buf[RTAS_ERROR_LOG_MAX];
-static DEFINE_PER_CPU(__u64, mce_data_buf);
-
static int ras_check_exception_token;
+static void mce_process_errlog_event(struct irq_work *work);
+static struct irq_work mce_errlog_process_work = {
+ .func = mce_process_errlog_event,
+};
+
#define EPOW_SENSOR_TOKEN 9
#define EPOW_SENSOR_INDEX 0
@@ -330,16 +333,20 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
((((A) >= 0x7000) && ((A) < 0x7ff0)) || \
(((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16))))
+static inline struct rtas_error_log *fwnmi_get_errlog(void)
+{
+ return (struct rtas_error_log *)local_paca->mce_data_buf;
+}
+
/*
* Get the error information for errors coming through the
* FWNMI vectors. The pt_regs' r3 will be updated to reflect
* the actual r3 if possible, and a ptr to the error log entry
* will be returned if found.
*
- * If the RTAS error is not of the extended type, then we put it in a per
- * cpu 64bit buffer. If it is the extended type we use global_mce_data_buf.
+ * Use one buffer mce_data_buf per cpu to store RTAS error.
*
- * The global_mce_data_buf does not have any locks or protection around it,
+ * The mce_data_buf does not have any locks or protection around it,
* if a second machine check comes in, or a system reset is done
* before we have logged the error, then we will get corruption in the
* error log. This is preferable over holding off on calling
@@ -349,7 +356,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
{
unsigned long *savep;
- struct rtas_error_log *h, *errhdr = NULL;
+ struct rtas_error_log *h;
/* Mask top two bits */
regs->gpr[3] &= ~(0x3UL << 62);
@@ -360,24 +367,22 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
}
savep = __va(regs->gpr[3]);
- regs->gpr[3] = savep[0]; /* restore original r3 */
+ regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
- /* If it isn't an extended log we can use the per cpu 64bit buffer */
h = (struct rtas_error_log *)&savep[1];
+ /* Use the per cpu buffer from paca to store rtas error log */
+ memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
if (!rtas_error_extended(h)) {
- memcpy(this_cpu_ptr(&mce_data_buf), h, sizeof(__u64));
- errhdr = (struct rtas_error_log *)this_cpu_ptr(&mce_data_buf);
+ memcpy(local_paca->mce_data_buf, h, sizeof(__u64));
} else {
int len, error_log_length;
error_log_length = 8 + rtas_error_extended_log_length(h);
- len = max_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
- memset(global_mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
- memcpy(global_mce_data_buf, h, len);
- errhdr = (struct rtas_error_log *)global_mce_data_buf;
+ len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
+ memcpy(local_paca->mce_data_buf, h, len);
}
- return errhdr;
+ return (struct rtas_error_log *)local_paca->mce_data_buf;
}
/* Call this when done with the data returned by FWNMI_get_errinfo.
@@ -423,6 +428,17 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
}
/*
+ * Process MCE rtas errlog event.
+ */
+static void mce_process_errlog_event(struct irq_work *work)
+{
+ struct rtas_error_log *err;
+
+ err = fwnmi_get_errlog();
+ log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
+}
+
+/*
* See if we can recover from a machine check exception.
* This is only called on power4 (or above) and only via
* the Firmware Non-Maskable Interrupts (fwnmi) handler
@@ -466,7 +482,8 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
recovered = 1;
}
- log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
+ /* Queue irq work to log this rtas event later. */
+ irq_work_queue(&mce_errlog_process_work);
return recovered;
}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 139f0af6c3d9..ba1791fd3234 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -41,6 +41,7 @@
#include <linux/root_dev.h>
#include <linux/of.h>
#include <linux/of_pci.h>
+#include <linux/memblock.h>
#include <asm/mmu.h>
#include <asm/processor.h>
@@ -69,8 +70,10 @@
#include <asm/kexec.h>
#include <asm/isa-bridge.h>
#include <asm/security_features.h>
+#include <asm/asm-const.h>
#include "pseries.h"
+#include "../../../../drivers/pci/pci.h"
int CMO_PrPSP = -1;
int CMO_SecPSP = -1;
@@ -101,6 +104,9 @@ static void pSeries_show_cpuinfo(struct seq_file *m)
static void __init fwnmi_init(void)
{
unsigned long system_reset_addr, machine_check_addr;
+ u8 *mce_data_buf;
+ unsigned int i;
+ int nr_cpus = num_possible_cpus();
int ibm_nmi_register = rtas_token("ibm,nmi-register");
if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE)
@@ -114,6 +120,18 @@ static void __init fwnmi_init(void)
if (0 == rtas_call(ibm_nmi_register, 2, 1, NULL, system_reset_addr,
machine_check_addr))
fwnmi_active = 1;
+
+ /*
+ * Allocate a chunk for per cpu buffer to hold rtas errorlog.
+ * It will be used in real mode mce handler, hence it needs to be
+ * below RMA.
+ */
+ mce_data_buf = __va(memblock_alloc_base(RTAS_ERROR_LOG_MAX * nr_cpus,
+ RTAS_ERROR_LOG_MAX, ppc64_rma_size));
+ for_each_possible_cpu(i) {
+ paca_ptrs[i]->mce_data_buf = mce_data_buf +
+ (RTAS_ERROR_LOG_MAX * i);
+ }
}
static void pseries_8259_cascade(struct irq_desc *desc)
@@ -484,6 +502,12 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
+ if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST)
+ security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
+
+ if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE)
+ security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
+
/*
* The features below are enabled by default, so we instead look to see
* if firmware has *disabled* them, and clear them if so.
@@ -534,7 +558,7 @@ void pseries_setup_rfi_flush(void)
security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR);
setup_rfi_flush(types, enable);
- setup_barrier_nospec();
+ setup_count_cache_flush();
}
#ifdef CONFIG_PCI_IOV
@@ -646,6 +670,15 @@ void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes)
}
}
+static void pseries_disable_sriov_resources(struct pci_dev *pdev)
+{
+ int i;
+
+ pci_warn(pdev, "No hypervisor support for SR-IOV on this device, IOV BARs disabled.\n");
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+ pdev->resource[i + PCI_IOV_RESOURCES].flags = 0;
+}
+
static void pseries_pci_fixup_resources(struct pci_dev *pdev)
{
const int *indexes;
@@ -653,10 +686,10 @@ static void pseries_pci_fixup_resources(struct pci_dev *pdev)
/*Firmware must support open sriov otherwise dont configure*/
indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
- if (!indexes)
- return;
- /* Assign the addresses from device tree*/
- of_pci_set_vf_bar_size(pdev, indexes);
+ if (indexes)
+ of_pci_set_vf_bar_size(pdev, indexes);
+ else
+ pseries_disable_sriov_resources(pdev);
}
static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
@@ -664,14 +697,14 @@ static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
const int *indexes;
struct device_node *dn = pci_device_to_OF_node(pdev);
- if (!pdev->is_physfn || pdev->is_added)
+ if (!pdev->is_physfn || pci_dev_is_added(pdev))
return;
/*Firmware must support open sriov otherwise dont configure*/
indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
- if (!indexes)
- return;
- /* Assign the addresses from device tree*/
- of_pci_parse_iov_addrs(pdev, indexes);
+ if (indexes)
+ of_pci_parse_iov_addrs(pdev, indexes);
+ else
+ pseries_disable_sriov_resources(pdev);
}
static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev,