From bb0054552d080dd929907c5925d4bedc8bf6def7 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Wed, 2 Sep 2015 08:39:28 -0700 Subject: powerpc/powernv/pci-ioda: fix 32-bit TCE table init in kdump kernel When attempting to kdump with the 4.2 kernel, we see for each PCI device: pci 0003:01 : [PE# 000] Assign DMA32 space pci 0003:01 : [PE# 000] Setting up 32-bit TCE table at 0..80000000 pci 0003:01 : [PE# 000] Failed to create 32-bit TCE table, err -22 PCI: Domain 0004 has 8 available 32-bit DMA segments PCI: 4 PE# for a total weight of 70 pci 0004:01 : [PE# 002] Assign DMA32 space pci 0004:01 : [PE# 002] Setting up 32-bit TCE table at 0..80000000 pci 0004:01 : [PE# 002] Failed to create 32-bit TCE table, err -22 pci 0004:0d : [PE# 005] Assign DMA32 space pci 0004:0d : [PE# 005] Setting up 32-bit TCE table at 0..80000000 pci 0004:0d : [PE# 005] Failed to create 32-bit TCE table, err -22 pci 0004:0e : [PE# 006] Assign DMA32 space pci 0004:0e : [PE# 006] Setting up 32-bit TCE table at 0..80000000 pci 0004:0e : [PE# 006] Failed to create 32-bit TCE table, err -22 pci 0004:10 : [PE# 008] Assign DMA32 space pci 0004:10 : [PE# 008] Setting up 32-bit TCE table at 0..80000000 pci 0004:10 : [PE# 008] Failed to create 32-bit TCE table, err -22 and eventually the kdump kernel fails to boot as none of the PCI devices (including the disk controller) are successfully initialized. The EINVAL response is because the DMA window (the 2GB base window) is larger than the kdump kernel's reserved memory (crashkernel=, in this case specified to be 1024M). The check in question, if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size)) is a valid sanity check for pnv_pci_ioda2_table_alloc_pages(), so adjust the caller to pass in a smaller window size if our maximum memory value is smaller than the DMA window. After this change, the PCI devices successfully set up the 32-bit TCE table and kdump succeeds. The problem was seen on a Firestone machine originally. Fixes: aca6913f5551 ("powerpc/powernv/ioda2: Introduce helpers to allocate TCE pages") Cc: stable@vger.kernel.org # 4.2 Signed-off-by: Nishanth Aravamudan Reviewed-by: Alexey Kardashevskiy [mpe: Coding style pedantry, use u64, change the indentation] Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2927cd5c8303..77083274a4e3 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2049,9 +2049,17 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) struct iommu_table *tbl = NULL; long rc; + /* + * In memory constrained environments, e.g. kdump kernel, the + * DMA window can be larger than available memory, which will + * cause errors later. + */ + const u64 window_size = min((u64)pe->table_group.tce32_size, + memory_hotplug_max()); + rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, IOMMU_PAGE_SHIFT_4K, - pe->table_group.tce32_size, + window_size, POWERNV_IOMMU_DEFAULT_LEVELS, &tbl); if (rc) { pe_err(pe, "Failed to create 32-bit TCE table, err %ld", -- cgit v1.2.3-59-g8ed1b From fa14486979b3a47307bcdb10f8b5baa875a5cf68 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Fri, 4 Sep 2015 11:22:52 -0700 Subject: powerpc/powernv/pci-ioda: fix kdump with non-power-of-2 crashkernel= The 32-bit TCE table initialization relies on the DMA window having a size equal to a power of 2 (and checks for it explicitly). But crashkernel= has no constraint that requires a power-of-2 be specified. This causes the kdump kernel to fail to boot as none of the PCI devices (including the disk controller) are successfully initialized. After this change, the PCI devices successfully set up the 32-bit TCE table and kdump succeeds. Fixes: aca6913f5551 ("powerpc/powernv/ioda2: Introduce helpers to allocate TCE pages") Signed-off-by: Nishanth Aravamudan Cc: stable@vger.kernel.org # 4.2 Tested-by: Jan Stancek Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 77083274a4e3..414fd1a00fda 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2049,13 +2049,19 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) struct iommu_table *tbl = NULL; long rc; + /* + * crashkernel= specifies the kdump kernel's maximum memory at + * some offset and there is no guaranteed the result is a power + * of 2, which will cause errors later. + */ + const u64 max_memory = __rounddown_pow_of_two(memory_hotplug_max()); + /* * In memory constrained environments, e.g. kdump kernel, the * DMA window can be larger than available memory, which will * cause errors later. */ - const u64 window_size = min((u64)pe->table_group.tce32_size, - memory_hotplug_max()); + const u64 window_size = min((u64)pe->table_group.tce32_size, max_memory); rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, IOMMU_PAGE_SHIFT_4K, -- cgit v1.2.3-59-g8ed1b From 7d1647dc4ba0a61fec5381c1abb59dc886b6ef3c Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Mon, 7 Sep 2015 10:52:58 +1000 Subject: cxl: abort cxl_pci_enable_device_hook() if PCI channel is offline cxl_pci_enable_device_hook() is called when attempting to enable an AFU sitting on a vPHB. At present, the state of the underlying CXL card's PCI channel is only checked when it calls cxl_afu_check_and_enable() at the very end, after it has already set DMA options and initialised a default context. Check the CXL card's link status before setting DMA options or initialising a default context. If the link is down, print a warning and return immediately. Signed-off-by: Andrew Donnellan Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/vphb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 6dd16a6d153f..94b520896b18 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -48,6 +48,12 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) phb = pci_bus_to_host(dev->bus); afu = (struct cxl_afu *)phb->private_data; + + if (!cxl_adapter_link_ok(afu->adapter)) { + dev_warn(&dev->dev, "%s: Device link is down, refusing to enable AFU\n", __func__); + return false; + } + set_dma_ops(&dev->dev, &dma_direct_ops); set_dma_offset(&dev->dev, PAGE_OFFSET); -- cgit v1.2.3-59-g8ed1b From daebaabb5cfbe4a6f09ca0e0f8b7673efc704960 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 7 Sep 2015 15:52:40 +0530 Subject: powerpc/pseries: Release DRC when configure_connector fails Commit f32393c943e2 ("powerpc/pseries: Correct cpu affinity for dlpar added cpus") moved dlpar_acquire_drc() call to before dlpar_configure_connector() call in dlpar_cpu_probe(), but missed to release the DRC if dlpar_configure_connector() failed. During CPU hotplug, if configure-connector fails for any reason, then this will result in subsequent CPU hotplug attempts to fail. Release the acquired DRC if dlpar_configure_connector() call fails so that the DRC is left in right isolation and allocation state for the subsequent hotplug operation to succeed. Fixes: f32393c943e2 ("powerpc/pseries: Correct cpu affinity for dlpar added cpus") Cc: stable@vger.kernel.org # 4.1+ Signed-off-by: Bharata B Rao Reviewed-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/dlpar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 47d9cebe7159..db17827eb746 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -422,8 +422,10 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count) dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent); of_node_put(parent); - if (!dn) + if (!dn) { + dlpar_release_drc(drc_index); return -EINVAL; + } rc = dlpar_attach_node(dn); if (rc) { -- cgit v1.2.3-59-g8ed1b From b855d45dc3175eb3e602b945805c7b6aa8c04559 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 8 Sep 2015 15:16:09 +1000 Subject: powerpc: Wire up sys_userfaultfd() The selftest passes on 64-bit LE and BE. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/systbl.h | 1 + arch/powerpc/include/asm/unistd.h | 2 +- arch/powerpc/include/uapi/asm/unistd.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 71f2b3f02cf8..4d65499ee1c1 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -368,3 +368,4 @@ SYSCALL_SPU(memfd_create) SYSCALL_SPU(bpf) COMPAT_SYS(execveat) PPC64ONLY(switch_endian) +SYSCALL_SPU(userfaultfd) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index f4f8b667d75b..4a055b6c2a64 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include -#define __NR_syscalls 364 +#define __NR_syscalls 365 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index e4aa173dae62..6ad58d4c879b 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -386,5 +386,6 @@ #define __NR_bpf 361 #define __NR_execveat 362 #define __NR_switch_endian 363 +#define __NR_userfaultfd 364 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ -- cgit v1.2.3-59-g8ed1b From e297c939b745e420ef0b9dc989cb87bda617b399 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 10 Sep 2015 14:36:21 +1000 Subject: powerpc/MSI: Fix race condition in tearing down MSI interrupts This fixes a race which can result in the same virtual IRQ number being assigned to two different MSI interrupts. The most visible consequence of that is usually a warning and stack trace from the sysfs code about an attempt to create a duplicate entry in sysfs. The race happens when one CPU (say CPU 0) is disposing of an MSI while another CPU (say CPU 1) is setting up an MSI. CPU 0 calls (for example) pnv_teardown_msi_irqs(), which calls msi_bitmap_free_hwirqs() to indicate that the MSI (i.e. its hardware IRQ number) is no longer in use. Then, before CPU 0 gets to calling irq_dispose_mapping() to free up the virtal IRQ number, CPU 1 comes in and calls msi_bitmap_alloc_hwirqs() to allocate an MSI, and gets the same hardware IRQ number that CPU 0 just freed. CPU 1 then calls irq_create_mapping() to get a virtual IRQ number, which sees that there is currently a mapping for that hardware IRQ number and returns the corresponding virtual IRQ number (which is the same virtual IRQ number that CPU 0 was using). CPU 0 then calls irq_dispose_mapping() and frees that virtual IRQ number. Now, if another CPU comes along and calls irq_create_mapping(), it is likely to get the virtual IRQ number that was just freed, resulting in the same virtual IRQ number apparently being used for two different hardware interrupts. To fix this race, we just move the call to msi_bitmap_free_hwirqs() to after the call to irq_dispose_mapping(). Since virq_to_hw() doesn't work for the virtual IRQ number after irq_dispose_mapping() has been called, we need to call it before irq_dispose_mapping() and remember the result for the msi_bitmap_free_hwirqs() call. The pattern of calling msi_bitmap_free_hwirqs() before irq_dispose_mapping() appears in 5 places under arch/powerpc, and appears to have originated in commit 05af7bd2d75e ("[POWERPC] MPIC U3/U4 MSI backend") from 2007. Fixes: 05af7bd2d75e ("[POWERPC] MPIC U3/U4 MSI backend") Cc: stable@vger.kernel.org # v2.6.22+ Reported-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pasemi/msi.c | 5 +++-- arch/powerpc/platforms/powernv/pci.c | 5 +++-- arch/powerpc/sysdev/fsl_msi.c | 5 +++-- arch/powerpc/sysdev/mpic_u3msi.c | 5 +++-- arch/powerpc/sysdev/ppc4xx_msi.c | 5 +++-- 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c index e66ef1943338..b304a9fe55cc 100644 --- a/arch/powerpc/platforms/pasemi/msi.c +++ b/arch/powerpc/platforms/pasemi/msi.c @@ -63,6 +63,7 @@ static struct irq_chip mpic_pasemi_msi_chip = { static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); @@ -70,10 +71,10 @@ static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), ALLOC_CHUNK); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, ALLOC_CHUNK); } return; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 9b2480b265c0..f2dd77234240 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -99,6 +99,7 @@ void pnv_teardown_msi_irqs(struct pci_dev *pdev) struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; struct msi_desc *entry; + irq_hw_number_t hwirq; if (WARN_ON(!phb)) return; @@ -106,10 +107,10 @@ void pnv_teardown_msi_irqs(struct pci_dev *pdev) for_each_pci_msi_entry(entry, pdev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&phb->msi_bmp, - virq_to_hw(entry->irq) - phb->msi_base, 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1); } } #endif /* CONFIG_PCI_MSI */ diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 5916da1856a7..48a576aa47b9 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -128,15 +128,16 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; struct fsl_msi *msi_data; + irq_hw_number_t hwirq; for_each_pci_msi_entry(entry, pdev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); msi_data = irq_get_chip_data(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index 70fbd5694a8b..2cbc7e29b85f 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -107,15 +107,16 @@ static u64 find_u4_magic_addr(struct pci_dev *pdev, unsigned int hwirq) static void u3msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; for_each_pci_msi_entry(entry, pdev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 24d0470c1698..8fb806135043 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -124,16 +124,17 @@ void ppc4xx_teardown_msi_irqs(struct pci_dev *dev) { struct msi_desc *entry; struct ppc4xx_msi *msi_data = &ppc4xx_msi; + irq_hw_number_t hwirq; dev_dbg(&dev->dev, "PCIE-MSI: tearing down msi irqs\n"); for_each_pci_msi_entry(entry, dev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } } -- cgit v1.2.3-59-g8ed1b From 2925c2fdf1e0eb642482f5b30577e9435aaa8edb Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 15 Sep 2015 15:04:07 +1000 Subject: cxl: Fix unbalanced pci_dev_get in cxl_probe Currently the first thing we do in cxl_probe is to grab a reference on the pci device. Later on, we call device_register on our adapter. In our remove path, we call device_unregister, but we never call pci_dev_put. We therefore leak the device every time we do a reflash. device_register/unregister is sufficient to hold the reference. Therefore, drop the call to pci_dev_get. Here's why this is safe. The proposed cxl_probe(pdev) calls cxl_adapter_init: a) init calls cxl_adapter_alloc, which creates a struct cxl, conventionally called adapter. This struct contains a device entry, adapter->dev. b) init calls cxl_configure_adapter, where we set adapter->dev.parent = &dev->dev (here dev is the pci dev) So at this point, the cxl adapter's device's parent is the PCI device that I want to be refcounted properly. c) init calls cxl_register_adapter *) cxl_register_adapter calls device_register(&adapter->dev) So now we're in device_register, where dev is the adapter device, and we want to know if the PCI device is safe after we return. device_register(&adapter->dev) calls device_initialize() and then device_add(). device_add() does a get_device(). device_add() also explicitly grabs the device's parent, and calls get_device() on it: parent = get_device(dev->parent); So therefore, device_register() takes a lock on the parent PCI dev, which is what pci_dev_get() was guarding. pci_dev_get() can therefore be safely removed. Fixes: f204e0b8cedd ("cxl: Driver code for powernv PCIe based cards for userspace access") Cc: stable@vger.kernel.org Signed-off-by: Daniel Axtens Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/pci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 02c85160bfe9..a5e977192b61 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1249,8 +1249,6 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) int slice; int rc; - pci_dev_get(dev); - if (cxl_verbose) dump_cxl_config_space(dev); -- cgit v1.2.3-59-g8ed1b From 2cd55c68c0a49a75433b15c7dbd1991fef81e662 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Tue, 15 Sep 2015 15:48:34 +1000 Subject: cxl: Fix build failure due to -Wunused-variable behaviour change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A recent change in gcc caused this build failure: /var/lib/jenkins/workspace/gcc_kernel_build/linux/drivers/misc/cxl/cxl.h:72:27: error: ‘CXL_PSL_DLCNTL’ defined but not used [-Werror=unused-const-variable] static const cxl_p1_reg_t CXL_PSL_DLCNTL = {0x0060}; Because of this gcc commit: Commit 1bca8cbd0c68366f07277f98ce6963e10c2aa617 by mark PR28901 -Wunused-variable ignores unused const initialised variables in C 12 years ago it was decided that -Wunused-variable shouldn't warn about static const variables because some code used const static char rcsid[] strings which were never used but wanted in the code anyway. But as the bug points out this hides some real bugs. These days the usage of rcsids is not very popular anymore. So this patch changes the default to warn about unused static const variables in C with -Wunused-variable. And it adds a new option -Wno-unused-const-variable to turn this warning off. For C++ this new warning is off by default, since const variables can be used as #defines in C++. New testcases for the new defaults in C and C++ are included testing the new warning and suppressing it with an unused attribute or using -Wno-unused-const-variable. gcc/ChangeLog The cxl driver uses static consts in place of #defines in some cases for type safety, so this change causes the driver to fail to build on new copilers as these constants are not all used in every file that imports the header. Suppress the warning for this driver to return to the old behaviour of -Wunused-variable. Reported-by: Anton Blanchard Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile index 6f484dfe78f9..6982f603fadc 100644 --- a/drivers/misc/cxl/Makefile +++ b/drivers/misc/cxl/Makefile @@ -1,4 +1,4 @@ -ccflags-y := -Werror +ccflags-y := -Werror -Wno-unused-const-variable cxl-y += main.o file.o irq.o fault.o native.o cxl-y += context.o sysfs.o debugfs.o pci.o trace.o -- cgit v1.2.3-59-g8ed1b From 655471f54c2e395ba29ae4156ba0f49928177cc1 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 15 Sep 2015 11:24:17 +1000 Subject: powerpc/boot: Specify ABI v2 when building an LE boot wrapper The kernel does it, not the boot wrapper, which breaks with some cross compilers that still default to ABI v1. Fixes: 147c05168fc8 ("powerpc/boot: Add support for 64bit little endian wrapper") Cc: stable@vger.kernel.org # v3.16+ Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/boot/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 73eddda53b8e..4eec430d8fa8 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -28,6 +28,9 @@ BOOTCFLAGS += -m64 endif ifdef CONFIG_CPU_BIG_ENDIAN BOOTCFLAGS += -mbig-endian +else +BOOTCFLAGS += -mlittle-endian +BOOTCFLAGS += $(call cc-option,-mabi=elfv2) endif BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc -- cgit v1.2.3-59-g8ed1b From 36b35d5d807b7e57aff7d08e63de8b17731ee211 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 15 Sep 2015 12:30:08 +0530 Subject: powerpc/mm: Recompute hash value after a failed update If we had secondary hash flag set, we ended up modifying hash value in the updatepp code path. Hence with a failed updatepp we will be using a wrong hash value for the following hash insert. Fix this by recomputing hash before insert. Without this patch we can end up with using wrong slot number in linux pte. That can result in us missing an hash pte update or invalidate which can cause memory corruption or even machine check. Fixes: 6d492ecc6489 ("powerpc/THP: Add code to handle HPTE faults for hugepages") Cc: stable@vger.kernel.org # v3.11+ Signed-off-by: Aneesh Kumar K.V Reviewed-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hugepage-hash64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 43dafb9d6a46..4d87122cf6a7 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -85,7 +85,6 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, BUG_ON(index >= 4096); vpn = hpt_vpn(ea, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); hpte_slot_array = get_hpte_slot_array(pmdp); if (psize == MMU_PAGE_4K) { /* @@ -101,6 +100,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, valid = hpte_valid(hpte_slot_array, index); if (valid) { /* update the hpte bits */ + hash = hpt_hash(vpn, shift, ssize); hidx = hpte_hash_index(hpte_slot_array, index); if (hidx & _PTEIDX_SECONDARY) hash = ~hash; @@ -126,6 +126,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, if (!valid) { unsigned long hpte_group; + hash = hpt_hash(vpn, shift, ssize); /* insert new entry */ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; new_pmd |= _PAGE_HASHPTE; -- cgit v1.2.3-59-g8ed1b From 1cd03890ea64795e53f17a94928cca22495acb2a Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Wed, 16 Sep 2015 12:04:51 +0200 Subject: powerpc32: memcpy: only use dcbz once cache is enabled memcpy() uses instruction dcbz to speed up copy by not wasting time loading cache line with data that will be overwritten. Some platform like mpc52xx do no have cache active at startup and can therefore not use memcpy(). Allthough no part of the code explicitly uses memcpy(), GCC makes calls to it. This patch modifies memcpy() such that at startup, memcpy() unconditionally jumps to generic_memcpy() which doesn't use the dcbz instruction. Once the initial MMU is set up, in machine_init() we patch memcpy() by replacing this inconditional jump by a NOP Reported-by: Michal Sojka Tested-by: Thomas Gleixner Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_32.c | 3 +++ arch/powerpc/lib/copy_32.S | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index bb02e9f6944e..b316ab787a3b 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -38,6 +38,7 @@ #include #include #include +#include #define DBG(fmt...) @@ -116,6 +117,8 @@ notrace void __init machine_init(u64 dt_ptr) /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); + patch_instruction((unsigned int *)&memcpy, PPC_INST_NOP); + /* Do some early initialization based on the flat device tree */ early_init_devtree(__va(dt_ptr)); diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 2ef50c629470..da5847d09718 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -128,6 +128,10 @@ _GLOBAL(memset) * the destination area is cacheable. * We only use this version if the source and dest don't overlap. * -- paulus. + * + * During early init, cache might not be active yet, so dcbz cannot be used. + * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is + * replaced by a nop once cache is active. This is done in machine_init() */ _GLOBAL(memmove) cmplw 0,r3,r4 @@ -135,6 +139,7 @@ _GLOBAL(memmove) /* fall through */ _GLOBAL(memcpy) + b generic_memcpy add r7,r3,r5 /* test if the src & dst overlap */ add r8,r4,r5 cmplw 0,r4,r7 -- cgit v1.2.3-59-g8ed1b From 400c47d81ca383fc87d5a3937b234e23e26909fb Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Wed, 16 Sep 2015 12:04:53 +0200 Subject: powerpc32: memset: only use dcbz once cache is enabled memset() uses instruction dcbz to speed up clearing by not wasting time loading cache line with data that will be overwritten. Some platform like mpc52xx do no have cache active at startup and can therefore not use memset(). Allthough no part of the code explicitly uses memset(), GCC may make calls to it. This patch modifies memset() such that at startup, memset() unconditionally skip the optimised bloc that uses dcbz instruction. Once the initial MMU is set up, in machine_init() we patch memset() by replacing this inconditional jump by a NOP Tested-by: Thomas Gleixner Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_32.c | 3 +++ arch/powerpc/lib/copy_32.S | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index b316ab787a3b..ad8c9db61237 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -110,6 +110,8 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) * This is called very early on the boot process, after a minimal * MMU environment has been set up but before MMU_init is called. */ +extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */ + notrace void __init machine_init(u64 dt_ptr) { lockdep_init(); @@ -118,6 +120,7 @@ notrace void __init machine_init(u64 dt_ptr) udbg_early_init(); patch_instruction((unsigned int *)&memcpy, PPC_INST_NOP); + patch_instruction(&memset_nocache_branch, PPC_INST_NOP); /* Do some early initialization based on the flat device tree */ early_init_devtree(__va(dt_ptr)); diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index da5847d09718..c44df2dbedd5 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -73,6 +73,10 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1) * Use dcbz on the complete cache lines in the destination * to set them to zero. This requires that the destination * area is cacheable. -- paulus + * + * During early init, cache might not be active yet, so dcbz cannot be used. + * We therefore skip the optimised bloc that uses dcbz. This jump is + * replaced by a nop once cache is active. This is done in machine_init() */ _GLOBAL(memset) rlwimi r4,r4,8,16,23 @@ -88,6 +92,8 @@ _GLOBAL(memset) subf r6,r0,r6 cmplwi 0,r4,0 bne 2f /* Use normal procedure if r4 is not zero */ +_GLOBAL(memset_nocache_branch) + b 2f /* Skip optimised bloc until cache is enabled */ clrlwi r7,r6,32-LG_CACHELINE_BYTES add r8,r7,r5 -- cgit v1.2.3-59-g8ed1b