diff options
Diffstat (limited to 'arch/powerpc')
176 files changed, 2144 insertions, 900 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ad620637cbd1..757175ccf53c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -147,6 +147,7 @@ config PPC select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_WANT_IPC_PARSE_VERSION + select ARCH_WANT_IRQS_OFF_ACTIVATE_MM select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF select BUILDTIME_EXTABLE_SORT @@ -213,7 +214,7 @@ config PPC select HAVE_MEMBLOCK_NODE_MAP select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) - select HAVE_HARDLOCKUP_DETECTOR_ARCH if (PPC64 && PPC_BOOK3S) + select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC64 && PPC_BOOK3S && SMP select HAVE_OPROFILE select HAVE_OPTPROBES if PPC64 select HAVE_PERF_EVENTS @@ -722,7 +723,7 @@ config PPC_64K_PAGES config PPC_256K_PAGES bool "256k page size" - depends on 44x && !STDBINUTILS + depends on 44x && !STDBINUTILS && !PPC_47x help Make the page size 256k. @@ -1023,6 +1024,19 @@ config FSL_RIO Include support for RapidIO controller on Freescale embedded processors (MPC8548, MPC8641, etc). +config PPC_RTAS_FILTER + bool "Enable filtering of RTAS syscalls" + default y + depends on PPC_RTAS + help + The RTAS syscall API has security issues that could be used to + compromise system integrity. This option enforces restrictions on the + RTAS calls and arguments passed by userspace programs to mitigate + these issues. + + Say Y unless you know what you are doing and the filter is causing + problems for you. + endmenu config NONSTATIC_KERNEL diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index b915fe658979..2ca9114fcf00 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -352,6 +352,7 @@ config PPC_EARLY_DEBUG_CPM_ADDR config FAIL_IOMMU bool "Fault-injection capability for IOMMU" depends on FAULT_INJECTION + depends on PCI || IBMVIO help Provide fault-injection capability for IOMMU. Each device can be selectively enabled via the fail_iommu property. diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 37ac731a556b..b9d2fcf030d0 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -174,7 +174,7 @@ else CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5)) CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4) endif -else +else ifdef CONFIG_PPC_BOOK3E_64 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif @@ -250,7 +250,6 @@ KBUILD_CFLAGS += $(call cc-option,-mno-string) cpu-as-$(CONFIG_4xx) += -Wa,-m405 cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) -cpu-as-$(CONFIG_E200) += -Wa,-me200 cpu-as-$(CONFIG_E500) += -Wa,-me500 # When using '-many -mpower4' gas will first try and find a matching power4 diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index 92608f34d312..1d83966f5ef6 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -44,9 +44,6 @@ p_end: .long _end p_pstack: .long _platform_stack_top #endif - .globl _zimage_start - /* Clang appears to require the .weak directive to be after the symbol - * is defined. See https://bugs.llvm.org/show_bug.cgi?id=38921 */ .weak _zimage_start _zimage_start: .globl _zimage_start_lib diff --git a/arch/powerpc/boot/devtree.c b/arch/powerpc/boot/devtree.c index 5d91036ad626..58fbcfcc98c9 100644 --- a/arch/powerpc/boot/devtree.c +++ b/arch/powerpc/boot/devtree.c @@ -13,6 +13,7 @@ #include "string.h" #include "stdio.h" #include "ops.h" +#include "of.h" void dt_fixup_memory(u64 start, u64 size) { @@ -23,21 +24,25 @@ void dt_fixup_memory(u64 start, u64 size) root = finddevice("/"); if (getprop(root, "#address-cells", &naddr, sizeof(naddr)) < 0) naddr = 2; + else + naddr = be32_to_cpu(naddr); if (naddr < 1 || naddr > 2) fatal("Can't cope with #address-cells == %d in /\n\r", naddr); if (getprop(root, "#size-cells", &nsize, sizeof(nsize)) < 0) nsize = 1; + else + nsize = be32_to_cpu(nsize); if (nsize < 1 || nsize > 2) fatal("Can't cope with #size-cells == %d in /\n\r", nsize); i = 0; if (naddr == 2) - memreg[i++] = start >> 32; - memreg[i++] = start & 0xffffffff; + memreg[i++] = cpu_to_be32(start >> 32); + memreg[i++] = cpu_to_be32(start & 0xffffffff); if (nsize == 2) - memreg[i++] = size >> 32; - memreg[i++] = size & 0xffffffff; + memreg[i++] = cpu_to_be32(size >> 32); + memreg[i++] = cpu_to_be32(size & 0xffffffff); memory = finddevice("/memory"); if (! memory) { @@ -45,9 +50,9 @@ void dt_fixup_memory(u64 start, u64 size) setprop_str(memory, "device_type", "memory"); } - printf("Memory <- <0x%x", memreg[0]); + printf("Memory <- <0x%x", be32_to_cpu(memreg[0])); for (i = 1; i < (naddr + nsize); i++) - printf(" 0x%x", memreg[i]); + printf(" 0x%x", be32_to_cpu(memreg[i])); printf("> (%ldMB)\n\r", (unsigned long)(size >> 20)); setprop(memory, "reg", memreg, (naddr + nsize)*sizeof(u32)); @@ -65,10 +70,10 @@ void dt_fixup_cpu_clocks(u32 cpu, u32 tb, u32 bus) printf("CPU bus-frequency <- 0x%x (%dMHz)\n\r", bus, MHZ(bus)); while ((devp = find_node_by_devtype(devp, "cpu"))) { - setprop_val(devp, "clock-frequency", cpu); - setprop_val(devp, "timebase-frequency", tb); + setprop_val(devp, "clock-frequency", cpu_to_be32(cpu)); + setprop_val(devp, "timebase-frequency", cpu_to_be32(tb)); if (bus > 0) - setprop_val(devp, "bus-frequency", bus); + setprop_val(devp, "bus-frequency", cpu_to_be32(bus)); } timebase_period_ns = 1000000000 / tb; @@ -80,7 +85,7 @@ void dt_fixup_clock(const char *path, u32 freq) if (devp) { printf("%s: clock-frequency <- %x (%dMHz)\n\r", path, freq, MHZ(freq)); - setprop_val(devp, "clock-frequency", freq); + setprop_val(devp, "clock-frequency", cpu_to_be32(freq)); } } @@ -133,8 +138,12 @@ void dt_get_reg_format(void *node, u32 *naddr, u32 *nsize) { if (getprop(node, "#address-cells", naddr, 4) != 4) *naddr = 2; + else + *naddr = be32_to_cpu(*naddr); if (getprop(node, "#size-cells", nsize, 4) != 4) *nsize = 1; + else + *nsize = be32_to_cpu(*nsize); } static void copy_val(u32 *dest, u32 *src, int naddr) @@ -163,9 +172,9 @@ static int add_reg(u32 *reg, u32 *add, int naddr) int i, carry = 0; for (i = MAX_ADDR_CELLS - 1; i >= MAX_ADDR_CELLS - naddr; i--) { - u64 tmp = (u64)reg[i] + add[i] + carry; + u64 tmp = (u64)be32_to_cpu(reg[i]) + be32_to_cpu(add[i]) + carry; carry = tmp >> 32; - reg[i] = (u32)tmp; + reg[i] = cpu_to_be32((u32)tmp); } return !carry; @@ -180,18 +189,18 @@ static int compare_reg(u32 *reg, u32 *range, u32 *rangesize) u32 end; for (i = 0; i < MAX_ADDR_CELLS; i++) { - if (reg[i] < range[i]) + if (be32_to_cpu(reg[i]) < be32_to_cpu(range[i])) return 0; - if (reg[i] > range[i]) + if (be32_to_cpu(reg[i]) > be32_to_cpu(range[i])) break; } for (i = 0; i < MAX_ADDR_CELLS; i++) { - end = range[i] + rangesize[i]; + end = be32_to_cpu(range[i]) + be32_to_cpu(rangesize[i]); - if (reg[i] < end) + if (be32_to_cpu(reg[i]) < end) break; - if (reg[i] > end) + if (be32_to_cpu(reg[i]) > end) return 0; } @@ -240,7 +249,6 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, return 0; dt_get_reg_format(parent, &naddr, &nsize); - if (nsize > 2) return 0; @@ -252,10 +260,10 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, copy_val(last_addr, prop_buf + offset, naddr); - ret_size = prop_buf[offset + naddr]; + ret_size = be32_to_cpu(prop_buf[offset + naddr]); if (nsize == 2) { ret_size <<= 32; - ret_size |= prop_buf[offset + naddr + 1]; + ret_size |= be32_to_cpu(prop_buf[offset + naddr + 1]); } for (;;) { @@ -278,7 +286,6 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, offset = find_range(last_addr, prop_buf, prev_naddr, naddr, prev_nsize, buflen / 4); - if (offset < 0) return 0; @@ -296,8 +303,7 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, if (naddr > 2) return 0; - ret_addr = ((u64)last_addr[2] << 32) | last_addr[3]; - + ret_addr = ((u64)be32_to_cpu(last_addr[2]) << 32) | be32_to_cpu(last_addr[3]); if (sizeof(void *) == 4 && (ret_addr >= 0x100000000ULL || ret_size > 0x100000000ULL || ret_addr + ret_size > 0x100000000ULL)) @@ -350,11 +356,14 @@ int dt_is_compatible(void *node, const char *compat) int dt_get_virtual_reg(void *node, void **addr, int nres) { unsigned long xaddr; - int n; + int n, i; n = getprop(node, "virtual-reg", addr, nres * 4); - if (n > 0) + if (n > 0) { + for (i = 0; i < n/4; i ++) + ((u32 *)addr)[i] = be32_to_cpu(((u32 *)addr)[i]); return n / 4; + } for (n = 0; n < nres; n++) { if (!dt_xlate_reg(node, n, &xaddr, NULL)) diff --git a/arch/powerpc/boot/dts/charon.dts b/arch/powerpc/boot/dts/charon.dts index 408b486b13df..cd589539f313 100644 --- a/arch/powerpc/boot/dts/charon.dts +++ b/arch/powerpc/boot/dts/charon.dts @@ -35,7 +35,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts index 0e5e9d3acf79..19a14e62e65f 100644 --- a/arch/powerpc/boot/dts/digsy_mtc.dts +++ b/arch/powerpc/boot/dts/digsy_mtc.dts @@ -16,7 +16,7 @@ model = "intercontrol,digsy-mtc"; compatible = "intercontrol,digsy-mtc"; - memory { + memory@0 { reg = <0x00000000 0x02000000>; // 32MB }; diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi index 1b4aafc1f6a2..9716a0484ecf 100644 --- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi @@ -122,7 +122,15 @@ }; /include/ "pq3-i2c-0.dtsi" + i2c@3000 { + fsl,i2c-erratum-a004447; + }; + /include/ "pq3-i2c-1.dtsi" + i2c@3100 { + fsl,i2c-erratum-a004447; + }; + /include/ "pq3-duart-0.dtsi" /include/ "pq3-espi-0.dtsi" spi0: spi@7000 { diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi index 872e4485dc3f..ddc018d42252 100644 --- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi @@ -371,7 +371,23 @@ }; /include/ "qoriq-i2c-0.dtsi" + i2c@118000 { + fsl,i2c-erratum-a004447; + }; + + i2c@118100 { + fsl,i2c-erratum-a004447; + }; + /include/ "qoriq-i2c-1.dtsi" + i2c@119000 { + fsl,i2c-erratum-a004447; + }; + + i2c@119100 { + fsl,i2c-erratum-a004447; + }; + /include/ "qoriq-duart-0.dtsi" /include/ "qoriq-duart-1.dtsi" /include/ "qoriq-gpio-0.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi index c90702b04a53..48e5cd61599c 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi @@ -79,6 +79,7 @@ fman0: fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xfc000 0x1000>; + fsl,erratum-a009885; }; xmdio0: mdio@fd000 { @@ -86,6 +87,7 @@ fman0: fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xfd000 0x1000>; + fsl,erratum-a009885; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t1023rdb.dts b/arch/powerpc/boot/dts/fsl/t1023rdb.dts index 5ba6fbfca274..f82f85c65964 100644 --- a/arch/powerpc/boot/dts/fsl/t1023rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1023rdb.dts @@ -154,7 +154,7 @@ fm1mac3: ethernet@e4000 { phy-handle = <&sgmii_aqr_phy3>; - phy-connection-type = "sgmii-2500"; + phy-connection-type = "2500base-x"; sleep = <&rcpm 0x20000000>; }; diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi index 099a598c74c0..bfe1ed5be337 100644 --- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi @@ -139,12 +139,12 @@ fman@400000 { ethernet@e6000 { phy-handle = <&phy_rgmii_0>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { phy-handle = <&phy_rgmii_1>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; mdio0: mdio@fc000 { diff --git a/arch/powerpc/boot/dts/lite5200.dts b/arch/powerpc/boot/dts/lite5200.dts index cb2782dd6132..e7b194775d78 100644 --- a/arch/powerpc/boot/dts/lite5200.dts +++ b/arch/powerpc/boot/dts/lite5200.dts @@ -32,7 +32,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/lite5200b.dts b/arch/powerpc/boot/dts/lite5200b.dts index 2b86c81f9048..547cbe726ff2 100644 --- a/arch/powerpc/boot/dts/lite5200b.dts +++ b/arch/powerpc/boot/dts/lite5200b.dts @@ -31,7 +31,7 @@ led4 { gpios = <&gpio_simple 2 1>; }; }; - memory { + memory@0 { reg = <0x00000000 0x10000000>; // 256MB }; diff --git a/arch/powerpc/boot/dts/media5200.dts b/arch/powerpc/boot/dts/media5200.dts index 61cae9dcddef..f3188018face 100644 --- a/arch/powerpc/boot/dts/media5200.dts +++ b/arch/powerpc/boot/dts/media5200.dts @@ -32,7 +32,7 @@ }; }; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB RAM }; diff --git a/arch/powerpc/boot/dts/mpc5200b.dtsi b/arch/powerpc/boot/dts/mpc5200b.dtsi index 648fe31795f4..8b796f3b11da 100644 --- a/arch/powerpc/boot/dts/mpc5200b.dtsi +++ b/arch/powerpc/boot/dts/mpc5200b.dtsi @@ -33,7 +33,7 @@ }; }; - memory: memory { + memory: memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/o2d.dts b/arch/powerpc/boot/dts/o2d.dts index 24a46f65e529..e0a8d3034417 100644 --- a/arch/powerpc/boot/dts/o2d.dts +++ b/arch/powerpc/boot/dts/o2d.dts @@ -12,7 +12,7 @@ model = "ifm,o2d"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/o2d.dtsi b/arch/powerpc/boot/dts/o2d.dtsi index 6661955a2be4..b55a9e5bd828 100644 --- a/arch/powerpc/boot/dts/o2d.dtsi +++ b/arch/powerpc/boot/dts/o2d.dtsi @@ -19,7 +19,7 @@ model = "ifm,o2d"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/o2dnt2.dts b/arch/powerpc/boot/dts/o2dnt2.dts index eeba7f5507d5..c2eedbd1f5fc 100644 --- a/arch/powerpc/boot/dts/o2dnt2.dts +++ b/arch/powerpc/boot/dts/o2dnt2.dts @@ -12,7 +12,7 @@ model = "ifm,o2dnt2"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/o3dnt.dts b/arch/powerpc/boot/dts/o3dnt.dts index fd00396b0593..e4c1bdd41271 100644 --- a/arch/powerpc/boot/dts/o3dnt.dts +++ b/arch/powerpc/boot/dts/o3dnt.dts @@ -12,7 +12,7 @@ model = "ifm,o3dnt"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/pcm032.dts b/arch/powerpc/boot/dts/pcm032.dts index c259c6b3ac5a..5674f978b983 100644 --- a/arch/powerpc/boot/dts/pcm032.dts +++ b/arch/powerpc/boot/dts/pcm032.dts @@ -22,7 +22,7 @@ model = "phytec,pcm032"; compatible = "phytec,pcm032"; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/tqm5200.dts b/arch/powerpc/boot/dts/tqm5200.dts index 9ed0bc78967e..5bb25a9e40a0 100644 --- a/arch/powerpc/boot/dts/tqm5200.dts +++ b/arch/powerpc/boot/dts/tqm5200.dts @@ -32,7 +32,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/ns16550.c b/arch/powerpc/boot/ns16550.c index b0da4466d419..f16d2be1d0f3 100644 --- a/arch/powerpc/boot/ns16550.c +++ b/arch/powerpc/boot/ns16550.c @@ -15,6 +15,7 @@ #include "stdio.h" #include "io.h" #include "ops.h" +#include "of.h" #define UART_DLL 0 /* Out: Divisor Latch Low */ #define UART_DLM 1 /* Out: Divisor Latch High */ @@ -58,16 +59,20 @@ int ns16550_console_init(void *devp, struct serial_console_data *scdp) int n; u32 reg_offset; - if (dt_get_virtual_reg(devp, (void **)®_base, 1) < 1) + if (dt_get_virtual_reg(devp, (void **)®_base, 1) < 1) { + printf("virt reg parse fail...\r\n"); return -1; + } n = getprop(devp, "reg-offset", ®_offset, sizeof(reg_offset)); if (n == sizeof(reg_offset)) - reg_base += reg_offset; + reg_base += be32_to_cpu(reg_offset); n = getprop(devp, "reg-shift", ®_shift, sizeof(reg_shift)); if (n != sizeof(reg_shift)) reg_shift = 0; + else + reg_shift = be32_to_cpu(reg_shift); scdp->open = ns16550_open; scdp->putc = ns16550_putc; diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 285d506c5a76..2f5e06309f09 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -39,6 +39,7 @@ CONFIG_MTD_CFI_GEOMETRY=y # CONFIG_MTD_CFI_I2 is not set CONFIG_MTD_CFI_I4=y CONFIG_MTD_CFI_AMDSTD=y +CONFIG_MTD_PHYSMAP=y CONFIG_MTD_PHYSMAP_OF=y # CONFIG_BLK_DEV is not set CONFIG_NETDEVICES=y diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h index a09595f00cab..f0f16b4fc5ea 100644 --- a/arch/powerpc/include/asm/archrandom.h +++ b/arch/powerpc/include/asm/archrandom.h @@ -6,27 +6,28 @@ #include <asm/machdep.h> -static inline int arch_get_random_long(unsigned long *v) +static inline bool arch_get_random_long(unsigned long *v) { - return 0; + return false; } -static inline int arch_get_random_int(unsigned int *v) +static inline bool arch_get_random_int(unsigned int *v) { - return 0; + return false; } -static inline int arch_get_random_seed_long(unsigned long *v) +static inline bool arch_get_random_seed_long(unsigned long *v) { if (ppc_md.get_random_seed) return ppc_md.get_random_seed(v); - return 0; + return false; } -static inline int arch_get_random_seed_int(unsigned int *v) + +static inline bool arch_get_random_seed_int(unsigned int *v) { unsigned long val; - int rc; + bool rc; rc = arch_get_random_seed_long(&val); if (rc) @@ -34,16 +35,6 @@ static inline int arch_get_random_seed_int(unsigned int *v) return rc; } - -static inline int arch_has_random(void) -{ - return 0; -} - -static inline int arch_has_random_seed(void) -{ - return !!ppc_md.get_random_seed; -} #endif /* CONFIG_ARCH_RANDOM */ #ifdef CONFIG_PPC_POWERNV diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index fbe8df433019..dc953d22e3c6 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -44,6 +44,8 @@ # define SMPWMB eieio #endif +/* clang defines this macro for a builtin, which will not work with runtime patching */ +#undef __lwsync #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") #define dma_rmb() __lwsync() #define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 603aed229af7..46338f236004 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -217,15 +217,34 @@ static __inline__ void __clear_bit_unlock(int nr, volatile unsigned long *addr) */ static __inline__ int fls(unsigned int x) { - return 32 - __builtin_clz(x); + int lz; + + if (__builtin_constant_p(x)) + return x ? 32 - __builtin_clz(x) : 0; + asm("cntlzw %0,%1" : "=r" (lz) : "r" (x)); + return 32 - lz; } #include <asm-generic/bitops/builtin-__fls.h> +/* + * 64-bit can do this using one cntlzd (count leading zeroes doubleword) + * instruction; for 32-bit we use the generic version, which does two + * 32-bit fls calls. + */ +#ifdef CONFIG_PPC64 static __inline__ int fls64(__u64 x) { - return 64 - __builtin_clzll(x); + int lz; + + if (__builtin_constant_p(x)) + return x ? 64 - __builtin_clzll(x) : 0; + asm("cntlzd %0,%1" : "=r" (lz) : "r" (x)); + return 64 - lz; } +#else +#include <asm-generic/bitops/fls64.h> +#endif #ifdef CONFIG_PPC64 unsigned int __arch_hweight8(unsigned int w); diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 0796533d37dd..5325bd9c9b47 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -37,8 +37,10 @@ static inline bool pte_user(pte_t pte) */ #ifdef CONFIG_PTE_64BIT #define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 36 #else #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 32 #endif /* @@ -555,9 +557,9 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_val(*ptep) & _PAGE_HASHPTE) flush_hash_entry(mm, ptep, addr); __asm__ __volatile__("\ - stw%U0%X0 %2,%0\n\ + stw%X0 %2,%0\n\ eieio\n\ - stw%U0%X0 %L2,%1" + stw%X1 %L2,%1" : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) : "r" (pte) : "memory"); diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 3f9ae3585ab9..80c953414882 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -13,20 +13,19 @@ */ #define MAX_EA_BITS_PER_CONTEXT 46 -#define REGION_SHIFT (MAX_EA_BITS_PER_CONTEXT - 2) /* - * Our page table limit us to 64TB. Hence for the kernel mapping, - * each MAP area is limited to 16 TB. - * The four map areas are: linear mapping, vmap, IO and vmemmap + * Our page table limit us to 64TB. For 64TB physical memory, we only need 64GB + * of vmemmap space. To better support sparse memory layout, we use 61TB + * linear map range, 1TB of vmalloc, 1TB of I/O and 1TB of vmememmap. */ +#define REGION_SHIFT (40) #define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT) /* - * Define the address range of the kernel non-linear virtual area - * 16TB + * Define the address range of the kernel non-linear virtual area (61TB) */ -#define H_KERN_VIRT_START ASM_CONST(0xc000100000000000) +#define H_KERN_VIRT_START ASM_CONST(0xc0003d0000000000) #ifndef __ASSEMBLY__ #define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index c8d1076e0ebb..a29b64129a7d 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -11,13 +11,12 @@ #ifdef __ASSEMBLY__ -.macro kuap_restore_amr gpr #ifdef CONFIG_PPC_KUAP +.macro kuap_restore_amr gpr BEGIN_MMU_FTR_SECTION_NESTED(67) ld \gpr, STACK_REGS_KUAP(r1) mtspr SPRN_AMR, \gpr END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) -#endif .endm .macro kuap_check_amr gpr1, gpr2 @@ -31,6 +30,7 @@ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) #endif .endm +#endif .macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr #ifdef CONFIG_PPC_KUAP @@ -54,6 +54,10 @@ #else /* !__ASSEMBLY__ */ +#include <linux/jump_label.h> + +DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); + #ifdef CONFIG_PPC_KUAP #include <asm/reg.h> @@ -77,6 +81,18 @@ static inline void set_kuap(unsigned long value) isync(); } +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && + (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), + "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); +} +#else /* CONFIG_PPC_KUAP */ +static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { } +static inline void set_kuap(unsigned long value) { } +#endif /* !CONFIG_PPC_KUAP */ + static __always_inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { @@ -94,17 +110,10 @@ static inline void prevent_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); + if (static_branch_unlikely(&uaccess_flush_key)) + do_uaccess_flush(); } -static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) -{ - return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && - (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), - "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); -} -#endif /* CONFIG_PPC_KUAP */ - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */ diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index a1c60d5b50af..5131ed787409 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -206,8 +206,10 @@ static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr, * from ptesync, it should probably go into update_mmu_cache, rather * than set_pte_at (which is used to set ptes unrelated to faults). * - * Spurious faults to vmalloc region are not tolerated, so there is - * a ptesync in flush_cache_vmap. + * Spurious faults from the kernel memory are not tolerated, so there + * is a ptesync in flush_cache_vmap, and __map_kernel_page() follows + * the pte update sequence from ISA Book III 6.10 Translation Table + * Update Synchronization Requirements. */ } diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 898b54262881..96ef54bc7d96 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -22,6 +22,7 @@ #define BRANCH_ABSOLUTE 0x2 bool is_offset_in_branch_range(long offset); +bool is_offset_in_cond_branch_range(long offset); unsigned int create_branch(const unsigned int *addr, unsigned long target, int flags); unsigned int create_cond_branch(const unsigned int *addr, @@ -72,7 +73,7 @@ void __patch_exception(int exc, unsigned long addr); #endif #define OP_RT_RA_MASK 0xffff0000UL -#define LIS_R2 0x3c020000UL +#define LIS_R2 0x3c400000UL #define ADDIS_R2_R12 0x3c4c0000UL #define ADDI_R2_R2 0x38420000UL diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h index a116fe931789..3bdd74739cb8 100644 --- a/arch/powerpc/include/asm/cpm1.h +++ b/arch/powerpc/include/asm/cpm1.h @@ -68,6 +68,7 @@ extern void cpm_reset(void); #define PROFF_SPI ((uint)0x0180) #define PROFF_SCC3 ((uint)0x0200) #define PROFF_SMC1 ((uint)0x0280) +#define PROFF_DSP1 ((uint)0x02c0) #define PROFF_SCC4 ((uint)0x0300) #define PROFF_SMC2 ((uint)0x0380) diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index 7897d16e0990..727d4b321937 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -7,7 +7,7 @@ #include <linux/bug.h> #include <asm/cputable.h> -static inline bool early_cpu_has_feature(unsigned long feature) +static __always_inline bool early_cpu_has_feature(unsigned long feature) { return !!((CPU_FTRS_ALWAYS & feature) || (CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature)); @@ -46,7 +46,7 @@ static __always_inline bool cpu_has_feature(unsigned long feature) return static_branch_likely(&cpu_feature_keys[i]); } #else -static inline bool cpu_has_feature(unsigned long feature) +static __always_inline bool cpu_has_feature(unsigned long feature) { return early_cpu_has_feature(feature); } diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index cf00ff0d121d..235911fb0e24 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -367,7 +367,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_82XX (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_NOEXECUTE) #define CPU_FTRS_G2_LE (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \ - CPU_FTR_MAYBE_CAN_NAP) + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NOEXECUTE) #define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_COMMON | CPU_FTR_NOEXECUTE) @@ -407,7 +407,6 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT) -#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ #define CPU_FTRS_PPC970 (CPU_FTR_LWSYNC | \ @@ -507,8 +506,6 @@ enum { CPU_FTRS_7447 | CPU_FTRS_7447A | CPU_FTRS_82XX | CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 | CPU_FTRS_CLASSIC32 | -#else - CPU_FTRS_GENERIC_32 | #endif #ifdef CONFIG_PPC_8xx CPU_FTRS_8XX | @@ -585,8 +582,6 @@ enum { CPU_FTRS_7447 & CPU_FTRS_7447A & CPU_FTRS_82XX & CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 & CPU_FTRS_CLASSIC32 & -#else - CPU_FTRS_GENERIC_32 & #endif #ifdef CONFIG_PPC_8xx CPU_FTRS_8XX & diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index deb99fd6e060..51f533b6ece0 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -99,6 +99,36 @@ static inline int cpu_last_thread_sibling(int cpu) return cpu | (threads_per_core - 1); } +/* + * tlb_thread_siblings are siblings which share a TLB. This is not + * architected, is not something a hypervisor could emulate and a future + * CPU may change behaviour even in compat mode, so this should only be + * used on PowerNV, and only with care. + */ +static inline int cpu_first_tlb_thread_sibling(int cpu) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8)) + return cpu & ~0x6; /* Big Core */ + else + return cpu_first_thread_sibling(cpu); +} + +static inline int cpu_last_tlb_thread_sibling(int cpu) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8)) + return cpu | 0x6; /* Big Core */ + else + return cpu_last_thread_sibling(cpu); +} + +static inline int cpu_tlb_thread_sibling_step(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8)) + return 2; /* Big Core */ + else + return 1; +} + static inline u32 get_tensr(void) { #ifdef CONFIG_BOOKE diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h index 7141ccea8c94..a92059964579 100644 --- a/arch/powerpc/include/asm/dcr-native.h +++ b/arch/powerpc/include/asm/dcr-native.h @@ -53,8 +53,8 @@ static inline void mtdcrx(unsigned int reg, unsigned int val) #define mfdcr(rn) \ ({unsigned int rval; \ if (__builtin_constant_p(rn) && rn < 1024) \ - asm volatile("mfdcr %0," __stringify(rn) \ - : "=r" (rval)); \ + asm volatile("mfdcr %0, %1" : "=r" (rval) \ + : "n" (rn)); \ else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR))) \ rval = mfdcrx(rn); \ else \ @@ -64,8 +64,8 @@ static inline void mtdcrx(unsigned int reg, unsigned int val) #define mtdcr(rn, v) \ do { \ if (__builtin_constant_p(rn) && rn < 1024) \ - asm volatile("mtdcr " __stringify(rn) ",%0" \ - : : "r" (v)); \ + asm volatile("mtdcr %0, %1" \ + : : "n" (rn), "r" (v)); \ else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR))) \ mtdcrx(rn, v); \ else \ diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 28c3d936fdf3..ee61542c6c3d 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -8,26 +8,39 @@ #ifndef _ASM_POWERPC_LMB_H #define _ASM_POWERPC_LMB_H +#include <linux/sched.h> + struct drmem_lmb { u64 base_addr; u32 drc_index; u32 aa_index; u32 flags; -#ifdef CONFIG_MEMORY_HOTPLUG - int nid; -#endif }; struct drmem_lmb_info { struct drmem_lmb *lmbs; int n_lmbs; - u32 lmb_size; + u64 lmb_size; }; extern struct drmem_lmb_info *drmem_info; +static inline struct drmem_lmb *drmem_lmb_next(struct drmem_lmb *lmb, + const struct drmem_lmb *start) +{ + /* + * DLPAR code paths can take several milliseconds per element + * when interacting with firmware. Ensure that we don't + * unfairly monopolize the CPU. + */ + if (((++lmb - start) % 16) == 0) + cond_resched(); + + return lmb; +} + #define for_each_drmem_lmb_in_range(lmb, start, end) \ - for ((lmb) = (start); (lmb) < (end); (lmb)++) + for ((lmb) = (start); (lmb) < (end); lmb = drmem_lmb_next(lmb, start)) #define for_each_drmem_lmb(lmb) \ for_each_drmem_lmb_in_range((lmb), \ @@ -66,7 +79,7 @@ struct of_drconf_cell_v2 { #define DRCONF_MEM_AI_INVALID 0x00000040 #define DRCONF_MEM_RESERVED 0x00000080 -static inline u32 drmem_lmb_size(void) +static inline u64 drmem_lmb_size(void) { return drmem_info->lmb_size; } @@ -103,22 +116,4 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb) lmb->aa_index = 0xffffffff; } -#ifdef CONFIG_MEMORY_HOTPLUG -static inline void lmb_set_nid(struct drmem_lmb *lmb) -{ - lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr); -} -static inline void lmb_clear_nid(struct drmem_lmb *lmb) -{ - lmb->nid = -1; -} -#else -static inline void lmb_set_nid(struct drmem_lmb *lmb) -{ -} -static inline void lmb_clear_nid(struct drmem_lmb *lmb) -{ -} -#endif - #endif /* _ASM_POWERPC_LMB_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 33f4f72eb035..6d0795d7b89c 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -61,11 +61,18 @@ nop; \ nop +#define ENTRY_FLUSH_SLOT \ + ENTRY_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop; + /* * r10 must be free to use, r13 must be paca */ #define INTERRUPT_TO_KERNEL \ - STF_ENTRY_BARRIER_SLOT + STF_ENTRY_BARRIER_SLOT; \ + ENTRY_FLUSH_SLOT /* * Macros for annotating the expected destination of (h)rfid @@ -127,6 +134,9 @@ hrfid; \ b hrfi_flush_fallback +#else /* __ASSEMBLY__ */ +/* Prototype for function defined in exceptions-64s.S */ +void do_uaccess_flush(void); #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_EXCEPTION_H */ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index b0af97add751..fbd406cd6916 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -205,6 +205,22 @@ label##3: \ FTR_ENTRY_OFFSET 955b-956b; \ .popsection; +#define UACCESS_FLUSH_FIXUP_SECTION \ +959: \ + .pushsection __uaccess_flush_fixup,"a"; \ + .align 2; \ +960: \ + FTR_ENTRY_OFFSET 959b-960b; \ + .popsection; + +#define ENTRY_FLUSH_FIXUP_SECTION \ +957: \ + .pushsection __entry_flush_fixup,"a"; \ + .align 2; \ +958: \ + FTR_ENTRY_OFFSET 957b-958b; \ + .popsection; + #define RFI_FLUSH_FIXUP_SECTION \ 951: \ .pushsection __rfi_flush_fixup,"a"; \ @@ -237,8 +253,11 @@ label##3: \ #include <linux/types.h> extern long stf_barrier_fallback; +extern long entry_flush_fallback; extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; +extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup; +extern long __start___entry_flush_fixup, __stop___entry_flush_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; extern long __start__btb_flush_fixup, __stop__btb_flush_fixup; diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index f54a08a2cd70..017336f2b086 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -96,7 +96,7 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name #endif /* PPC64_ELF_ABI_v1 */ #endif /* CONFIG_FTRACE_SYSCALLS */ -#ifdef CONFIG_PPC64 +#if defined(CONFIG_PPC64) && defined(CONFIG_FUNCTION_TRACER) #include <asm/paca.h> static inline void this_cpu_disable_ftrace(void) @@ -108,9 +108,12 @@ static inline void this_cpu_enable_ftrace(void) { get_paca()->ftrace_enabled = 1; } + +void ftrace_free_init_tramp(void); #else /* CONFIG_PPC64 */ static inline void this_cpu_disable_ftrace(void) { } static inline void this_cpu_enable_ftrace(void) { } +static inline void ftrace_free_init_tramp(void) { } #endif /* CONFIG_PPC64 */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index a63ec938636d..daba2d2a02a0 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -345,25 +345,37 @@ static inline void __raw_writeq_be(unsigned long v, volatile void __iomem *addr) */ static inline void __raw_rm_writeb(u8 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stbcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stbcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writew(u16 val, volatile void __iomem *paddr) { - __asm__ __volatile__("sthcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + sthcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writel(u32 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stwcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stwcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stdcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stdcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } @@ -375,7 +387,10 @@ static inline void __raw_rm_writeq_be(u64 val, volatile void __iomem *paddr) static inline u8 __raw_rm_readb(volatile void __iomem *paddr) { u8 ret; - __asm__ __volatile__("lbzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lbzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -383,7 +398,10 @@ static inline u8 __raw_rm_readb(volatile void __iomem *paddr) static inline u16 __raw_rm_readw(volatile void __iomem *paddr) { u16 ret; - __asm__ __volatile__("lhzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lhzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -391,7 +409,10 @@ static inline u16 __raw_rm_readw(volatile void __iomem *paddr) static inline u32 __raw_rm_readl(volatile void __iomem *paddr) { u32 ret; - __asm__ __volatile__("lwzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lwzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -399,7 +420,10 @@ static inline u32 __raw_rm_readl(volatile void __iomem *paddr) static inline u64 __raw_rm_readq(volatile void __iomem *paddr) { u64 ret; - __asm__ __volatile__("ldcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + ldcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 94f24928916a..ed4f5f536fc1 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -6,7 +6,7 @@ #define KUAP_WRITE 2 #define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE) -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/kup-radix.h> #endif #ifdef CONFIG_PPC_8xx @@ -24,9 +24,15 @@ .macro kuap_restore sp, current, gpr1, gpr2, gpr3 .endm +.macro kuap_restore_amr gpr +.endm + .macro kuap_check current, gpr .endm +.macro kuap_check_amr gpr1, gpr2 +.endm + #endif #else /* !__ASSEMBLY__ */ @@ -45,15 +51,26 @@ static inline void setup_kuep(bool disabled) { } void setup_kuap(bool disabled); #else static inline void setup_kuap(bool disabled) { } -static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) { } -static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) { } + static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return false; } + +static inline void kuap_check_amr(void) { } + +/* + * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush + * the L1D cache after user accesses. Only include the empty stubs for other + * platforms. + */ +#ifndef CONFIG_PPC_BOOK3S_64 +static inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { } +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { } +#endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_KUAP */ static inline void allow_read_from_user(const void __user *from, unsigned long size) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 7bcb64444a39..f71c361dc356 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -59,6 +59,9 @@ struct machdep_calls { int (*pcibios_root_bridge_prepare)(struct pci_host_bridge *bridge); + /* finds all the pci_controllers present at boot */ + void (*discover_phbs)(void); + /* To setup PHBs when using automatic OF platform driver for PCI */ int (*pci_setup_phb)(struct pci_controller *host); diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 58efca934311..f132b418a8c7 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -216,7 +216,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { - switch_mm(prev, next, current); + switch_mm_irqs_off(prev, next, current); } /* We don't currently use enter_lazy_tlb() for anything */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 552b96eef0c8..3d32d7103ec8 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -148,8 +148,10 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); */ #if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) #define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 36 #else #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 32 #endif /* diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 7fed9dc0f147..3d2a78ab051a 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -199,9 +199,9 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, */ if (IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_PTE_64BIT) && !percpu) { __asm__ __volatile__("\ - stw%U0%X0 %2,%0\n\ + stw%X0 %2,%0\n\ eieio\n\ - stw%U0%X0 %L2,%1" + stw%X1 %L2,%1" : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) : "r" (pte) : "memory"); return; diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 6ba5adb96a3b..d92353a96f81 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -132,7 +132,11 @@ static inline bool pfn_valid(unsigned long pfn) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) +#define virt_addr_valid(vaddr) ({ \ + unsigned long _addr = (unsigned long)vaddr; \ + _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \ + pfn_valid(virt_to_pfn(_addr)); \ +}) /* * On Book-E parts we need __va to parse the device tree and we can't @@ -212,6 +216,9 @@ static inline bool pfn_valid(unsigned long pfn) #define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET) #else #ifdef CONFIG_PPC64 + +#define VIRTUAL_WARN_ON(x) WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && (x)) + /* * gcc miscompiles (unsigned long)(&static_var) - PAGE_OFFSET * with -mcmodel=medium, so we use & and | instead of - and + on 64-bit. @@ -219,13 +226,13 @@ static inline bool pfn_valid(unsigned long pfn) */ #define __va(x) \ ({ \ - VIRTUAL_BUG_ON((unsigned long)(x) >= PAGE_OFFSET); \ + VIRTUAL_WARN_ON((unsigned long)(x) >= PAGE_OFFSET); \ (void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET); \ }) #define __pa(x) \ ({ \ - VIRTUAL_BUG_ON((unsigned long)(x) < PAGE_OFFSET); \ + VIRTUAL_WARN_ON((unsigned long)(x) < PAGE_OFFSET); \ (unsigned long)(x) & 0x0fffffffffffffffUL; \ }) diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h index c6bbe9778d3c..3c09109e708e 100644 --- a/arch/powerpc/include/asm/pmc.h +++ b/arch/powerpc/include/asm/pmc.h @@ -34,6 +34,13 @@ static inline void ppc_set_pmu_inuse(int inuse) #endif } +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +static inline int ppc_get_pmu_inuse(void) +{ + return get_paca()->pmcregs_in_use; +} +#endif + extern void power4_enable_pmcs(void); #else /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index c1df75edde44..a9af63bd430f 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -204,6 +204,7 @@ #define PPC_INST_ICBT 0x7c00002c #define PPC_INST_ICSWX 0x7c00032d #define PPC_INST_ICSWEPX 0x7c00076d +#define PPC_INST_DSSALL 0x7e00066c #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LDARX 0x7c0000a8 @@ -439,6 +440,7 @@ __PPC_RA(a) | __PPC_RB(b)) #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \ __PPC_RA(a) | __PPC_RB(b)) +#define PPC_DSSALL stringify_in_c(.long PPC_INST_DSSALL) #define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LQARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index cb89e4bf55ce..964063765662 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -71,6 +71,7 @@ struct ps3_dma_region_ops; * @bus_addr: The 'translated' bus address of the region. * @len: The length in bytes of the region. * @offset: The offset from the start of memory of the region. + * @dma_mask: Device dma_mask. * @ioid: The IOID of the device who owns this region * @chunk_list: Opaque variable used by the ioc page manager. * @region_ops: struct ps3_dma_region_ops - dma region operations @@ -85,6 +86,7 @@ struct ps3_dma_region { enum ps3_dma_region_type region_type; unsigned long len; unsigned long offset; + u64 dma_mask; /* driver variables (set by ps3_dma_region_create) */ unsigned long bus_addr; diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index c41220f4aad9..5a424f867c82 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -62,6 +62,9 @@ struct pt_regs }; #endif + +#define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)) + #ifdef __powerpc64__ /* diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b3cbb1136bce..34d08ff21b98 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -796,7 +796,7 @@ #define THRM1_TIN (1 << 31) #define THRM1_TIV (1 << 30) #define THRM1_THRES(x) ((x&0x7f)<<23) -#define THRM3_SITV(x) ((x&0x3fff)<<1) +#define THRM3_SITV(x) ((x & 0x1fff) << 1) #define THRM1_TID (1<<2) #define THRM1_TIE (1<<1) #define THRM1_V (1<<0) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 7c05e95a5c44..316a9c8d1929 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(u64 feature) return !!(powerpc_security_features & feature); } +#ifdef CONFIG_PPC_BOOK3S_64 +enum stf_barrier_type stf_barrier_type_get(void); +#else +static inline enum stf_barrier_type stf_barrier_type_get(void) { return STF_BARRIER_NONE; } +#endif // Features indicating support for Spectre/Meltdown mitigations @@ -84,12 +89,19 @@ static inline bool security_ftr_enabled(u64 feature) // Software required to flush link stack on context switch #define SEC_FTR_FLUSH_LINK_STACK 0x0000000000001000ull +// The L1-D cache should be flushed when entering the kernel +#define SEC_FTR_L1D_FLUSH_ENTRY 0x0000000000004000ull + +// The L1-D cache should be flushed after user accesses from the kernel +#define SEC_FTR_L1D_FLUSH_UACCESS 0x0000000000008000ull // Features enabled by default #define SEC_FTR_DEFAULT \ (SEC_FTR_L1D_FLUSH_HV | \ SEC_FTR_L1D_FLUSH_PR | \ SEC_FTR_BNDS_CHK_SPEC_BAR | \ + SEC_FTR_L1D_FLUSH_ENTRY | \ + SEC_FTR_L1D_FLUSH_UACCESS | \ SEC_FTR_FAVOUR_SECURITY) #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 65676e2325b8..6f2f4497e13b 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -52,12 +52,16 @@ enum l1d_flush_type { }; void setup_rfi_flush(enum l1d_flush_type, bool enable); +void setup_entry_flush(bool enable); +void setup_uaccess_flush(bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); #ifdef CONFIG_PPC_BARRIER_NOSPEC void setup_barrier_nospec(void); #else static inline void setup_barrier_nospec(void) { }; #endif +void do_uaccess_flush_fixups(enum l1d_flush_type types); +void do_entry_flush_fixups(enum l1d_flush_type types); void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index 6047402b0a4d..5405cb2fe65a 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -22,6 +22,7 @@ static inline cycles_t get_cycles(void) return mftb(); } +#define get_cycles get_cycles #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_TIMEX_H */ diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 7f3a8b902325..02a1c18cdba3 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -67,19 +67,6 @@ static inline int mm_is_thread_local(struct mm_struct *mm) return false; return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)); } -static inline void mm_reset_thread_local(struct mm_struct *mm) -{ - WARN_ON(atomic_read(&mm->context.copros) > 0); - /* - * It's possible for mm_access to take a reference on mm_users to - * access the remote mm from another thread, but it's not allowed - * to set mm_cpumask, so mm_users may be > 1 here. - */ - WARN_ON(current->mm != mm); - atomic_set(&mm->context.active_cpus, 1); - cpumask_clear(mm_cpumask(mm)); - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); -} #else /* CONFIG_PPC_BOOK3S_64 */ static inline int mm_is_thread_local(struct mm_struct *mm) { diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index cafad1960e76..a14a32f6c3cc 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -191,8 +191,11 @@ extern long __get_user_bad(void); */ #define __get_user_atomic_128_aligned(kaddr, uaddr, err) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine altivec\n" \ "1: lvx 0,0,%1 # get user\n" \ " stvx 0,0,%2 # put kernel\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ diff --git a/arch/powerpc/include/uapi/asm/errno.h b/arch/powerpc/include/uapi/asm/errno.h index cc79856896a1..4ba87de32be0 100644 --- a/arch/powerpc/include/uapi/asm/errno.h +++ b/arch/powerpc/include/uapi/asm/errno.h @@ -2,6 +2,7 @@ #ifndef _ASM_POWERPC_ERRNO_H #define _ASM_POWERPC_ERRNO_H +#undef EDEADLOCK #include <asm-generic/errno.h> #undef EDEADLOCK diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 59260eb96291..5819a577d267 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -13,6 +13,7 @@ CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC endif +CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) @@ -181,6 +182,9 @@ KCOV_INSTRUMENT_cputable.o := n KCOV_INSTRUMENT_setup_64.o := n KCOV_INSTRUMENT_paca.o := n +CFLAGS_setup_64.o += -fno-stack-protector +CFLAGS_paca.o += -fno-stack-protector + extra-$(CONFIG_PPC_FPU) += fpu.o extra-$(CONFIG_ALTIVEC) += vector.o extra-$(CONFIG_PPC64) += entry_64.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 5c0a1e17219b..af399675248e 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -285,7 +285,7 @@ int main(void) /* Interrupt register frame */ DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); - DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); + DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS); STACK_PT_REGS_OFFSET(GPR0, gpr[0]); STACK_PT_REGS_OFFSET(GPR1, gpr[1]); STACK_PT_REGS_OFFSET(GPR2, gpr[2]); diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 6dfceaa820e4..b0e0b3cd91ee 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -250,8 +250,10 @@ int __init btext_find_display(int allow_nonstdout) rc = btext_initialize(np); printk("result: %d\n", rc); } - if (rc == 0) + if (rc == 0) { + of_node_put(np); break; + } } return rc; } diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index c35069294ecf..8b0e523b2abb 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -368,14 +368,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) pa = pte_pfn(*ptep); /* On radix we can do hugepage mappings for io, so handle that */ - if (hugepage_shift) { - pa <<= hugepage_shift; - pa |= token & ((1ul << hugepage_shift) - 1); - } else { - pa <<= PAGE_SHIFT; - pa |= token & (PAGE_SIZE - 1); - } + if (!hugepage_shift) + hugepage_shift = PAGE_SHIFT; + pa <<= PAGE_SHIFT; + pa |= token & ((1ul << hugepage_shift) - 1); return pa; } diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index cf11277ebd02..000ebb5a6fb3 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -272,8 +272,9 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v) { struct pci_io_addr_range *piar; struct rb_node *n; + unsigned long flags; - spin_lock(&pci_io_addr_cache_root.piar_lock); + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) { piar = rb_entry(n, struct pci_io_addr_range, rb_node); @@ -281,7 +282,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v) (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev)); } - spin_unlock(&pci_io_addr_cache_root.piar_lock); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); return 0; } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index f29bb176381f..c72894ff9d61 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -336,6 +336,9 @@ trace_syscall_entry_irq_off: .globl transfer_to_syscall transfer_to_syscall: +#ifdef CONFIG_PPC_BOOK3S_32 + kuep_lock r11, r12 +#endif #ifdef CONFIG_TRACE_IRQFLAGS andi. r12,r9,MSR_EE beq- trace_syscall_entry_irq_off diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 70ac8a6ba0c1..88bba0a931d6 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1150,7 +1150,7 @@ EXC_REAL_BEGIN(data_access, 0x300, 0x80) INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1 EXC_REAL_END(data_access, 0x300, 0x80) EXC_VIRT_BEGIN(data_access, 0x4300, 0x80) - INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1 + INT_HANDLER data_access, 0x300, ool=1, virt=1, dar=1, dsisr=1 EXC_VIRT_END(data_access, 0x4300, 0x80) INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1 EXC_COMMON_BEGIN(data_access_common) @@ -1205,7 +1205,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(instruction_access, 0x400, 0x80) - INT_HANDLER instruction_access, 0x400, kvm=1 + INT_HANDLER instruction_access, 0x400, ool=1, kvm=1 EXC_REAL_END(instruction_access, 0x400, 0x80) EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80) INT_HANDLER instruction_access, 0x400, virt=1 @@ -1225,7 +1225,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80) - INT_HANDLER instruction_access_slb, 0x480, area=PACA_EXSLB, kvm=1 + INT_HANDLER instruction_access_slb, 0x480, ool=1, area=PACA_EXSLB, kvm=1 EXC_REAL_END(instruction_access_slb, 0x480, 0x80) EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) INT_HANDLER instruction_access_slb, 0x480, virt=1, area=PACA_EXSLB @@ -1365,17 +1365,17 @@ EXC_REAL_BEGIN(decrementer, 0x900, 0x80) INT_HANDLER decrementer, 0x900, ool=1, bitmask=IRQS_DISABLED, kvm=1 EXC_REAL_END(decrementer, 0x900, 0x80) EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80) - INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED + INT_HANDLER decrementer, 0x900, ool=1, virt=1, bitmask=IRQS_DISABLED EXC_VIRT_END(decrementer, 0x4900, 0x80) INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0 EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80) - INT_HANDLER hdecrementer, 0x980, hsrr=EXC_HV, kvm=1 + INT_HANDLER hdecrementer, 0x980, ool=1, hsrr=EXC_HV, kvm=1 EXC_REAL_END(hdecrementer, 0x980, 0x80) EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80) - INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1 + INT_HANDLER hdecrementer, 0x980, ool=1, virt=1, hsrr=EXC_HV, kvm=1 EXC_VIRT_END(hdecrementer, 0x4980, 0x80) INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0 EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt) @@ -2046,15 +2046,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback) .endr blr -TRAMP_REAL_BEGIN(rfi_flush_fallback) - SET_SCRATCH0(r13); - GET_PACA(r13); - std r1,PACA_EXRFI+EX_R12(r13) - ld r1,PACAKSAVE(r13) - std r9,PACA_EXRFI+EX_R9(r13) - std r10,PACA_EXRFI+EX_R10(r13) - std r11,PACA_EXRFI+EX_R11(r13) - mfctr r9 +/* Clobbers r10, r11, ctr */ +.macro L1D_DISPLACEMENT_FLUSH ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) ld r11,PACA_L1D_FLUSH_SIZE(r13) srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ @@ -2065,7 +2058,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) sync /* - * The load adresses are at staggered offsets within cachelines, + * The load addresses are at staggered offsets within cachelines, * which suits some pipelines better (on others it should not * hurt). */ @@ -2080,7 +2073,30 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) ld r11,(0x80 + 8)*7(r10) addi r10,r10,0x80*8 bdnz 1b +.endm + +TRAMP_REAL_BEGIN(entry_flush_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + mfctr r9 + L1D_DISPLACEMENT_FLUSH + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + blr +TRAMP_REAL_BEGIN(rfi_flush_fallback) + SET_SCRATCH0(r13); + GET_PACA(r13); + std r1,PACA_EXRFI+EX_R12(r13) + ld r1,PACAKSAVE(r13) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + mfctr r9 + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -2098,32 +2114,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) mfctr r9 - ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SIZE(r13) - srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ - mtctr r11 - DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ - - /* order ld/st prior to dcbt stop all streams with flushing */ - sync - - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ -1: - ld r11,(0x80 + 8)*0(r10) - ld r11,(0x80 + 8)*1(r10) - ld r11,(0x80 + 8)*2(r10) - ld r11,(0x80 + 8)*3(r10) - ld r11,(0x80 + 8)*4(r10) - ld r11,(0x80 + 8)*5(r10) - ld r11,(0x80 + 8)*6(r10) - ld r11,(0x80 + 8)*7(r10) - addi r10,r10,0x80*8 - bdnz 1b - + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -2132,6 +2123,19 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) GET_SCRATCH0(r13); hrfid +USE_TEXT_SECTION() + +_GLOBAL(do_uaccess_flush) + UACCESS_FLUSH_FIXUP_SECTION + nop + nop + nop + blr + L1D_DISPLACEMENT_FLUSH + blr +_ASM_NOKPROBE_SYMBOL(do_uaccess_flush) +EXPORT_SYMBOL(do_uaccess_flush) + /* * Real mode exceptions actually use this too, but alternate * instruction code patches (which end up in the common .text area) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 9b522152d8f0..69d64f406204 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -279,7 +279,7 @@ static void fadump_show_config(void) * that is required for a kernel to boot successfully. * */ -static inline u64 fadump_calculate_reserve_size(void) +static __init u64 fadump_calculate_reserve_size(void) { u64 base, size, bootmem_min; int ret; @@ -835,7 +835,6 @@ static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info) sizeof(struct fadump_memory_range)); return 0; } - static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, u64 base, u64 end) { @@ -854,7 +853,12 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, start = mem_ranges[mrange_info->mem_range_cnt - 1].base; size = mem_ranges[mrange_info->mem_range_cnt - 1].size; - if ((start + size) == base) + /* + * Boot memory area needs separate PT_LOAD segment(s) as it + * is moved to a different location at the time of crash. + * So, fold only if the region is not boot memory area. + */ + if ((start + size) == base && start >= fw_dump.boot_mem_top) is_adjacent = true; } if (!is_adjacent) { diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 4a24f8f026c7..edaab1142498 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -418,14 +418,11 @@ InstructionTLBMiss: cmplw 0,r1,r3 #endif mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP - li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#else - li r1,_PAGE_PRESENT | _PAGE_EXEC -#endif + li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) bge- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ #endif 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ @@ -484,13 +481,10 @@ DataLoadTLBMiss: lis r1,PAGE_OFFSET@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP - li r1, _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r1, _PAGE_PRESENT -#endif + li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER bge- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + li r1, _PAGE_PRESENT | _PAGE_ACCESSED addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ @@ -564,13 +558,10 @@ DataStoreTLBMiss: lis r1,PAGE_OFFSET@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT -#endif + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER bge- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ @@ -843,7 +834,7 @@ BEGIN_MMU_FTR_SECTION END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr -load_segment_registers: +_GLOBAL(load_segment_registers) li r0, NUM_USER_SEGMENTS /* load up user segment register values */ mtctr r0 /* for context 0 */ li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 780f527eabd2..9019f1395d39 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -420,6 +420,10 @@ generic_secondary_common_init: /* From now on, r24 is expected to be logical cpuid */ mr r24,r5 + /* Create a temp kernel stack for use before relocation is on. */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + /* See if we need to call a cpu state restore handler */ LOAD_REG_ADDR(r23, cur_cpu_spec) ld r23,0(r23) @@ -448,10 +452,6 @@ generic_secondary_common_init: sync /* order paca.run and cur_cpu_spec */ isync /* In case code patching happened */ - /* Create a temp kernel stack for use before relocation is on. */ - ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD - b __secondary_start #endif /* SMP */ @@ -992,7 +992,7 @@ start_here_common: bl start_kernel /* Not reached */ - trap +0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 /* diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 98d8b6832fcb..6f3e417f55a3 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -191,7 +191,7 @@ SystemCall: /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. */ - EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD) + EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD) /* Called from DataStoreTLBMiss when perf TLB misses events are activated */ #ifdef CONFIG_PERF_EVENTS @@ -229,9 +229,7 @@ SystemCall: InstructionTLBMiss: mtspr SPRN_SPRG_SCRATCH0, r10 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) mtspr SPRN_SPRG_SCRATCH1, r11 -#endif /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -278,11 +276,9 @@ InstructionTLBMiss: #ifdef ITLB_MISS_KERNEL mtcr r11 #endif -#ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT + rlwinm r11, r10, 32-7, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT -#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 20 and 23 must be clear. * Software indicator bits 22, 24, 25, 26, and 27 must be @@ -296,9 +292,7 @@ InstructionTLBMiss: /* Restore registers */ 0: mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) mfspr r11, SPRN_SPRG_SCRATCH1 -#endif rfi patch_site 0b, patch__itlbmiss_exit_1 @@ -308,9 +302,7 @@ InstructionTLBMiss: addi r10, r10, 1 stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0) mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) mfspr r11, SPRN_SPRG_SCRATCH1 -#endif rfi #endif @@ -394,11 +386,9 @@ DataStoreTLBMiss: * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); * r10 = (r10 & ~PRESENT) | r11; */ -#ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT + rlwinm r11, r10, 32-7, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT -#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index a36fd053c3db..0615ba86baef 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -37,7 +37,7 @@ static int __init powersave_off(char *arg) { ppc_md.power_save = NULL; cpuidle_disable = IDLE_POWERSAVE_OFF; - return 0; + return 1; } __setup("powersave=off", powersave_off); diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 0ffdd18b9f26..acb8215c5a01 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -129,7 +129,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM) mtspr SPRN_HID0,r4 BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */ diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index d32751994a62..c3f62c9ce740 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -50,28 +50,32 @@ _GLOBAL(isa300_idle_stop_mayloss) std r1,PACAR1(r13) mflr r4 mfcr r5 - /* use stack red zone rather than a new frame for saving regs */ - std r2,-8*0(r1) - std r14,-8*1(r1) - std r15,-8*2(r1) - std r16,-8*3(r1) - std r17,-8*4(r1) - std r18,-8*5(r1) - std r19,-8*6(r1) - std r20,-8*7(r1) - std r21,-8*8(r1) - std r22,-8*9(r1) - std r23,-8*10(r1) - std r24,-8*11(r1) - std r25,-8*12(r1) - std r26,-8*13(r1) - std r27,-8*14(r1) - std r28,-8*15(r1) - std r29,-8*16(r1) - std r30,-8*17(r1) - std r31,-8*18(r1) - std r4,-8*19(r1) - std r5,-8*20(r1) + /* + * Use the stack red zone rather than a new frame for saving regs since + * in the case of no GPR loss the wakeup code branches directly back to + * the caller without deallocating the stack frame first. + */ + std r2,-8*1(r1) + std r14,-8*2(r1) + std r15,-8*3(r1) + std r16,-8*4(r1) + std r17,-8*5(r1) + std r18,-8*6(r1) + std r19,-8*7(r1) + std r20,-8*8(r1) + std r21,-8*9(r1) + std r22,-8*10(r1) + std r23,-8*11(r1) + std r24,-8*12(r1) + std r25,-8*13(r1) + std r26,-8*14(r1) + std r27,-8*15(r1) + std r28,-8*16(r1) + std r29,-8*17(r1) + std r30,-8*18(r1) + std r31,-8*19(r1) + std r4,-8*20(r1) + std r5,-8*21(r1) /* 168 bytes */ PPC_STOP b . /* catch bugs */ @@ -87,8 +91,8 @@ _GLOBAL(isa300_idle_stop_mayloss) */ _GLOBAL(idle_return_gpr_loss) ld r1,PACAR1(r13) - ld r4,-8*19(r1) - ld r5,-8*20(r1) + ld r4,-8*20(r1) + ld r5,-8*21(r1) mtlr r4 mtcr r5 /* @@ -96,38 +100,40 @@ _GLOBAL(idle_return_gpr_loss) * from PACATOC. This could be avoided for that less common case * if KVM saved its r2. */ - ld r2,-8*0(r1) - ld r14,-8*1(r1) - ld r15,-8*2(r1) - ld r16,-8*3(r1) - ld r17,-8*4(r1) - ld r18,-8*5(r1) - ld r19,-8*6(r1) - ld r20,-8*7(r1) - ld r21,-8*8(r1) - ld r22,-8*9(r1) - ld r23,-8*10(r1) - ld r24,-8*11(r1) - ld r25,-8*12(r1) - ld r26,-8*13(r1) - ld r27,-8*14(r1) - ld r28,-8*15(r1) - ld r29,-8*16(r1) - ld r30,-8*17(r1) - ld r31,-8*18(r1) + ld r2,-8*1(r1) + ld r14,-8*2(r1) + ld r15,-8*3(r1) + ld r16,-8*4(r1) + ld r17,-8*5(r1) + ld r18,-8*6(r1) + ld r19,-8*7(r1) + ld r20,-8*8(r1) + ld r21,-8*9(r1) + ld r22,-8*10(r1) + ld r23,-8*11(r1) + ld r24,-8*12(r1) + ld r25,-8*13(r1) + ld r26,-8*14(r1) + ld r27,-8*15(r1) + ld r28,-8*16(r1) + ld r29,-8*17(r1) + ld r30,-8*18(r1) + ld r31,-8*19(r1) blr /* * This is the sequence required to execute idle instructions, as * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0. - * - * The 0(r1) slot is used to save r2 in isa206, so use that here. + * We have to store a GPR somewhere, ptesync, then reload it, and create + * a false dependency on the result of the load. It doesn't matter which + * GPR we store, or where we store it. We have already stored r2 to the + * stack at -8(r1) in isa206_idle_insn_mayloss, so use that. */ #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r2,0(r1); \ + std r2,-8(r1); \ ptesync; \ - ld r2,0(r1); \ + ld r2,-8(r1); \ 236: cmpd cr0,r2,r2; \ bne 236b; \ IDLE_INST; \ @@ -152,28 +158,32 @@ _GLOBAL(isa206_idle_insn_mayloss) std r1,PACAR1(r13) mflr r4 mfcr r5 - /* use stack red zone rather than a new frame for saving regs */ - std r2,-8*0(r1) - std r14,-8*1(r1) - std r15,-8*2(r1) - std r16,-8*3(r1) - std r17,-8*4(r1) - std r18,-8*5(r1) - std r19,-8*6(r1) - std r20,-8*7(r1) - std r21,-8*8(r1) - std r22,-8*9(r1) - std r23,-8*10(r1) - std r24,-8*11(r1) - std r25,-8*12(r1) - std r26,-8*13(r1) - std r27,-8*14(r1) - std r28,-8*15(r1) - std r29,-8*16(r1) - std r30,-8*17(r1) - std r31,-8*18(r1) - std r4,-8*19(r1) - std r5,-8*20(r1) + /* + * Use the stack red zone rather than a new frame for saving regs since + * in the case of no GPR loss the wakeup code branches directly back to + * the caller without deallocating the stack frame first. + */ + std r2,-8*1(r1) + std r14,-8*2(r1) + std r15,-8*3(r1) + std r16,-8*4(r1) + std r17,-8*5(r1) + std r18,-8*6(r1) + std r19,-8*7(r1) + std r20,-8*8(r1) + std r21,-8*9(r1) + std r22,-8*10(r1) + std r23,-8*11(r1) + std r24,-8*12(r1) + std r25,-8*13(r1) + std r26,-8*14(r1) + std r27,-8*15(r1) + std r28,-8*16(r1) + std r29,-8*17(r1) + std r30,-8*18(r1) + std r31,-8*19(r1) + std r4,-8*20(r1) + std r5,-8*21(r1) cmpwi r3,PNV_THREAD_NAP bne 1f IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 9704f3f76e63..d7d42bd448c4 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1057,7 +1057,7 @@ int iommu_take_ownership(struct iommu_table *tbl) spin_lock_irqsave(&tbl->large_pool.lock, flags); for (i = 0; i < tbl->nr_pools; i++) - spin_lock(&tbl->pools[i].lock); + spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); iommu_table_release_pages(tbl); @@ -1085,7 +1085,7 @@ void iommu_release_ownership(struct iommu_table *tbl) spin_lock_irqsave(&tbl->large_pool.lock, flags); for (i = 0; i < tbl->nr_pools; i++) - spin_lock(&tbl->pools[i].lock); + spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); memset(tbl->it_map, 0, sz); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 9b340af02c38..dd01a4abc258 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -264,7 +264,8 @@ int kprobe_handler(struct pt_regs *regs) if (user_mode(regs)) return 0; - if (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR)) + if (!IS_ENABLED(CONFIG_BOOKE) && + (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR))) return 0; /* diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 617eba82531c..d89cf802d9aa 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -669,7 +669,7 @@ static void __init kvm_use_magic_page(void) on_each_cpu(kvm_map_magic_page, &features, 1); /* Quick self-test to see if the mapping works */ - if (!fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) { + if (fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) { kvm_patching_worked = false; return; } diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 2020d255585f..7684f644e93e 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -96,7 +96,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L2CR) /* Stop DST streams */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) @@ -293,7 +293,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR) isync /* Stop DST streams */ - DSSALL + PPC_DSSALL sync /* Get the current enable bit of the L3CR into r4 */ @@ -402,7 +402,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR) _GLOBAL(__flush_disable_L1) /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 7a1c11a7cba5..716f8bb17461 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -146,11 +146,18 @@ void __init reserve_crashkernel(void) if (!crashk_res.start) { #ifdef CONFIG_PPC64 /* - * On 64bit we split the RMO in half but cap it at half of - * a small SLB (128MB) since the crash kernel needs to place - * itself and some stacks to be in the first segment. + * On the LPAR platform place the crash kernel to mid of + * RMA size (512MB or more) to ensure the crash kernel + * gets enough space to place itself and some stack to be + * in the first segment. At the same time normal kernel + * also get enough space to allocate memory for essential + * system resource in the first segment. Keep the crash + * kernel starts at 128MB offset on other platforms. */ - crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); + if (firmware_has_feature(FW_FEATURE_LPAR)) + crashk_res.start = ppc64_rma_size / 2; + else + crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); #else crashk_res.start = KDUMP_KERNELBASE; #endif diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 4ea0cca52e16..c786adfb9413 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -176,7 +176,7 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) struct paca_struct **paca_ptrs __read_mostly; EXPORT_SYMBOL(paca_ptrs); -void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int cpu) +void __init initialise_paca(struct paca_struct *new_paca, int cpu) { #ifdef CONFIG_PPC_PSERIES new_paca->lppaca_ptr = NULL; @@ -205,7 +205,7 @@ void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int } /* Put the paca pointer into r13 and SPRG_PACA */ -void __nostackprotector setup_paca(struct paca_struct *new_paca) +void setup_paca(struct paca_struct *new_paca) { /* Setup r13 */ local_paca = new_paca; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 1c448cf25506..a2c258a8d736 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1669,3 +1669,13 @@ static void fixup_hide_host_resource_fsl(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_resource_fsl); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl); + + +static int __init discover_phbs(void) +{ + if (ppc_md.discover_phbs) + ppc_md.discover_phbs(); + + return 0; +} +core_initcall(discover_phbs); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index bd0c258a1d5d..cf87573e6e78 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1719,7 +1719,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) tm_reclaim_current(0); #endif - memset(regs->gpr, 0, sizeof(regs->gpr)); + memset(®s->gpr[1], 0, sizeof(regs->gpr) - sizeof(regs->gpr[0])); regs->ctr = 0; regs->link = 0; regs->xer = 0; @@ -2001,12 +2001,12 @@ static unsigned long __get_wchan(struct task_struct *p) return 0; do { - sp = *(unsigned long *)sp; + sp = READ_ONCE_NOCHECK(*(unsigned long *)sp); if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) || p->state == TASK_RUNNING) return 0; if (count > 0) { - ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE]; + ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]); if (!in_sched_functions(ip)) return ip; } @@ -2081,7 +2081,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) * See if this is an exception frame. * We look for the "regshere" marker in the current frame. */ - if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE) + if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS) && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index e13e96e665e0..537142b877b8 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -266,7 +266,7 @@ static struct feature_property { }; #if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU) -static inline void identical_pvr_fixup(unsigned long node) +static __init void identical_pvr_fixup(unsigned long node) { unsigned int pvr; const char *model = of_get_flat_dt_prop(node, "model", NULL); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 689664cd4e79..7f4e2c031a9a 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1305,14 +1305,10 @@ static void __init prom_check_platform_support(void) if (prop_len > sizeof(vec)) prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n", prop_len); - prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", - &vec, sizeof(vec)); - for (i = 0; i < sizeof(vec); i += 2) { - prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 - , vec[i] - , vec[i + 1]); - prom_parse_platform_support(vec[i], vec[i + 1], - &supported); + prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", &vec, sizeof(vec)); + for (i = 0; i < prop_len; i += 2) { + prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2, vec[i], vec[i + 1]); + prom_parse_platform_support(vec[i], vec[i + 1], &supported); } } @@ -2923,7 +2919,7 @@ static void __init fixup_device_tree_efika_add_phy(void) /* Check if the phy-handle property exists - bail if it does */ rv = prom_getprop(node, "phy-handle", prop, sizeof(prop)); - if (!rv) + if (rv <= 0) return; /* diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 8c92febf5f44..63bfc5250b67 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -3014,8 +3014,13 @@ long arch_ptrace(struct task_struct *child, long request, flush_fp_to_thread(child); if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&tmp, &child->thread.TS_FPR(fpidx), - sizeof(long)); + if (IS_ENABLED(CONFIG_PPC32)) { + // On 32-bit the index we are passed refers to 32-bit words + tmp = ((u32 *)child->thread.fp_state.fpr)[fpidx]; + } else { + memcpy(&tmp, &child->thread.TS_FPR(fpidx), + sizeof(long)); + } else tmp = child->thread.fp_state.fpscr; } @@ -3047,8 +3052,13 @@ long arch_ptrace(struct task_struct *child, long request, flush_fp_to_thread(child); if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&child->thread.TS_FPR(fpidx), &data, - sizeof(long)); + if (IS_ENABLED(CONFIG_PPC32)) { + // On 32-bit the index we are passed refers to 32-bit words + ((u32 *)child->thread.fp_state.fpr)[fpidx] = data; + } else { + memcpy(&child->thread.TS_FPR(fpidx), &data, + sizeof(long)); + } else child->thread.fp_state.fpscr = data; ret = 0; @@ -3398,4 +3408,7 @@ void __init pt_regs_check(void) offsetof(struct user_pt_regs, result)); BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs)); + + // ptrace_get/put_fpr() rely on PPC32 and VSX being incompatible + BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_VSX)); } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 01210593d60c..35e246e39705 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -940,6 +940,156 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, return NULL; } +#ifdef CONFIG_PPC_RTAS_FILTER + +/* + * The sys_rtas syscall, as originally designed, allows root to pass + * arbitrary physical addresses to RTAS calls. A number of RTAS calls + * can be abused to write to arbitrary memory and do other things that + * are potentially harmful to system integrity, and thus should only + * be used inside the kernel and not exposed to userspace. + * + * All known legitimate users of the sys_rtas syscall will only ever + * pass addresses that fall within the RMO buffer, and use a known + * subset of RTAS calls. + * + * Accordingly, we filter RTAS requests to check that the call is + * permitted, and that provided pointers fall within the RMO buffer. + * The rtas_filters list contains an entry for each permitted call, + * with the indexes of the parameters which are expected to contain + * addresses and sizes of buffers allocated inside the RMO buffer. + */ +struct rtas_filter { + const char *name; + int token; + /* Indexes into the args buffer, -1 if not used */ + int buf_idx1; + int size_idx1; + int buf_idx2; + int size_idx2; + + int fixed_size; +}; + +static struct rtas_filter rtas_filters[] __ro_after_init = { + { "ibm,activate-firmware", -1, -1, -1, -1, -1 }, + { "ibm,configure-connector", -1, 0, -1, 1, -1, 4096 }, /* Special cased */ + { "display-character", -1, -1, -1, -1, -1 }, + { "ibm,display-message", -1, 0, -1, -1, -1 }, + { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 }, + { "ibm,close-errinjct", -1, -1, -1, -1, -1 }, + { "ibm,open-errinjct", -1, -1, -1, -1, -1 }, + { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 }, + { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 }, + { "ibm,get-indices", -1, 2, 3, -1, -1 }, + { "get-power-level", -1, -1, -1, -1, -1 }, + { "get-sensor-state", -1, -1, -1, -1, -1 }, + { "ibm,get-system-parameter", -1, 1, 2, -1, -1 }, + { "get-time-of-day", -1, -1, -1, -1, -1 }, + { "ibm,get-vpd", -1, 0, -1, 1, 2 }, + { "ibm,lpar-perftools", -1, 2, 3, -1, -1 }, + { "ibm,platform-dump", -1, 4, 5, -1, -1 }, /* Special cased */ + { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 }, + { "ibm,scan-log-dump", -1, 0, 1, -1, -1 }, + { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 }, + { "ibm,set-eeh-option", -1, -1, -1, -1, -1 }, + { "set-indicator", -1, -1, -1, -1, -1 }, + { "set-power-level", -1, -1, -1, -1, -1 }, + { "set-time-for-power-on", -1, -1, -1, -1, -1 }, + { "ibm,set-system-parameter", -1, 1, -1, -1, -1 }, + { "set-time-of-day", -1, -1, -1, -1, -1 }, + { "ibm,suspend-me", -1, -1, -1, -1, -1 }, + { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 }, + { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 }, + { "ibm,physical-attestation", -1, 0, 1, -1, -1 }, +}; + +static bool in_rmo_buf(u32 base, u32 end) +{ + return base >= rtas_rmo_buf && + base < (rtas_rmo_buf + RTAS_RMOBUF_MAX) && + base <= end && + end >= rtas_rmo_buf && + end < (rtas_rmo_buf + RTAS_RMOBUF_MAX); +} + +static bool block_rtas_call(int token, int nargs, + struct rtas_args *args) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { + struct rtas_filter *f = &rtas_filters[i]; + u32 base, size, end; + + if (token != f->token) + continue; + + if (f->buf_idx1 != -1) { + base = be32_to_cpu(args->args[f->buf_idx1]); + if (f->size_idx1 != -1) + size = be32_to_cpu(args->args[f->size_idx1]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; + + end = base + size - 1; + + /* + * Special case for ibm,platform-dump - NULL buffer + * address is used to indicate end of dump processing + */ + if (!strcmp(f->name, "ibm,platform-dump") && + base == 0) + return false; + + if (!in_rmo_buf(base, end)) + goto err; + } + + if (f->buf_idx2 != -1) { + base = be32_to_cpu(args->args[f->buf_idx2]); + if (f->size_idx2 != -1) + size = be32_to_cpu(args->args[f->size_idx2]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; + end = base + size - 1; + + /* + * Special case for ibm,configure-connector where the + * address can be 0 + */ + if (!strcmp(f->name, "ibm,configure-connector") && + base == 0) + return false; + + if (!in_rmo_buf(base, end)) + goto err; + } + + return false; + } + +err: + pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n"); + pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n", + token, nargs, current->comm); + return true; +} + +#else + +static bool block_rtas_call(int token, int nargs, + struct rtas_args *args) +{ + return false; +} + +#endif /* CONFIG_PPC_RTAS_FILTER */ + /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { @@ -977,6 +1127,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) args.rets = &args.args[nargs]; memset(args.rets, 0, nret * sizeof(rtas_arg_t)); + if (block_rtas_call(token, nargs, &args)) + return -EINVAL; + /* Need to handle ibm,suspend_me call specially */ if (token == ibm_suspend_me_token) { @@ -1038,6 +1191,9 @@ void __init rtas_initialize(void) unsigned long rtas_region = RTAS_INSTANTIATE_MAX; u32 base, size, entry; int no_base, no_size, no_entry; +#ifdef CONFIG_PPC_RTAS_FILTER + int i; +#endif /* Get RTAS dev node and fill up our "rtas" structure with infos * about it. @@ -1077,6 +1233,12 @@ void __init rtas_initialize(void) #ifdef CONFIG_RTAS_ERROR_LOGGING rtas_last_error_token = rtas_token("rtas-last-error"); #endif + +#ifdef CONFIG_PPC_RTAS_FILTER + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { + rtas_filters[i].token = rtas_token(rtas_filters[i].name); + } +#endif } int __init early_init_dt_scan_rtas(unsigned long node, @@ -1091,6 +1253,12 @@ int __init early_init_dt_scan_rtas(unsigned long node, entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL); sizep = of_get_flat_dt_prop(node, "rtas-size", NULL); +#ifdef CONFIG_PPC64 + /* need this feature to decide the crashkernel offset */ + if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL)) + powerpc_firmware_features |= FW_FEATURE_LPAR; +#endif + if (basep && entryp && sizep) { rtas.base = *basep; rtas.entry = *entryp; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 1740a66cea84..ff022e725693 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -256,6 +256,11 @@ static int __init handle_no_stf_barrier(char *p) early_param("no_stf_barrier", handle_no_stf_barrier); +enum stf_barrier_type stf_barrier_type_get(void) +{ + return stf_enabled_flush_types; +} + /* This is the generic flag used by other architectures */ static int __init handle_ssbd(char *p) { diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 25aaa3903000..f281d011f4b9 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -903,8 +903,6 @@ void __init setup_arch(char **cmdline_p) /* On BookE, setup per-core TLB data structures. */ setup_tlb_core_data(); - - smp_release_cpus(); #endif /* Print various info about the machine that has been gathered so far. */ @@ -925,6 +923,8 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); + smp_release_cpus(); + initmem_init(); early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 1b02d338a5f5..c82577c4b15d 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -8,12 +8,6 @@ #ifndef __ARCH_POWERPC_KERNEL_SETUP_H #define __ARCH_POWERPC_KERNEL_SETUP_H -#ifdef CONFIG_CC_IS_CLANG -#define __nostackprotector -#else -#define __nostackprotector __attribute__((__optimize__("no-stack-protector"))) -#endif - void initialize_cache_info(void); void irqstack_early_init(void); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e50fbed36651..5bc7e753df4d 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -284,7 +284,7 @@ void __init record_spr_defaults(void) * device-tree is not accessible via normal means at this point. */ -void __init __nostackprotector early_setup(unsigned long dt_ptr) +void __init early_setup(unsigned long dt_ptr) { static __initdata struct paca_struct boot_paca; @@ -859,7 +859,13 @@ early_initcall(disable_hardlockup_detector); static enum l1d_flush_type enabled_flush_types; static void *l1d_flush_fallback_area; static bool no_rfi_flush; +static bool no_entry_flush; +static bool no_uaccess_flush; bool rfi_flush; +bool entry_flush; +bool uaccess_flush; +DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); +EXPORT_SYMBOL(uaccess_flush_key); static int __init handle_no_rfi_flush(char *p) { @@ -869,6 +875,22 @@ static int __init handle_no_rfi_flush(char *p) } early_param("no_rfi_flush", handle_no_rfi_flush); +static int __init handle_no_entry_flush(char *p) +{ + pr_info("entry-flush: disabled on command line."); + no_entry_flush = true; + return 0; +} +early_param("no_entry_flush", handle_no_entry_flush); + +static int __init handle_no_uaccess_flush(char *p) +{ + pr_info("uaccess-flush: disabled on command line."); + no_uaccess_flush = true; + return 0; +} +early_param("no_uaccess_flush", handle_no_uaccess_flush); + /* * The RFI flush is not KPTI, but because users will see doco that says to use * nopti we hijack that option here to also disable the RFI flush. @@ -900,6 +922,32 @@ void rfi_flush_enable(bool enable) rfi_flush = enable; } +void entry_flush_enable(bool enable) +{ + if (enable) { + do_entry_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else { + do_entry_flush_fixups(L1D_FLUSH_NONE); + } + + entry_flush = enable; +} + +void uaccess_flush_enable(bool enable) +{ + if (enable) { + do_uaccess_flush_fixups(enabled_flush_types); + static_branch_enable(&uaccess_flush_key); + on_each_cpu(do_nothing, NULL, 1); + } else { + static_branch_disable(&uaccess_flush_key); + do_uaccess_flush_fixups(L1D_FLUSH_NONE); + } + + uaccess_flush = enable; +} + static void __ref init_fallback_flush(void) { u64 l1d_size, limit; @@ -958,10 +1006,28 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable) enabled_flush_types = types; - if (!no_rfi_flush && !cpu_mitigations_off()) + if (!cpu_mitigations_off() && !no_rfi_flush) rfi_flush_enable(enable); } +void setup_entry_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_entry_flush) + entry_flush_enable(enable); +} + +void setup_uaccess_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_uaccess_flush) + uaccess_flush_enable(enable); +} + #ifdef CONFIG_DEBUG_FS static int rfi_flush_set(void *data, u64 val) { @@ -989,9 +1055,63 @@ static int rfi_flush_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); +static int entry_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != entry_flush) + entry_flush_enable(enable); + + return 0; +} + +static int entry_flush_get(void *data, u64 *val) +{ + *val = entry_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); + +static int uaccess_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != uaccess_flush) + uaccess_flush_enable(enable); + + return 0; +} + +static int uaccess_flush_get(void *data, u64 *val) +{ + *val = uaccess_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n"); + static __init int rfi_flush_debugfs_init(void) { debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); + debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush); return 0; } device_initcall(rfi_flush_debugfs_init); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ea6adbf6a221..4de63ec2e155 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -583,11 +583,43 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) #endif #ifdef CONFIG_NMI_IPI +static void crash_stop_this_cpu(struct pt_regs *regs) +#else +static void crash_stop_this_cpu(void *dummy) +#endif +{ + /* + * Just busy wait here and avoid marking CPU as offline to ensure + * register data is captured appropriately. + */ + while (1) + cpu_relax(); +} + +void crash_smp_send_stop(void) +{ + static bool stopped = false; + + if (stopped) + return; + + stopped = true; + +#ifdef CONFIG_NMI_IPI + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000); +#else + smp_call_function(crash_stop_this_cpu, NULL, 0); +#endif /* CONFIG_NMI_IPI */ +} + +#ifdef CONFIG_NMI_IPI static void nmi_stop_this_cpu(struct pt_regs *regs) { /* * IRQs are already hard disabled by the smp_handle_nmi_ipi. */ + set_cpu_online(smp_processor_id(), false); + spin_begin(); while (1) spin_cpu_relax(); @@ -603,6 +635,15 @@ void smp_send_stop(void) static void stop_this_cpu(void *dummy) { hard_irq_disable(); + + /* + * Offlining CPUs in stop_this_cpu can result in scheduler warnings, + * (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants + * to know other CPUs are offline before it breaks locks to flush + * printk buffers, in case we panic()ed while holding the lock. + */ + set_cpu_online(smp_processor_id(), false); + spin_begin(); while (1) spin_cpu_relax(); @@ -1254,6 +1295,9 @@ void start_secondary(void *unused) vdso_getcpu_init(); #endif + set_numa_node(numa_cpu_lookup_table[cpu]); + set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); + /* Update topology CPU masks */ add_cpu_to_masks(cpu); @@ -1266,9 +1310,6 @@ void start_secondary(void *unused) if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu))) shared_caches = true; - set_numa_node(numa_cpu_lookup_table[cpu]); - set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); - smp_wmb(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); @@ -1285,10 +1326,12 @@ void start_secondary(void *unused) BUG(); } +#ifdef CONFIG_PROFILING int setup_profiling_timer(unsigned int multiplier) { return 0; } +#endif #ifdef CONFIG_SCHED_SMT /* cpumask of CPUs with asymetric SMT dependancy */ diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index e2a46cfed5fd..890f95151fb4 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -8,6 +8,7 @@ * Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp. */ +#include <linux/delay.h> #include <linux/export.h> #include <linux/kallsyms.h> #include <linux/module.h> @@ -19,6 +20,7 @@ #include <asm/ptrace.h> #include <asm/processor.h> #include <linux/ftrace.h> +#include <linux/delay.h> #include <asm/kprobes.h> #include <asm/paca.h> @@ -230,17 +232,31 @@ static void handle_backtrace_ipi(struct pt_regs *regs) static void raise_backtrace_ipi(cpumask_t *mask) { + struct paca_struct *p; unsigned int cpu; + u64 delay_us; for_each_cpu(cpu, mask) { - if (cpu == smp_processor_id()) + if (cpu == smp_processor_id()) { handle_backtrace_ipi(NULL); - else - smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC); - } + continue; + } - for_each_cpu(cpu, mask) { - struct paca_struct *p = paca_ptrs[cpu]; + delay_us = 5 * USEC_PER_SEC; + + if (smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, delay_us)) { + // Now wait up to 5s for the other CPU to do its backtrace + while (cpumask_test_cpu(cpu, mask) && delay_us) { + udelay(1); + delay_us--; + } + + // Other CPU cleared itself from the mask + if (delay_us) + continue; + } + + p = paca_ptrs[cpu]; cpumask_clear_cpu(cpu, mask); diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index cbdf86228eaa..54c44aea338c 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -181,7 +181,7 @@ _GLOBAL(swsusp_arch_resume) #ifdef CONFIG_ALTIVEC /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif sync diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 6d3189830dd3..068a268a8013 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -142,7 +142,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR) _GLOBAL(swsusp_arch_resume) /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 80a676da11cb..f08ca604a394 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -31,29 +31,27 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); -/* - * SMT snooze delay stuff, 64-bit only for now - */ - #ifdef CONFIG_PPC64 -/* Time in microseconds we delay before sleeping in the idle loop */ -static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; +/* + * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle: + * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in + * 2014: + * + * "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean + * up the kernel code." + * + * powerpc-utils stopped using it as of 1.3.8. At some point in the future this + * code should be removed. + */ static ssize_t store_smt_snooze_delay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct cpu *cpu = container_of(dev, struct cpu, dev); - ssize_t ret; - long snooze; - - ret = sscanf(buf, "%ld", &snooze); - if (ret != 1) - return -EINVAL; - - per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; + pr_warn_once("%s (%d) stored to unsupported smt_snooze_delay, which has no effect.\n", + current->comm, current->pid); return count; } @@ -61,9 +59,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev, struct device_attribute *attr, char *buf) { - struct cpu *cpu = container_of(dev, struct cpu, dev); - - return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id)); + pr_warn_once("%s (%d) read from unsupported smt_snooze_delay\n", + current->comm, current->pid); + return sprintf(buf, "100\n"); } static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, @@ -71,16 +69,10 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, static int __init setup_smt_snooze_delay(char *str) { - unsigned int cpu; - long snooze; - if (!cpu_has_feature(CPU_FTR_SMT)) return 1; - snooze = simple_strtol(str, NULL, 10); - for_each_possible_cpu(cpu) - per_cpu(smt_snooze_delay, cpu) = snooze; - + pr_warn("smt-snooze-delay command line option has no effect\n"); return 1; } __setup("smt-snooze-delay=", setup_smt_snooze_delay); diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index e2ab8a111b69..0b4694b8d248 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -13,13 +13,14 @@ */ #include <linux/errno.h> -#include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/param.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/init.h> +#include <linux/delay.h> +#include <linux/workqueue.h> #include <asm/io.h> #include <asm/reg.h> @@ -39,9 +40,7 @@ static struct tau_temp unsigned char grew; } tau[NR_CPUS]; -struct timer_list tau_timer; - -#undef DEBUG +static bool tau_int_enable; /* TODO: put these in a /proc interface, with some sanity checks, and maybe * dynamic adjustment to minimize # of interrupts */ @@ -50,72 +49,49 @@ struct timer_list tau_timer; #define step_size 2 /* step size when temp goes out of range */ #define window_expand 1 /* expand the window by this much */ /* configurable values for shrinking the window */ -#define shrink_timer 2*HZ /* period between shrinking the window */ +#define shrink_timer 2000 /* period between shrinking the window */ #define min_window 2 /* minimum window size, degrees C */ static void set_thresholds(unsigned long cpu) { -#ifdef CONFIG_TAU_INT - /* - * setup THRM1, - * threshold, valid bit, enable interrupts, interrupt when below threshold - */ - mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID); + u32 maybe_tie = tau_int_enable ? THRM1_TIE : 0; - /* setup THRM2, - * threshold, valid bit, enable interrupts, interrupt when above threshold - */ - mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE); -#else - /* same thing but don't enable interrupts */ - mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TID); - mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V); -#endif + /* setup THRM1, threshold, valid bit, interrupt when below threshold */ + mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | maybe_tie | THRM1_TID); + + /* setup THRM2, threshold, valid bit, interrupt when above threshold */ + mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | maybe_tie); } static void TAUupdate(int cpu) { - unsigned thrm; - -#ifdef DEBUG - printk("TAUupdate "); -#endif + u32 thrm; + u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V; /* if both thresholds are crossed, the step_sizes cancel out * and the window winds up getting expanded twice. */ - if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */ - if(thrm & THRM1_TIN){ /* crossed low threshold */ - if (tau[cpu].low >= step_size){ - tau[cpu].low -= step_size; - tau[cpu].high -= (step_size - window_expand); - } - tau[cpu].grew = 1; -#ifdef DEBUG - printk("low threshold crossed "); -#endif + thrm = mfspr(SPRN_THRM1); + if ((thrm & bits) == bits) { + mtspr(SPRN_THRM1, 0); + + if (tau[cpu].low >= step_size) { + tau[cpu].low -= step_size; + tau[cpu].high -= (step_size - window_expand); } + tau[cpu].grew = 1; + pr_debug("%s: low threshold crossed\n", __func__); } - if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */ - if(thrm & THRM1_TIN){ /* crossed high threshold */ - if (tau[cpu].high <= 127-step_size){ - tau[cpu].low += (step_size - window_expand); - tau[cpu].high += step_size; - } - tau[cpu].grew = 1; -#ifdef DEBUG - printk("high threshold crossed "); -#endif + thrm = mfspr(SPRN_THRM2); + if ((thrm & bits) == bits) { + mtspr(SPRN_THRM2, 0); + + if (tau[cpu].high <= 127 - step_size) { + tau[cpu].low += (step_size - window_expand); + tau[cpu].high += step_size; } + tau[cpu].grew = 1; + pr_debug("%s: high threshold crossed\n", __func__); } - -#ifdef DEBUG - printk("grew = %d\n", tau[cpu].grew); -#endif - -#ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */ - set_thresholds(cpu); -#endif - } #ifdef CONFIG_TAU_INT @@ -140,17 +116,16 @@ void TAUException(struct pt_regs * regs) static void tau_timeout(void * info) { int cpu; - unsigned long flags; int size; int shrink; - /* disabling interrupts *should* be okay */ - local_irq_save(flags); cpu = smp_processor_id(); -#ifndef CONFIG_TAU_INT - TAUupdate(cpu); -#endif + if (!tau_int_enable) + TAUupdate(cpu); + + /* Stop thermal sensor comparisons and interrupts */ + mtspr(SPRN_THRM3, 0); size = tau[cpu].high - tau[cpu].low; if (size > min_window && ! tau[cpu].grew) { @@ -173,32 +148,26 @@ static void tau_timeout(void * info) set_thresholds(cpu); - /* - * Do the enable every time, since otherwise a bunch of (relatively) - * complex sleep code needs to be added. One mtspr every time - * tau_timeout is called is probably not a big deal. - * - * Enable thermal sensor and set up sample interval timer - * need 20 us to do the compare.. until a nice 'cpu_speed' function - * call is implemented, just assume a 500 mhz clock. It doesn't really - * matter if we take too long for a compare since it's all interrupt - * driven anyway. - * - * use a extra long time.. (60 us @ 500 mhz) + /* Restart thermal sensor comparisons and interrupts. + * The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet" + * recommends that "the maximum value be set in THRM3 under all + * conditions." */ - mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E); - - local_irq_restore(flags); + mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E); } -static void tau_timeout_smp(struct timer_list *unused) -{ +static struct workqueue_struct *tau_workq; - /* schedule ourselves to be run again */ - mod_timer(&tau_timer, jiffies + shrink_timer) ; +static void tau_work_func(struct work_struct *work) +{ + msleep(shrink_timer); on_each_cpu(tau_timeout, NULL, 0); + /* schedule ourselves to be run again */ + queue_work(tau_workq, work); } +DECLARE_WORK(tau_work, tau_work_func); + /* * setup the TAU * @@ -231,21 +200,19 @@ static int __init TAU_init(void) return 1; } + tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) && + !strcmp(cur_cpu_spec->platform, "ppc750"); - /* first, set up the window shrinking timer */ - timer_setup(&tau_timer, tau_timeout_smp, 0); - tau_timer.expires = jiffies + shrink_timer; - add_timer(&tau_timer); + tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0); + if (!tau_workq) + return -ENOMEM; on_each_cpu(TAU_init_smp, NULL, 0); - printk("Thermal assist unit "); -#ifdef CONFIG_TAU_INT - printk("using interrupts, "); -#else - printk("using timers, "); -#endif - printk("shrink_timer: %d jiffies\n", shrink_timer); + queue_work(tau_workq, &tau_work); + + pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n", + tau_int_enable ? "interrupts" : "workqueue", shrink_timer); tau_initialized = 1; return 0; diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 7ea0ca044b65..d816e714f2f4 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -328,9 +328,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) /* Is this a known long jump tramp? */ for (i = 0; i < NUM_FTRACE_TRAMPS; i++) - if (!ftrace_tramps[i]) - break; - else if (ftrace_tramps[i] == tramp) + if (ftrace_tramps[i] == tramp) return 0; /* Is this a known plt tramp? */ @@ -868,6 +866,17 @@ void arch_ftrace_update_code(int command) extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; +void ftrace_free_init_tramp(void) +{ + int i; + + for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++) + if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) { + ftrace_tramps[i] = 0; + return; + } +} + int __init ftrace_dyn_arch_init(void) { int i; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 9432fc6af28a..ecfa460f66d1 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -513,8 +513,11 @@ out: die("Unrecoverable nested System Reset", regs, SIGABRT); #endif /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) + if (!(regs->msr & MSR_RI)) { + /* For the reason explained in die_mce, nmi_exit before die */ + nmi_exit(); die("Unrecoverable System Reset", regs, SIGABRT); + } if (saved_hsrrs) { mtspr(SPRN_HSRR0, hsrr0); @@ -877,7 +880,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs) { unsigned int ra, rb, t, i, sel, instr, rc; const void __user *addr; - u8 vbuf[16], *vdst; + u8 vbuf[16] __aligned(16), *vdst; unsigned long ea, msr, msr_mask; bool swap; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 060a1acd7c6d..3ea360cad337 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -98,10 +98,11 @@ SECTIONS ALIGN_FUNCTION(); #endif /* careful! __ftr_alt_* sections need to be close to .text */ - *(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text); + *(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text); #ifdef CONFIG_PPC64 *(.tramp.ftrace.text); #endif + NOINSTR_TEXT SCHED_TEXT CPUIDLE_TEXT LOCK_TEXT @@ -144,6 +145,20 @@ SECTIONS } . = ALIGN(8); + __uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) { + __start___uaccess_flush_fixup = .; + *(__uaccess_flush_fixup) + __stop___uaccess_flush_fixup = .; + } + + . = ALIGN(8); + __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) { + __start___entry_flush_fixup = .; + *(__entry_flush_fixup) + __stop___entry_flush_fixup = .; + } + + . = ALIGN(8); __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) { __start___stf_exit_barrier_fixup = .; *(__stf_exit_barrier_fixup) @@ -196,6 +211,12 @@ SECTIONS .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; INIT_TEXT + + /* + *.init.text might be RO so we must ensure this section ends on + * a page boundary. + */ + . = ALIGN(PAGE_SIZE); _einittext = .; #ifdef CONFIG_PPC64 *(.tramp.ftrace.init); @@ -209,21 +230,9 @@ SECTIONS EXIT_TEXT } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - INIT_DATA - } - - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - INIT_SETUP(16) - } - - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - INIT_CALLS - } + . = ALIGN(PAGE_SIZE); - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - CON_INITCALL - } + INIT_DATA_SECTION(16) . = ALIGN(8); __ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) { @@ -251,9 +260,6 @@ SECTIONS __stop___fw_ftr_fixup = .; } #endif - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - INIT_RAM_FS - } PERCPU_SECTION(L1_CACHE_BYTES) diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index af3c15a1d41e..75b2a6c4db5a 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -132,6 +132,10 @@ static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb) { cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask); cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask); + /* + * See wd_smp_clear_cpu_pending() + */ + smp_mb(); if (cpumask_empty(&wd_smp_cpus_pending)) { wd_smp_last_reset_tb = tb; cpumask_andnot(&wd_smp_cpus_pending, @@ -217,13 +221,44 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb) cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck); wd_smp_unlock(&flags); + } else { + /* + * The last CPU to clear pending should have reset the + * watchdog so we generally should not find it empty + * here if our CPU was clear. However it could happen + * due to a rare race with another CPU taking the + * last CPU out of the mask concurrently. + * + * We can't add a warning for it. But just in case + * there is a problem with the watchdog that is causing + * the mask to not be reset, try to kick it along here. + */ + if (unlikely(cpumask_empty(&wd_smp_cpus_pending))) + goto none_pending; } return; } + cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + + /* + * Order the store to clear pending with the load(s) to check all + * words in the pending mask to check they are all empty. This orders + * with the same barrier on another CPU. This prevents two CPUs + * clearing the last 2 pending bits, but neither seeing the other's + * store when checking if the mask is empty, and missing an empty + * mask, which ends with a false positive. + */ + smp_mb(); if (cpumask_empty(&wd_smp_cpus_pending)) { unsigned long flags; +none_pending: + /* + * Double check under lock because more than one CPU could see + * a clear mask with the lockless check after clearing their + * pending bits. + */ wd_smp_lock(&flags); if (cpumask_empty(&wd_smp_cpus_pending)) { wd_smp_last_reset_tb = tb; @@ -314,8 +349,12 @@ void arch_touch_nmi_watchdog(void) { unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000; int cpu = smp_processor_id(); - u64 tb = get_tb(); + u64 tb; + if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) + return; + + tb = get_tb(); if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) { per_cpu(wd_timer_tb, cpu) = tb; wd_smp_clear_cpu_pending(cpu, tb); diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 03b947429e4d..4518a0f2d6c6 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -420,13 +420,19 @@ static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, tbl[idx % TCES_PER_PAGE] = tce; } -static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl, - unsigned long entry) +static void kvmppc_clear_tce(struct mm_struct *mm, struct kvmppc_spapr_tce_table *stt, + struct iommu_table *tbl, unsigned long entry) { - unsigned long hpa = 0; - enum dma_data_direction dir = DMA_NONE; + unsigned long i; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift); + + for (i = 0; i < subpages; ++i) { + unsigned long hpa = 0; + enum dma_data_direction dir = DMA_NONE; - iommu_tce_xchg_no_kill(mm, tbl, entry, &hpa, &dir); + iommu_tce_xchg_no_kill(mm, tbl, io_entry + i, &hpa, &dir); + } } static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, @@ -485,6 +491,8 @@ static long kvmppc_tce_iommu_unmap(struct kvm *kvm, break; } + iommu_tce_kill(tbl, io_entry, subpages); + return ret; } @@ -544,6 +552,8 @@ static long kvmppc_tce_iommu_map(struct kvm *kvm, break; } + iommu_tce_kill(tbl, io_entry, subpages); + return ret; } @@ -590,10 +600,9 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); - iommu_tce_kill(stit->tbl, entry, 1); if (ret != H_SUCCESS) { - kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry); + kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry); goto unlock_exit; } } @@ -669,13 +678,13 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, */ if (get_user(tce, tces + i)) { ret = H_TOO_HARD; - goto invalidate_exit; + goto unlock_exit; } tce = be64_to_cpu(tce); if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) { ret = H_PARAMETER; - goto invalidate_exit; + goto unlock_exit; } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { @@ -684,19 +693,15 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, iommu_tce_direction(tce)); if (ret != H_SUCCESS) { - kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, - entry); - goto invalidate_exit; + kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, + entry + i); + goto unlock_exit; } } kvmppc_tce_put(stt, entry + i, tce); } -invalidate_exit: - list_for_each_entry_lockless(stit, &stt->iommu_tables, next) - iommu_tce_kill(stit->tbl, entry, npages); - unlock_exit: srcu_read_unlock(&vcpu->kvm->srcu, idx); @@ -735,20 +740,16 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, continue; if (ret == H_TOO_HARD) - goto invalidate_exit; + return ret; WARN_ON_ONCE(1); - kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry); + kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry + i); } } for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); -invalidate_exit: - list_for_each_entry_lockless(stit, &stt->iommu_tables, next) - iommu_tce_kill(stit->tbl, ioba >> stt->page_shift, npages); - return ret; } EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce); diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index ab6eeb8e753e..abb49d863329 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -177,10 +177,13 @@ static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt, idx -= stt->offset; page = stt->pages[idx / TCES_PER_PAGE]; /* - * page must not be NULL in real mode, - * kvmppc_rm_ioba_validate() must have taken care of this. + * kvmppc_rm_ioba_validate() allows pages not be allocated if TCE is + * being cleared, otherwise it returns H_TOO_HARD and we skip this. */ - WARN_ON_ONCE_RM(!page); + if (!page) { + WARN_ON_ONCE_RM(tce != 0); + return; + } tbl = kvmppc_page_address(page); tbl[idx % TCES_PER_PAGE] = tce; @@ -248,13 +251,19 @@ extern void iommu_tce_kill_rm(struct iommu_table *tbl, tbl->it_ops->tce_kill(tbl, entry, pages, true); } -static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl, - unsigned long entry) +static void kvmppc_rm_clear_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *stt, + struct iommu_table *tbl, unsigned long entry) { - unsigned long hpa = 0; - enum dma_data_direction dir = DMA_NONE; + unsigned long i; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift); - iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir); + for (i = 0; i < subpages; ++i) { + unsigned long hpa = 0; + enum dma_data_direction dir = DMA_NONE; + + iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, io_entry + i, &hpa, &dir); + } } static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, @@ -317,6 +326,8 @@ static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm, break; } + iommu_tce_kill_rm(tbl, io_entry, subpages); + return ret; } @@ -380,6 +391,8 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, break; } + iommu_tce_kill_rm(tbl, io_entry, subpages); + return ret; } @@ -425,10 +438,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); - iommu_tce_kill_rm(stit->tbl, entry, 1); - if (ret != H_SUCCESS) { - kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry); return ret; } } @@ -568,7 +579,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ua = 0; if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { ret = H_PARAMETER; - goto invalidate_exit; + goto unlock_exit; } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { @@ -577,19 +588,15 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, iommu_tce_direction(tce)); if (ret != H_SUCCESS) { - kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, - entry); - goto invalidate_exit; + kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, + entry + i); + goto unlock_exit; } } kvmppc_rm_tce_put(stt, entry + i, tce); } -invalidate_exit: - list_for_each_entry_lockless(stit, &stt->iommu_tables, next) - iommu_tce_kill_rm(stit->tbl, entry, npages); - unlock_exit: if (rmap) unlock_rmap(rmap); @@ -632,20 +639,16 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, continue; if (ret == H_TOO_HARD) - goto invalidate_exit; + return ret; WARN_ON_ONCE_RM(1); - kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry + i); } } for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value); -invalidate_exit: - list_for_each_entry_lockless(stit, &stt->iommu_tables, next) - iommu_tce_kill_rm(stit->tbl, ioba >> stt->page_shift, npages); - return ret; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e2183fed947d..6c99ccc3bfcb 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -58,6 +58,7 @@ #include <asm/kvm_book3s.h> #include <asm/mmu_context.h> #include <asm/lppaca.h> +#include <asm/pmc.h> #include <asm/processor.h> #include <asm/cputhreads.h> #include <asm/page.h> @@ -2306,8 +2307,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP; if (cpu_has_feature(CPU_FTR_HVMODE)) { vcpu->arch.hfscr &= mfspr(SPRN_HFSCR); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) vcpu->arch.hfscr |= HFSCR_TM; +#endif } if (cpu_has_feature(CPU_FTR_TM_COMP)) vcpu->arch.hfscr |= HFSCR_TM; @@ -2536,7 +2539,7 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) cpumask_t *cpu_in_guest; int i; - cpu = cpu_first_thread_sibling(cpu); + cpu = cpu_first_tlb_thread_sibling(cpu); if (nested) { cpumask_set_cpu(cpu, &nested->need_tlb_flush); cpu_in_guest = &nested->cpu_in_guest; @@ -2550,9 +2553,10 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) * the other side is the first smp_mb() in kvmppc_run_core(). */ smp_mb(); - for (i = 0; i < threads_per_core; ++i) - if (cpumask_test_cpu(cpu + i, cpu_in_guest)) - smp_call_function_single(cpu + i, do_nothing, NULL, 1); + for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); + i += cpu_tlb_thread_sibling_step()) + if (cpumask_test_cpu(i, cpu_in_guest)) + smp_call_function_single(i, do_nothing, NULL, 1); } static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) @@ -2583,8 +2587,8 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) */ if (prev_cpu != pcpu) { if (prev_cpu >= 0 && - cpu_first_thread_sibling(prev_cpu) != - cpu_first_thread_sibling(pcpu)) + cpu_first_tlb_thread_sibling(prev_cpu) != + cpu_first_tlb_thread_sibling(pcpu)) radix_flush_cpu(kvm, prev_cpu, vcpu); if (nested) nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu; @@ -3556,6 +3560,18 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true); +#ifdef CONFIG_PPC_PSERIES + if (kvmhv_on_pseries()) { + barrier(); + if (vcpu->arch.vpa.pinned_addr) { + struct lppaca *lp = vcpu->arch.vpa.pinned_addr; + get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use; + } else { + get_lppaca()->pmcregs_in_use = 1; + } + barrier(); + } +#endif kvmhv_load_guest_pmu(vcpu); msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); @@ -3638,7 +3654,10 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.dec_expires = dec + tb; vcpu->cpu = -1; vcpu->arch.thread_cpu = -1; + /* Save guest CTRL register, set runlatch to 1 */ vcpu->arch.ctrl = mfspr(SPRN_CTRLF); + if (!(vcpu->arch.ctrl & 1)) + mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1); vcpu->arch.iamr = mfspr(SPRN_IAMR); vcpu->arch.pspb = mfspr(SPRN_PSPB); @@ -3687,6 +3706,13 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, save_pmu |= nesting_enabled(vcpu->kvm); kvmhv_save_guest_pmu(vcpu, save_pmu); +#ifdef CONFIG_PPC_PSERIES + if (kvmhv_on_pseries()) { + barrier(); + get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse(); + barrier(); + } +#endif vc->entry_exit_map = 0x101; vc->in_guest = 0; @@ -5191,6 +5217,12 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, case KVM_PPC_ALLOCATE_HTAB: { u32 htab_order; + /* If we're a nested hypervisor, we currently only support radix */ + if (kvmhv_on_pseries()) { + r = -EOPNOTSUPP; + break; + } + r = -EFAULT; if (get_user(htab_order, (u32 __user *)argp)) break; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7c1909657b55..6d34b6972985 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -821,6 +821,7 @@ static void flush_guest_tlb(struct kvm *kvm) "r" (0) : "memory"); } asm volatile("ptesync": : :"memory"); + // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory"); } else { for (set = 0; set < kvm->arch.tlb_sets; ++set) { @@ -831,7 +832,9 @@ static void flush_guest_tlb(struct kvm *kvm) rb += PPC_BIT(51); /* increment set number */ } asm volatile("ptesync": : :"memory"); - asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); + // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. + if (cpu_has_feature(CPU_FTR_ARCH_300)) + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); } } @@ -847,7 +850,7 @@ void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, * Thus we make all 4 threads use the same bit. */ if (cpu_has_feature(CPU_FTR_ARCH_300)) - pcpu = cpu_first_thread_sibling(pcpu); + pcpu = cpu_first_tlb_thread_sibling(pcpu); if (nested) need_tlb_flush = &nested->need_tlb_flush; diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index cdf30c6eaf54..613d24b707ab 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -51,7 +51,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) hr->ppr = vcpu->arch.ppr; } -static void byteswap_pt_regs(struct pt_regs *regs) +/* Use noinline_for_stack due to https://bugs.llvm.org/show_bug.cgi?id=49610 */ +static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs) { unsigned long *addr = (unsigned long *) regs; @@ -231,6 +232,9 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (vcpu->kvm->arch.l1_ptcr == 0) return H_NOT_AVAILABLE; + if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr)) + return H_BAD_MODE; + /* copy parameters in */ hv_ptr = kvmppc_get_gpr(vcpu, 4); err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv, @@ -252,6 +256,23 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (l2_hv.vcpu_token >= NR_CPUS) return H_PARAMETER; + /* + * L1 must have set up a suspended state to enter the L2 in a + * transactional state, and only in that case. These have to be + * filtered out here to prevent causing a TM Bad Thing in the + * host HRFID. We could synthesize a TM Bad Thing back to the L1 + * here but there doesn't seem like much point. + */ + if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) { + if (!MSR_TM_ACTIVE(l2_regs.msr)) + return H_BAD_MODE; + } else { + if (l2_regs.msr & MSR_TS_MASK) + return H_BAD_MODE; + if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK)) + return H_BAD_MODE; + } + /* translate lpid */ l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true); if (!l2) @@ -489,7 +510,7 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu) if (eaddr & (0xFFFUL << 52)) return H_PARAMETER; - buf = kzalloc(n, GFP_KERNEL); + buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN); if (!buf) return H_NO_MEM; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 220305454c23..9bf3be438ac5 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -67,7 +67,7 @@ static int global_invalidates(struct kvm *kvm) * so use the bit for the first thread to represent the core. */ if (cpu_has_feature(CPU_FTR_ARCH_300)) - cpu = cpu_first_thread_sibling(cpu); + cpu = cpu_first_tlb_thread_sibling(cpu); cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c6fbbd29bd87..c9c6619564ff 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -292,13 +292,16 @@ kvm_novcpu_exit: * r3 contains the SRR1 wakeup value, SRR1 is trashed. */ _GLOBAL(idle_kvm_start_guest) - ld r4,PACAEMERGSP(r13) mfcr r5 mflr r0 - std r1,0(r4) - std r5,8(r4) - std r0,16(r4) - subi r1,r4,STACK_FRAME_OVERHEAD + std r5, 8(r1) // Save CR in caller's frame + std r0, 16(r1) // Save LR in caller's frame + // Create frame on emergency stack + ld r4, PACAEMERGSP(r13) + stdu r1, -SWITCH_FRAME_SIZE(r4) + // Switch to new frame on emergency stack + mr r1, r4 + std r3, 32(r1) // Save SRR1 wakeup value SAVE_NVGPRS(r1) /* @@ -350,6 +353,10 @@ kvm_unsplit_wakeup: kvm_secondary_got_guest: + // About to go to guest, clear saved SRR1 + li r0, 0 + std r0, 32(r1) + /* Set HSTATE_DSCR(r13) to something sensible */ ld r6, PACA_DSCR_DEFAULT(r13) std r6, HSTATE_DSCR(r13) @@ -441,13 +448,12 @@ kvm_no_guest: mfspr r4, SPRN_LPCR rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR, r4 - /* set up r3 for return */ - mfspr r3,SPRN_SRR1 + // Return SRR1 wakeup value, or 0 if we went into the guest + ld r3, 32(r1) REST_NVGPRS(r1) - addi r1, r1, STACK_FRAME_OVERHEAD - ld r0, 16(r1) - ld r5, 8(r1) - ld r1, 0(r1) + ld r1, 0(r1) // Switch back to caller stack + ld r0, 16(r1) // Reload LR + ld r5, 8(r1) // Reload CR mtlr r0 mtcr r5 blr @@ -2529,7 +2535,7 @@ hcall_real_table: .globl hcall_real_table_end hcall_real_table_end: -_GLOBAL(kvmppc_h_set_xdabr) +_GLOBAL_TOC(kvmppc_h_set_xdabr) EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr) andi. r0, r5, DABRX_USER | DABRX_KERNEL beq 6f @@ -2539,7 +2545,7 @@ EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr) 6: li r3, H_PARAMETER blr -_GLOBAL(kvmppc_h_set_dabr) +_GLOBAL_TOC(kvmppc_h_set_dabr) EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr) li r5, DABRX_USER | DABRX_KERNEL 3: @@ -3137,7 +3143,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) /* The following code handles the fake_suspend = 1 case */ mflr r0 std r0, PPC_LR_STKOFF(r1) - stdu r1, -PPC_MIN_STKFRM(r1) + stdu r1, -TM_FRAME_SIZE(r1) /* Turn on TM. */ mfmsr r8 @@ -3152,10 +3158,42 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) nop + /* + * It's possible that treclaim. may modify registers, if we have lost + * track of fake-suspend state in the guest due to it using rfscv. + * Save and restore registers in case this occurs. + */ + mfspr r3, SPRN_DSCR + mfspr r4, SPRN_XER + mfspr r5, SPRN_AMR + /* SPRN_TAR would need to be saved here if the kernel ever used it */ + mfcr r12 + SAVE_NVGPRS(r1) + SAVE_GPR(2, r1) + SAVE_GPR(3, r1) + SAVE_GPR(4, r1) + SAVE_GPR(5, r1) + stw r12, 8(r1) + std r1, HSTATE_HOST_R1(r13) + /* We have to treclaim here because that's the only way to do S->N */ li r3, TM_CAUSE_KVM_RESCHED TRECLAIM(R3) + GET_PACA(r13) + ld r1, HSTATE_HOST_R1(r13) + REST_GPR(2, r1) + REST_GPR(3, r1) + REST_GPR(4, r1) + REST_GPR(5, r1) + lwz r12, 8(r1) + REST_NVGPRS(r1) + mtspr SPRN_DSCR, r3 + mtspr SPRN_XER, r4 + mtspr SPRN_AMR, r5 + mtcr r12 + HMT_MEDIUM + /* * We were in fake suspend, so we are not going to save the * register state as the guest checkpointed state (since @@ -3183,7 +3221,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) std r5, VCPU_TFHAR(r9) std r6, VCPU_TFIAR(r9) - addi r1, r1, PPC_MIN_STKFRM + addi r1, r1, TM_FRAME_SIZE ld r0, PPC_LR_STKOFF(r1) mtlr r0 blr diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 26b25994c969..41137ec72762 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -240,6 +240,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) * value so we can restore it on the way out. */ orig_rets = args.rets; + if (be32_to_cpu(args.nargs) >= ARRAY_SIZE(args.args)) { + /* + * Don't overflow our args array: ensure there is room for + * at least rets[0] (even if the call specifies 0 nret). + * + * Each handler must then check for the correct nargs and nret + * values, but they may always return failure in rets[0]. + */ + rc = -EINVAL; + goto fail; + } args.rets = &args.args[be32_to_cpu(args.nargs)]; mutex_lock(&vcpu->kvm->arch.rtas_token_lock); @@ -267,9 +278,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) fail: /* * We only get here if the guest has called RTAS with a bogus - * args pointer. That means we can't get to the args, and so we - * can't fail the RTAS call. So fail right out to userspace, - * which should kill the guest. + * args pointer or nargs/nret values that would overflow the + * array. That means we can't get to the args, and so we can't + * fail the RTAS call. So fail right out to userspace, which + * should kill the guest. + * + * SLOF should actually pass the hcall return value from the + * rtas handler call in r3, so enter_rtas could be modified to + * return a failure indication in r3 and we could return such + * errors to the guest rather than failing to host userspace. + * However old guests that don't test for failure could then + * continue silently after errors, so for now we won't do this. */ return rc; } diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 235d57d6c205..d78d8487c1d6 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -252,6 +252,13 @@ static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) } state = &sb->irq_state[src]; + + /* Some sanity checking */ + if (!state->valid) { + pr_devel("%s: source %lx invalid !\n", __func__, irq); + return VM_FAULT_SIGBUS; + } + kvmppc_xive_select_irq(state, &hw_num, &xd); arch_spin_lock(&sb->lock); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3a77bb643452..eb8c72846b7f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1495,7 +1495,7 @@ int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, { enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; while (vcpu->arch.mmio_vmx_copy_nums) { @@ -1513,7 +1513,7 @@ int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } -int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1531,7 +1531,7 @@ int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1549,7 +1549,7 @@ int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1567,7 +1567,7 @@ int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1592,7 +1592,7 @@ int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int index = rs & KVM_MMIO_REG_MASK; enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; vcpu->arch.io_gpr = rs; @@ -2035,9 +2035,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, { struct kvm_enable_cap cap; r = -EFAULT; - vcpu_load(vcpu); if (copy_from_user(&cap, argp, sizeof(cap))) goto out; + vcpu_load(vcpu); r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); vcpu_put(vcpu); break; @@ -2061,9 +2061,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_DIRTY_TLB: { struct kvm_dirty_tlb dirty; r = -EFAULT; - vcpu_load(vcpu); if (copy_from_user(&dirty, argp, sizeof(dirty))) goto out; + vcpu_load(vcpu); r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty); vcpu_put(vcpu); break; diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index b8de3be10eb4..8656b8d2ce55 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -16,6 +16,9 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING endif +CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + obj-y += alloc.o code-patching.o feature-fixups.o pmem.o ifndef CONFIG_KASAN diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 3345f039a876..a05f289e613e 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -221,6 +221,11 @@ bool is_offset_in_branch_range(long offset) return (offset >= -0x2000000 && offset <= 0x1fffffc && !(offset & 0x3)); } +bool is_offset_in_cond_branch_range(long offset) +{ + return offset >= -0x8000 && offset <= 0x7fff && !(offset & 0x3); +} + /* * Helper to check if a given instruction is a conditional branch * Derived from the conditional checks in analyse_instr() @@ -274,7 +279,7 @@ unsigned int create_cond_branch(const unsigned int *addr, offset = offset - (unsigned long)addr; /* Check we can represent the target in the instruction format */ - if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3) + if (!is_offset_in_cond_branch_range(offset)) return 0; /* Mask out the flags and target, so they don't step on each other. */ diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 4ba634b89ce5..c8e260e29f2c 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -14,6 +14,7 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/sched/mm.h> +#include <linux/stop_machine.h> #include <asm/cputable.h> #include <asm/code-patching.h> #include <asm/page.h> @@ -221,11 +222,143 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) : "unknown"); } +static int __do_stf_barrier_fixups(void *data) +{ + enum stf_barrier_type *types = data; + + do_stf_entry_barrier_fixups(*types); + do_stf_exit_barrier_fixups(*types); + + return 0; +} void do_stf_barrier_fixups(enum stf_barrier_type types) { - do_stf_entry_barrier_fixups(types); - do_stf_exit_barrier_fixups(types); + /* + * The call to the fallback entry flush, and the fallback/sync-ori exit + * flush can not be safely patched in/out while other CPUs are executing + * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs + * spin in the stop machine core with interrupts hard disabled. + */ + stop_machine(__do_stf_barrier_fixups, &types, NULL); +} + +void do_uaccess_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[4], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___uaccess_flush_fixup); + end = PTRRELOC(&__stop___uaccess_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + instrs[3] = 0x4e800020; /* blr */ + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[3] = 0x60000000; /* nop */ + /* fallthrough to fallback flush */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + + patch_instruction((dest + 1), instrs[1]); + patch_instruction((dest + 2), instrs[2]); + patch_instruction((dest + 3), instrs[3]); + } + + printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + +static int __do_entry_flush_fixups(void *data) +{ + enum l1d_flush_type types = *(enum l1d_flush_type *)data; + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___entry_flush_fixup); + end = PTRRELOC(&__stop___entry_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[i++] = 0x7d4802a6; /* mflr r10 */ + instrs[i++] = 0x60000000; /* branch patched below */ + instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + + if (types == L1D_FLUSH_FALLBACK) + patch_branch((dest + 1), (unsigned long)&entry_flush_fallback, + BRANCH_SET_LINK); + else + patch_instruction((dest + 1), instrs[1]); + + patch_instruction((dest + 2), instrs[2]); + } + + printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); + + return 0; +} + +void do_entry_flush_fixups(enum l1d_flush_type types) +{ + /* + * The call to the fallback flush can not be safely patched in/out while + * other CPUs are executing it. So call __do_entry_flush_fixups() on one + * CPU while all other CPUs spin in the stop machine core with interrupts + * hard disabled. + */ + stop_machine(__do_entry_flush_fixups, &types, NULL); } void do_rfi_flush_fixups(enum l1d_flush_type types) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index c077acb983a1..27650cd5857f 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -906,7 +906,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __put_user_asmx(x, addr, err, op, cr) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: " op " %2,0,%3\n" \ + ".machine pop\n" \ " mfcr %1\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ @@ -919,7 +922,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __get_user_asmx(x, addr, err, op) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: "op" %1,0,%2\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ @@ -2787,12 +2793,14 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) case BARRIER_EIEIO: eieio(); break; +#ifdef CONFIG_PPC64 case BARRIER_LWSYNC: asm volatile("lwsync" : : : "memory"); break; case BARRIER_PTESYNC: asm volatile("ptesync" : : : "memory"); break; +#endif } break; @@ -2910,7 +2918,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) __put_user_asmx(op->val, ea, err, "stbcx.", cr); break; case 2: - __put_user_asmx(op->val, ea, err, "stbcx.", cr); + __put_user_asmx(op->val, ea, err, "sthcx.", cr); break; #endif case 4: diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 770542ccdb46..bdcb07a98cd3 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -97,7 +97,7 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); - smp_wmb(); + asm volatile("ptesync": : :"memory"); return 0; } @@ -155,7 +155,7 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); - smp_wmb(); + asm volatile("ptesync": : :"memory"); return 0; } diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 67af871190c6..b0f240afffa2 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -639,19 +639,29 @@ static void do_exit_flush_lazy_tlb(void *arg) struct mm_struct *mm = arg; unsigned long pid = mm->context.id; + /* + * A kthread could have done a mmget_not_zero() after the flushing CPU + * checked mm_is_singlethreaded, and be in the process of + * kthread_use_mm when interrupted here. In that case, current->mm will + * be set to mm, because kthread_use_mm() setting ->mm and switching to + * the mm is done with interrupts off. + */ if (current->mm == mm) - return; /* Local CPU */ + goto out_flush; if (current->active_mm == mm) { - /* - * Must be a kernel thread because sender is single-threaded. - */ - BUG_ON(current->mm); + WARN_ON_ONCE(current->mm != NULL); + /* Is a kernel thread and is using mm as the lazy tlb */ mmgrab(&init_mm); - switch_mm(mm, &init_mm, current); current->active_mm = &init_mm; + switch_mm_irqs_off(mm, &init_mm, current); mmdrop(mm); } + + atomic_dec(&mm->context.active_cpus); + cpumask_clear_cpu(smp_processor_id(), mm_cpumask(mm)); + +out_flush: _tlbiel_pid(pid, RIC_FLUSH_ALL); } @@ -666,7 +676,6 @@ static void exit_flush_lazy_tlbs(struct mm_struct *mm) */ smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, (void *)mm, 1); - mm_reset_thread_local(mm); } void radix__flush_tlb_mm(struct mm_struct *mm) diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 59327cefbc6a..873fcfc7b875 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -362,10 +362,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop) if (!drmem_info->lmbs) return; - for_each_drmem_lmb(lmb) { + for_each_drmem_lmb(lmb) read_drconf_v1_cell(lmb, &prop); - lmb_set_nid(lmb); - } } static void __init init_drmem_v2_lmbs(const __be32 *prop) @@ -410,8 +408,6 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop) lmb->aa_index = dr_cell.aa_index; lmb->flags = dr_cell.flags; - - lmb_set_nid(lmb); } } } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 187047592d53..9f4a78e3cde9 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -204,9 +204,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, { int is_exec = TRAP(regs) == 0x400; - /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */ - if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT | - DSISR_PROTFAULT))) { + if (is_exec) { pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n", address >= TASK_SIZE ? "exec-protected" : "user", address, @@ -349,7 +347,6 @@ static inline void cmo_account_page_fault(void) static inline void cmo_account_page_fault(void) { } #endif /* CONFIG_PPC_SMLPAR */ -#ifdef CONFIG_PPC_BOOK3S static void sanity_check_fault(bool is_write, bool is_user, unsigned long error_code, unsigned long address) { @@ -366,6 +363,9 @@ static void sanity_check_fault(bool is_write, bool is_user, return; } + if (!IS_ENABLED(CONFIG_PPC_BOOK3S)) + return; + /* * For hash translation mode, we should never get a * PROTFAULT. Any update to pte to reduce access will result in us @@ -400,10 +400,6 @@ static void sanity_check_fault(bool is_write, bool is_user, WARN_ON_ONCE(error_code & DSISR_PROTFAULT); } -#else -static void sanity_check_fault(bool is_write, bool is_user, - unsigned long error_code, unsigned long address) { } -#endif /* CONFIG_PPC_BOOK3S */ /* * Define the correct "is_write" bit in error_code based diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 1cfe57b51d7e..3f78007a7282 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -121,7 +121,7 @@ static void __init kasan_remap_early_shadow_ro(void) pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); - if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) + if (pte_page(*ptep) != virt_to_page(lm_alias(kasan_early_shadow_page))) continue; __set_pte_at(&init_mm, k_cur, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 96ca90ce0264..d427f70556ea 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -48,6 +48,7 @@ #include <asm/fixmap.h> #include <asm/swiotlb.h> #include <asm/rtas.h> +#include <asm/ftrace.h> #include <mm/mmu_decl.h> @@ -346,6 +347,7 @@ void free_initmem(void) mark_initmem_nx(); init_mem_is_free = true; free_initmem_default(POISON_FREE_INITMEM); + ftrace_free_init_tramp(); } /** @@ -530,7 +532,7 @@ void __flush_dcache_icache(void *p) * space occurs, before returning to user space. */ - if (cpu_has_feature(MMU_FTR_TYPE_44x)) + if (mmu_has_feature(MMU_FTR_TYPE_44x)) return; invalidate_icache_range(addr, addr + PAGE_SIZE); diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 18f20da0d348..64290d343b55 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -79,7 +79,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * context */ if (cpu_has_feature(CPU_FTR_ALTIVEC)) - asm volatile ("dssall"); + asm volatile (PPC_DSSALL); if (new_on_cpu) radix_kvm_prefetch_workaround(next); diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 55d4377ccfae..6d2a268528df 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -11,6 +11,7 @@ #ifndef __ASSEMBLY__ #include <asm/types.h> +#include <asm/code-patching.h> #ifdef PPC64_ELF_ABI_v1 #define FUNCTION_DESCR_SIZE 24 @@ -180,13 +181,26 @@ #define PPC_NEG(d, a) EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a)) /* Long jump; (unconditional 'branch') */ -#define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \ - (((dest) - (ctx->idx * 4)) & 0x03fffffc)) +#define PPC_JMP(dest) \ + do { \ + long offset = (long)(dest) - (ctx->idx * 4); \ + if (!is_offset_in_branch_range(offset)) { \ + pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ + return -ERANGE; \ + } \ + EMIT(PPC_INST_BRANCH | (offset & 0x03fffffc)); \ + } while (0) /* "cond" here covers BO:BI fields. */ -#define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \ - (((cond) & 0x3ff) << 16) | \ - (((dest) - (ctx->idx * 4)) & \ - 0xfffc)) +#define PPC_BCC_SHORT(cond, dest) \ + do { \ + long offset = (long)(dest) - (ctx->idx * 4); \ + if (!is_offset_in_cond_branch_range(offset)) { \ + pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ + return -ERANGE; \ + } \ + EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \ + } while (0) + /* Sign-extended 32-bit immediate load */ #define PPC_LI32(d, i) do { \ if ((int)(uintptr_t)(i) >= -32768 && \ @@ -225,11 +239,6 @@ #define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0) #endif -static inline bool is_nearbranch(int offset) -{ - return (offset < 32768) && (offset >= -32768); -} - /* * The fly in the ointment of code size changing from pass to pass is * avoided by padding the short branch case with a NOP. If code size differs @@ -238,7 +247,7 @@ static inline bool is_nearbranch(int offset) * state. */ #define PPC_BCC(cond, dest) do { \ - if (is_nearbranch((dest) - (ctx->idx * 4))) { \ + if (is_offset_in_cond_branch_range((long)(dest) - (ctx->idx * 4))) { \ PPC_BCC_SHORT(cond, dest); \ PPC_NOP(); \ } else { \ diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index cf3a7e337f02..68ddf3502ab0 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -16,18 +16,18 @@ * with our redzone usage. * * [ prev sp ] <------------- - * [ nv gpr save area ] 6*8 | + * [ nv gpr save area ] 5*8 | * [ tail_call_cnt ] 8 | - * [ local_tmp_var ] 8 | + * [ local_tmp_var ] 16 | * fp (r31) --> [ ebpf stack space ] upto 512 | * [ frame header ] 32/112 | * sp (r1) ---> [ stack pointer ] -------------- */ /* for gpr non volatile registers BPG_REG_6 to 10 */ -#define BPF_PPC_STACK_SAVE (6*8) +#define BPF_PPC_STACK_SAVE (5*8) /* for bpf JIT code internal usage */ -#define BPF_PPC_STACK_LOCALS 16 +#define BPF_PPC_STACK_LOCALS 24 /* stack frame excluding BPF stack, ensure this is quadword aligned */ #define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index be3517ef0574..687cd352648a 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -15,6 +15,7 @@ #include <linux/if_vlan.h> #include <asm/kprobes.h> #include <linux/bpf.h> +#include <asm/security_features.h> #include "bpf_jit64.h" @@ -56,9 +57,9 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * [ prev sp ] <------------- * [ ... ] | * sp (r1) ---> [ stack pointer ] -------------- - * [ nv gpr save area ] 6*8 + * [ nv gpr save area ] 5*8 * [ tail_call_cnt ] 8 - * [ local_tmp_var ] 8 + * [ local_tmp_var ] 16 * [ unused red zone ] 208 bytes protected */ static int bpf_jit_stack_local(struct codegen_context *ctx) @@ -66,12 +67,12 @@ static int bpf_jit_stack_local(struct codegen_context *ctx) if (bpf_has_stack_frame(ctx)) return STACK_FRAME_MIN_SIZE + ctx->stack_size; else - return -(BPF_PPC_STACK_SAVE + 16); + return -(BPF_PPC_STACK_SAVE + 24); } static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) { - return bpf_jit_stack_local(ctx) + 8; + return bpf_jit_stack_local(ctx) + 16; } static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) @@ -224,7 +225,7 @@ static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, PPC_BLRL(); } -static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) +static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) { /* * By now, the eBPF program has already setup parameters in r3, r4 and r5 @@ -285,14 +286,39 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 bpf_jit_emit_common_epilogue(image, ctx); PPC_BCTR(); + /* out: */ + return 0; } +/* + * We spill into the redzone always, even if the bpf program has its own stackframe. + * Offsets hardcoded based on BPF_PPC_STACK_SAVE -- see bpf_jit_stack_local() + */ +void bpf_stf_barrier(void); + +asm ( +" .global bpf_stf_barrier ;" +" bpf_stf_barrier: ;" +" std 21,-64(1) ;" +" std 22,-56(1) ;" +" sync ;" +" ld 21,-64(1) ;" +" ld 22,-56(1) ;" +" ori 31,31,0 ;" +" .rept 14 ;" +" b 1f ;" +" 1: ;" +" .endr ;" +" blr ;" +); + /* Assemble the body code between the prologue & epilogue */ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, u32 *addrs, bool extra_pass) { + enum stf_barrier_type stf_barrier = stf_barrier_type_get(); const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; int i, ret; @@ -347,18 +373,25 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_SUB(dst_reg, dst_reg, src_reg); goto bpf_alu32_trunc; case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ - case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ + if (!imm) { + goto bpf_alu32_trunc; + } else if (imm >= -32768 && imm < 32768) { + PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); + } else { + PPC_LI32(b2p[TMP_REG_1], imm); + PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); + } + goto bpf_alu32_trunc; + case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ - if (BPF_OP(code) == BPF_SUB) - imm = -imm; - if (imm) { - if (imm >= -32768 && imm < 32768) - PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); - else { - PPC_LI32(b2p[TMP_REG_1], imm); - PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); - } + if (!imm) { + goto bpf_alu32_trunc; + } else if (imm > -32768 && imm <= 32768) { + PPC_ADDI(dst_reg, dst_reg, IMM_L(-imm)); + } else { + PPC_LI32(b2p[TMP_REG_1], imm); + PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); } goto bpf_alu32_trunc; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ @@ -408,8 +441,14 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ if (imm == 0) return -EINVAL; - else if (imm == 1) - goto bpf_alu32_trunc; + if (imm == 1) { + if (BPF_OP(code) == BPF_DIV) { + goto bpf_alu32_trunc; + } else { + PPC_LI(dst_reg, 0); + break; + } + } PPC_LI32(b2p[TMP_REG_1], imm); switch (BPF_CLASS(code)) { @@ -645,6 +684,36 @@ emit_clear: break; /* + * BPF_ST NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + if (!security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) || + (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) && + (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) || !cpu_has_feature(CPU_FTR_HVMODE)))) + break; + + switch (stf_barrier) { + case STF_BARRIER_EIEIO: + EMIT(0x7c0006ac | 0x02000000); + break; + case STF_BARRIER_SYNC_ORI: + EMIT(PPC_INST_SYNC); + PPC_LD(b2p[TMP_REG_1], 13, 0); + PPC_ORI(31, 31, 0); + break; + case STF_BARRIER_FALLBACK: + EMIT(PPC_INST_MFLR | ___PPC_RT(b2p[TMP_REG_1])); + PPC_LI64(12, dereference_kernel_function_descriptor(bpf_stf_barrier)); + PPC_MTCTR(12); + EMIT(PPC_INST_BCTR | 0x1); + PPC_MTLR(b2p[TMP_REG_1]); + break; + case STF_BARRIER_NONE: + break; + } + break; + + /* * BPF_ST(X) */ case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ @@ -989,7 +1058,9 @@ cond_branch: */ case BPF_JMP | BPF_TAIL_CALL: ctx->seen |= SEEN_TAILCALL; - bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + ret = bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + if (ret < 0) + return ret; break; default: diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index f582aa2d9807..6f013e418834 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -133,6 +133,9 @@ static void pmao_restore_workaround(bool ebb) { } bool is_sier_available(void) { + if (!ppmu) + return false; + if (ppmu->flags & PPMU_HAS_SIER) return true; @@ -2074,7 +2077,17 @@ static void record_and_restart(struct perf_event *event, unsigned long val, left += period; if (left <= 0) left = period; - record = siar_valid(regs); + + /* + * If address is not requested in the sample via + * PERF_SAMPLE_IP, just record that sample irrespective + * of SIAR valid check. + */ + if (event->attr.sample_type & PERF_SAMPLE_IP) + record = siar_valid(regs); + else + record = 1; + event->hw.last_period = event->hw.sample_period; } if (left < 0x80000000LL) @@ -2087,6 +2100,17 @@ static void record_and_restart(struct perf_event *event, unsigned long val, perf_event_update_userpage(event); /* + * Due to hardware limitation, sometimes SIAR could sample a kernel + * address even when freeze on supervisor state (kernel) is set in + * MMCR2. Check attr.exclude_kernel and address to drop the sample in + * these cases. + */ + if (event->attr.exclude_kernel && + (event->attr.sample_type & PERF_SAMPLE_IP) && + is_kernel_addr(mfspr(SPRN_SIAR))) + record = 0; + + /* * Finally record data if requested. */ if (record) { diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h index e608f9db12dd..8965b4463d43 100644 --- a/arch/powerpc/perf/hv-gpci-requests.h +++ b/arch/powerpc/perf/hv-gpci-requests.h @@ -95,7 +95,7 @@ REQUEST(__field(0, 8, partition_id) #define REQUEST_NAME system_performance_capabilities #define REQUEST_NUM 0x40 -#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#define REQUEST_IDX_KIND "starting_index=0xffffffff" #include I(REQUEST_BEGIN) REQUEST(__field(0, 1, perf_collect_privileged) __field(0x1, 1, capability_mask) @@ -223,7 +223,7 @@ REQUEST(__field(0, 2, partition_id) #define REQUEST_NAME system_hypervisor_times #define REQUEST_NUM 0xF0 -#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#define REQUEST_IDX_KIND "starting_index=0xffffffff" #include I(REQUEST_BEGIN) REQUEST(__count(0, 8, time_spent_to_dispatch_virtual_processors) __count(0x8, 8, time_spent_processing_virtual_processor_timers) @@ -234,7 +234,7 @@ REQUEST(__count(0, 8, time_spent_to_dispatch_virtual_processors) #define REQUEST_NAME system_tlbie_count_and_time #define REQUEST_NUM 0xF4 -#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#define REQUEST_IDX_KIND "starting_index=0xffffffff" #include I(REQUEST_BEGIN) REQUEST(__count(0, 8, tlbie_instructions_issued) /* diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 6884d16ec19b..732cfc53e260 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -164,7 +164,7 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index, */ count = 0; for (i = offset; i < offset + length; i++) - count |= arg->bytes[i] << (i - offset); + count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8); *value = count; out: diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index eb82dda884e5..d76e800a1f33 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1441,7 +1441,11 @@ static int trace_imc_event_init(struct perf_event *event) event->hw.idx = -1; target = event->hw.target; - event->pmu->task_ctx_nr = perf_hw_context; + /* + * There can only be a single PMU for perf_hw_context events which is assigned to + * core PMU. Hence use "perf_sw_context" for trace_imc. + */ + event->pmu->task_ctx_nr = perf_sw_context; event->destroy = reset_global_refc; return 0; } diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 4c86da5eb28a..25eda98f3b1b 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -269,6 +269,15 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) mask |= CNST_PMC_MASK(pmc); value |= CNST_PMC_VAL(pmc); + + /* + * PMC5 and PMC6 are used to count cycles and instructions and + * they do not support most of the constraint bits. Add a check + * to exclude PMC5/6 from most of the constraints except for + * EBB/BHRB. + */ + if (pmc >= 5) + goto ebb_bhrb; } if (pmc <= 4) { @@ -317,7 +326,8 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) if (event_is_threshold(event) && is_thresh_cmp_valid(event)) { mask |= CNST_THRESH_MASK; value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); - } + } else if (event_is_threshold(event)) + return -1; } else { /* * Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC, @@ -335,6 +345,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } } +ebb_bhrb: if (!pmc && ebb) /* EBB events must specify the PMC */ return -1; @@ -353,8 +364,8 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) * EBB events are pinned & exclusive, so this should never actually * hit, but we leave it as a fallback in case. */ - mask |= CNST_EBB_VAL(ebb); - value |= CNST_EBB_MASK; + mask |= CNST_EBB_MASK; + value |= CNST_EBB_VAL(ebb); *maskp = mask; *valp = value; diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 08c3ef796198..1225f53609a4 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -131,11 +131,11 @@ int p9_dd22_bl_ev[] = { /* Table of alternatives, sorted by column 0 */ static const unsigned int power9_event_alternatives[][MAX_ALT] = { - { PM_INST_DISP, PM_INST_DISP_ALT }, - { PM_RUN_CYC_ALT, PM_RUN_CYC }, - { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, - { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT }, { PM_BR_2PATH, PM_BR_2PATH_ALT }, + { PM_INST_DISP, PM_INST_DISP_ALT }, + { PM_RUN_CYC_ALT, PM_RUN_CYC }, + { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT }, + { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, }; static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[]) diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c index b299e43f5ef9..823397c802de 100644 --- a/arch/powerpc/platforms/44x/fsp2.c +++ b/arch/powerpc/platforms/44x/fsp2.c @@ -208,6 +208,7 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler) if (irq == NO_IRQ) { pr_err("device tree node %pOFn is missing a interrupt", np); + of_node_put(np); return; } @@ -215,6 +216,7 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler) if (rc) { pr_err("fsp_of_probe: request_irq failed: np=%pOF rc=%d", np, rc); + of_node_put(np); return; } } diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c index ae8b812c9202..2481e78c0423 100644 --- a/arch/powerpc/platforms/4xx/cpm.c +++ b/arch/powerpc/platforms/4xx/cpm.c @@ -327,6 +327,6 @@ late_initcall(cpm_init); static int __init cpm_powersave_off(char *arg) { cpm.powersave_off = 1; - return 0; + return 1; } __setup("powersave=off", cpm_powersave_off); diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S index 3a9969c429b3..054f927bfef9 100644 --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S @@ -181,7 +181,7 @@ sram_code: udelay: /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */ mullw r12, r12, r11 mftb r13 /* start */ - addi r12, r13, r12 /* end */ + add r12, r13, r12 /* end */ 1: mftb r13 /* current */ cmp cr0, r13, r12 diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c index 7c0133f558d0..ffa8a7a6a2db 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c @@ -94,9 +94,8 @@ int __init mpc85xx_setup_pmc(void) pr_err("Could not map guts node address\n"); return -ENOMEM; } + qoriq_pm_ops = &mpc85xx_pm_ops; } - qoriq_pm_ops = &mpc85xx_pm_ops; - return 0; } diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index 0f65c51271db..ec6dc2d7a9db 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -292,6 +292,7 @@ cpm_setbrg(uint brg, uint rate) out_be32(bp, (((BRG_UART_CLK_DIV16 / rate) - 1) << 1) | CPM_BRG_EN | CPM_BRG_DIV16); } +EXPORT_SYMBOL(cpm_setbrg); struct cpm_ioport16 { __be16 dir, par, odr_sor, dat, intr; diff --git a/arch/powerpc/platforms/8xx/micropatch.c b/arch/powerpc/platforms/8xx/micropatch.c index c80bd7afd6c5..b06c6d26dd72 100644 --- a/arch/powerpc/platforms/8xx/micropatch.c +++ b/arch/powerpc/platforms/8xx/micropatch.c @@ -361,6 +361,17 @@ void __init cpm_load_patch(cpm8xx_t *cp) if (IS_ENABLED(CONFIG_SMC_UCODE_PATCH)) { smc_uart_t *smp; + if (IS_ENABLED(CONFIG_PPC_EARLY_DEBUG_CPM)) { + int i; + + for (i = 0; i < sizeof(*smp); i += 4) { + u32 __iomem *src = (u32 __iomem *)&cp->cp_dparam[PROFF_SMC1 + i]; + u32 __iomem *dst = (u32 __iomem *)&cp->cp_dparam[PROFF_DSP1 + i]; + + out_be32(dst, in_be32(src)); + } + } + smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC1]; out_be16(&smp->smc_rpbase, 0x1ec0); smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC2]; diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c index e9617d35fd1f..209b12323aa4 100644 --- a/arch/powerpc/platforms/8xx/pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -153,6 +153,7 @@ int mpc8xx_pic_init(void) if (mpc8xx_pic_host == NULL) { printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); ret = -ENOMEM; + goto out; } ret = 0; diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index d82e3664ffdf..18792a5b003a 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -219,12 +219,11 @@ config TAU temperature within 2-4 degrees Celsius. This option shows the current on-die temperature in /proc/cpuinfo if the cpu supports it. - Unfortunately, on some chip revisions, this sensor is very inaccurate - and in many cases, does not work at all, so don't assume the cpu - temp is actually what /proc/cpuinfo says it is. + Unfortunately, this sensor is very inaccurate when uncalibrated, so + don't assume the cpu temp is actually what /proc/cpuinfo says it is. config TAU_INT - bool "Interrupt driven TAU driver (DANGEROUS)" + bool "Interrupt driven TAU driver (EXPERIMENTAL)" depends on TAU ---help--- The TAU supports an interrupt driven mode which causes an interrupt @@ -232,12 +231,7 @@ config TAU_INT to get notified the temp has exceeded a range. With this option off, a timer is used to re-check the temperature periodically. - However, on some cpus it appears that the TAU interrupt hardware - is buggy and can cause a situation which would lead unexplained hard - lockups. - - Unless you are extending the TAU driver, or enjoy kernel/hardware - debugging, leave this option off. + If in doubt, say N here. config TAU_AVERAGE bool "Average high and low temp" diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index ca9ffc1c8685..a6a60e2b8f45 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -976,6 +976,7 @@ static int __init cell_iommu_fixed_mapping_init(void) if (hbase < dbase || (hend > (dbase + dsize))) { pr_debug("iommu: hash window doesn't fit in" "real DMA window\n"); + of_node_put(np); return -1; } } diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c index 6af3a6e600a7..80c9c4d71550 100644 --- a/arch/powerpc/platforms/cell/pervasive.c +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -77,6 +77,7 @@ static int cbe_system_reset_exception(struct pt_regs *regs) switch (regs->msr & SRR1_WAKEMASK) { case SRR1_WAKEDEC: set_dec(1); + break; case SRR1_WAKEEE: /* * Handle these when interrupts get re-enabled and we take diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index a1b7f79a8a15..de10c13de15c 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -215,6 +215,7 @@ void hlwd_pic_probe(void) irq_set_chained_handler(cascade_virq, hlwd_pic_irq_cascade); hlwd_irq_host = host; + of_node_put(np); break; } } diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S index da69e0fcb4f1..9b85b030cbeb 100644 --- a/arch/powerpc/platforms/powermac/cache.S +++ b/arch/powerpc/platforms/powermac/cache.S @@ -48,7 +48,7 @@ flush_disable_75x: /* Stop DST streams */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) @@ -196,7 +196,7 @@ flush_disable_745x: isync /* Stop prefetch streams */ - DSSALL + PPC_DSSALL sync /* Disable L2 prefetching */ diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index bf4be4b53b44..210435a43bf9 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -582,6 +582,7 @@ static void __init kw_i2c_add(struct pmac_i2c_host_kw *host, bus->close = kw_i2c_close; bus->xfer = kw_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); if (controller == busnode) bus->flags = pmac_i2c_multibus; @@ -811,6 +812,7 @@ static void __init pmu_i2c_probe(void) bus->hostdata = bus + 1; bus->xfer = pmu_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); bus->flags = pmac_i2c_multibus; list_add(&bus->link, &pmac_i2c_busses); @@ -934,6 +936,7 @@ static void __init smu_i2c_probe(void) bus->hostdata = bus + 1; bus->xfer = smu_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); bus->flags = 0; list_add(&bus->link, &pmac_i2c_busses); diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index bd6085b470b7..935dcac4c02d 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -293,14 +293,7 @@ grackle_wake_up: * we do any r1 memory access as we are not sure they * are in a sane state above the first 256Mb region */ - li r0,16 /* load up segment register values */ - mtctr r0 /* for context 0 */ - lis r3,0x2000 /* Ku = 1, VSID = 0 */ - li r4,0 -3: mtsrin r3,r4 - addi r3,r3,0x111 /* increment VSID */ - addis r4,r4,0x1000 /* address of next segment */ - bdnz 3b + bl load_segment_registers sync isync diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index eb2e75dac369..b45ab455a18e 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -30,6 +30,7 @@ struct memtrace_entry { char name[16]; }; +static DEFINE_MUTEX(memtrace_mutex); static u64 memtrace_size; static struct memtrace_entry *memtrace_array; @@ -67,6 +68,23 @@ static int change_memblock_state(struct memory_block *mem, void *arg) return 0; } +static void memtrace_clear_range(unsigned long start_pfn, + unsigned long nr_pages) +{ + unsigned long pfn; + + /* + * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM + * does not apply, avoid passing around "struct page" and use + * clear_page() instead directly. + */ + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { + if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) + cond_resched(); + clear_page(__va(PFN_PHYS(pfn))); + } +} + /* called with device_hotplug_lock held */ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) { @@ -112,6 +130,11 @@ static u64 memtrace_alloc_node(u32 nid, u64 size) for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { /* + * Clear the range while we still have a linear + * mapping. + */ + memtrace_clear_range(base_pfn, nr_pages); + /* * Remove memory in memory block size chunks so that * iomem resources are always split to the same size and * we never try to remove memory that spans two iomem @@ -268,6 +291,7 @@ static int memtrace_online(void) static int memtrace_enable_set(void *data, u64 val) { + int rc = -EAGAIN; u64 bytes; /* @@ -280,25 +304,31 @@ static int memtrace_enable_set(void *data, u64 val) return -EINVAL; } + mutex_lock(&memtrace_mutex); + /* Re-add/online previously removed/offlined memory */ if (memtrace_size) { if (memtrace_online()) - return -EAGAIN; + goto out_unlock; } - if (!val) - return 0; + if (!val) { + rc = 0; + goto out_unlock; + } /* Offline and remove memory */ if (memtrace_init_regions_runtime(val)) - return -EINVAL; + goto out_unlock; if (memtrace_init_debugfs()) - return -EINVAL; + goto out_unlock; memtrace_size = val; - - return 0; + rc = 0; +out_unlock: + mutex_unlock(&memtrace_mutex); + return rc; } static int memtrace_enable_get(void *data, u64 *val) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index b95b9e3c4c98..c640cb993209 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -384,7 +384,8 @@ static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group) for (i = 0; i < npucomp->pe_num; ++i) { struct pnv_ioda_pe *pe = npucomp->pe[i]; - if (!pe->table_group.ops->take_ownership) + if (!pe->table_group.ops || + !pe->table_group.ops->take_ownership) continue; pe->table_group.ops->take_ownership(&pe->table_group); } @@ -400,7 +401,8 @@ static void pnv_npu_peers_release_ownership( for (i = 0; i < npucomp->pe_num; ++i) { struct pnv_ioda_pe *pe = npucomp->pe[i]; - if (!pe->table_group.ops->release_ownership) + if (!pe->table_group.ops || + !pe->table_group.ops->release_ownership) continue; pe->table_group.ops->release_ownership(&pe->table_group); } @@ -560,6 +562,11 @@ int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid, return -ENODEV; hose = pci_bus_to_host(npdev->bus); + if (hose->npu == NULL) { + dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); + return 0; + } + nphb = hose->private_data; dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n", @@ -607,6 +614,11 @@ int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev) return -ENODEV; hose = pci_bus_to_host(npdev->bus); + if (hose->npu == NULL) { + dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); + return 0; + } + nphb = hose->private_data; dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n", diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 543c816fa99e..0e6693bacb7e 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -318,15 +318,14 @@ static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj, return count; } -static struct dump_obj *create_dump_obj(uint32_t id, size_t size, - uint32_t type) +static void create_dump_obj(uint32_t id, size_t size, uint32_t type) { struct dump_obj *dump; int rc; dump = kzalloc(sizeof(*dump), GFP_KERNEL); if (!dump) - return NULL; + return; dump->kobj.kset = dump_kset; @@ -346,21 +345,39 @@ static struct dump_obj *create_dump_obj(uint32_t id, size_t size, rc = kobject_add(&dump->kobj, NULL, "0x%x-0x%x", type, id); if (rc) { kobject_put(&dump->kobj); - return NULL; + return; } + /* + * As soon as the sysfs file for this dump is created/activated there is + * a chance the opal_errd daemon (or any userspace) might read and + * acknowledge the dump before kobject_uevent() is called. If that + * happens then there is a potential race between + * dump_ack_store->kobject_put() and kobject_uevent() which leads to a + * use-after-free of a kernfs object resulting in a kernel crash. + * + * To avoid that, we need to take a reference on behalf of the bin file, + * so that our reference remains valid while we call kobject_uevent(). + * We then drop our reference before exiting the function, leaving the + * bin file to drop the last reference (if it hasn't already). + */ + + /* Take a reference for the bin file */ + kobject_get(&dump->kobj); rc = sysfs_create_bin_file(&dump->kobj, &dump->dump_attr); - if (rc) { + if (rc == 0) { + kobject_uevent(&dump->kobj, KOBJ_ADD); + + pr_info("%s: New platform dump. ID = 0x%x Size %u\n", + __func__, dump->id, dump->size); + } else { + /* Drop reference count taken for bin file */ kobject_put(&dump->kobj); - return NULL; } - pr_info("%s: New platform dump. ID = 0x%x Size %u\n", - __func__, dump->id, dump->size); - - kobject_uevent(&dump->kobj, KOBJ_ADD); - - return dump; + /* Drop our reference */ + kobject_put(&dump->kobj); + return; } static irqreturn_t process_dump(int irq, void *data) diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 62ef7ad995da..5e33b1fc67c2 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -179,14 +179,14 @@ static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj, return count; } -static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) +static void create_elog_obj(uint64_t id, size_t size, uint64_t type) { struct elog_obj *elog; int rc; elog = kzalloc(sizeof(*elog), GFP_KERNEL); if (!elog) - return NULL; + return; elog->kobj.kset = elog_kset; @@ -219,18 +219,37 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) rc = kobject_add(&elog->kobj, NULL, "0x%llx", id); if (rc) { kobject_put(&elog->kobj); - return NULL; + return; } + /* + * As soon as the sysfs file for this elog is created/activated there is + * a chance the opal_errd daemon (or any userspace) might read and + * acknowledge the elog before kobject_uevent() is called. If that + * happens then there is a potential race between + * elog_ack_store->kobject_put() and kobject_uevent() which leads to a + * use-after-free of a kernfs object resulting in a kernel crash. + * + * To avoid that, we need to take a reference on behalf of the bin file, + * so that our reference remains valid while we call kobject_uevent(). + * We then drop our reference before exiting the function, leaving the + * bin file to drop the last reference (if it hasn't already). + */ + + /* Take a reference for the bin file */ + kobject_get(&elog->kobj); rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr); - if (rc) { + if (rc == 0) { + kobject_uevent(&elog->kobj, KOBJ_ADD); + } else { + /* Drop the reference taken for the bin file */ kobject_put(&elog->kobj); - return NULL; } - kobject_uevent(&elog->kobj, KOBJ_ADD); + /* Drop our reference */ + kobject_put(&elog->kobj); - return elog; + return; } static irqreturn_t elog_event(int irq, void *data) diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c index d361d37d975f..f5cea068f0bd 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.c +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -60,7 +60,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) addr = be64_to_cpu(addr); pr_debug("Kernel metadata addr: %llx\n", addr); opal_fdm_active = (void *)addr; - if (opal_fdm_active->registered_regions == 0) + if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) return; ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr); @@ -95,17 +95,17 @@ static int opal_fadump_unregister(struct fw_dump *fadump_conf); static void opal_fadump_update_config(struct fw_dump *fadump_conf, const struct opal_fadump_mem_struct *fdm) { - pr_debug("Boot memory regions count: %d\n", fdm->region_cnt); + pr_debug("Boot memory regions count: %d\n", be16_to_cpu(fdm->region_cnt)); /* * The destination address of the first boot memory region is the * destination address of boot memory regions. */ - fadump_conf->boot_mem_dest_addr = fdm->rgn[0].dest; + fadump_conf->boot_mem_dest_addr = be64_to_cpu(fdm->rgn[0].dest); pr_debug("Destination address of boot memory regions: %#016llx\n", fadump_conf->boot_mem_dest_addr); - fadump_conf->fadumphdr_addr = fdm->fadumphdr_addr; + fadump_conf->fadumphdr_addr = be64_to_cpu(fdm->fadumphdr_addr); } /* @@ -126,9 +126,9 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, fadump_conf->boot_memory_size = 0; pr_debug("Boot memory regions:\n"); - for (i = 0; i < fdm->region_cnt; i++) { - base = fdm->rgn[i].src; - size = fdm->rgn[i].size; + for (i = 0; i < be16_to_cpu(fdm->region_cnt); i++) { + base = be64_to_cpu(fdm->rgn[i].src); + size = be64_to_cpu(fdm->rgn[i].size); pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size); fadump_conf->boot_mem_addr[i] = base; @@ -143,7 +143,7 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, * Start address of reserve dump area (permanent reservation) for * re-registering FADump after dump capture. */ - fadump_conf->reserve_dump_area_start = fdm->rgn[0].dest; + fadump_conf->reserve_dump_area_start = be64_to_cpu(fdm->rgn[0].dest); /* * Rarely, but it can so happen that system crashes before all @@ -155,13 +155,14 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, * Hope the memory that could not be preserved only has pages * that are usually filtered out while saving the vmcore. */ - if (fdm->region_cnt > fdm->registered_regions) { + if (be16_to_cpu(fdm->region_cnt) > be16_to_cpu(fdm->registered_regions)) { pr_warn("Not all memory regions were saved!!!\n"); pr_warn(" Unsaved memory regions:\n"); - i = fdm->registered_regions; - while (i < fdm->region_cnt) { + i = be16_to_cpu(fdm->registered_regions); + while (i < be16_to_cpu(fdm->region_cnt)) { pr_warn("\t[%03d] base: 0x%llx, size: 0x%llx\n", - i, fdm->rgn[i].src, fdm->rgn[i].size); + i, be64_to_cpu(fdm->rgn[i].src), + be64_to_cpu(fdm->rgn[i].size)); i++; } @@ -170,7 +171,7 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, } fadump_conf->boot_mem_top = (fadump_conf->boot_memory_size + hole_size); - fadump_conf->boot_mem_regs_cnt = fdm->region_cnt; + fadump_conf->boot_mem_regs_cnt = be16_to_cpu(fdm->region_cnt); opal_fadump_update_config(fadump_conf, fdm); } @@ -178,35 +179,38 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, static void opal_fadump_init_metadata(struct opal_fadump_mem_struct *fdm) { fdm->version = OPAL_FADUMP_VERSION; - fdm->region_cnt = 0; - fdm->registered_regions = 0; - fdm->fadumphdr_addr = 0; + fdm->region_cnt = cpu_to_be16(0); + fdm->registered_regions = cpu_to_be16(0); + fdm->fadumphdr_addr = cpu_to_be64(0); } static u64 opal_fadump_init_mem_struct(struct fw_dump *fadump_conf) { u64 addr = fadump_conf->reserve_dump_area_start; + u16 reg_cnt; int i; opal_fdm = __va(fadump_conf->kernel_metadata); opal_fadump_init_metadata(opal_fdm); /* Boot memory regions */ + reg_cnt = be16_to_cpu(opal_fdm->region_cnt); for (i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) { - opal_fdm->rgn[i].src = fadump_conf->boot_mem_addr[i]; - opal_fdm->rgn[i].dest = addr; - opal_fdm->rgn[i].size = fadump_conf->boot_mem_sz[i]; + opal_fdm->rgn[i].src = cpu_to_be64(fadump_conf->boot_mem_addr[i]); + opal_fdm->rgn[i].dest = cpu_to_be64(addr); + opal_fdm->rgn[i].size = cpu_to_be64(fadump_conf->boot_mem_sz[i]); - opal_fdm->region_cnt++; + reg_cnt++; addr += fadump_conf->boot_mem_sz[i]; } + opal_fdm->region_cnt = cpu_to_be16(reg_cnt); /* * Kernel metadata is passed to f/w and retrieved in capture kerenl. * So, use it to save fadump header address instead of calculating it. */ - opal_fdm->fadumphdr_addr = (opal_fdm->rgn[0].dest + - fadump_conf->boot_memory_size); + opal_fdm->fadumphdr_addr = cpu_to_be64(be64_to_cpu(opal_fdm->rgn[0].dest) + + fadump_conf->boot_memory_size); opal_fadump_update_config(fadump_conf, opal_fdm); @@ -269,18 +273,21 @@ static u64 opal_fadump_get_bootmem_min(void) static int opal_fadump_register(struct fw_dump *fadump_conf) { s64 rc = OPAL_PARAMETER; + u16 registered_regs; int i, err = -EIO; - for (i = 0; i < opal_fdm->region_cnt; i++) { + registered_regs = be16_to_cpu(opal_fdm->registered_regions); + for (i = 0; i < be16_to_cpu(opal_fdm->region_cnt); i++) { rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE, - opal_fdm->rgn[i].src, - opal_fdm->rgn[i].dest, - opal_fdm->rgn[i].size); + be64_to_cpu(opal_fdm->rgn[i].src), + be64_to_cpu(opal_fdm->rgn[i].dest), + be64_to_cpu(opal_fdm->rgn[i].size)); if (rc != OPAL_SUCCESS) break; - opal_fdm->registered_regions++; + registered_regs++; } + opal_fdm->registered_regions = cpu_to_be16(registered_regs); switch (rc) { case OPAL_SUCCESS: @@ -291,7 +298,8 @@ static int opal_fadump_register(struct fw_dump *fadump_conf) case OPAL_RESOURCE: /* If MAX regions limit in f/w is hit, warn and proceed. */ pr_warn("%d regions could not be registered for MPIPL as MAX limit is reached!\n", - (opal_fdm->region_cnt - opal_fdm->registered_regions)); + (be16_to_cpu(opal_fdm->region_cnt) - + be16_to_cpu(opal_fdm->registered_regions))); fadump_conf->dump_registered = 1; err = 0; break; @@ -312,7 +320,7 @@ static int opal_fadump_register(struct fw_dump *fadump_conf) * If some regions were registered before OPAL_MPIPL_ADD_RANGE * OPAL call failed, unregister all regions. */ - if ((err < 0) && (opal_fdm->registered_regions > 0)) + if ((err < 0) && (be16_to_cpu(opal_fdm->registered_regions) > 0)) opal_fadump_unregister(fadump_conf); return err; @@ -328,7 +336,7 @@ static int opal_fadump_unregister(struct fw_dump *fadump_conf) return -EIO; } - opal_fdm->registered_regions = 0; + opal_fdm->registered_regions = cpu_to_be16(0); fadump_conf->dump_registered = 0; return 0; } @@ -563,19 +571,20 @@ static void opal_fadump_region_show(struct fw_dump *fadump_conf, else fdm_ptr = opal_fdm; - for (i = 0; i < fdm_ptr->region_cnt; i++) { + for (i = 0; i < be16_to_cpu(fdm_ptr->region_cnt); i++) { /* * Only regions that are registered for MPIPL * would have dump data. */ if ((fadump_conf->dump_active) && - (i < fdm_ptr->registered_regions)) - dumped_bytes = fdm_ptr->rgn[i].size; + (i < be16_to_cpu(fdm_ptr->registered_regions))) + dumped_bytes = be64_to_cpu(fdm_ptr->rgn[i].size); seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ", - fdm_ptr->rgn[i].src, fdm_ptr->rgn[i].dest); + be64_to_cpu(fdm_ptr->rgn[i].src), + be64_to_cpu(fdm_ptr->rgn[i].dest)); seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n", - fdm_ptr->rgn[i].size, dumped_bytes); + be64_to_cpu(fdm_ptr->rgn[i].size), dumped_bytes); } /* Dump is active. Show reserved area start address. */ @@ -624,6 +633,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) { const __be32 *prop; unsigned long dn; + __be64 be_addr; u64 addr = 0; int i, len; s64 ret; @@ -680,13 +690,13 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) if (!prop) return; - ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr); - if ((ret != OPAL_SUCCESS) || !addr) { + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &be_addr); + if ((ret != OPAL_SUCCESS) || !be_addr) { pr_err("Failed to get Kernel metadata (%lld)\n", ret); return; } - addr = be64_to_cpu(addr); + addr = be64_to_cpu(be_addr); pr_debug("Kernel metadata addr: %llx\n", addr); opal_fdm_active = __va(addr); @@ -697,14 +707,14 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) } /* Kernel regions not registered with f/w for MPIPL */ - if (opal_fdm_active->registered_regions == 0) { + if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) { opal_fdm_active = NULL; return; } - ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr); - if (addr) { - addr = be64_to_cpu(addr); + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &be_addr); + if (be_addr) { + addr = be64_to_cpu(be_addr); pr_debug("CPU metadata addr: %llx\n", addr); opal_cpu_metadata = __va(addr); } diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h b/arch/powerpc/platforms/powernv/opal-fadump.h index f1e9ecf548c5..3f715efb0aa6 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.h +++ b/arch/powerpc/platforms/powernv/opal-fadump.h @@ -31,14 +31,14 @@ * OPAL FADump kernel metadata * * The address of this structure will be registered with f/w for retrieving - * and processing during crash dump. + * in the capture kernel to process the crash dump. */ struct opal_fadump_mem_struct { u8 version; u8 reserved[3]; - u16 region_cnt; /* number of regions */ - u16 registered_regions; /* Regions registered for MPIPL */ - u64 fadumphdr_addr; + __be16 region_cnt; /* number of regions */ + __be16 registered_regions; /* Regions registered for MPIPL */ + __be64 fadumphdr_addr; struct opal_mpipl_region rgn[FADUMP_MAX_MEM_REGS]; } __packed; @@ -135,7 +135,7 @@ static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt, for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) { reg_entry = (struct hdat_fadump_reg_entry *)bufp; val = (cpu_endian ? be64_to_cpu(reg_entry->reg_val) : - reg_entry->reg_val); + (u64)(reg_entry->reg_val)); opal_fadump_set_regval_regnum(regs, be32_to_cpu(reg_entry->reg_type), be32_to_cpu(reg_entry->reg_num), diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index 608569082ba0..123a0e799b7b 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -396,6 +396,7 @@ void __init opal_lpc_init(void) if (!of_get_property(np, "primary", NULL)) continue; opal_lpc_chip_id = of_get_ibm_chip_id(np); + of_node_put(np); break; } if (opal_lpc_chip_id < 0) diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index 45f4223a790f..6ebc906e07b0 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -372,6 +372,12 @@ static struct notifier_block opal_prd_event_nb = { .priority = 0, }; +static struct notifier_block opal_prd_event_nb2 = { + .notifier_call = opal_prd_msg_notifier, + .next = NULL, + .priority = 0, +}; + static int opal_prd_probe(struct platform_device *pdev) { int rc; @@ -393,9 +399,10 @@ static int opal_prd_probe(struct platform_device *pdev) return rc; } - rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb); + rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb2); if (rc) { pr_err("Couldn't register PRD2 event notifier\n"); + opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); return rc; } @@ -404,6 +411,8 @@ static int opal_prd_probe(struct platform_device *pdev) pr_err("failed to register miscdev\n"); opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); + opal_message_notifier_unregister(OPAL_MSG_PRD2, + &opal_prd_event_nb2); return rc; } @@ -414,6 +423,7 @@ static int opal_prd_remove(struct platform_device *pdev) { misc_deregister(&opal_prd_dev); opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); + opal_message_notifier_unregister(OPAL_MSG_PRD2, &opal_prd_event_nb2); return 0; } diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 1aa51c4fa904..4825e4cf4410 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -35,4 +35,6 @@ ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count); u32 memcons_get_size(struct memcons *mc); struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name); +void pnv_rng_init(void); + #endif /* _POWERNV_H */ diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 8035caf6e297..eba4142998b0 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -17,6 +17,7 @@ #include <asm/prom.h> #include <asm/machdep.h> #include <asm/smp.h> +#include "powernv.h" #define DARN_ERR 0xFFFFFFFFFFFFFFFFul @@ -28,7 +29,6 @@ struct powernv_rng { static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng); - int powernv_hwrng_present(void) { struct powernv_rng *rng; @@ -43,7 +43,11 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) unsigned long parity; /* Calculate the parity of the value */ - asm ("popcntd %0,%1" : "=r" (parity) : "r" (val)); + asm (".machine push; \ + .machine power7; \ + popcntd %0,%1; \ + .machine pop;" + : "=r" (parity) : "r" (val)); /* xor our value with the previous mask */ val ^= rng->mask; @@ -94,9 +98,6 @@ static int initialise_darn(void) return 0; } } - - pr_warn("Unable to use DARN for get_random_seed()\n"); - return -EIO; } @@ -159,32 +160,55 @@ static __init int rng_create(struct device_node *dn) rng_init_per_cpu(rng, dn); - pr_info_once("Registering arch random hook.\n"); - ppc_md.get_random_seed = powernv_get_random_long; return 0; } -static __init int rng_init(void) +static int __init pnv_get_random_long_early(unsigned long *v) { struct device_node *dn; - int rc; + + if (!slab_is_available()) + return 0; + + if (cmpxchg(&ppc_md.get_random_seed, pnv_get_random_long_early, + NULL) != pnv_get_random_long_early) + return 0; for_each_compatible_node(dn, NULL, "ibm,power-rng") { - rc = rng_create(dn); - if (rc) { - pr_err("Failed creating rng for %pOF (%d).\n", - dn, rc); + if (rng_create(dn)) continue; - } - /* Create devices for hwrng driver */ of_platform_device_create(dn, NULL, NULL); } - initialise_darn(); + if (!ppc_md.get_random_seed) + return 0; + return ppc_md.get_random_seed(v); +} + +void __init pnv_rng_init(void) +{ + struct device_node *dn; + /* Prefer darn over the rest. */ + if (!initialise_darn()) + return; + + dn = of_find_compatible_node(NULL, NULL, "ibm,power-rng"); + if (dn) + ppc_md.get_random_seed = pnv_get_random_long_early; + + of_node_put(dn); +} + +static int __init pnv_rng_late_init(void) +{ + unsigned long v; + /* In case it wasn't called during init for some other reason. */ + if (ppc_md.get_random_seed == pnv_get_random_long_early) + pnv_get_random_long_early(&v); return 0; } -machine_subsys_initcall(powernv, rng_init); +machine_subsys_initcall(powernv, pnv_rng_late_init); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 83498604d322..d9e26614d7ed 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -122,12 +122,29 @@ static void pnv_setup_rfi_flush(void) type = L1D_FLUSH_ORI; } + /* + * If we are non-Power9 bare metal, we don't need to flush on kernel + * entry or after user access: they fix a P9 specific vulnerability. + */ + if (!pvr_version_is(PVR_POWER9)) { + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + } + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); setup_rfi_flush(type, enable); setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); } static void __init pnv_setup_arch(void) @@ -151,6 +168,8 @@ static void __init pnv_setup_arch(void) powersave_nap = 1; /* XXX PMCS */ + + pnv_rng_init(); } static void __init pnv_init(void) @@ -169,11 +188,16 @@ static void __init pnv_init(void) add_preferred_console("hvc", 0, NULL); if (!radix_enabled()) { + size_t size = sizeof(struct slb_entry) * mmu_slb_size; int i; /* Allocate per cpu area to save old slb contents during MCE */ - for_each_possible_cpu(i) - paca_ptrs[i]->mce_faulty_slbs = memblock_alloc_node(mmu_slb_size, __alignof__(*paca_ptrs[i]->mce_faulty_slbs), cpu_to_node(i)); + for_each_possible_cpu(i) { + paca_ptrs[i]->mce_faulty_slbs = + memblock_alloc_node(size, + __alignof__(struct slb_entry), + cpu_to_node(i)); + } } } diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index b2ba3e95bda7..bbf361f23ae8 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -43,7 +43,7 @@ #include <asm/udbg.h> #define DBG(fmt...) udbg_printf(fmt) #else -#define DBG(fmt...) +#define DBG(fmt...) do { } while (0) #endif static void pnv_smp_setup_cpu(int cpu) diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c index e4a00ad06f9d..67c8c4b2d8b1 100644 --- a/arch/powerpc/platforms/powernv/ultravisor.c +++ b/arch/powerpc/platforms/powernv/ultravisor.c @@ -55,6 +55,7 @@ static int __init uv_init(void) return -ENODEV; uv_memcons = memcons_init(node, "memcons"); + of_node_put(node); if (!uv_memcons) return -ENOENT; diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index f42fe4e86ce5..c3e374669400 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -6,6 +6,7 @@ * Copyright 2006 Sony Corp. */ +#include <linux/dma-mapping.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/memblock.h> @@ -1118,6 +1119,7 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev, enum ps3_dma_region_type region_type, void *addr, unsigned long len) { unsigned long lpar_addr; + int result; lpar_addr = addr ? ps3_mm_phys_to_lpar(__pa(addr)) : 0; @@ -1129,6 +1131,16 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev, r->offset -= map.r1.offset; r->len = len ? len : _ALIGN_UP(map.total, 1 << r->page_size); + dev->core.dma_mask = &r->dma_mask; + + result = dma_set_mask_and_coherent(&dev->core, DMA_BIT_MASK(32)); + + if (result < 0) { + dev_err(&dev->core, "%s:%d: dma_set_mask_and_coherent failed: %d\n", + __func__, __LINE__, result); + return result; + } + switch (dev->dev_type) { case PS3_DEVICE_TYPE_SB: r->region_ops = (USE_DYNAMIC_DMA) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 16e86ba8aa20..f6b7749d6ada 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -127,7 +127,6 @@ void dlpar_free_cc_nodes(struct device_node *dn) #define NEXT_PROPERTY 3 #define PREV_PARENT 4 #define MORE_MEMORY 5 -#define CALL_AGAIN -2 #define ERR_CFG_USE -9003 struct device_node *dlpar_configure_connector(__be32 drc_index, @@ -168,6 +167,9 @@ struct device_node *dlpar_configure_connector(__be32 drc_index, spin_unlock(&rtas_data_buf_lock); + if (rtas_busy_delay(rc)) + continue; + switch (rc) { case COMPLETE: break; @@ -216,9 +218,6 @@ struct device_node *dlpar_configure_connector(__be32 drc_index, last_dn = last_dn->parent; break; - case CALL_AGAIN: - break; - case MORE_MEMORY: case ERR_CFG_USE: default: diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index bbda646b63b5..210e6f563eb4 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -91,9 +91,6 @@ static void rtas_stop_self(void) BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE); - printk("cpu %u (hwid %u) Ready to die...\n", - smp_processor_id(), hard_smp_processor_id()); - rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL); panic("Alas, I survived.\n"); diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index e7d23a933a0d..f364909d0c08 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -279,7 +279,7 @@ static int dlpar_offline_lmb(struct drmem_lmb *lmb) return dlpar_change_lmb_state(lmb, false); } -static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) +static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size) { unsigned long block_sz, start_pfn; int sections_per_block; @@ -310,10 +310,11 @@ out: static int pseries_remove_mem_node(struct device_node *np) { - const __be32 *regs; + const __be32 *prop; unsigned long base; - unsigned int lmb_size; + unsigned long lmb_size; int ret = -EINVAL; + int addr_cells, size_cells; /* * Check to see if we are actually removing memory @@ -324,12 +325,19 @@ static int pseries_remove_mem_node(struct device_node *np) /* * Find the base address and size of the memblock */ - regs = of_get_property(np, "reg", NULL); - if (!regs) + prop = of_get_property(np, "reg", NULL); + if (!prop) return ret; - base = be64_to_cpu(*(unsigned long *)regs); - lmb_size = be32_to_cpu(regs[3]); + addr_cells = of_n_addr_cells(np); + size_cells = of_n_size_cells(np); + + /* + * "reg" property represents (addr,size) tuple. + */ + base = of_read_number(prop, addr_cells); + prop += addr_cells; + lmb_size = of_read_number(prop, size_cells); pseries_remove_memblock(base, lmb_size); return 0; @@ -376,25 +384,32 @@ static int dlpar_add_lmb(struct drmem_lmb *); static int dlpar_remove_lmb(struct drmem_lmb *lmb) { + struct memory_block *mem_block; unsigned long block_sz; int rc; if (!lmb_is_removable(lmb)) return -EINVAL; + mem_block = lmb_to_memblock(lmb); + if (mem_block == NULL) + return -EINVAL; + rc = dlpar_offline_lmb(lmb); - if (rc) + if (rc) { + put_device(&mem_block->dev); return rc; + } block_sz = pseries_memory_block_size(); - __remove_memory(lmb->nid, lmb->base_addr, block_sz); + __remove_memory(mem_block->nid, lmb->base_addr, block_sz); + put_device(&mem_block->dev); /* Update memory regions for memory remove */ memblock_remove(lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); - lmb_clear_nid(lmb); lmb->flags &= ~DRCONF_MEM_ASSIGNED; return 0; @@ -613,7 +628,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) #else static inline int pseries_remove_memblock(unsigned long base, - unsigned int memblock_size) + unsigned long memblock_size) { return -EOPNOTSUPP; } @@ -651,7 +666,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) static int dlpar_add_lmb(struct drmem_lmb *lmb) { unsigned long block_sz; - int rc; + int nid, rc; if (lmb->flags & DRCONF_MEM_ASSIGNED) return -EINVAL; @@ -662,11 +677,13 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) return rc; } - lmb_set_nid(lmb); block_sz = memory_block_size_bytes(); + /* Find the node id for this address. */ + nid = memory_add_physaddr_to_nid(lmb->base_addr); + /* Add the memory */ - rc = __add_memory(lmb->nid, lmb->base_addr, block_sz); + rc = __add_memory(nid, lmb->base_addr, block_sz); if (rc) { invalidate_lmb_associativity_index(lmb); return rc; @@ -674,9 +691,8 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) rc = dlpar_online_lmb(lmb); if (rc) { - __remove_memory(lmb->nid, lmb->base_addr, block_sz); + __remove_memory(nid, lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); - lmb_clear_nid(lmb); } else { lmb->flags |= DRCONF_MEM_ASSIGNED; } @@ -945,10 +961,11 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) static int pseries_add_mem_node(struct device_node *np) { - const __be32 *regs; + const __be32 *prop; unsigned long base; - unsigned int lmb_size; + unsigned long lmb_size; int ret = -EINVAL; + int addr_cells, size_cells; /* * Check to see if we are actually adding memory @@ -959,12 +976,18 @@ static int pseries_add_mem_node(struct device_node *np) /* * Find the base and size of the memblock */ - regs = of_get_property(np, "reg", NULL); - if (!regs) + prop = of_get_property(np, "reg", NULL); + if (!prop) return ret; - base = be64_to_cpu(*(unsigned long *)regs); - lmb_size = be32_to_cpu(regs[3]); + addr_cells = of_n_addr_cells(np); + size_cells = of_n_size_cells(np); + /* + * "reg" property represents (addr,size) tuple. + */ + base = of_read_number(prop, addr_cells); + prop += addr_cells; + lmb_size = of_read_number(prop, size_cells); /* * Update memory region to represent the memory add diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 133f6adcb39c..637300330507 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -4,6 +4,7 @@ * Copyright 2006-2007 Michael Ellerman, IBM Corp. */ +#include <linux/crash_dump.h> #include <linux/device.h> #include <linux/irq.h> #include <linux/msi.h> @@ -458,7 +459,28 @@ again: return hwirq; } - virq = irq_create_mapping(NULL, hwirq); + /* + * Depending on the number of online CPUs in the original + * kernel, it is likely for CPU #0 to be offline in a kdump + * kernel. The associated IRQs in the affinity mappings + * provided by irq_create_affinity_masks() are thus not + * started by irq_startup(), as per-design for managed IRQs. + * This can be a problem with multi-queue block devices driven + * by blk-mq : such a non-started IRQ is very likely paired + * with the single queue enforced by blk-mq during kdump (see + * blk_mq_alloc_tag_set()). This causes the device to remain + * silent and likely hangs the guest at some point. + * + * We don't really care for fine-grained affinity when doing + * kdump actually : simply ignore the pre-computed affinity + * masks in this case and let the default mask with all CPUs + * be used when creating the IRQ mappings. + */ + if (is_kdump_kernel()) + virq = irq_create_mapping(NULL, hwirq); + else + virq = irq_create_mapping_affinity(NULL, hwirq, + entry->affinity); if (!virq) { pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq); diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 561917fa54a8..afca4b737e80 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -66,6 +66,7 @@ EXPORT_SYMBOL_GPL(init_phb_dynamic); int remove_phb_dynamic(struct pci_controller *phb) { struct pci_bus *b = phb->bus; + struct pci_host_bridge *host_bridge = to_pci_host_bridge(b->bridge); struct resource *res; int rc, i; @@ -92,7 +93,8 @@ int remove_phb_dynamic(struct pci_controller *phb) /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); - device_unregister(b->bridge); + host_bridge->bus = NULL; + device_unregister(&host_bridge->dev); /* Now release the IO resource */ if (res->flags & IORESOURCE_IO) diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 13fa370a87e4..5fd56230b01c 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -114,4 +114,6 @@ int dlpar_workqueue_init(void); void pseries_setup_rfi_flush(void); void pseries_lpar_read_hblkrm_characteristics(void); +void pseries_rng_init(void); + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index b3c4848869e5..b658fa627a34 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -494,18 +494,55 @@ int pSeries_system_reset_exception(struct pt_regs *regs) return 0; /* need to perform reset */ } +static int mce_handle_err_realmode(int disposition, u8 error_type) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (disposition == RTAS_DISP_NOT_RECOVERED) { + switch (error_type) { + case MC_ERROR_TYPE_SLB: + case MC_ERROR_TYPE_ERAT: + /* + * Store the old slb content in paca before flushing. + * Print this when we go to virtual mode. + * There are chances that we may hit MCE again if there + * is a parity error on the SLB entry we trying to read + * for saving. Hence limit the slb saving to single + * level of recursion. + */ + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + flush_and_reload_slb(); + disposition = RTAS_DISP_FULLY_RECOVERED; + break; + default: + break; + } + } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { + /* Platform corrected itself but could be degraded */ + pr_err("MCE: limited recovery, system may be degraded\n"); + disposition = RTAS_DISP_FULLY_RECOVERED; + } +#endif + return disposition; +} -static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) +static int mce_handle_err_virtmode(struct pt_regs *regs, + struct rtas_error_log *errp, + struct pseries_mc_errorlog *mce_log, + int disposition) { struct mce_error_info mce_err = { 0 }; - unsigned long eaddr = 0, paddr = 0; - struct pseries_errorlog *pseries_log; - struct pseries_mc_errorlog *mce_log; - int disposition = rtas_error_disposition(errp); int initiator = rtas_error_initiator(errp); int severity = rtas_error_severity(errp); + unsigned long eaddr = 0, paddr = 0; u8 error_type, err_sub_type; + if (!mce_log) + goto out; + + error_type = mce_log->error_type; + err_sub_type = rtas_mc_error_sub_type(mce_log); + if (initiator == RTAS_INITIATOR_UNKNOWN) mce_err.initiator = MCE_INITIATOR_UNKNOWN; else if (initiator == RTAS_INITIATOR_CPU) @@ -544,18 +581,7 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; mce_err.error_class = MCE_ECLASS_UNKNOWN; - if (!rtas_error_extended(errp)) - goto out; - - pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); - if (pseries_log == NULL) - goto out; - - mce_log = (struct pseries_mc_errorlog *)pseries_log->data; - error_type = mce_log->error_type; - err_sub_type = rtas_mc_error_sub_type(mce_log); - - switch (mce_log->error_type) { + switch (error_type) { case MC_ERROR_TYPE_UE: mce_err.error_type = MCE_ERROR_TYPE_UE; switch (err_sub_type) { @@ -652,37 +678,31 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; break; } +out: + save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED, + &mce_err, regs->nip, eaddr, paddr); + return disposition; +} -#ifdef CONFIG_PPC_BOOK3S_64 - if (disposition == RTAS_DISP_NOT_RECOVERED) { - switch (error_type) { - case MC_ERROR_TYPE_SLB: - case MC_ERROR_TYPE_ERAT: - /* - * Store the old slb content in paca before flushing. - * Print this when we go to virtual mode. - * There are chances that we may hit MCE again if there - * is a parity error on the SLB entry we trying to read - * for saving. Hence limit the slb saving to single - * level of recursion. - */ - if (local_paca->in_mce == 1) - slb_save_contents(local_paca->mce_faulty_slbs); - flush_and_reload_slb(); - disposition = RTAS_DISP_FULLY_RECOVERED; - break; - default: - break; - } - } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { - /* Platform corrected itself but could be degraded */ - printk(KERN_ERR "MCE: limited recovery, system may " - "be degraded\n"); - disposition = RTAS_DISP_FULLY_RECOVERED; - } -#endif +static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) +{ + struct pseries_errorlog *pseries_log; + struct pseries_mc_errorlog *mce_log = NULL; + int disposition = rtas_error_disposition(errp); + u8 error_type; + + if (!rtas_error_extended(errp)) + goto out; + + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); + if (!pseries_log) + goto out; + + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; + error_type = mce_log->error_type; + + disposition = mce_handle_err_realmode(disposition, error_type); -out: /* * Enable translation as we will be accessing per-cpu variables * in save_mce_event() which may fall outside RMO region, also @@ -693,10 +713,10 @@ out: * Note: All the realmode handling like flushing SLB entries for * SLB multihit is done by now. */ +out: mtmsr(mfmsr() | MSR_IR | MSR_DR); - save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED, - &mce_err, regs->nip, eaddr, paddr); - + disposition = mce_handle_err_virtmode(regs, errp, mce_log, + disposition); return disposition; } diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c index bbb97169bf63..6ddfdeaace9e 100644 --- a/arch/powerpc/platforms/pseries/rng.c +++ b/arch/powerpc/platforms/pseries/rng.c @@ -10,6 +10,7 @@ #include <asm/archrandom.h> #include <asm/machdep.h> #include <asm/plpar_wrappers.h> +#include "pseries.h" static int pseries_get_random_long(unsigned long *v) @@ -24,18 +25,13 @@ static int pseries_get_random_long(unsigned long *v) return 0; } -static __init int rng_init(void) +void __init pseries_rng_init(void) { struct device_node *dn; dn = of_find_compatible_node(NULL, NULL, "ibm,random"); if (!dn) - return -ENODEV; - - pr_info("Registering arch random hook.\n"); - + return; ppc_md.get_random_seed = pseries_get_random_long; - - return 0; + of_node_put(dn); } -machine_subsys_initcall(pseries, rng_init); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 0c8421dd01ab..d5abb25865e3 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -75,7 +75,7 @@ #include "../../../../drivers/pci/pci.h" DEFINE_STATIC_KEY_FALSE(shared_processor); -EXPORT_SYMBOL_GPL(shared_processor); +EXPORT_SYMBOL(shared_processor); int CMO_PrPSP = -1; int CMO_SecPSP = -1; @@ -561,6 +561,14 @@ void pseries_setup_rfi_flush(void) setup_rfi_flush(types, enable); setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); } #ifdef CONFIG_PCI_IOV @@ -784,6 +792,8 @@ static void __init pSeries_setup_arch(void) if (swiotlb_force == SWIOTLB_FORCE) ppc_swiotlb_enable = 1; + + pseries_rng_init(); } static void pseries_panic(char *str) diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index f789693f61f4..e5ecadfb5dea 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -13,7 +13,6 @@ #include <asm/mmu.h> #include <asm/rtas.h> #include <asm/topology.h> -#include "../../kernel/cacheinfo.h" static u64 stream_id; static struct device suspend_dev; @@ -78,9 +77,7 @@ static void pseries_suspend_enable_irqs(void) * Update configuration which can be modified based on device tree * changes during resume. */ - cacheinfo_cpu_offline(smp_processor_id()); post_mobility_fixup(); - cacheinfo_cpu_online(smp_processor_id()); } /** @@ -190,7 +187,6 @@ static struct bus_type suspend_subsys = { static const struct platform_suspend_ops pseries_suspend_ops = { .valid = suspend_valid_only_mem, - .begin = pseries_suspend_begin, .prepare_late = pseries_prepare_late, .enter = pseries_suspend_enter, }; diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 6b4a34b36d98..8ff9bcfe4b8d 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -403,9 +403,10 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) } /* Initialize the DART HW */ - if (dart_init(dn) != 0) + if (dart_init(dn) != 0) { + of_node_put(dn); return; - + } /* * U4 supports a DART bypass, we use it for 64-bit capable devices to * improve performance. However, that only works for devices connected @@ -418,6 +419,7 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) /* Setup pci_dma ops */ set_pci_dma_ops(&dma_iommu_ops); + of_node_put(dn); } #ifdef CONFIG_PM diff --git a/arch/powerpc/sysdev/dcr-low.S b/arch/powerpc/sysdev/dcr-low.S index efeeb1b885a1..329b9c4ae542 100644 --- a/arch/powerpc/sysdev/dcr-low.S +++ b/arch/powerpc/sysdev/dcr-low.S @@ -11,7 +11,7 @@ #include <asm/export.h> #define DCR_ACCESS_PROLOG(table) \ - cmpli cr0,r3,1024; \ + cmplwi cr0,r3,1024; \ rlwinm r3,r3,4,18,27; \ lis r5,table@h; \ ori r5,r5,table@l; \ diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c index 8963eaffb1b7..39186ad6b3c3 100644 --- a/arch/powerpc/sysdev/fsl_gtm.c +++ b/arch/powerpc/sysdev/fsl_gtm.c @@ -86,7 +86,7 @@ static LIST_HEAD(gtms); */ struct gtm_timer *gtm_get_timer16(void) { - struct gtm *gtm = NULL; + struct gtm *gtm; int i; list_for_each_entry(gtm, >ms, list_node) { @@ -103,7 +103,7 @@ struct gtm_timer *gtm_get_timer16(void) spin_unlock_irq(>m->lock); } - if (gtm) + if (!list_empty(>ms)) return ERR_PTR(-EBUSY); return ERR_PTR(-ENODEV); } diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 07c164f7f8cf..3f9f78621cf3 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -505,8 +505,10 @@ int fsl_rio_setup(struct platform_device *dev) if (rc) { dev_err(&dev->dev, "Can't get %pOF property 'reg'\n", rmu_node); + of_node_put(rmu_node); goto err_rmu; } + of_node_put(rmu_node); rmu_regs_win = ioremap(rmu_regs.start, resource_size(&rmu_regs)); if (!rmu_regs_win) { dev_err(&dev->dev, "Unable to map rmu register window\n"); diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index f6b253e2be40..36ec0bdd8b63 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -191,7 +191,7 @@ static int mpic_msgr_probe(struct platform_device *dev) /* IO map the message register block. */ of_address_to_resource(np, 0, &rsrc); - msgr_block_addr = ioremap(rsrc.start, resource_size(&rsrc)); + msgr_block_addr = devm_ioremap(&dev->dev, rsrc.start, resource_size(&rsrc)); if (!msgr_block_addr) { dev_err(&dev->dev, "Failed to iomap MPIC message registers"); return -EFAULT; diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c index ad8117148ea3..21b9d1bf39ff 100644 --- a/arch/powerpc/sysdev/xics/icp-hv.c +++ b/arch/powerpc/sysdev/xics/icp-hv.c @@ -174,6 +174,7 @@ int icp_hv_init(void) icp_ops = &icp_hv_ops; + of_node_put(np); return 0; } diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index 68fd2540b093..7fa520efcefa 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -195,6 +195,7 @@ int icp_opal_init(void) printk("XICS: Using OPAL ICP fallbacks\n"); + of_node_put(np); return 0; } diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 16df9cc8f360..d4467da27966 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -257,17 +257,20 @@ notrace void xmon_xive_do_dump(int cpu) xmon_printf("\n"); } +static struct irq_data *xive_get_irq_data(u32 hw_irq) +{ + unsigned int irq = irq_find_mapping(xive_irq_domain, hw_irq); + + return irq ? irq_get_irq_data(irq) : NULL; +} + int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) { - struct irq_chip *chip = irq_data_get_irq_chip(d); int rc; u32 target; u8 prio; u32 lirq; - if (!is_xive_irq(chip)) - return -EINVAL; - rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); if (rc) { xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); @@ -277,6 +280,9 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", hw_irq, target, prio, lirq); + if (!d) + d = xive_get_irq_data(hw_irq); + if (d) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); u64 val = xive_esb_read(xd, XIVE_ESB_GET); @@ -984,7 +990,8 @@ static int xive_get_irqchip_state(struct irq_data *data, * interrupt to be inactive in that case. */ *state = (pq != XIVE_ESB_INVALID) && !xd->stale_p && - (xd->saved_p || !!(pq & XIVE_ESB_VAL_P)); + (xd->saved_p || (!!(pq & XIVE_ESB_VAL_P) && + !irqd_irq_disabled(data))); return 0; default: return -EINVAL; diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c index 5c1a50912229..9b0d85bff021 100644 --- a/arch/powerpc/xmon/nonstdio.c +++ b/arch/powerpc/xmon/nonstdio.c @@ -178,7 +178,7 @@ void xmon_printf(const char *format, ...) if (n && rc == 0) { /* No udbg hooks, fallback to printk() - dangerous */ - printk("%s", xmon_outbuf); + pr_cont("%s", xmon_outbuf); } } |