From 0b9a29add43273e64ef45472e08d38116fee1d82 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 24 Apr 2016 14:30:16 -0700 Subject: arm: Use _rcuidle tracepoint to allow use from idle Testing on ARM encountered the following pair of lockdep-RCU splats: ------------------------------------------------------------------------ =============================== [ INFO: suspicious RCU usage. ] 4.6.0-rc4-next-20160422 #1 Not tainted ------------------------------- include/trace/events/power.h:328 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! no locks held by swapper/0/0. stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc4-next-20160422 #1 Hardware name: Generic OMAP3-GP (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xa8/0xe0) [] (dump_stack) from [] (pwrdm_set_next_pwrst+0xf8/0x1cc) [] (pwrdm_set_next_pwrst) from [] (omap3_enter_idle_bm+0x1b8/0x1e8) [] (omap3_enter_idle_bm) from [] (cpuidle_enter_state+0x84/0x408) [] (cpuidle_enter_state) from [] (cpu_startup_entry+0x1c8/0x3f0) [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3cc) ------------------------------------------------------------------------ [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xa8/0xe0) [] (dump_stack) from [] (_pwrdm_state_switch+0x188/0x32c) [] (_pwrdm_state_switch) from [] (_pwrdm_post_transition_cb+0xc/0x14) [] (_pwrdm_post_transition_cb) from [] (pwrdm_for_each+0x30/0x5c) [] (pwrdm_for_each) from [] (pwrdm_post_transition+0x24/0x30) [] (pwrdm_post_transition) from [] (omap_sram_idle+0xfc/0x240) [] (omap_sram_idle) from [] (omap3_enter_idle_bm+0xf0/0x1e8) [] (omap3_enter_idle_bm) from [] (cpuidle_enter_state+0x84/0x408) [] (cpuidle_enter_state) from [] (cpu_startup_entry+0x1c8/0x3f0) [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3cc) ------------------------------------------------------------------------ These are caused by event tracing from the idle loop, and they were exposed by commit 293e2421fe25 ("rcu: Remove superfluous versions of rcu_read_lock_sched_held()"), which suppressed some false negatives. The current commit therefore adds the _rcuidle suffix to make RCU aware of this implicit use of RCU by event tracing, thus preventing both splats. Reported-by: Guenter Roeck Signed-off-by: Paul E. McKenney Tested-by: Guenter Roeck Tested-by: Tony Lindgren Cc: Russell King Reviewed-by: Steven Rostedt Cc: Cc: --- arch/arm/mach-omap2/powerdomain.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c index 78af6d8cf2e2..daf2753de7aa 100644 --- a/arch/arm/mach-omap2/powerdomain.c +++ b/arch/arm/mach-omap2/powerdomain.c @@ -186,8 +186,9 @@ static int _pwrdm_state_switch(struct powerdomain *pwrdm, int flag) trace_state = (PWRDM_TRACE_STATES_FLAG | ((next & OMAP_POWERSTATE_MASK) << 8) | ((prev & OMAP_POWERSTATE_MASK) << 0)); - trace_power_domain_target(pwrdm->name, trace_state, - smp_processor_id()); + trace_power_domain_target_rcuidle(pwrdm->name, + trace_state, + smp_processor_id()); } break; default: @@ -523,8 +524,8 @@ int pwrdm_set_next_pwrst(struct powerdomain *pwrdm, u8 pwrst) if (arch_pwrdm && arch_pwrdm->pwrdm_set_next_pwrst) { /* Trace the pwrdm desired target state */ - trace_power_domain_target(pwrdm->name, pwrst, - smp_processor_id()); + trace_power_domain_target_rcuidle(pwrdm->name, pwrst, + smp_processor_id()); /* Program the pwrdm desired target state */ ret = arch_pwrdm->pwrdm_set_next_pwrst(pwrdm, pwrst); } -- cgit v1.2.3-59-g8ed1b From 6b4e941875ca464e0bf737d7cdc9b72b008df6eb Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Thu, 5 May 2016 15:33:37 -0700 Subject: ARM: dts: omap5-board-common: Describe the voltage supply mapping accurately MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OMAP5uEVM based platforms share a similar voltage rail map. This should be properly described in device tree, without this regulator core will be unable to determine the source voltage of LDOs such as LDO9 and SMPS10 which could be configured for bypass depending on the voltage requested of them. This results in conditions such as: ldo9: bypassed regulator has no supply! ldo9: failed to get the current voltage(-517) palmas-pmic 48070000.i2c:palmas@48:palmas_pmic: failed to register 48070000.i2c:palmas@48:palmas_pmic regulator Cc: Agustí Fontquerni Cc: Eduard Gavin Cc: Enric Balletbo i Serra Cc: Peter Ujfalusi Signed-off-by: Nishanth Menon [tony@atomide.com: fixed to use palmas style in-supply] Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 43 +++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index d0990606d16e..ebbaa140355b 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -14,6 +14,29 @@ display0 = &hdmi0; }; + vmain: fixedregulator-vmain { + compatible = "regulator-fixed"; + regulator-name = "vmain"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + }; + + vsys_cobra: fixedregulator-vsys_cobra { + compatible = "regulator-fixed"; + regulator-name = "vsys_cobra"; + vin-supply = <&vmain>; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + }; + + vdds_1v8_main: fixedregulator-vdds_1v8_main { + compatible = "regulator-fixed"; + regulator-name = "vdds_1v8_main"; + vin-supply = <&smps7_reg>; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + vmmcsd_fixed: fixedregulator-mmcsd { compatible = "regulator-fixed"; regulator-name = "vmmcsd_fixed"; @@ -409,6 +432,26 @@ ti,ldo6-vibrator; + smps123-in-supply = <&vsys_cobra>; + smps45-in-supply = <&vsys_cobra>; + smps6-in-supply = <&vsys_cobra>; + smps7-in-supply = <&vsys_cobra>; + smps8-in-supply = <&vsys_cobra>; + smps9-in-supply = <&vsys_cobra>; + smps10_out2-in-supply = <&vsys_cobra>; + smps10_out1-in-supply = <&vsys_cobra>; + ldo1-in-supply = <&vsys_cobra>; + ldo2-in-supply = <&vsys_cobra>; + ldo3-in-supply = <&vdds_1v8_main>; + ldo4-in-supply = <&vdds_1v8_main>; + ldo5-in-supply = <&vsys_cobra>; + ldo6-in-supply = <&vdds_1v8_main>; + ldo7-in-supply = <&vsys_cobra>; + ldo8-in-supply = <&vsys_cobra>; + ldo9-in-supply = <&vmmcsd_fixed>; + ldoln-in-supply = <&vsys_cobra>; + ldousb-in-supply = <&vsys_cobra>; + regulators { smps123_reg: smps123 { /* VDD_OPP_MPU */ -- cgit v1.2.3-59-g8ed1b From c0053bd50af57c4ebf032a9de1b07ca78c982452 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Thu, 6 Aug 2015 10:54:24 -0500 Subject: ARM: OMAP5 / DRA7: Introduce workaround for 801819 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add workaround for Cortex-A15 ARM erratum 801819 which says in summary that "A livelock can occur in the L2 cache arbitration that might prevent a snoop from completing. Under certain conditions this can cause the system to deadlock. " Recommended workaround is as follows: Do both of the following: 1) Do not use the write-back no-allocate memory type. 2) Do not issue write-back cacheable stores at any time when the cache is disabled (SCTLR.C=0) and the MMU is enabled (SCTLR.M=1). Because it is implementation defined whether cacheable stores update the cache when the cache is disabled it is not expected that any portable code will execute cacheable stores when the cache is disabled. For implementations of Cortex-A15 configured without the “L2 arbitration register slice” option (typically one or two core systems), you must also do the following: 3) Disable write-streaming in each CPU by setting ACTLR[28:25] = 0b1111 So, we provide an option to disable write streaming on OMAP5 and DRA7. It is a rare condition to occur and may be enabled selectively based on platform acceptance of risk. Applies to: A15 revisions r2p0, r2p1, r2p2, r2p3 or r2p4 and REVIDR[3] is set to 0. Based on ARM errata Document revision 18.0 (22 Nov 2013) Note: the configuration for the workaround needs to be done with each CPU bringup, since CPU0 bringup is done by bootloader, it is recommended to have the workaround in the bootloader, kernel also does ensure that CPU0 has the workaround and makes the workaround active when CPU1 gets active. With CONFIG_SMP disabled, it is expected to be done by the bootloader. This does show significant degradation in synthetic tests such as mbw (https://packages.qa.debian.org/m/mbw.html) mbw -n 100 100|grep AVG (on a test platform) Without enabling the erratum: AVG Method: MEMCPY Elapsed: 0.13406 MiB: 100.00000 Copy: 745.913 MiB/s AVG Method: DUMB Elapsed: 0.06746 MiB: 100.00000 Copy: 1482.357 MiB/s AVG Method: MCBLOCK Elapsed: 0.03058 MiB: 100.00000 Copy: 3270.569 MiB/s After enabling the erratum: AVG Method: MEMCPY Elapsed: 0.13757 MiB: 100.00000 Copy: 726.913 MiB/s AVG Method: DUMB Elapsed: 0.12024 MiB: 100.00000 Copy: 831.668 MiB/s AVG Method: MCBLOCK Elapsed: 0.09243 MiB: 100.00000 Copy: 1081.942 MiB/s Most benchmarks are designed for specific performance analysis, so overall usecase must be considered before making a decision to enable/disable the erratum workaround. Pending internal investigation, the erratum is kept disabled by default. Cc: Russell King Cc: Catalin Marinas Cc: Tony Lindgren Suggested-by: Richard Woodruff Suggested-by: Brad Griffis Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Kconfig | 8 +++++++ arch/arm/mach-omap2/omap-secure.h | 1 + arch/arm/mach-omap2/omap-smp.c | 48 +++++++++++++++++++++++++++++++++++---- 3 files changed, 52 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 0517f0c1581a..a63d3fe2ca46 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -240,4 +240,12 @@ endmenu endif +config OMAP5_ERRATA_801819 + bool "Errata 801819: An eviction from L1 data cache might stall indefinitely" + depends on SOC_OMAP5 || SOC_DRA7XX + help + A livelock can occur in the L2 cache arbitration that might prevent + a snoop from completing. Under certain conditions this can cause the + system to deadlock. + endmenu diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h index af2851fbcdf0..bae263fba640 100644 --- a/arch/arm/mach-omap2/omap-secure.h +++ b/arch/arm/mach-omap2/omap-secure.h @@ -46,6 +46,7 @@ #define OMAP5_DRA7_MON_SET_CNTFRQ_INDEX 0x109 #define OMAP5_MON_AMBA_IF_INDEX 0x108 +#define OMAP5_DRA7_MON_SET_ACR_INDEX 0x107 /* Secure PPA(Primary Protected Application) APIs */ #define OMAP4_PPA_L2_POR_INDEX 0x23 diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index c625cc10d9f9..8cd1de914ee4 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -50,6 +50,39 @@ void __iomem *omap4_get_scu_base(void) return scu_base; } +#ifdef CONFIG_OMAP5_ERRATA_801819 +void omap5_erratum_workaround_801819(void) +{ + u32 acr, revidr; + u32 acr_mask; + + /* REVIDR[3] indicates erratum fix available on silicon */ + asm volatile ("mrc p15, 0, %0, c0, c0, 6" : "=r" (revidr)); + if (revidr & (0x1 << 3)) + return; + + asm volatile ("mrc p15, 0, %0, c1, c0, 1" : "=r" (acr)); + /* + * BIT(27) - Disables streaming. All write-allocate lines allocate in + * the L1 or L2 cache. + * BIT(25) - Disables streaming. All write-allocate lines allocate in + * the L1 cache. + */ + acr_mask = (0x3 << 25) | (0x3 << 27); + /* do we already have it done.. if yes, skip expensive smc */ + if ((acr & acr_mask) == acr_mask) + return; + + acr |= acr_mask; + omap_smc1(OMAP5_DRA7_MON_SET_ACR_INDEX, acr); + + pr_debug("%s: ARM erratum workaround 801819 applied on CPU%d\n", + __func__, smp_processor_id()); +} +#else +static inline void omap5_erratum_workaround_801819(void) { } +#endif + static void omap4_secondary_init(unsigned int cpu) { /* @@ -64,12 +97,15 @@ static void omap4_secondary_init(unsigned int cpu) omap_secure_dispatcher(OMAP4_PPA_CPU_ACTRL_SMP_INDEX, 4, 0, 0, 0, 0, 0); - /* - * Configure the CNTFRQ register for the secondary cpu's which - * indicates the frequency of the cpu local timers. - */ - if (soc_is_omap54xx() || soc_is_dra7xx()) + if (soc_is_omap54xx() || soc_is_dra7xx()) { + /* + * Configure the CNTFRQ register for the secondary cpu's which + * indicates the frequency of the cpu local timers. + */ set_cntfreq(); + /* Configure ACR to disable streaming WA for 801819 */ + omap5_erratum_workaround_801819(); + } /* * Synchronise with the boot thread. @@ -218,6 +254,8 @@ static void __init omap4_smp_prepare_cpus(unsigned int max_cpus) if (cpu_is_omap446x()) startup_addr = omap4460_secondary_startup; + if (soc_is_dra74x() || soc_is_omap54xx()) + omap5_erratum_workaround_801819(); /* * Write the address of secondary startup routine into the -- cgit v1.2.3-59-g8ed1b From 49111cd1c5498d2a356b0e51d74987dad0e88530 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 12 May 2016 13:29:48 -0700 Subject: ARM: dts: Fix igepv5 audiopwon-gpio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Playing audio works on omap5-uevm, but produces an "Unhandled fault: imprecise external abort (0x1406) at 0x00000000" error on igepv5. Looks like the twl6040 audpwron GPIO pin is different for these boards. Let's fix the issue by configuring the audpwron in the board specific dts file. Cc: Agustí Fontquerni Cc: Eduard Gavin Cc: Enric Balletbo i Serra Acked-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 3 ++- arch/arm/boot/dts/omap5-igep0050.dts | 10 ++++++++++ arch/arm/boot/dts/omap5-uevm.dts | 10 ++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index ebbaa140355b..c6aa65a68642 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -643,7 +643,8 @@ pinctrl-0 = <&twl6040_pins>; interrupts = ; /* IRQ_SYS_2N cascaded to gic */ - ti,audpwron-gpio = <&gpio5 13 GPIO_ACTIVE_HIGH>; /* gpio line 141 */ + + /* audpwron gpio defined in the board specific dts */ vio-supply = <&smps7_reg>; v2v1-supply = <&smps9_reg>; diff --git a/arch/arm/boot/dts/omap5-igep0050.dts b/arch/arm/boot/dts/omap5-igep0050.dts index 46ecb1dd3b5c..700966b85575 100644 --- a/arch/arm/boot/dts/omap5-igep0050.dts +++ b/arch/arm/boot/dts/omap5-igep0050.dts @@ -52,3 +52,13 @@ <&gpio7 3 0>; /* 195, SDA */ }; +&twl6040 { + ti,audpwron-gpio = <&gpio5 16 GPIO_ACTIVE_HIGH>; /* gpio line 144 */ +}; + +&twl6040_pins { + pinctrl-single,pins = < + OMAP5_IOPAD(0x1c4, PIN_OUTPUT | MUX_MODE6) /* mcspi1_somi.gpio5_144 */ + OMAP5_IOPAD(0x1ca, PIN_OUTPUT | MUX_MODE6) /* perslimbus2_clock.gpio5_145 */ + >; +}; diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts index 60b3fbb3bf07..a51e60518eb6 100644 --- a/arch/arm/boot/dts/omap5-uevm.dts +++ b/arch/arm/boot/dts/omap5-uevm.dts @@ -51,3 +51,13 @@ <&gpio9 1 GPIO_ACTIVE_HIGH>, /* TCA6424A P00, LS OE */ <&gpio7 1 GPIO_ACTIVE_HIGH>; /* GPIO 193, HPD */ }; + +&twl6040 { + ti,audpwron-gpio = <&gpio5 13 GPIO_ACTIVE_HIGH>; /* gpio line 141 */ +}; + +&twl6040_pins { + pinctrl-single,pins = < + OMAP5_IOPAD(0x1be, PIN_OUTPUT | MUX_MODE6) /* mcspi1_somi.gpio5_141 */ + >; +}; -- cgit v1.2.3-59-g8ed1b From e0e80d43fc0c5aace76a6c5f9462a1aec3e004f8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 12 May 2016 13:29:48 -0700 Subject: ARM: dts: Fix uart wakeirq on omap5 by removing WAKEUP_EN for omaps The padconf register WAKEUP_EN is now handled in a generic way using Linux wakeirqs where pinctrl-single toggles the WAKEUP_EN bit when a wakeirq is enabled or disabled. At least omap5 gets confused if the WAKEUP_EN bit is set and the pin is not claimed as a wakeirq. The end result is that wakeirqs don't work properly as there is nothing handling the wakeirq. So let's just remove the WAKEUP_EN usage from dts files. Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-evm-37xx.dts | 2 +- arch/arm/boot/dts/omap3-n900.dts | 4 ++-- arch/arm/boot/dts/omap3-n950-n9.dtsi | 6 +++--- arch/arm/boot/dts/omap3-zoom3.dts | 6 +++--- arch/arm/boot/dts/omap5-board-common.dtsi | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap3-evm-37xx.dts b/arch/arm/boot/dts/omap3-evm-37xx.dts index 76056ba92ced..ed449827c3d3 100644 --- a/arch/arm/boot/dts/omap3-evm-37xx.dts +++ b/arch/arm/boot/dts/omap3-evm-37xx.dts @@ -85,7 +85,7 @@ OMAP3_CORE1_IOPAD(0x2158, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_clk.sdmmc2_clk */ OMAP3_CORE1_IOPAD(0x215a, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_cmd.sdmmc2_cmd */ OMAP3_CORE1_IOPAD(0x215c, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat0.sdmmc2_dat0 */ - OMAP3_CORE1_IOPAD(0x215e, WAKEUP_EN | PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat1.sdmmc2_dat1 */ + OMAP3_CORE1_IOPAD(0x215e, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat1.sdmmc2_dat1 */ OMAP3_CORE1_IOPAD(0x2160, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat2.sdmmc2_dat2 */ OMAP3_CORE1_IOPAD(0x2162, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat3.sdmmc2_dat3 */ >; diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index b3c26a96a726..9c9e2ae6d6aa 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -288,7 +288,7 @@ pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x2180, PIN_INPUT_PULLUP | MUX_MODE1) /* ssi1_rdy_tx */ OMAP3_CORE1_IOPAD(0x217e, PIN_OUTPUT | MUX_MODE1) /* ssi1_flag_tx */ - OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* ssi1_wake_tx (cawake) */ + OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | MUX_MODE4) /* ssi1_wake_tx (cawake) */ OMAP3_CORE1_IOPAD(0x217c, PIN_OUTPUT | MUX_MODE1) /* ssi1_dat_tx */ OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT | MUX_MODE1) /* ssi1_dat_rx */ OMAP3_CORE1_IOPAD(0x2186, PIN_INPUT | MUX_MODE1) /* ssi1_flag_rx */ @@ -300,7 +300,7 @@ modem_pins: pinmux_modem { pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x20dc, PIN_OUTPUT | MUX_MODE4) /* gpio 70 => cmt_apeslpx */ - OMAP3_CORE1_IOPAD(0x20e0, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* gpio 72 => ape_rst_rq */ + OMAP3_CORE1_IOPAD(0x20e0, PIN_INPUT | MUX_MODE4) /* gpio 72 => ape_rst_rq */ OMAP3_CORE1_IOPAD(0x20e2, PIN_OUTPUT | MUX_MODE4) /* gpio 73 => cmt_rst_rq */ OMAP3_CORE1_IOPAD(0x20e4, PIN_OUTPUT | MUX_MODE4) /* gpio 74 => cmt_en */ OMAP3_CORE1_IOPAD(0x20e6, PIN_OUTPUT | MUX_MODE4) /* gpio 75 => cmt_rst */ diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi index a00ca761675d..927b17fc4ed8 100644 --- a/arch/arm/boot/dts/omap3-n950-n9.dtsi +++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi @@ -97,7 +97,7 @@ OMAP3_CORE1_IOPAD(0x217c, PIN_OUTPUT | MUX_MODE1) /* ssi1_dat_tx */ OMAP3_CORE1_IOPAD(0x217e, PIN_OUTPUT | MUX_MODE1) /* ssi1_flag_tx */ OMAP3_CORE1_IOPAD(0x2180, PIN_INPUT_PULLUP | MUX_MODE1) /* ssi1_rdy_tx */ - OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* ssi1_wake_tx (cawake) */ + OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | MUX_MODE4) /* ssi1_wake_tx (cawake) */ OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT | MUX_MODE1) /* ssi1_dat_rx */ OMAP3_CORE1_IOPAD(0x2186, PIN_INPUT | MUX_MODE1) /* ssi1_flag_rx */ OMAP3_CORE1_IOPAD(0x2188, PIN_OUTPUT | MUX_MODE1) /* ssi1_rdy_rx */ @@ -110,7 +110,7 @@ OMAP3_CORE1_IOPAD(0x217c, PIN_OUTPUT | MUX_MODE7) /* ssi1_dat_tx */ OMAP3_CORE1_IOPAD(0x217e, PIN_OUTPUT | MUX_MODE7) /* ssi1_flag_tx */ OMAP3_CORE1_IOPAD(0x2180, PIN_INPUT_PULLDOWN | MUX_MODE7) /* ssi1_rdy_tx */ - OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* ssi1_wake_tx (cawake) */ + OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | MUX_MODE4) /* ssi1_wake_tx (cawake) */ OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT | MUX_MODE7) /* ssi1_dat_rx */ OMAP3_CORE1_IOPAD(0x2186, PIN_INPUT | MUX_MODE7) /* ssi1_flag_rx */ OMAP3_CORE1_IOPAD(0x2188, PIN_OUTPUT | MUX_MODE4) /* ssi1_rdy_rx */ @@ -120,7 +120,7 @@ modem_pins1: pinmux_modem_core1_pins { pinctrl-single,pins = < - OMAP3_CORE1_IOPAD(0x207a, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* gpio_34 (ape_rst_rq) */ + OMAP3_CORE1_IOPAD(0x207a, PIN_INPUT | MUX_MODE4) /* gpio_34 (ape_rst_rq) */ OMAP3_CORE1_IOPAD(0x2100, PIN_OUTPUT | MUX_MODE4) /* gpio_88 (cmt_rst_rq) */ OMAP3_CORE1_IOPAD(0x210a, PIN_OUTPUT | MUX_MODE4) /* gpio_93 (cmt_apeslpx) */ >; diff --git a/arch/arm/boot/dts/omap3-zoom3.dts b/arch/arm/boot/dts/omap3-zoom3.dts index f19170bdcc1f..c29b41dc7b95 100644 --- a/arch/arm/boot/dts/omap3-zoom3.dts +++ b/arch/arm/boot/dts/omap3-zoom3.dts @@ -98,7 +98,7 @@ pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x2174, PIN_INPUT_PULLUP | MUX_MODE0) /* uart2_cts.uart2_cts */ OMAP3_CORE1_IOPAD(0x2176, PIN_OUTPUT | MUX_MODE0) /* uart2_rts.uart2_rts */ - OMAP3_CORE1_IOPAD(0x217a, WAKEUP_EN | PIN_INPUT | MUX_MODE0) /* uart2_rx.uart2_rx */ + OMAP3_CORE1_IOPAD(0x217a, PIN_INPUT | MUX_MODE0) /* uart2_rx.uart2_rx */ OMAP3_CORE1_IOPAD(0x2178, PIN_OUTPUT | MUX_MODE0) /* uart2_tx.uart2_tx */ >; }; @@ -107,7 +107,7 @@ pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x219a, PIN_INPUT_PULLDOWN | MUX_MODE0) /* uart3_cts_rctx.uart3_cts_rctx */ OMAP3_CORE1_IOPAD(0x219c, PIN_OUTPUT | MUX_MODE0) /* uart3_rts_sd.uart3_rts_sd */ - OMAP3_CORE1_IOPAD(0x219e, WAKEUP_EN | PIN_INPUT | MUX_MODE0) /* uart3_rx_irrx.uart3_rx_irrx */ + OMAP3_CORE1_IOPAD(0x219e, PIN_INPUT | MUX_MODE0) /* uart3_rx_irrx.uart3_rx_irrx */ OMAP3_CORE1_IOPAD(0x21a0, PIN_OUTPUT | MUX_MODE0) /* uart3_tx_irtx.uart3_tx_irtx */ >; }; @@ -125,7 +125,7 @@ pinctrl-single,pins = < OMAP3630_CORE2_IOPAD(0x25d8, PIN_INPUT_PULLUP | MUX_MODE2) /* etk_clk.sdmmc3_clk */ OMAP3630_CORE2_IOPAD(0x25e4, PIN_INPUT_PULLUP | MUX_MODE2) /* etk_d4.sdmmc3_dat0 */ - OMAP3630_CORE2_IOPAD(0x25e6, WAKEUP_EN | PIN_INPUT_PULLUP | MUX_MODE2) /* etk_d5.sdmmc3_dat1 */ + OMAP3630_CORE2_IOPAD(0x25e6, PIN_INPUT_PULLUP | MUX_MODE2) /* etk_d5.sdmmc3_dat1 */ OMAP3630_CORE2_IOPAD(0x25e8, PIN_INPUT_PULLUP | MUX_MODE2) /* etk_d6.sdmmc3_dat2 */ OMAP3630_CORE2_IOPAD(0x25e2, PIN_INPUT_PULLUP | MUX_MODE2) /* etk_d3.sdmmc3_dat3 */ >; diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index c6aa65a68642..9851b5767ec4 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -332,7 +332,7 @@ wlcore_irq_pin: pinmux_wlcore_irq_pin { pinctrl-single,pins = < - OMAP5_IOPAD(0x40, WAKEUP_EN | PIN_INPUT_PULLUP | MUX_MODE6) /* llia_wakereqin.gpio1_wk14 */ + OMAP5_IOPAD(0x40, PIN_INPUT_PULLUP | MUX_MODE6) /* llia_wakereqin.gpio1_wk14 */ >; }; }; -- cgit v1.2.3-59-g8ed1b From 6c05495d6d0507a6e7886265f82339bebf25cfed Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 12 May 2016 13:29:48 -0700 Subject: ARM: dts: Fix ldo7 source for HDMI on igepv5 Fix ldo7 source for HDMI on igepv5. Suggested-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-igep0050.dts | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap5-igep0050.dts b/arch/arm/boot/dts/omap5-igep0050.dts index 700966b85575..f75ce02fb398 100644 --- a/arch/arm/boot/dts/omap5-igep0050.dts +++ b/arch/arm/boot/dts/omap5-igep0050.dts @@ -35,6 +35,22 @@ }; }; +/* LDO4 is VPP1 - ball AD9 */ +&ldo4_reg { + regulator-min-microvolt = <2000000>; + regulator-max-microvolt = <2000000>; +}; + +/* + * LDO7 is used for HDMI: VDDA_DSIPORTA - ball AA33, VDDA_DSIPORTC - ball AE33, + * VDDA_HDMI - ball AN25 + */ +&ldo7_reg { + status = "okay"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; +}; + &omap5_pmx_core { i2c4_pins: pinmux_i2c4_pins { pinctrl-single,pins = < -- cgit v1.2.3-59-g8ed1b From 54c78870e4d70420b887d712a3e73a6783d5b51d Mon Sep 17 00:00:00 2001 From: Nicolas Chauvet Date: Tue, 10 May 2016 12:14:57 +0200 Subject: ARM: dts: Add non-removable to hsmmc on hp-t410 This will clean-up warnings at boot, since either that or cd-gpio{,s} are mandated by the dts specification of_get_named_gpiod_flags: can't parse 'cd-gpios' property of node '/ocp/mmc@47810000[0]' of_get_named_gpiod_flags: can't parse 'cd-gpio' property of node '/ocp/mmc@47810000[0]' v2: use the generic non-removable instead of ti,non-removable Signed-off-by: Nicolas Chauvet Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dm8148-t410.dts | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/dm8148-t410.dts b/arch/arm/boot/dts/dm8148-t410.dts index 5d4313fd5a46..97000b521cea 100644 --- a/arch/arm/boot/dts/dm8148-t410.dts +++ b/arch/arm/boot/dts/dm8148-t410.dts @@ -53,6 +53,7 @@ dmas = <&edma_xbar 8 0 1 /* use SDTXEVT1 instead of MCASP0TX */ &edma_xbar 9 0 2>; /* use SDRXEVT1 instead of MCASP0RX */ dma-names = "tx", "rx"; + non-removable; }; &pincntl { -- cgit v1.2.3-59-g8ed1b From 1ddbef4d596ce51bf25825549bc5865de08a7e62 Mon Sep 17 00:00:00 2001 From: Nicolas Chauvet Date: Tue, 10 May 2016 12:14:58 +0200 Subject: ARM: dts: disable mmc by default and enable when needed for dm814x This patch disable mmc nodes by default in the dm814x.dtsi and enable only when needed on a given dts v2: Disable un-used mmc nodes on the related boards dts files instead of from the included SOC dts Signed-off-by: Nicolas Chauvet Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dm8148-evm.dts | 8 ++++++++ arch/arm/boot/dts/dm8148-t410.dts | 8 ++++++++ 2 files changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/dm8148-evm.dts b/arch/arm/boot/dts/dm8148-evm.dts index cbc17b0794b1..4128fa91823c 100644 --- a/arch/arm/boot/dts/dm8148-evm.dts +++ b/arch/arm/boot/dts/dm8148-evm.dts @@ -93,6 +93,10 @@ }; }; +&mmc1 { + status = "disabled"; +}; + &mmc2 { pinctrl-names = "default"; pinctrl-0 = <&sd1_pins>; @@ -101,6 +105,10 @@ cd-gpios = <&gpio2 6 GPIO_ACTIVE_LOW>; }; +&mmc3 { + status = "disabled"; +}; + &pincntl { sd1_pins: pinmux_sd1_pins { pinctrl-single,pins = < diff --git a/arch/arm/boot/dts/dm8148-t410.dts b/arch/arm/boot/dts/dm8148-t410.dts index 97000b521cea..3f184863e0c5 100644 --- a/arch/arm/boot/dts/dm8148-t410.dts +++ b/arch/arm/boot/dts/dm8148-t410.dts @@ -45,6 +45,14 @@ phy-mode = "rgmii"; }; +&mmc1 { + status = "disabled"; +}; + +&mmc2 { + status = "disabled"; +}; + &mmc3 { pinctrl-names = "default"; pinctrl-0 = <&sd2_pins>; -- cgit v1.2.3-59-g8ed1b From 10ce2404ccab6828f86fb038e4888837f49f9f48 Mon Sep 17 00:00:00 2001 From: Franklin S Cooper Jr Date: Wed, 4 May 2016 12:43:55 -0500 Subject: ARM: dts: dra7: Add gpmc dma channel Add dma channel information to the gpmc. Signed-off-by: Franklin S Cooper Jr Signed-off-by: Sekhar Nori Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra7.dtsi | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index e0074014385a..3a8f3976f6f9 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1451,6 +1451,8 @@ ti,hwmods = "gpmc"; reg = <0x50000000 0x37c>; /* device IO registers */ interrupts = ; + dmas = <&edma_xbar 4 0>; + dma-names = "rxtx"; gpmc,num-cs = <8>; gpmc,num-waitpins = <2>; #address-cells = <2>; -- cgit v1.2.3-59-g8ed1b From 3d9f77be066ccf8d364928154552ed6ca910793b Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 6 May 2016 08:37:57 -0500 Subject: ARM: dts: am57xx-idk-common: Fix input supply names Palmas Regulator is an exception and does not follow the standard "vin-supply" common definitions for all regulators, as a result of this, the input supplies are not reported to regulator framework, with the obvious result of not being appropriately mapped. Fix the same. Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am57xx-idk-common.dtsi | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi index b01a5948cdd0..0e63b9dff6e7 100644 --- a/arch/arm/boot/dts/am57xx-idk-common.dtsi +++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi @@ -60,10 +60,26 @@ tps659038_pmic { compatible = "ti,tps659038-pmic"; + + smps12-in-supply = <&vmain>; + smps3-in-supply = <&vmain>; + smps45-in-supply = <&vmain>; + smps6-in-supply = <&vmain>; + smps7-in-supply = <&vmain>; + smps8-in-supply = <&vmain>; + smps9-in-supply = <&vmain>; + ldo1-in-supply = <&vmain>; + ldo2-in-supply = <&vmain>; + ldo3-in-supply = <&vmain>; + ldo4-in-supply = <&vmain>; + ldo9-in-supply = <&vmain>; + ldoln-in-supply = <&vmain>; + ldousb-in-supply = <&vmain>; + ldortc-in-supply = <&vmain>; + regulators { smps12_reg: smps12 { /* VDD_MPU */ - vin-supply = <&vmain>; regulator-name = "smps12"; regulator-min-microvolt = <850000>; regulator-max-microvolt = <1250000>; @@ -73,7 +89,6 @@ smps3_reg: smps3 { /* VDD_DDR EMIF1 EMIF2 */ - vin-supply = <&vmain>; regulator-name = "smps3"; regulator-min-microvolt = <1350000>; regulator-max-microvolt = <1350000>; @@ -84,7 +99,6 @@ smps45_reg: smps45 { /* VDD_DSPEVE on AM572 */ /* VDD_IVA + VDD_DSP on AM571 */ - vin-supply = <&vmain>; regulator-name = "smps45"; regulator-min-microvolt = <850000>; regulator-max-microvolt = <1250000>; @@ -94,7 +108,6 @@ smps6_reg: smps6 { /* VDD_GPU */ - vin-supply = <&vmain>; regulator-name = "smps6"; regulator-min-microvolt = <850000>; regulator-max-microvolt = <1250000>; @@ -104,7 +117,6 @@ smps7_reg: smps7 { /* VDD_CORE */ - vin-supply = <&vmain>; regulator-name = "smps7"; regulator-min-microvolt = <850000>; regulator-max-microvolt = <1150000>; @@ -115,13 +127,11 @@ smps8_reg: smps8 { /* 5728 - VDD_IVAHD */ /* 5718 - N.C. test point */ - vin-supply = <&vmain>; regulator-name = "smps8"; }; smps9_reg: smps9 { /* VDD_3_3D */ - vin-supply = <&vmain>; regulator-name = "smps9"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; @@ -132,7 +142,6 @@ ldo1_reg: ldo1 { /* VDDSHV8 - VSDMMC */ /* NOTE: on rev 1.3a, data supply */ - vin-supply = <&vmain>; regulator-name = "ldo1"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; @@ -142,7 +151,6 @@ ldo2_reg: ldo2 { /* VDDSH18V */ - vin-supply = <&vmain>; regulator-name = "ldo2"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -152,7 +160,6 @@ ldo3_reg: ldo3 { /* R1.3a 572x V1_8PHY_LDO3: USB, SATA */ - vin-supply = <&vmain>; regulator-name = "ldo3"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -162,7 +169,6 @@ ldo4_reg: ldo4 { /* R1.3a 572x V1_8PHY_LDO4: PCIE, HDMI*/ - vin-supply = <&vmain>; regulator-name = "ldo4"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -174,7 +180,6 @@ ldo9_reg: ldo9 { /* VDD_RTC */ - vin-supply = <&vmain>; regulator-name = "ldo9"; regulator-min-microvolt = <840000>; regulator-max-microvolt = <1160000>; @@ -184,7 +189,6 @@ ldoln_reg: ldoln { /* VDDA_1V8_PLL */ - vin-supply = <&vmain>; regulator-name = "ldoln"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -194,7 +198,6 @@ ldousb_reg: ldousb { /* VDDA_3V_USB: VDDA_USBHS33 */ - vin-supply = <&vmain>; regulator-name = "ldousb"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; @@ -204,7 +207,6 @@ ldortc_reg: ldortc { /* VDDA_RTC */ - vin-supply = <&vmain>; regulator-name = "ldortc"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; -- cgit v1.2.3-59-g8ed1b From 65db875d110b8e517f81fa71c57a65fc6d0019ac Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Thu, 12 May 2016 13:20:52 -0500 Subject: ARM: OMAP2+: AM43XX: Enable fixes for Cortex-A9 errata This patch explicitly enables the fixes for the below errata applicable for AM43x Socs as was done for OMAP4. 754322: Faulty MMU translations following ASID switch 775420: A data cache maintenance operation which aborts, followed by an ISB, without any DSB in-between, might lead to deadlock Signed-off-by: Dave Gerlach Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Kconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index a63d3fe2ca46..415a0bd0cda6 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -67,6 +67,8 @@ config SOC_AM43XX select HAVE_ARM_SCU select GENERIC_CLOCKEVENTS_BROADCAST select HAVE_ARM_TWD + select ARM_ERRATA_754322 + select ARM_ERRATA_775420 config SOC_DRA7XX bool "TI DRA7XX" -- cgit v1.2.3-59-g8ed1b From b44f788cce4086da00fd2f14b6b4d5abcf85f842 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Fri, 6 May 2016 23:02:33 +0200 Subject: ARM: dts: igep00x0: Add SD card-detect. Fix SD card remove/insert detection by adding the correct card-detect pin. All IGEP OMAP3 based boards use the same card-detect pin. Signed-off-by: Enric Balletbo i Serra Signed-off-by: Ladislav Michl Reviewed-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-igep.dtsi | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap3-igep.dtsi b/arch/arm/boot/dts/omap3-igep.dtsi index 41f5d386f21f..f4f2ce46d681 100644 --- a/arch/arm/boot/dts/omap3-igep.dtsi +++ b/arch/arm/boot/dts/omap3-igep.dtsi @@ -188,6 +188,7 @@ vmmc-supply = <&vmmc1>; vmmc_aux-supply = <&vsim>; bus-width = <4>; + cd-gpios = <&twl_gpio 0 GPIO_ACTIVE_LOW>; }; &mmc3 { -- cgit v1.2.3-59-g8ed1b From a1f6ad14176d466c00e6a687f9c78ec6c7ad6bf8 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Fri, 6 May 2016 23:02:34 +0200 Subject: ARM: dts: igep0020: Add SD card write-protect pin. A host device that supports write protection should refuse to write to an SD card that is designated read-only when write-protect is set. This is an optional feature of the SD specification. Signed-off-by: Enric Balletbo i Serra Reviewed-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-igep0020-common.dtsi | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap3-igep0020-common.dtsi b/arch/arm/boot/dts/omap3-igep0020-common.dtsi index d6f839cab649..b6971060648a 100644 --- a/arch/arm/boot/dts/omap3-igep0020-common.dtsi +++ b/arch/arm/boot/dts/omap3-igep0020-common.dtsi @@ -194,6 +194,12 @@ OMAP3630_CORE2_IOPAD(0x25f8, PIN_OUTPUT | MUX_MODE4) /* etk_d14.gpio_28 */ >; }; + + mmc1_wp_pins: pinmux_mmc1_cd_pins { + pinctrl-single,pins = < + OMAP3630_CORE2_IOPAD(0x25fa, PIN_INPUT | MUX_MODE4) /* etk_d15.gpio_29 */ + >; + }; }; &i2c3 { @@ -250,3 +256,8 @@ }; }; }; + +&mmc1 { + pinctrl-0 = <&mmc1_pins &mmc1_wp_pins>; + wp-gpios = <&gpio1 29 GPIO_ACTIVE_LOW>; /* gpio_29 */ +}; -- cgit v1.2.3-59-g8ed1b From 4cb54493ab2003568222e6ad7449747354f22ce3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 12 May 2016 10:38:31 +0200 Subject: ARM: samsung: improve static dma_mask definition When no DMA master devices are part of the kernel configuration, we get a warning about the unused dma mask definition: arch/arm/plat-samsung/devs.c:71:12: error: 'samsung_device_dma_mask' defined but not used [-Werror=unused-variable] static u64 samsung_device_dma_mask = DMA_BIT_MASK(32); We could simply mark this as __maybe_unused to shut up that warning, but a nicer solution seems to be to have a separate mask for each device. The advantage is that a driver that happens to call dma_set_mask() on one device doesn't implicitly change the mask for the other devices as well. This is more of a theoretical problem, as obviously nothing does it for the devices in this file (or they would have always been broken), but it feels cleaner that way. The definition works by creating an array in place so we can take the address of it and let the compiler generate a hidden symbol for it at compile time. Signed-off-by: Arnd Bergmann Signed-off-by: Krzysztof Kozlowski --- arch/arm/plat-samsung/devs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c index 84baa16f4c0b..e93aa6734147 100644 --- a/arch/arm/plat-samsung/devs.c +++ b/arch/arm/plat-samsung/devs.c @@ -68,7 +68,7 @@ #include #include -static u64 samsung_device_dma_mask = DMA_BIT_MASK(32); +#define samsung_device_dma_mask (*((u64[]) { DMA_BIT_MASK(32) })) /* AC97 */ #ifdef CONFIG_CPU_S3C2440 -- cgit v1.2.3-59-g8ed1b From 8a6f71ccb6c39ca5ae442d1478d3a076e2b1361e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 May 2016 16:17:36 +0200 Subject: ARM: exynos: don't select keyboard driver The samsung-keypad driver is implicitly selected by ARCH_EXYNOS4 (why?), but this fails if CONFIG_INPUT is a loadable module: drivers/input/built-in.o: In function `samsung_keypad_remove': drivers/input/keyboard/samsung-keypad.c:461: undefined reference to `input_unregister_device' drivers/input/built-in.o: In function `samsung_keypad_irq': drivers/input/keyboard/samsung-keypad.c:137: undefined reference to `input_event' drivers/input/built-in.o: In function `samsung_keypad_irq': include/linux/input.h:389: undefined reference to `input_event' drivers/input/built-in.o: In function `samsung_keypad_probe': drivers/input/keyboard/samsung-keypad.c:358: undefined reference to `devm_input_allocate_device' drivers/input/built-in.o:(.debug_addr+0x34): undefined reference to `input_set_capability' This removes the 'select' as suggested by Krzysztof Kozlowski and instead enables the driver from the defconfig files. The problem does not happen on mainline kernels, as we don't normally build built-in input drivers when CONFIG_INPUT=m, but I am experimenting with a patch to change this, and the samsung keypad driver showed up as one example that was silently broken before. Signed-off-by: Arnd Bergmann Link: https://lkml.org/lkml/2016/2/14/55 Signed-off-by: Krzysztof Kozlowski --- arch/arm/configs/exynos_defconfig | 1 + arch/arm/configs/multi_v7_defconfig | 1 + arch/arm/mach-exynos/Kconfig | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index 6ffd7e76f3ce..1a197fc8fdc7 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -77,6 +77,7 @@ CONFIG_TOUCHSCREEN_ATMEL_MXT=y CONFIG_INPUT_MISC=y CONFIG_INPUT_MAX77693_HAPTIC=y CONFIG_INPUT_MAX8997_HAPTIC=y +CONFIG_KEYBOARD_SAMSUNG=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_SAMSUNG=y CONFIG_SERIAL_SAMSUNG_CONSOLE=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 28234906a064..ad97a41ceb35 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -260,6 +260,7 @@ CONFIG_KEYBOARD_TEGRA=y CONFIG_KEYBOARD_SPEAR=y CONFIG_KEYBOARD_ST_KEYSCAN=y CONFIG_KEYBOARD_CROS_EC=m +CONFIG_KEYBOARD_SAMSUNG=m CONFIG_MOUSE_PS2_ELANTECH=y CONFIG_MOUSE_CYAPA=m CONFIG_MOUSE_ELAN_I2C=y diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 207fa2c737a6..c0d9f334d1d0 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -58,7 +58,6 @@ config ARCH_EXYNOS4 select CLKSRC_SAMSUNG_PWM if CPU_EXYNOS4210 select CPU_EXYNOS4210 select GIC_NON_BANKED - select KEYBOARD_SAMSUNG if INPUT_KEYBOARD select MIGHT_HAVE_CACHE_L2X0 help Samsung EXYNOS4 (Cortex-A9) SoC based systems -- cgit v1.2.3-59-g8ed1b From 275ae411e56f8f900fa364da29c4706f9af4e1f3 Mon Sep 17 00:00:00 2001 From: Vincent Stehlé Date: Tue, 24 May 2016 16:53:49 +0200 Subject: perf/x86/intel/rapl: Fix pmus free during cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On rapl cleanup path, kfree() is given by mistake the address of the pointer of the structure to free (rapl_pmus->pmus + i). Pass the pointer instead (rapl_pmus->pmus[i]). Fixes: 9de8d686955b "perf/x86/intel/rapl: Convert it to a per package facility" Signed-off-by: Vincent Stehlé Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1464101629-14905-1-git-send-email-vincent.stehle@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/events/intel/rapl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 99c4bab123cd..e30eef4f29a6 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -714,7 +714,7 @@ static void cleanup_rapl_pmus(void) int i; for (i = 0; i < rapl_pmus->maxpkg; i++) - kfree(rapl_pmus->pmus + i); + kfree(rapl_pmus->pmus[i]); kfree(rapl_pmus); } -- cgit v1.2.3-59-g8ed1b From 2547476a5e4061f6addb88d5fc837d3a950f54c4 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Sat, 21 May 2016 13:45:35 +0200 Subject: Fix typos Signed-off-by: Andrea Gelmini Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 2 +- arch/arc/include/asm/entry-compact.h | 4 ++-- arch/arc/include/asm/mmu_context.h | 2 +- arch/arc/include/asm/pgtable.h | 2 +- arch/arc/include/asm/processor.h | 2 +- arch/arc/include/asm/smp.h | 2 +- arch/arc/include/asm/thread_info.h | 2 +- arch/arc/include/asm/uaccess.h | 2 +- arch/arc/include/uapi/asm/swab.h | 2 +- arch/arc/kernel/perf_event.c | 2 +- arch/arc/kernel/setup.c | 2 +- arch/arc/kernel/signal.c | 2 +- arch/arc/kernel/troubleshoot.c | 2 +- arch/arc/mm/cache.c | 6 +++--- arch/arc/mm/dma.c | 2 +- 15 files changed, 18 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 02fabef2891c..d4df6be66d58 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -127,7 +127,7 @@ libs-y += arch/arc/lib/ $(LIBGCC) boot := arch/arc/boot -#default target for make without any arguements. +#default target for make without any arguments. KBUILD_IMAGE := bootpImage all: $(KBUILD_IMAGE) diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index e0e1faf03c50..14c310f2e0b1 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -76,8 +76,8 @@ * We need to be a bit more cautious here. What if a kernel bug in * L1 ISR, caused SP to go whaco (some small value which looks like * USER stk) and then we take L2 ISR. - * Above brlo alone would treat it as a valid L1-L2 sceanrio - * instead of shouting alound + * Above brlo alone would treat it as a valid L1-L2 scenario + * instead of shouting around * The only feasible way is to make sure this L2 happened in * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in * L1 ISR before it switches stack diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h index 1fd467ef658f..b0b87f2447f5 100644 --- a/arch/arc/include/asm/mmu_context.h +++ b/arch/arc/include/asm/mmu_context.h @@ -83,7 +83,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm) local_flush_tlb_all(); /* - * Above checke for rollover of 8 bit ASID in 32 bit container. + * Above check for rollover of 8 bit ASID in 32 bit container. * If the container itself wrapped around, set it to a non zero * "generation" to distinguish from no context */ diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 034bbdc0ff61..858f98ef7f1b 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -47,7 +47,7 @@ * Page Tables are purely for Linux VM's consumption and the bits below are * suited to that (uniqueness). Hence some are not implemented in the TLB and * some have different value in TLB. - * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible becoz they live in + * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible because they live in * seperate PD0 and PD1, which combined forms a translation entry) * while for PTE perspective, they are 8 and 9 respectively * with MMU v3: Most bits (except SHARED) represent the exact hardware pos diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index f9048994b22f..16b630fbeb6a 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -78,7 +78,7 @@ struct task_struct; #define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp) /* - * Where abouts of Task's sp, fp, blink when it was last seen in kernel mode. + * Where about of Task's sp, fp, blink when it was last seen in kernel mode. * Look in process.c for details of kernel stack layout */ #define TSK_K_ESP(tsk) (tsk->thread.ksp) diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index 991380438d6b..89fdd1b0a76e 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -86,7 +86,7 @@ static inline const char *arc_platform_smp_cpuinfo(void) * (1) These insn were introduced only in 4.10 release. So for older released * support needed. * - * (2) In a SMP setup, the LLOCK/SCOND atomiticity across CPUs needs to be + * (2) In a SMP setup, the LLOCK/SCOND atomicity across CPUs needs to be * gaurantted by the platform (not something which core handles). * Assuming a platform won't, SMP Linux needs to use spinlocks + local IRQ * disabling for atomicity. diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index 3af67455659a..2d79e527fa50 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -103,7 +103,7 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void) /* * _TIF_ALLWORK_MASK includes SYSCALL_TRACE, but we don't need it. - * SYSCALL_TRACE is anways seperately/unconditionally tested right after a + * SYSCALL_TRACE is anyway seperately/unconditionally tested right after a * syscall, so all that reamins to be tested is _TIF_WORK_MASK */ diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index d1da6032b715..a78d5670884f 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -32,7 +32,7 @@ #define __kernel_ok (segment_eq(get_fs(), KERNEL_DS)) /* - * Algorthmically, for __user_ok() we want do: + * Algorithmically, for __user_ok() we want do: * (start < TASK_SIZE) && (start+len < TASK_SIZE) * where TASK_SIZE could either be retrieved from thread_info->addr_limit or * emitted directly in code. diff --git a/arch/arc/include/uapi/asm/swab.h b/arch/arc/include/uapi/asm/swab.h index 095599a73195..71f3918b0fc3 100644 --- a/arch/arc/include/uapi/asm/swab.h +++ b/arch/arc/include/uapi/asm/swab.h @@ -74,7 +74,7 @@ __tmp ^ __in; \ }) -#elif (ARC_BSWAP_TYPE == 2) /* Custom single cycle bwap instruction */ +#elif (ARC_BSWAP_TYPE == 2) /* Custom single cycle bswap instruction */ #define __arch_swab32(x) \ ({ \ diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 6fd48021324b..08f03d9b5b3e 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -108,7 +108,7 @@ static void arc_perf_event_update(struct perf_event *event, int64_t delta = new_raw_count - prev_raw_count; /* - * We don't afaraid of hwc->prev_count changing beneath our feet + * We aren't afraid of hwc->prev_count changing beneath our feet * because there's no way for us to re-enter this function anytime. */ local64_set(&hwc->prev_count, new_raw_count); diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index f63b8bfefb0c..2ee7a4d758a8 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -392,7 +392,7 @@ void __init setup_arch(char **cmdline_p) /* * If we are here, it is established that @uboot_arg didn't * point to DT blob. Instead if u-boot says it is cmdline, - * Appent to embedded DT cmdline. + * append to embedded DT cmdline. * setup_machine_fdt() would have populated @boot_command_line */ if (uboot_tag == 1) { diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 004b7f0bc76c..6cb3736b6b83 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -34,7 +34,7 @@ * -ViXS were still seeing crashes when using insmod to load drivers. * It turned out that the code to change Execute permssions for TLB entries * of user was not guarded for interrupts (mod_tlb_permission) - * This was cauing TLB entries to be overwritten on unrelated indexes + * This was causing TLB entries to be overwritten on unrelated indexes * * Vineetg: July 15th 2008: Bug #94183 * -Exception happens in Delay slot of a JMP, and before user space resumes, diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index a6f91e88ce36..934150e7ac48 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -276,7 +276,7 @@ static int tlb_stats_open(struct inode *inode, struct file *file) return 0; } -/* called on user read(): display the couters */ +/* called on user read(): display the counters */ static ssize_t tlb_stats_output(struct file *file, /* file descriptor */ char __user *user_buf, /* user buffer */ size_t len, /* length of buffer */ diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 9e5eddbb856f..5a294b2c3cb3 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -215,7 +215,7 @@ slc_chk: * ------------------ * This ver of MMU supports variable page sizes (1k-16k): although Linux will * only support 8k (default), 16k and 4k. - * However from hardware perspective, smaller page sizes aggrevate aliasing + * However from hardware perspective, smaller page sizes aggravate aliasing * meaning more vaddr bits needed to disambiguate the cache-line-op ; * the existing scheme of piggybacking won't work for certain configurations. * Two new registers IC_PTAG and DC_PTAG inttoduced. @@ -302,7 +302,7 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, /* * This is technically for MMU v4, using the MMU v3 programming model - * Special work for HS38 aliasing I-cache configuratino with PAE40 + * Special work for HS38 aliasing I-cache configuration with PAE40 * - upper 8 bits of paddr need to be written into PTAG_HI * - (and needs to be written before the lower 32 bits) * Note that PTAG_HI is hoisted outside the line loop @@ -936,7 +936,7 @@ void arc_cache_init(void) ic->ver, CONFIG_ARC_MMU_VER); /* - * In MMU v4 (HS38x) the alising icache config uses IVIL/PTAG + * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG * pair to provide vaddr/paddr respectively, just as in MMU v3 */ if (is_isa_arcv2() && ic->alias) diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 8c8e36fa5659..73d7e4c75b7d 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -10,7 +10,7 @@ * DMA Coherent API Notes * * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is - * implemented by accessintg it using a kernel virtual address, with + * implemented by accessing it using a kernel virtual address, with * Cache bit off in the TLB entry. * * The default DMA address == Phy address which is 0x8000_0000 based. -- cgit v1.2.3-59-g8ed1b From 49acadff2a0cb4f7ff4efe0fb6c23f5fad81a3b3 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 25 May 2016 15:11:29 +0300 Subject: arc: Get rid of root core-frequency property Now when we switched to usage of real clk devices for CPU core frequency those root properties make no sense any longer. Se we're just getting rid of them here to not confuse readers of our .dts files. Signed-off-by: Alexey Brodkin Cc: Christian Ruppert Cc: Noam Camus Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/abilis_tb100.dtsi | 2 -- arch/arc/boot/dts/abilis_tb101.dtsi | 2 -- arch/arc/boot/dts/axc001.dtsi | 1 - arch/arc/boot/dts/axc003.dtsi | 1 - arch/arc/boot/dts/axc003_idu.dtsi | 1 - arch/arc/boot/dts/eznps.dts | 1 - arch/arc/boot/dts/nsim_700.dts | 1 - arch/arc/boot/dts/nsimosci.dts | 1 - arch/arc/boot/dts/nsimosci_hs.dts | 1 - arch/arc/boot/dts/nsimosci_hs_idu.dts | 1 - arch/arc/boot/dts/skeleton.dtsi | 1 - arch/arc/boot/dts/skeleton_hs.dtsi | 1 - arch/arc/boot/dts/skeleton_hs_idu.dtsi | 1 - arch/arc/boot/dts/vdk_axc003.dtsi | 1 - arch/arc/boot/dts/vdk_axc003_idu.dtsi | 1 - 15 files changed, 17 deletions(-) (limited to 'arch') diff --git a/arch/arc/boot/dts/abilis_tb100.dtsi b/arch/arc/boot/dts/abilis_tb100.dtsi index 3942634f805a..02410b211433 100644 --- a/arch/arc/boot/dts/abilis_tb100.dtsi +++ b/arch/arc/boot/dts/abilis_tb100.dtsi @@ -23,8 +23,6 @@ / { - clock-frequency = <500000000>; /* 500 MHZ */ - soc100 { bus-frequency = <166666666>; diff --git a/arch/arc/boot/dts/abilis_tb101.dtsi b/arch/arc/boot/dts/abilis_tb101.dtsi index b0467229a5c4..f9e7686044eb 100644 --- a/arch/arc/boot/dts/abilis_tb101.dtsi +++ b/arch/arc/boot/dts/abilis_tb101.dtsi @@ -23,8 +23,6 @@ / { - clock-frequency = <500000000>; /* 500 MHZ */ - soc100 { bus-frequency = <166666666>; diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi index 3e02f152edcb..6ae2c476ad82 100644 --- a/arch/arc/boot/dts/axc001.dtsi +++ b/arch/arc/boot/dts/axc001.dtsi @@ -15,7 +15,6 @@ / { compatible = "snps,arc"; - clock-frequency = <750000000>; /* 750 MHZ */ #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index 378e455a94c4..14df46f141bf 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -14,7 +14,6 @@ / { compatible = "snps,arc"; - clock-frequency = <90000000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 64c94b2860ab..3d6cfa32bf51 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -14,7 +14,6 @@ / { compatible = "snps,arc"; - clock-frequency = <90000000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/eznps.dts b/arch/arc/boot/dts/eznps.dts index b89f6c3eb352..1e0d225791c1 100644 --- a/arch/arc/boot/dts/eznps.dts +++ b/arch/arc/boot/dts/eznps.dts @@ -18,7 +18,6 @@ / { compatible = "ezchip,arc-nps"; - clock-frequency = <83333333>; /* 83.333333 MHZ */ #address-cells = <1>; #size-cells = <1>; interrupt-parent = <&intc>; diff --git a/arch/arc/boot/dts/nsim_700.dts b/arch/arc/boot/dts/nsim_700.dts index 5d5e373e0ebc..63970513e4ae 100644 --- a/arch/arc/boot/dts/nsim_700.dts +++ b/arch/arc/boot/dts/nsim_700.dts @@ -11,7 +11,6 @@ / { compatible = "snps,nsim"; - clock-frequency = <80000000>; /* 80 MHZ */ #address-cells = <1>; #size-cells = <1>; interrupt-parent = <&core_intc>; diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts index b5b060adce8a..763d66c883da 100644 --- a/arch/arc/boot/dts/nsimosci.dts +++ b/arch/arc/boot/dts/nsimosci.dts @@ -11,7 +11,6 @@ / { compatible = "snps,nsimosci"; - clock-frequency = <20000000>; /* 20 MHZ */ #address-cells = <1>; #size-cells = <1>; interrupt-parent = <&core_intc>; diff --git a/arch/arc/boot/dts/nsimosci_hs.dts b/arch/arc/boot/dts/nsimosci_hs.dts index 325e73090a18..4eb97c584b18 100644 --- a/arch/arc/boot/dts/nsimosci_hs.dts +++ b/arch/arc/boot/dts/nsimosci_hs.dts @@ -11,7 +11,6 @@ / { compatible = "snps,nsimosci_hs"; - clock-frequency = <20000000>; /* 20 MHZ */ #address-cells = <1>; #size-cells = <1>; interrupt-parent = <&core_intc>; diff --git a/arch/arc/boot/dts/nsimosci_hs_idu.dts b/arch/arc/boot/dts/nsimosci_hs_idu.dts index ee03d7126581..853f897eb2a3 100644 --- a/arch/arc/boot/dts/nsimosci_hs_idu.dts +++ b/arch/arc/boot/dts/nsimosci_hs_idu.dts @@ -11,7 +11,6 @@ / { compatible = "snps,nsimosci_hs"; - clock-frequency = <5000000>; /* 5 MHZ */ #address-cells = <1>; #size-cells = <1>; interrupt-parent = <&core_intc>; diff --git a/arch/arc/boot/dts/skeleton.dtsi b/arch/arc/boot/dts/skeleton.dtsi index 3a10cc633e2b..65808fe0a290 100644 --- a/arch/arc/boot/dts/skeleton.dtsi +++ b/arch/arc/boot/dts/skeleton.dtsi @@ -13,7 +13,6 @@ / { compatible = "snps,arc"; - clock-frequency = <80000000>; /* 80 MHZ */ #address-cells = <1>; #size-cells = <1>; chosen { }; diff --git a/arch/arc/boot/dts/skeleton_hs.dtsi b/arch/arc/boot/dts/skeleton_hs.dtsi index 71fd308a9298..2dfe8037dfbb 100644 --- a/arch/arc/boot/dts/skeleton_hs.dtsi +++ b/arch/arc/boot/dts/skeleton_hs.dtsi @@ -8,7 +8,6 @@ / { compatible = "snps,arc"; - clock-frequency = <80000000>; /* 80 MHZ */ #address-cells = <1>; #size-cells = <1>; chosen { }; diff --git a/arch/arc/boot/dts/skeleton_hs_idu.dtsi b/arch/arc/boot/dts/skeleton_hs_idu.dtsi index d1cb25a66989..4c11079f3565 100644 --- a/arch/arc/boot/dts/skeleton_hs_idu.dtsi +++ b/arch/arc/boot/dts/skeleton_hs_idu.dtsi @@ -8,7 +8,6 @@ / { compatible = "snps,arc"; - clock-frequency = <80000000>; /* 80 MHZ */ #address-cells = <1>; #size-cells = <1>; chosen { }; diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi index ad4ee43bd2ac..0fd6ba985b16 100644 --- a/arch/arc/boot/dts/vdk_axc003.dtsi +++ b/arch/arc/boot/dts/vdk_axc003.dtsi @@ -14,7 +14,6 @@ / { compatible = "snps,arc"; - clock-frequency = <50000000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi index a3cb6263c581..82214cd7ba0c 100644 --- a/arch/arc/boot/dts/vdk_axc003_idu.dtsi +++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi @@ -15,7 +15,6 @@ / { compatible = "snps,arc"; - clock-frequency = <50000000>; #address-cells = <1>; #size-cells = <1>; -- cgit v1.2.3-59-g8ed1b From 60f2b4b8af548150cc56bf6fd213e47897964794 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 30 May 2016 19:21:22 +0530 Subject: ARC: [intc-compact] simplify code for 2 priority levels ARC700 support for 2 interrupt priorities historically allowed even slow perpherals such as emac and uart to setup high priority interrupts which was wrong from the beginning as they could possibly delay the more critical timer interrupt. The hardware support for 2 level interrupts in ARCompact is less than ideal anyways (judging from the "hacks" in low level entry code and thus is not used in productions systems I know of. So reduce the scope of this to timer only, thereby reducing a bunch of complexity. Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 22 ++-------------------- arch/arc/kernel/entry-compact.S | 18 ++---------------- arch/arc/kernel/intc-compact.c | 6 ++---- 3 files changed, 6 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 0dcbacfdea4b..b14826a4f59c 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -186,9 +186,6 @@ if SMP config ARC_HAS_COH_CACHES def_bool n -config ARC_HAS_REENTRANT_IRQ_LV2 - def_bool n - config ARC_MCIP bool "ARConnect Multicore IP (MCIP) Support " depends on ISA_ARCV2 @@ -366,25 +363,10 @@ config NODES_SHIFT if ISA_ARCOMPACT config ARC_COMPACT_IRQ_LEVELS - bool "ARCompact IRQ Priorities: High(2)/Low(1)" + bool "Setup Timer IRQ as high Priority" default n - # Timer HAS to be high priority, for any other high priority config - select ARC_IRQ3_LV2 # if SMP, LV2 enabled ONLY if ARC implementation has LV2 re-entrancy - depends on !SMP || ARC_HAS_REENTRANT_IRQ_LV2 - -if ARC_COMPACT_IRQ_LEVELS - -config ARC_IRQ3_LV2 - bool - -config ARC_IRQ5_LV2 - bool - -config ARC_IRQ6_LV2 - bool - -endif #ARC_COMPACT_IRQ_LEVELS + depends on !SMP config ARC_FPU_SAVE_RESTORE bool "Enable FPU state persistence across context switch" diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S index 0cb0abaa0479..98812c1248df 100644 --- a/arch/arc/kernel/entry-compact.S +++ b/arch/arc/kernel/entry-compact.S @@ -91,27 +91,13 @@ VECTOR mem_service ; 0x8, Mem exception (0x1) VECTOR instr_service ; 0x10, Instrn Error (0x2) ; ******************** Device ISRs ********************** -#ifdef CONFIG_ARC_IRQ3_LV2 -VECTOR handle_interrupt_level2 -#else -VECTOR handle_interrupt_level1 -#endif - -VECTOR handle_interrupt_level1 - -#ifdef CONFIG_ARC_IRQ5_LV2 -VECTOR handle_interrupt_level2 -#else -VECTOR handle_interrupt_level1 -#endif - -#ifdef CONFIG_ARC_IRQ6_LV2 +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS VECTOR handle_interrupt_level2 #else VECTOR handle_interrupt_level1 #endif -.rept 25 +.rept 28 VECTOR handle_interrupt_level1 ; Other devices .endr diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c index c5cceca36118..ce9deb953ca9 100644 --- a/arch/arc/kernel/intc-compact.c +++ b/arch/arc/kernel/intc-compact.c @@ -28,10 +28,8 @@ void arc_init_IRQ(void) { int level_mask = 0; - /* setup any high priority Interrupts (Level2 in ARCompact jargon) */ - level_mask |= IS_ENABLED(CONFIG_ARC_IRQ3_LV2) << 3; - level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5; - level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6; + /* Is timer high priority Interrupt (Level2 in ARCompact jargon) */ + level_mask |= IS_ENABLED(CONFIG_ARC_COMPACT_IRQ_LEVELS) << TIMER0_IRQ; /* * Write to register, even if no LV2 IRQs configured to reset it -- cgit v1.2.3-59-g8ed1b From d140b9bfcad9e53f1da67ad09dd5092b44d55c7b Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 31 May 2016 11:46:47 +0530 Subject: ARC: don't enable DISCONTIGMEM unconditionally Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index b14826a4f59c..be9d0b5ae0cc 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -61,7 +61,7 @@ config RWSEM_GENERIC_SPINLOCK def_bool y config ARCH_DISCONTIGMEM_ENABLE - def_bool y + def_bool n config ARCH_FLATMEM_ENABLE def_bool y @@ -453,7 +453,7 @@ config LINUX_LINK_BASE config HIGHMEM bool "High Memory Support" - select DISCONTIGMEM + select ARCH_DISCONTIGMEM_ENABLE help With ARC 2G:2G address split, only upper 2G is directly addressable by kernel. Enable this to potentially allow access to rest of 2G and PAE -- cgit v1.2.3-59-g8ed1b From b52fc2183f40170489692852fd66d8750cbaa324 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 19 Apr 2016 18:36:55 -0700 Subject: vexpress/spc: Remove CLK_IS_ROOT This flag is a no-op now (see commit 47b0eeb3dc8a "clk: Deprecate CLK_IS_ROOT", 2016-02-02) so remove it. Acked-by: Sudeep Holla Signed-off-by: Stephen Boyd --- arch/arm/mach-vexpress/spc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index 5766ce2be32b..8409cab3f760 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -547,7 +547,7 @@ static struct clk *ve_spc_clk_register(struct device *cpu_dev) init.name = dev_name(cpu_dev); init.ops = &clk_spc_ops; - init.flags = CLK_IS_ROOT | CLK_GET_RATE_NOCACHE; + init.flags = CLK_GET_RATE_NOCACHE; init.num_parents = 0; return devm_clk_register(cpu_dev, &spc->hw); -- cgit v1.2.3-59-g8ed1b From 3fb9c41286e594d52d23ca58404c69a7e2c39c41 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 19 Apr 2016 18:31:12 -0700 Subject: powerpc/512x: clk: Remove CLK_IS_ROOT This flag is a no-op now (see commit 47b0eeb3dc8a "clk: Deprecate CLK_IS_ROOT", 2016-02-02) so remove it. Cc: Gerhard Sittig Signed-off-by: Stephen Boyd --- arch/powerpc/platforms/512x/clock-commonclk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c index c50ea76ba66c..6081fbd75330 100644 --- a/arch/powerpc/platforms/512x/clock-commonclk.c +++ b/arch/powerpc/platforms/512x/clock-commonclk.c @@ -221,7 +221,7 @@ static bool soc_has_mclk_mux0_canin(void) /* convenience wrappers around the common clk API */ static inline struct clk *mpc512x_clk_fixed(const char *name, int rate) { - return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate); + return clk_register_fixed_rate(NULL, name, NULL, 0, rate); } static inline struct clk *mpc512x_clk_factor( -- cgit v1.2.3-59-g8ed1b From 42316a201a60be38b07db1ebc3a1633107ed7209 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 31 May 2016 16:31:33 +0530 Subject: Revert "ARCv2: spinlock/rwlock/atomics: reduce 1 instruction in exponential backoff" This reverts commit 10971638701dedadb58c88ce4d31c9375b224ed6. The issue was fixed in hardware in HS2.1C release and there are no known external users of affected RTL - so revert thw whole delayed retry series ! Signed-off-by: Vineet Gupta --- arch/arc/include/asm/atomic.h | 3 ++- arch/arc/include/asm/spinlock.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 5f3dcbbc0cc9..75c8226317f4 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -36,7 +36,8 @@ " mov %[tmp], %[delay] \n" /* tmp = delay */ \ "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ - " rol %[delay], %[delay] \n" /* delay *= 2 */ \ + " asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \ + " mov.z %[delay], 1 \n" /* handle overflow */ \ " b 1b \n" /* start over */ \ "4: ; --- success --- \n" \ diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index 800e7c430ca5..a86cb84fad2a 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -260,7 +260,8 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) " mov %[tmp], %[delay] \n" /* tmp = delay */ \ "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ - " rol %[delay], %[delay] \n" /* delay *= 2 */ \ + " asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \ + " mov.z %[delay], 1 \n" /* handle overflow */ \ " b 1b \n" /* start over */ \ " \n" \ "4: ; --- done --- \n" \ -- cgit v1.2.3-59-g8ed1b From 819f3602dcbd6b021cd50e18f5d05da30bca5b07 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 31 May 2016 16:33:29 +0530 Subject: Revert "ARCv2: spinlock/rwlock: Reset retry delay when starting a new spin-wait cycle" This reverts commit b89aa12c177477e34caa722818536fb5d0bffd76. The issue was fixed in hardware in HS2.1C release and there are no known external users of affected RTL so revert the whole delayed retry series ! Signed-off-by: Vineet Gupta --- arch/arc/include/asm/spinlock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index a86cb84fad2a..5e01bdf968ea 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -279,7 +279,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) __asm__ __volatile__( "0: mov %[delay], 1 \n" "1: llock %[val], [%[slock]] \n" - " breq %[val], %[LOCKED], 0b \n" /* spin while LOCKED */ + " breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */ " scond %[LOCKED], [%[slock]] \n" /* acquire */ " bz 4f \n" /* done */ " \n" @@ -358,7 +358,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw) __asm__ __volatile__( "0: mov %[delay], 1 \n" "1: llock %[val], [%[rwlock]] \n" - " brls %[val], %[WR_LOCKED], 0b\n" /* <= 0: spin while write locked */ + " brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */ " sub %[val], %[val], 1 \n" /* reader lock */ " scond %[val], [%[rwlock]] \n" " bz 4f \n" /* done */ @@ -427,7 +427,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) __asm__ __volatile__( "0: mov %[delay], 1 \n" "1: llock %[val], [%[rwlock]] \n" - " brne %[val], %[UNLOCKED], 0b \n" /* while !UNLOCKED spin */ + " brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */ " mov %[val], %[WR_LOCKED] \n" " scond %[val], [%[rwlock]] \n" " bz 4f \n" -- cgit v1.2.3-59-g8ed1b From ed6aefed726a305bd36344e230d2a9e9301226fc Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 31 May 2016 16:35:09 +0530 Subject: Revert "ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff" This reverts commit e78fdfef84be13a5c2b8276e12203cdf24778596. The issue was fixed in hardware in HS2.1C release and there are no known external users of affected RTL so revert the whole delayed retry series ! Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 5 - arch/arc/include/asm/atomic.h | 46 +------ arch/arc/include/asm/spinlock.h | 293 ---------------------------------------- 3 files changed, 4 insertions(+), 340 deletions(-) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index be9d0b5ae0cc..0d3e59f56974 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -389,11 +389,6 @@ config ARC_HAS_LLSC default y depends on !ARC_CANT_LLSC -config ARC_STAR_9000923308 - bool "Workaround for llock/scond livelock" - default n - depends on ISA_ARCV2 && SMP && ARC_HAS_LLSC - config ARC_HAS_SWAPE bool "Insn: SWAPE (endian-swap)" default y diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 75c8226317f4..dd683995bc9d 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -25,51 +25,17 @@ #define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) -#ifdef CONFIG_ARC_STAR_9000923308 - -#define SCOND_FAIL_RETRY_VAR_DEF \ - unsigned int delay = 1, tmp; \ - -#define SCOND_FAIL_RETRY_ASM \ - " bz 4f \n" \ - " ; --- scond fail delay --- \n" \ - " mov %[tmp], %[delay] \n" /* tmp = delay */ \ - "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ - " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ - " asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \ - " mov.z %[delay], 1 \n" /* handle overflow */ \ - " b 1b \n" /* start over */ \ - "4: ; --- success --- \n" \ - -#define SCOND_FAIL_RETRY_VARS \ - ,[delay] "+&r" (delay),[tmp] "=&r" (tmp) \ - -#else /* !CONFIG_ARC_STAR_9000923308 */ - -#define SCOND_FAIL_RETRY_VAR_DEF - -#define SCOND_FAIL_RETRY_ASM \ - " bnz 1b \n" \ - -#define SCOND_FAIL_RETRY_VARS - -#endif - #define ATOMIC_OP(op, c_op, asm_op) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ - unsigned int val; \ - SCOND_FAIL_RETRY_VAR_DEF \ + unsigned int val; \ \ __asm__ __volatile__( \ "1: llock %[val], [%[ctr]] \n" \ " " #asm_op " %[val], %[val], %[i] \n" \ " scond %[val], [%[ctr]] \n" \ - " \n" \ - SCOND_FAIL_RETRY_ASM \ - \ + " bnz 1b \n" \ : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \ - SCOND_FAIL_RETRY_VARS \ : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \ [i] "ir" (i) \ : "cc"); \ @@ -78,8 +44,7 @@ static inline void atomic_##op(int i, atomic_t *v) \ #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ - unsigned int val; \ - SCOND_FAIL_RETRY_VAR_DEF \ + unsigned int val; \ \ /* \ * Explicit full memory barrier needed before/after as \ @@ -91,11 +56,8 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ "1: llock %[val], [%[ctr]] \n" \ " " #asm_op " %[val], %[val], %[i] \n" \ " scond %[val], [%[ctr]] \n" \ - " \n" \ - SCOND_FAIL_RETRY_ASM \ - \ + " bnz 1b \n" \ : [val] "=&r" (val) \ - SCOND_FAIL_RETRY_VARS \ : [ctr] "r" (&v->counter), \ [i] "ir" (i) \ : "cc"); \ diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index 5e01bdf968ea..cded4a9b5438 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -20,11 +20,6 @@ #ifdef CONFIG_ARC_HAS_LLSC -/* - * A normal LLOCK/SCOND based system, w/o need for livelock workaround - */ -#ifndef CONFIG_ARC_STAR_9000923308 - static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int val; @@ -238,294 +233,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) smp_mb(); } -#else /* CONFIG_ARC_STAR_9000923308 */ - -/* - * HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping - * coherency transactions in the SCU. The exclusive line state keeps rotating - * among contenting cores leading to a never ending cycle. So break the cycle - * by deferring the retry of failed exclusive access (SCOND). The actual delay - * needed is function of number of contending cores as well as the unrelated - * coherency traffic from other cores. To keep the code simple, start off with - * small delay of 1 which would suffice most cases and in case of contention - * double the delay. Eventually the delay is sufficient such that the coherency - * pipeline is drained, thus a subsequent exclusive access would succeed. - */ - -#define SCOND_FAIL_RETRY_VAR_DEF \ - unsigned int delay, tmp; \ - -#define SCOND_FAIL_RETRY_ASM \ - " ; --- scond fail delay --- \n" \ - " mov %[tmp], %[delay] \n" /* tmp = delay */ \ - "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ - " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ - " asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \ - " mov.z %[delay], 1 \n" /* handle overflow */ \ - " b 1b \n" /* start over */ \ - " \n" \ - "4: ; --- done --- \n" \ - -#define SCOND_FAIL_RETRY_VARS \ - ,[delay] "=&r" (delay), [tmp] "=&r" (tmp) \ - -static inline void arch_spin_lock(arch_spinlock_t *lock) -{ - unsigned int val; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[slock]] \n" - " breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */ - " scond %[LOCKED], [%[slock]] \n" /* acquire */ - " bz 4f \n" /* done */ - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val) - SCOND_FAIL_RETRY_VARS - : [slock] "r" (&(lock->slock)), - [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) - : "memory", "cc"); - - smp_mb(); -} - -/* 1 - lock taken successfully */ -static inline int arch_spin_trylock(arch_spinlock_t *lock) -{ - unsigned int val, got_it = 0; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[slock]] \n" - " breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */ - " scond %[LOCKED], [%[slock]] \n" /* acquire */ - " bz.d 4f \n" - " mov.z %[got_it], 1 \n" /* got it */ - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val), - [got_it] "+&r" (got_it) - SCOND_FAIL_RETRY_VARS - : [slock] "r" (&(lock->slock)), - [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) - : "memory", "cc"); - - smp_mb(); - - return got_it; -} - -static inline void arch_spin_unlock(arch_spinlock_t *lock) -{ - smp_mb(); - - lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__; - - smp_mb(); -} - -/* - * Read-write spinlocks, allowing multiple readers but only one writer. - * Unfair locking as Writers could be starved indefinitely by Reader(s) - */ - -static inline void arch_read_lock(arch_rwlock_t *rw) -{ - unsigned int val; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - /* - * zero means writer holds the lock exclusively, deny Reader. - * Otherwise grant lock to first/subseq reader - * - * if (rw->counter > 0) { - * rw->counter--; - * ret = 1; - * } - */ - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[rwlock]] \n" - " brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */ - " sub %[val], %[val], 1 \n" /* reader lock */ - " scond %[val], [%[rwlock]] \n" - " bz 4f \n" /* done */ - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val) - SCOND_FAIL_RETRY_VARS - : [rwlock] "r" (&(rw->counter)), - [WR_LOCKED] "ir" (0) - : "memory", "cc"); - - smp_mb(); -} - -/* 1 - lock taken successfully */ -static inline int arch_read_trylock(arch_rwlock_t *rw) -{ - unsigned int val, got_it = 0; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[rwlock]] \n" - " brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */ - " sub %[val], %[val], 1 \n" /* counter-- */ - " scond %[val], [%[rwlock]] \n" - " bz.d 4f \n" - " mov.z %[got_it], 1 \n" /* got it */ - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val), - [got_it] "+&r" (got_it) - SCOND_FAIL_RETRY_VARS - : [rwlock] "r" (&(rw->counter)), - [WR_LOCKED] "ir" (0) - : "memory", "cc"); - - smp_mb(); - - return got_it; -} - -static inline void arch_write_lock(arch_rwlock_t *rw) -{ - unsigned int val; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - /* - * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__), - * deny writer. Otherwise if unlocked grant to writer - * Hence the claim that Linux rwlocks are unfair to writers. - * (can be starved for an indefinite time by readers). - * - * if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) { - * rw->counter = 0; - * ret = 1; - * } - */ - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[rwlock]] \n" - " brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */ - " mov %[val], %[WR_LOCKED] \n" - " scond %[val], [%[rwlock]] \n" - " bz 4f \n" - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val) - SCOND_FAIL_RETRY_VARS - : [rwlock] "r" (&(rw->counter)), - [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), - [WR_LOCKED] "ir" (0) - : "memory", "cc"); - - smp_mb(); -} - -/* 1 - lock taken successfully */ -static inline int arch_write_trylock(arch_rwlock_t *rw) -{ - unsigned int val, got_it = 0; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[rwlock]] \n" - " brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */ - " mov %[val], %[WR_LOCKED] \n" - " scond %[val], [%[rwlock]] \n" - " bz.d 4f \n" - " mov.z %[got_it], 1 \n" /* got it */ - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val), - [got_it] "+&r" (got_it) - SCOND_FAIL_RETRY_VARS - : [rwlock] "r" (&(rw->counter)), - [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), - [WR_LOCKED] "ir" (0) - : "memory", "cc"); - - smp_mb(); - - return got_it; -} - -static inline void arch_read_unlock(arch_rwlock_t *rw) -{ - unsigned int val; - - smp_mb(); - - /* - * rw->counter++; - */ - __asm__ __volatile__( - "1: llock %[val], [%[rwlock]] \n" - " add %[val], %[val], 1 \n" - " scond %[val], [%[rwlock]] \n" - " bnz 1b \n" - " \n" - : [val] "=&r" (val) - : [rwlock] "r" (&(rw->counter)) - : "memory", "cc"); - - smp_mb(); -} - -static inline void arch_write_unlock(arch_rwlock_t *rw) -{ - unsigned int val; - - smp_mb(); - - /* - * rw->counter = __ARCH_RW_LOCK_UNLOCKED__; - */ - __asm__ __volatile__( - "1: llock %[val], [%[rwlock]] \n" - " scond %[UNLOCKED], [%[rwlock]]\n" - " bnz 1b \n" - " \n" - : [val] "=&r" (val) - : [rwlock] "r" (&(rw->counter)), - [UNLOCKED] "r" (__ARCH_RW_LOCK_UNLOCKED__) - : "memory", "cc"); - - smp_mb(); -} - -#undef SCOND_FAIL_RETRY_VAR_DEF -#undef SCOND_FAIL_RETRY_ASM -#undef SCOND_FAIL_RETRY_VARS - -#endif /* CONFIG_ARC_STAR_9000923308 */ - #else /* !CONFIG_ARC_HAS_LLSC */ static inline void arch_spin_lock(arch_spinlock_t *lock) -- cgit v1.2.3-59-g8ed1b From 3b94a891667c30fb4624221497d77fc65d950345 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 27 May 2016 04:12:20 -0700 Subject: perf/x86/intel/uncore: Remove SBOX support for Broadwell server There was a report that on certain Broadwell-EP systems writing any bit of the SBOX PMU initialization MSR would #GP at boot. This did not happen on all systems. My test systems booted fine. Considering both DE and EP may have such issues, this patch removes SBOX support for all Broadwell platforms for now. Reported-and-tested-by: Mark van Dijk Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1464347540-5763-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snbep.c | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index b2625867ebd1..874e8bd64d1d 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -2868,27 +2868,10 @@ static struct intel_uncore_type bdx_uncore_cbox = { .format_group = &hswep_uncore_cbox_format_group, }; -static struct intel_uncore_type bdx_uncore_sbox = { - .name = "sbox", - .num_counters = 4, - .num_boxes = 4, - .perf_ctr_bits = 48, - .event_ctl = HSWEP_S0_MSR_PMON_CTL0, - .perf_ctr = HSWEP_S0_MSR_PMON_CTR0, - .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, - .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL, - .msr_offset = HSWEP_SBOX_MSR_OFFSET, - .ops = &hswep_uncore_sbox_msr_ops, - .format_group = &hswep_uncore_sbox_format_group, -}; - -#define BDX_MSR_UNCORE_SBOX 3 - static struct intel_uncore_type *bdx_msr_uncores[] = { &bdx_uncore_ubox, &bdx_uncore_cbox, &hswep_uncore_pcu, - &bdx_uncore_sbox, NULL, }; @@ -2897,10 +2880,6 @@ void bdx_uncore_cpu_init(void) if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; uncore_msr_uncores = bdx_msr_uncores; - - /* BDX-DE doesn't have SBOX */ - if (boot_cpu_data.x86_model == 86) - uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; } static struct intel_uncore_type bdx_uncore_ha = { -- cgit v1.2.3-59-g8ed1b From 15b7cc78f0951e418c940d8b3b6a7a3b962b7748 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 May 2016 11:12:33 +0900 Subject: arm64: dts: drop "arm,amba-bus" in favor of "simple-bus" part 2 Tree-wide replacement was done by commit 2ef7d5f342c1 (ARM, ARM64: dts: drop "arm,amba-bus" in favor of "simple-bus"), but we have some new users of "arm,amba-bus" at Linux 4.7-rc1. Eliminate them now. Signed-off-by: Masahiro Yamada Acked-by: Heiko Stuebner Acked-by: Chanho Min Signed-off-by: Olof Johansson --- arch/arm64/boot/dts/lg/lg1312.dtsi | 2 +- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/lg/lg1312.dtsi b/arch/arm64/boot/dts/lg/lg1312.dtsi index 3a4e9a2ab313..fbafa24cd533 100644 --- a/arch/arm64/boot/dts/lg/lg1312.dtsi +++ b/arch/arm64/boot/dts/lg/lg1312.dtsi @@ -125,7 +125,7 @@ #size-cells = <1>; #interrupts-cells = <3>; - compatible = "arm,amba-bus"; + compatible = "simple-bus"; interrupt-parent = <&gic>; ranges; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 46f325a143b0..d7f8e06910bc 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -163,7 +163,7 @@ }; amba { - compatible = "arm,amba-bus"; + compatible = "simple-bus"; #address-cells = <2>; #size-cells = <2>; ranges; -- cgit v1.2.3-59-g8ed1b From 5fdb884267890b26fdfd5ff5ddaf596745b9ab43 Mon Sep 17 00:00:00 2001 From: Olliver Schinagl Date: Fri, 13 May 2016 21:57:16 +0200 Subject: ARM: dts: sunxi: Add OLinuXino Lime2 eMMC to the Makefile commit 27dd9af6bc000ab21fd ("ARM: dts: sunxi: Add a olinuxino-lime2-emmc") added the new emmc equipped lime2 but forgot its Makefile. This patch adds an entry to the Makefile. Signed-off-by: Olliver Schinagl Acked-by: Maxime Ripard Signed-off-by: Olof Johansson --- arch/arm/boot/dts/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 06b6c2d695bf..414b42710a36 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -741,6 +741,7 @@ dtb-$(CONFIG_MACH_SUN7I) += \ sun7i-a20-olimex-som-evb.dtb \ sun7i-a20-olinuxino-lime.dtb \ sun7i-a20-olinuxino-lime2.dtb \ + sun7i-a20-olinuxino-lime2-emmc.dtb \ sun7i-a20-olinuxino-micro.dtb \ sun7i-a20-orangepi.dtb \ sun7i-a20-orangepi-mini.dtb \ -- cgit v1.2.3-59-g8ed1b From cb84f6c0a25eb76b1f838eb63e212705c0c06b5f Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 11 May 2016 13:23:13 +0800 Subject: ARM: dts: sun6i: primo81: Drop constraints on dc1sw regulator This is the same issue fixed in commit dcf5341f0150 ("ARM: dts: sun8i-q8-common: Do not set constraints on dc1sw regulator"). Commit message copied: dc1sw is an on/off only regulator and as such it cannot have constraints. This is a limitation of the kernel regulator implementation which resolves supplies on the first regulator_get(), which is done after applying constraints, and applying the constrains will fail because it calls _regulator_get_voltage() and _regulator_do_set_voltage() both of which will fail on a switch regulator when there is no supply (yet). This causes registering of all axp22x regulators to fail with the following errors: [ 1.395249] vcc-lcd: failed to get the current voltage(-22) [ 1.405131] axp20x-regulator axp20x-regulator: Failed to register dc1sw [ 1.412436] axp20x-regulator: probe of axp20x-regulator failed with error -22 This commit removes the constrains on dc1sw / vcc-lcd fixing this problem. Note that dcdc1 itself is contrained to the exact same values, so this does not change anything. Cc: Hans de Goede Signed-off-by: Chen-Yu Tsai Cc: # 4.6 Acked-by: Maxime Ripard Signed-off-by: Olof Johansson --- arch/arm/boot/dts/sun6i-a31s-primo81.dts | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun6i-a31s-primo81.dts b/arch/arm/boot/dts/sun6i-a31s-primo81.dts index 68b479b8772c..73c133f5e79c 100644 --- a/arch/arm/boot/dts/sun6i-a31s-primo81.dts +++ b/arch/arm/boot/dts/sun6i-a31s-primo81.dts @@ -176,8 +176,6 @@ }; ®_dc1sw { - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; regulator-name = "vcc-lcd"; }; -- cgit v1.2.3-59-g8ed1b From b223d6242c372a81cca3bb81998f53d3b3e3fb70 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 11 May 2016 13:23:14 +0800 Subject: ARM: dts: sun6i: yones-toptech-bs1078-v2: Drop constraints on dc1sw regulator This is the same issue fixed in commit dcf5341f0150 ("ARM: dts: sun8i-q8-common: Do not set constraints on dc1sw regulator"). Commit message copied: dc1sw is an on/off only regulator and as such it cannot have constraints. This is a limitation of the kernel regulator implementation which resolves supplies on the first regulator_get(), which is done after applying constraints, and applying the constrains will fail because it calls _regulator_get_voltage() and _regulator_do_set_voltage() both of which will fail on a switch regulator when there is no supply (yet). This causes registering of all axp22x regulators to fail with the following errors: [ 1.395249] vcc-lcd: failed to get the current voltage(-22) [ 1.405131] axp20x-regulator axp20x-regulator: Failed to register dc1sw [ 1.412436] axp20x-regulator: probe of axp20x-regulator failed with error -22 This commit removes the constrains on dc1sw / vcc-lcd fixing this problem. Note that dcdc1 itself is contrained to the exact same values, so this does not change anything. Cc: Hans de Goede Signed-off-by: Chen-Yu Tsai Cc: # 4.6 Acked-by: Maxime Ripard Signed-off-by: Olof Johansson --- arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts b/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts index 360adfb1e9ca..d6ad6196a768 100644 --- a/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts +++ b/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts @@ -135,8 +135,6 @@ ®_dc1sw { regulator-name = "vcc-lcd-usb2"; - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; }; ®_dc5ldo { -- cgit v1.2.3-59-g8ed1b From 1e407ee3b21f981140491d5b8a36422979ca246f Mon Sep 17 00:00:00 2001 From: Khem Raj Date: Mon, 25 Apr 2016 09:19:17 -0700 Subject: powerpc/ptrace: Fix out of bounds array access warning gcc-6 correctly warns about a out of bounds access arch/powerpc/kernel/ptrace.c:407:24: warning: index 32 denotes an offset greater than size of 'u64[32][1] {aka long long unsigned int[32][1]}' [-Warray-bounds] offsetof(struct thread_fp_state, fpr[32][0])); ^ check the end of array instead of beginning of next element to fix this Signed-off-by: Khem Raj Cc: Kees Cook Cc: Michael Ellerman Cc: Segher Boessenkool Tested-by: Aaro Koskinen Acked-by: Olof Johansson Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 30a03c03fe73..060b140f03c6 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -377,7 +377,7 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, #else BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != - offsetof(struct thread_fp_state, fpr[32][0])); + offsetof(struct thread_fp_state, fpr[32])); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.fp_state, 0, -1); @@ -405,7 +405,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, return 0; #else BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != - offsetof(struct thread_fp_state, fpr[32][0])); + offsetof(struct thread_fp_state, fpr[32])); return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.fp_state, 0, -1); -- cgit v1.2.3-59-g8ed1b From 8a934efe94347eee843aeea65bdec8077a79e259 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 26 May 2016 09:56:07 +1000 Subject: powerpc/pseries: Fix PCI config address for DDW In commit 8445a87f7092 "powerpc/iommu: Remove the dependency on EEH struct in DDW mechanism", the PE address was replaced with the PCI config address in order to remove dependency on EEH. According to PAPR spec, firmware (pHyp or QEMU) should accept "xxBBSSxx" format PCI config address, not "xxxxBBSS" provided by the patch. Note that "BB" is PCI bus number and "SS" is the combination of slot and function number. This fixes the PCI address passed to DDW RTAS calls. Fixes: 8445a87f7092 ("powerpc/iommu: Remove the dependency on EEH struct in DDW mechanism") Cc: stable@vger.kernel.org # v3.4+ Reported-by: Guilherme G. Piccoli Signed-off-by: Gavin Shan Tested-by: Guilherme G. Piccoli Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index b7dfc1359d01..3e8865b187de 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -927,7 +927,7 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, dn = pci_device_to_OF_node(dev); pdn = PCI_DN(dn); buid = pdn->phb->buid; - cfg_addr = (pdn->busno << 8) | pdn->devfn; + cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query, cfg_addr, BUID_HI(buid), BUID_LO(buid)); @@ -956,7 +956,7 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, dn = pci_device_to_OF_node(dev); pdn = PCI_DN(dn); buid = pdn->phb->buid; - cfg_addr = (pdn->busno << 8) | pdn->devfn; + cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); do { /* extra outputs are LIOBN and dma-addr (hi, lo) */ -- cgit v1.2.3-59-g8ed1b From 08dd8cd06ed95625b9e2fac43c78fcb45b7eaf94 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 3 Jun 2016 19:00:59 +0100 Subject: x86/msr: Use the proper trace point conditional for writes The msr tracing for writes is incorrectly conditional on the read trace. Fixes: 7f47d8cc039f "x86, tracing, perf: Add trace point for MSR accesses" Signed-off-by: Dr. David Alan Gilbert Cc: stable@vger.kernel.org Cc: ak@linux.intel.com Link: http://lkml.kernel.org/r/1464976859-21850-1-git-send-email-dgilbert@redhat.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/msr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 7dc1d8fef7fd..b5fee97813cd 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -122,7 +122,7 @@ notrace static inline void native_write_msr(unsigned int msr, "2:\n" _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) : : "c" (msr), "a"(low), "d" (high) : "memory"); - if (msr_tracepoint_active(__tracepoint_read_msr)) + if (msr_tracepoint_active(__tracepoint_write_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), 0); } @@ -141,7 +141,7 @@ notrace static inline int native_write_msr_safe(unsigned int msr, : "c" (msr), "0" (low), "d" (high), [fault] "i" (-EIO) : "memory"); - if (msr_tracepoint_active(__tracepoint_read_msr)) + if (msr_tracepoint_active(__tracepoint_write_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), err); return err; } -- cgit v1.2.3-59-g8ed1b From 2969c03763b40e946b46aee57ef083527711c69f Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 6 Jun 2016 16:24:43 -0400 Subject: ARM: dts: exynos: Fix port nodes names for Exynos5250 Snow board Commit 5c9cbade0629 ("ARM: dts: exynos: Fix DTC unit name warnings in Exynos5250") fixed all the DTC warnings about mismatchs between unit names and reg properties in Exynos5250 boards DTS. But unfortunately it also added a regression on the Exynos5250 Snow Chromebook when changing the port node names since the OF graph logic expects the port nodes to be always named 'port'. The Documentation/devicetree/bindings/graph.txt binding document says that when there is more than one port, '#address-cells', '#size-cells' and 'reg' properties should be used to number the port nodes. Fixes: 5c9cbade0629 ("ARM: dts: exynos: Fix DTC unit name warnings in Exynos5250") Reported-by: Marc Zyngier Signed-off-by: Javier Martinez Canillas Tested-by: Marc Zyngier Signed-off-by: Krzysztof Kozlowski --- arch/arm/boot/dts/exynos5250-snow-common.dtsi | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/exynos5250-snow-common.dtsi b/arch/arm/boot/dts/exynos5250-snow-common.dtsi index ddfe1f558c10..fa14f77df563 100644 --- a/arch/arm/boot/dts/exynos5250-snow-common.dtsi +++ b/arch/arm/boot/dts/exynos5250-snow-common.dtsi @@ -242,7 +242,7 @@ hpd-gpios = <&gpx0 7 GPIO_ACTIVE_HIGH>; ports { - port0 { + port { dp_out: endpoint { remote-endpoint = <&bridge_in>; }; @@ -485,13 +485,20 @@ edid-emulation = <5>; ports { - port0 { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + bridge_out: endpoint { remote-endpoint = <&panel_in>; }; }; - port1 { + port@1 { + reg = <1>; + bridge_in: endpoint { remote-endpoint = <&dp_out>; }; -- cgit v1.2.3-59-g8ed1b From a7d7865fecd83adb98b20eca5eddef7efc94831d Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 6 Jun 2016 16:24:44 -0400 Subject: ARM: dts: exynos: Fix port nodes names for Exynos5420 Peach Pit board Commit bea7eef6949c ("ARM: dts: exynos: Fix DTC unit name warnings in Peach Pit") fixed the DTC warnings about mismatches between unit names and reg properties in the Exynos5420 Peach Pit DTS. But unfortunately it also added a regression on the Peach Pit when changing the port node names since the OF graph logic expects the port nodes to be always named 'port'. The Documentation/devicetree/bindings/graph.txt binding document says that when there is more than one port, '#address-cells', '#size-cells' and 'reg' properties should be used to number the port nodes. Fixes: bea7eef6949c ("ARM: dts: exynos: Fix DTC unit name warnings in Peach Pit") Reported-by: Marc Zyngier Signed-off-by: Javier Martinez Canillas Signed-off-by: Krzysztof Kozlowski --- arch/arm/boot/dts/exynos5420-peach-pit.dts | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts index f9d2e4f1a0e0..1de972d46a87 100644 --- a/arch/arm/boot/dts/exynos5420-peach-pit.dts +++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts @@ -163,7 +163,7 @@ hpd-gpios = <&gpx2 6 GPIO_ACTIVE_HIGH>; ports { - port0 { + port { dp_out: endpoint { remote-endpoint = <&bridge_in>; }; @@ -631,13 +631,20 @@ use-external-pwm; ports { - port0 { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + bridge_out: endpoint { remote-endpoint = <&panel_in>; }; }; - port1 { + port@1 { + reg = <1>; + bridge_in: endpoint { remote-endpoint = <&dp_out>; }; -- cgit v1.2.3-59-g8ed1b From 9c77679cadb118c0aa99e6f88533d91765a131ba Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 5 Apr 2016 17:01:33 -0700 Subject: x86, build: copy ldlinux.c32 to image.iso For newer versions of Syslinux, we need ldlinux.c32 in addition to isolinux.bin to reside on the boot disk, so if the latter is found, copy it, too, to the isoimage tree. Signed-off-by: H. Peter Anvin Cc: Linux Stable Tree --- arch/x86/boot/Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 700a9c6e6159..be8e688fa0d4 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -162,6 +162,9 @@ isoimage: $(obj)/bzImage for i in lib lib64 share end ; do \ if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \ cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \ + if [ -f /usr/$$i/syslinux/ldlinux.c32 ]; then \ + cp /usr/$$i/syslinux/ldlinux.c32 $(obj)/isoimage ; \ + fi ; \ break ; \ fi ; \ if [ $$i = end ] ; then exit 1 ; fi ; \ -- cgit v1.2.3-59-g8ed1b From 2c2a63e301fd19ccae673e79de59b30a232ff7f9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 8 Jun 2016 10:01:23 +1000 Subject: powerpc/pseries: Fix IBM_ARCH_VEC_NRCORES_OFFSET since POWER8NVL was added The recent commit 7cc851039d64 ("powerpc/pseries: Add POWER8NVL support to ibm,client-architecture-support call") added a new PVR mask & value to the start of the ibm_architecture_vec[] array. However it missed the fact that further down in the array, we hard code the offset of one of the fields, and then at boot use that value to patch the value in the array. This means every update to the array must also update the #define, ugh. This means that on pseries machines we will misreport to firmware the number of cores we support, by a factor of threads_per_core. Fix it for now by updating the #define. Fixes: 7cc851039d64 ("powerpc/pseries: Add POWER8NVL support to ibm,client-architecture-support call") Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index ccd2037c797f..6ee4b72cda42 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -719,7 +719,7 @@ unsigned char ibm_architecture_vec[] = { * must match by the macro below. Update the definition if * the structure layout changes. */ -#define IBM_ARCH_VEC_NRCORES_OFFSET 125 +#define IBM_ARCH_VEC_NRCORES_OFFSET 133 W(NR_CPUS), /* number of cores supported */ 0, 0, -- cgit v1.2.3-59-g8ed1b From 1607f09c226d1378439c411baaaa020042750338 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 5 Jun 2016 23:14:14 +0200 Subject: coredump: fix dumping through pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The offset in the core file used to be tracked with ->written field of the coredump_params structure. The field was retired in favour of file->f_pos. However, ->f_pos is not maintained for pipes which leads to breakage. Restore explicit tracking of the offset in coredump_params. Introduce ->pos field for this purpose since ->written was already reused. Fixes: a00839395103 ("get rid of coredump_params->written"). Reported-by: Zbigniew Jędrzejewski-Szmek Signed-off-by: Mateusz Guzik Reviewed-by: Omar Sandoval Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/coredump.c | 2 +- fs/binfmt_elf.c | 2 +- fs/binfmt_elf_fdpic.c | 2 +- fs/coredump.c | 4 +++- include/linux/binfmts.h | 1 + 5 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 84fb984f29c1..85c85eb3e245 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -172,7 +172,7 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i, if (rc < 0) goto out; - skip = roundup(cprm->file->f_pos - total + sz, 4) - cprm->file->f_pos; + skip = roundup(cprm->pos - total + sz, 4) - cprm->pos; if (!dump_skip(cprm, skip)) goto Eio; out: diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index e158b22ef32f..a7a28110dc80 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2275,7 +2275,7 @@ static int elf_core_dump(struct coredump_params *cprm) goto end_coredump; /* Align to page */ - if (!dump_skip(cprm, dataoff - cprm->file->f_pos)) + if (!dump_skip(cprm, dataoff - cprm->pos)) goto end_coredump; for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 71ade0e556b7..203589311bf8 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1787,7 +1787,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto end_coredump; } - if (!dump_skip(cprm, dataoff - cprm->file->f_pos)) + if (!dump_skip(cprm, dataoff - cprm->pos)) goto end_coredump; if (!elf_fdpic_dump_segments(cprm)) diff --git a/fs/coredump.c b/fs/coredump.c index 38a7ab87e10a..281b768000e6 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -794,6 +794,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr) return 0; file->f_pos = pos; cprm->written += n; + cprm->pos += n; nr -= n; } return 1; @@ -808,6 +809,7 @@ int dump_skip(struct coredump_params *cprm, size_t nr) if (dump_interrupted() || file->f_op->llseek(file, nr, SEEK_CUR) < 0) return 0; + cprm->pos += nr; return 1; } else { while (nr > PAGE_SIZE) { @@ -822,7 +824,7 @@ EXPORT_SYMBOL(dump_skip); int dump_align(struct coredump_params *cprm, int align) { - unsigned mod = cprm->file->f_pos & (align - 1); + unsigned mod = cprm->pos & (align - 1); if (align & (align - 1)) return 0; return mod ? dump_skip(cprm, align - mod) : 1; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 576e4639ca60..314b3caa701c 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -65,6 +65,7 @@ struct coredump_params { unsigned long limit; unsigned long mm_flags; loff_t written; + loff_t pos; }; /* -- cgit v1.2.3-59-g8ed1b From 9690c15742688e9cb5ee4aa0b08e458551ceea13 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 2 Jun 2016 15:14:48 +0530 Subject: powerpc/mm/radix: Fix always false comparison against MMU_NO_CONTEXT In some of the radix TLB flush routines, we use a local to store the mm->context.id, AKA the PID. Currently we use an int, but the PID is unsigned long, so large values of PID will be truncated. In particular MMU_NO_CONTEXT is -1, which means all our comparisons against that value can never be true. This means we'll issue TLB flushes when we shouldn't on radix enabled machines. Fix it by using an unsigned long for the local. Discovered by Coverity. Fixes: 1a472c9dba6b ("powerpc/mm/radix: Add tlbflush routines") Signed-off-by: Aneesh Kumar K.V Reviewed-by: Balbir Singh [mpe: Write change log] Signed-off-by: Michael Ellerman --- arch/powerpc/mm/tlb-radix.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 0fdaf93a3e09..54efba2fd66e 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -117,7 +117,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, */ void radix__local_flush_tlb_mm(struct mm_struct *mm) { - unsigned int pid; + unsigned long pid; preempt_disable(); pid = mm->context.id; @@ -130,7 +130,7 @@ EXPORT_SYMBOL(radix__local_flush_tlb_mm); void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid) { - unsigned int pid; + unsigned long pid; preempt_disable(); pid = mm ? mm->context.id : 0; @@ -160,7 +160,7 @@ static int mm_is_core_local(struct mm_struct *mm) void radix__flush_tlb_mm(struct mm_struct *mm) { - unsigned int pid; + unsigned long pid; preempt_disable(); pid = mm->context.id; @@ -185,7 +185,7 @@ EXPORT_SYMBOL(radix__flush_tlb_mm); void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid) { - unsigned int pid; + unsigned long pid; preempt_disable(); pid = mm ? mm->context.id : 0; -- cgit v1.2.3-59-g8ed1b From 3b6d1eb7ea65f4aa64115cf9ba02c190e5c9f6de Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 19 May 2016 13:24:30 +0530 Subject: powerpc/mm/hash: Compute the segment size correctly for ISA 3.0 PowerISA 3.0 encodes the segment size in the second half of hash page table entry. Update hpte_decode() accordingly. Fixes: 50de596de8be ("powerpc/mm/hash: Add support for Power9 Hash") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_native_64.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index d873f6507f72..40e05e7f43de 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -550,7 +550,11 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, } } /* This works for all page sizes, and for 256M and 1T segments */ - *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + *ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT; + else + *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; + shift = mmu_psize_defs[size].shift; avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm); -- cgit v1.2.3-59-g8ed1b From 0106d456c4cb1770253fefc0ab23c9ca760b43f7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 7 Jun 2016 17:55:15 +0100 Subject: arm64: mm: always take dirty state from new pte in ptep_set_access_flags Commit 66dbd6e61a52 ("arm64: Implement ptep_set_access_flags() for hardware AF/DBM") ensured that pte flags are updated atomically in the face of potential concurrent, hardware-assisted updates. However, Alex reports that: | This patch breaks swapping for me. | In the broken case, you'll see either systemd cpu time spike (because | it's stuck in a page fault loop) or the system hang (because the | application owning the screen is stuck in a page fault loop). It turns out that this is because the 'dirty' argument to ptep_set_access_flags is always 0 for read faults, and so we can't use it to set PTE_RDONLY. The failing sequence is: 1. We put down a PTE_WRITE | PTE_DIRTY | PTE_AF pte 2. Memory pressure -> pte_mkold(pte) -> clear PTE_AF 3. A read faults due to the missing access flag 4. ptep_set_access_flags is called with dirty = 0, due to the read fault 5. pte is then made PTE_WRITE | PTE_DIRTY | PTE_AF | PTE_RDONLY (!) 6. A write faults, but pte_write is true so we get stuck The solution is to check the new page table entry (as would be done by the generic, non-atomic definition of ptep_set_access_flags that just calls set_pte_at) to establish the dirty state. Cc: # 4.3+ Fixes: 66dbd6e61a52 ("arm64: Implement ptep_set_access_flags() for hardware AF/DBM") Reviewed-by: Catalin Marinas Reported-by: Alexander Graf Tested-by: Alexander Graf Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 5954881a35ac..ba3fc12bd272 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -109,7 +109,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, * PTE_RDONLY is cleared by default in the asm below, so set it in * back if necessary (read-only or clean PTE). */ - if (!pte_write(entry) || !dirty) + if (!pte_write(entry) || !pte_sw_dirty(entry)) pte_val(entry) |= PTE_RDONLY; /* -- cgit v1.2.3-59-g8ed1b From 970442c599b22ccd644ebfe94d1d303bf6f87c05 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 2 Jun 2016 17:19:27 -0700 Subject: x86/cpu/intel: Introduce macros for Intel family numbers Problem: We have a boatload of open-coded family-6 model numbers. Half of them have these model numbers in hex and the other half in decimal. This makes grepping for them tons of fun, if you were to try. Solution: Consolidate all the magic numbers. Put all the definitions in one header. The names here are closely derived from the comments describing the models from arch/x86/events/intel/core.c. We could easily make them shorter by doing things like s/SANDYBRIDGE/SNB/, but they seemed fine even with the longer versions to me. Do not take any of these names too literally, like "DESKTOP" or "MOBILE". These are all colloquial names and not precise descriptions of everywhere a given model will show up. Signed-off-by: Dave Hansen Cc: Adrian Hunter Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Darren Hart Cc: Dave Hansen Cc: Denys Vlasenko Cc: Doug Thompson Cc: Eduardo Valentin Cc: H. Peter Anvin Cc: Jacob Pan Cc: Kan Liang Cc: Len Brown Cc: Linus Torvalds Cc: Mauro Carvalho Chehab Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Rajneesh Bhardwaj Cc: Souvik Kumar Chakravarty Cc: Srinivas Pandruvada Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Tony Luck Cc: Ulf Hansson Cc: Viresh Kumar Cc: Vishwanath Somayaji Cc: Zhang Rui Cc: jacob.jun.pan@intel.com Cc: linux-acpi@vger.kernel.org Cc: linux-edac@vger.kernel.org Cc: linux-mmc@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: platform-driver-x86@vger.kernel.org Link: http://lkml.kernel.org/r/20160603001927.F2A7D828@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/intel-family.h | 68 +++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 arch/x86/include/asm/intel-family.h (limited to 'arch') diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h new file mode 100644 index 000000000000..6999f7d01a0d --- /dev/null +++ b/arch/x86/include/asm/intel-family.h @@ -0,0 +1,68 @@ +#ifndef _ASM_X86_INTEL_FAMILY_H +#define _ASM_X86_INTEL_FAMILY_H + +/* + * "Big Core" Processors (Branded as Core, Xeon, etc...) + * + * The "_X" parts are generally the EP and EX Xeons, or the + * "Extreme" ones, like Broadwell-E. + * + * Things ending in "2" are usually because we have no better + * name for them. There's no processor called "WESTMERE2". + */ + +#define INTEL_FAM6_CORE_YONAH 0x0E +#define INTEL_FAM6_CORE2_MEROM 0x0F +#define INTEL_FAM6_CORE2_MEROM_L 0x16 +#define INTEL_FAM6_CORE2_PENRYN 0x17 +#define INTEL_FAM6_CORE2_DUNNINGTON 0x1D + +#define INTEL_FAM6_NEHALEM 0x1E +#define INTEL_FAM6_NEHALEM_EP 0x1A +#define INTEL_FAM6_NEHALEM_EX 0x2E +#define INTEL_FAM6_WESTMERE 0x25 +#define INTEL_FAM6_WESTMERE2 0x1F +#define INTEL_FAM6_WESTMERE_EP 0x2C +#define INTEL_FAM6_WESTMERE_EX 0x2F + +#define INTEL_FAM6_SANDYBRIDGE 0x2A +#define INTEL_FAM6_SANDYBRIDGE_X 0x2D +#define INTEL_FAM6_IVYBRIDGE 0x3A +#define INTEL_FAM6_IVYBRIDGE_X 0x3E + +#define INTEL_FAM6_HASWELL_CORE 0x3C +#define INTEL_FAM6_HASWELL_X 0x3F +#define INTEL_FAM6_HASWELL_ULT 0x45 +#define INTEL_FAM6_HASWELL_GT3E 0x46 + +#define INTEL_FAM6_BROADWELL_CORE 0x3D +#define INTEL_FAM6_BROADWELL_XEON_D 0x56 +#define INTEL_FAM6_BROADWELL_GT3E 0x47 +#define INTEL_FAM6_BROADWELL_X 0x4F + +#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E +#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E +#define INTEL_FAM6_SKYLAKE_X 0x55 +#define INTEL_FAM6_KABYLAKE_MOBILE 0x8E +#define INTEL_FAM6_KABYLAKE_DESKTOP 0x9E + +/* "Small Core" Processors (Atom) */ + +#define INTEL_FAM6_ATOM_PINEVIEW 0x1C +#define INTEL_FAM6_ATOM_LINCROFT 0x26 +#define INTEL_FAM6_ATOM_PENWELL 0x27 +#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35 +#define INTEL_FAM6_ATOM_CEDARVIEW 0x36 +#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */ +#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ +#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ +#define INTEL_FAM6_ATOM_MERRIFIELD1 0x4A /* Tangier */ +#define INTEL_FAM6_ATOM_MERRIFIELD2 0x5A /* Annidale */ +#define INTEL_FAM6_ATOM_GOLDMONT 0x5C +#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ + +/* Xeon Phi */ + +#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ + +#endif /* _ASM_X86_INTEL_FAMILY_H */ -- cgit v1.2.3-59-g8ed1b From 96685a55a82c383cbba7ef1d4a636acf708cf17f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 1 Jun 2016 12:04:28 +0200 Subject: x86/cpu/AMD: Extend X86_FEATURE_TOPOEXT workaround to newer models We need to reenable the topology extensions CPUID leafs on newer models too, if BIOS has disabled them, as we rely on them to get proper compute unit topology. Make the printk a once thing, while at it. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rui Huang Cc: Sherry Hurwitz Cc: Thomas Gleixner Cc: linux-hwmon@vger.kernel.org Link: http://lkml.kernel.org/r/1464775468-23355-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c343a54bed39..f5c69d8974e1 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -674,14 +674,14 @@ static void init_amd_bd(struct cpuinfo_x86 *c) u64 value; /* re-enable TopologyExtensions if switched off by BIOS */ - if ((c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && + if ((c->x86_model >= 0x10) && (c->x86_model <= 0x6f) && !cpu_has(c, X86_FEATURE_TOPOEXT)) { if (msr_set_bit(0xc0011005, 54) > 0) { rdmsrl(0xc0011005, value); if (value & BIT_64(54)) { set_cpu_cap(c, X86_FEATURE_TOPOEXT); - pr_info(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); + pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); } } } -- cgit v1.2.3-59-g8ed1b From c106c21ce02e366f3dc887bff7c48f3f140c45c9 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 25 May 2016 22:40:42 +0000 Subject: ARM: dts: socfpga: Add missing PHY phandle Add missing PHY phandle into the DT, otherwise the stmmac code won't detect the PHY correctly anymore. Signed-off-by: Marek Vasut Cc: Dinh Nguyen Signed-off-by: Dinh Nguyen --- arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts index a3601e4c0a2e..b844473601d2 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts @@ -136,6 +136,7 @@ &gmac1 { status = "okay"; phy-mode = "rgmii"; + phy-handle = <&phy1>; snps,reset-gpio = <&porta 0 GPIO_ACTIVE_LOW>; snps,reset-active-low; -- cgit v1.2.3-59-g8ed1b From 2e4094bdaa3ef295abbebb31b978e3344ee64257 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Thu, 19 May 2016 18:20:17 -0500 Subject: ARM: OMAP2: Enable Errata 430973 for OMAP3 Enable Erratum 430973 similar to commit 5c86c5339c56 ("ARM: omap2plus_defconfig: Enable ARM erratum 430973 for omap3") - Since multiple defconfigs can exist from various points of view (multi_v7, omap2plus etc.. it is always better to enable the erratum from the Kconfig selection point of view so that downstream kernels dont have to rediscover this all over again. Reported-by: Grygorii Strashko Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 0517f0c1581a..b9b71328249b 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -17,6 +17,7 @@ config ARCH_OMAP3 select PM_OPP if PM select PM if CPU_IDLE select SOC_HAS_OMAP2_SDRC + select ARM_ERRATA_430973 config ARCH_OMAP4 bool "TI OMAP4" -- cgit v1.2.3-59-g8ed1b From 4c88c1c72f86dab63d8219c0aa9e9a398f2efaa9 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 20 May 2016 13:13:33 +0300 Subject: ARM: dts: DRA74x: fix DSS PLL2 addresses DSS's 'pll2_clkctrl' and 'pll2' have wrong addresses in the dra74x.dtsi file. Video PLL2 has not been used so wrong addresses went unnoticed. Signed-off-by: Tomi Valkeinen Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra74x.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/dra74x.dtsi b/arch/arm/boot/dts/dra74x.dtsi index 4220eeffc65a..5e06020f450b 100644 --- a/arch/arm/boot/dts/dra74x.dtsi +++ b/arch/arm/boot/dts/dra74x.dtsi @@ -107,8 +107,8 @@ reg = <0x58000000 0x80>, <0x58004054 0x4>, <0x58004300 0x20>, - <0x58005054 0x4>, - <0x58005300 0x20>; + <0x58009054 0x4>, + <0x58009300 0x20>; reg-names = "dss", "pll1_clkctrl", "pll1", "pll2_clkctrl", "pll2"; -- cgit v1.2.3-59-g8ed1b From 8d29bdba7291f9f939bc17ac088ab650d106d451 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Tue, 24 May 2016 11:12:29 -0500 Subject: ARM: OMAP2+: Select OMAP_INTERCONNECT for SOC_AM43XX AM43XX SoCs make use of the omap_l3_noc driver so explicitly select OMAP_INTERCONNECT in the Kconfig for SOC_AM43XX to ensure it always gets enabled for AM43XX only builds. Signed-off-by: Dave Gerlach Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index b9b71328249b..e6405c094f37 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -37,6 +37,7 @@ config ARCH_OMAP4 select PM if CPU_IDLE select ARM_ERRATA_754322 select ARM_ERRATA_775420 + select OMAP_INTERCONNECT config SOC_OMAP5 bool "TI OMAP5" -- cgit v1.2.3-59-g8ed1b From 20c15226d1c73150c4d9107301cac5dda0b7f995 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 11 May 2016 16:39:30 -0300 Subject: ARM: imx6ul: Fix Micrel PHY mask The value used for Micrel PHY mask is not correct. Use the MICREL_PHY_ID_MASK definition instead. Thanks to Jiri Luznicky for proposing the fix at https://community.freescale.com/thread/387739 Cc: Fixes: 709bc0657fe6f9f55 ("ARM: imx6ul: add fec MAC refrence clock and phy fixup init") Signed-off-by: Fabio Estevam Reviewed-by: Andrew Lunn Signed-off-by: Shawn Guo --- arch/arm/mach-imx/mach-imx6ul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/mach-imx6ul.c b/arch/arm/mach-imx/mach-imx6ul.c index a38b16b69923..b56de4b8cdf2 100644 --- a/arch/arm/mach-imx/mach-imx6ul.c +++ b/arch/arm/mach-imx/mach-imx6ul.c @@ -46,7 +46,7 @@ static int ksz8081_phy_fixup(struct phy_device *dev) static void __init imx6ul_enet_phy_init(void) { if (IS_BUILTIN(CONFIG_PHYLIB)) - phy_register_fixup_for_uid(PHY_ID_KSZ8081, 0xffffffff, + phy_register_fixup_for_uid(PHY_ID_KSZ8081, MICREL_PHY_ID_MASK, ksz8081_phy_fixup); } -- cgit v1.2.3-59-g8ed1b From 624531886987f0f1b5d01fb598034d039198e090 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 7 Jun 2016 17:57:54 +0100 Subject: ARM: 8578/1: mm: ensure pmd_present only checks the valid bit In a subsequent patch, pmd_mknotpresent will clear the valid bit of the pmd entry, resulting in a not-present entry from the hardware's perspective. Unfortunately, pmd_present simply checks for a non-zero pmd value and will therefore continue to return true even after a pmd_mknotpresent operation. Since pmd_mknotpresent is only used for managing huge entries, this is only an issue for the 3-level case. This patch fixes the 3-level pmd_present implementation to take into account the valid bit. For bisectability, the change is made before the fix to pmd_mknotpresent. [catalin.marinas@arm.com: comment update regarding pmd_mknotpresent patch] Fixes: 8d9625070073 ("ARM: mm: Transparent huge page support for LPAE systems.") Cc: # 3.11+ Cc: Russell King Cc: Steve Capper Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/include/asm/pgtable-2level.h | 1 + arch/arm/include/asm/pgtable-3level.h | 1 + arch/arm/include/asm/pgtable.h | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index aeddd28b3595..92fd2c8a9af0 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -193,6 +193,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #define pmd_large(pmd) (pmd_val(pmd) & 2) #define pmd_bad(pmd) (pmd_val(pmd) & 2) +#define pmd_present(pmd) (pmd_val(pmd)) #define copy_pmd(pmdpd,pmdps) \ do { \ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index fa70db7c714b..4dce1580bc15 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -211,6 +211,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) : !!(pmd_val(pmd) & (val))) #define pmd_isclear(pmd, val) (!(pmd_val(pmd) & (val))) +#define pmd_present(pmd) (pmd_isset((pmd), L_PMD_SECT_VALID)) #define pmd_young(pmd) (pmd_isset((pmd), PMD_SECT_AF)) #define pte_special(pte) (pte_isset((pte), L_PTE_SPECIAL)) static inline pte_t pte_mkspecial(pte_t pte) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 348caabb7625..d62204060cbe 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -182,7 +182,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) #define pmd_none(pmd) (!pmd_val(pmd)) -#define pmd_present(pmd) (pmd_val(pmd)) static inline pte_t *pmd_page_vaddr(pmd_t pmd) { -- cgit v1.2.3-59-g8ed1b From 56530f5d2ddc9b9fade7ef8db9cb886e9dc689b5 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 7 Jun 2016 17:58:06 +0100 Subject: ARM: 8579/1: mm: Fix definition of pmd_mknotpresent Currently pmd_mknotpresent will use a zero entry to respresent an invalidated pmd. Unfortunately this definition clashes with pmd_none, thus it is possible for a race condition to occur if zap_pmd_range sees pmd_none whilst __split_huge_pmd_locked is running too with pmdp_invalidate just called. This patch fixes the race condition by modifying pmd_mknotpresent to create non-zero faulting entries (as is done in other architectures), removing the ambiguity with pmd_none. [catalin.marinas@arm.com: using L_PMD_SECT_VALID instead of PMD_TYPE_SECT] Fixes: 8d9625070073 ("ARM: mm: Transparent huge page support for LPAE systems.") Cc: # 3.11+ Reported-by: Kirill A. Shutemov Acked-by: Will Deacon Cc: Russell King Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/include/asm/pgtable-3level.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 4dce1580bc15..2a029bceaf2f 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -250,10 +250,10 @@ PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); #define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) #define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) -/* represent a notpresent pmd by zero, this is used by pmdp_invalidate */ +/* represent a notpresent pmd by faulting entry, this is used by pmdp_invalidate */ static inline pmd_t pmd_mknotpresent(pmd_t pmd) { - return __pmd(0); + return __pmd(pmd_val(pmd) & ~L_PMD_SECT_VALID); } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) -- cgit v1.2.3-59-g8ed1b From 8017ea35d33f9e0950d369773ab48bcb1efb9ba0 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 8 Jun 2016 21:50:50 +1000 Subject: powerpc/nohash: Fix build break with 64K pages Commit 74701d5947a6 "powerpc/mm: Rename function to indicate we are allocating fragments" renamed page_table_free() to pte_fragment_free(). One occurrence was mistyped as pte_fragment_fre(). This only breaks the nohash 64K page build, which is not the default or enabled in any defconfig. Fixes: 74701d5947a6 ("powerpc/mm: Rename function to indicate we are allocating fragments") Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/64/pgalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 0c12a3bfe2ab..069369f6414b 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -172,7 +172,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - pte_fragment_fre((unsigned long *)pte, 1); + pte_fragment_free((unsigned long *)pte, 1); } static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) -- cgit v1.2.3-59-g8ed1b From 36194812a4063dd2a72070aec3ee7890d135a587 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 8 Jun 2016 19:55:50 +0530 Subject: powerpc/mm/radix: Update to tlb functions ric argument Radix invalidate control (RIC) is used to control which cache to flush using tlb instructions. When doing a PID flush, we currently flush everything including page walk cache. For address range flush, we flush only the TLB. In the next patch, we add support for flushing only the page walk cache. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/tlb-radix.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 54efba2fd66e..71083621884e 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -18,16 +18,20 @@ static DEFINE_RAW_SPINLOCK(native_tlbie_lock); -static inline void __tlbiel_pid(unsigned long pid, int set) +#define RIC_FLUSH_TLB 0 +#define RIC_FLUSH_PWC 1 +#define RIC_FLUSH_ALL 2 + +static inline void __tlbiel_pid(unsigned long pid, int set, + unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = PPC_BIT(53); /* IS = 1 */ rb |= set << PPC_BITLSHIFT(51); rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 2; /* invalidate all the caches */ asm volatile("ptesync": : :"memory"); asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |" @@ -39,25 +43,24 @@ static inline void __tlbiel_pid(unsigned long pid, int set) /* * We use 128 set in radix mode and 256 set in hpt mode. */ -static inline void _tlbiel_pid(unsigned long pid) +static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) { int set; for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) { - __tlbiel_pid(pid, set); + __tlbiel_pid(pid, set, ric); } return; } -static inline void _tlbie_pid(unsigned long pid) +static inline void _tlbie_pid(unsigned long pid, unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = PPC_BIT(53); /* IS = 1 */ rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 2; /* invalidate all the caches */ asm volatile("ptesync": : :"memory"); asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |" @@ -67,16 +70,15 @@ static inline void _tlbie_pid(unsigned long pid) } static inline void _tlbiel_va(unsigned long va, unsigned long pid, - unsigned long ap) + unsigned long ap, unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = va & ~(PPC_BITMASK(52, 63)); rb |= ap << PPC_BITLSHIFT(58); rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 0; /* no cluster flush yet */ asm volatile("ptesync": : :"memory"); asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |" @@ -86,16 +88,15 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid, } static inline void _tlbie_va(unsigned long va, unsigned long pid, - unsigned long ap) + unsigned long ap, unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = va & ~(PPC_BITMASK(52, 63)); rb |= ap << PPC_BITLSHIFT(58); rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 0; /* no cluster flush yet */ asm volatile("ptesync": : :"memory"); asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |" @@ -122,7 +123,7 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm) preempt_disable(); pid = mm->context.id; if (pid != MMU_NO_CONTEXT) - _tlbiel_pid(pid); + _tlbiel_pid(pid, RIC_FLUSH_ALL); preempt_enable(); } EXPORT_SYMBOL(radix__local_flush_tlb_mm); @@ -135,7 +136,7 @@ void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, preempt_disable(); pid = mm ? mm->context.id : 0; if (pid != MMU_NO_CONTEXT) - _tlbiel_va(vmaddr, pid, ap); + _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB); preempt_enable(); } @@ -172,11 +173,11 @@ void radix__flush_tlb_mm(struct mm_struct *mm) if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); - _tlbie_pid(pid); + _tlbie_pid(pid, RIC_FLUSH_ALL); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } else - _tlbiel_pid(pid); + _tlbiel_pid(pid, RIC_FLUSH_ALL); no_context: preempt_enable(); } @@ -196,11 +197,11 @@ void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); - _tlbie_va(vmaddr, pid, ap); + _tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } else - _tlbiel_va(vmaddr, pid, ap); + _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB); bail: preempt_enable(); } @@ -224,7 +225,7 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); - _tlbie_pid(0); + _tlbie_pid(0, RIC_FLUSH_ALL); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } -- cgit v1.2.3-59-g8ed1b From a145abf12c9f7d30d8c330c9d8a97428cbf0589b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 8 Jun 2016 19:55:51 +0530 Subject: powerpc/mm/radix: Flush page walk cache when freeing page table Even though a tlb_flush() does a flush with invalidate all cache, we can end up doing an RCU page table free before calling tlb_flush(). That means we can have page walk cache entries even after we free the page table pages. This can result in us doing wrong page table walk. Avoid this by doing pwc flush on every page table free. We can't batch the pwc flush, because the rcu call back function where we free the page table pages doesn't have information of the mmu gather. Thus we have to do a pwc on every page table page freed. Note: I also removed the dummy tlb_flush_pgtable call functions for hash 32. Fixes: 1a472c9dba6b ("powerpc/mm/radix: Add tlbflush routines") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgalloc.h | 1 - arch/powerpc/include/asm/book3s/64/pgalloc.h | 16 ++++++++- .../powerpc/include/asm/book3s/64/tlbflush-radix.h | 3 ++ arch/powerpc/include/asm/book3s/64/tlbflush.h | 14 ++++++++ arch/powerpc/include/asm/book3s/pgalloc.h | 5 --- arch/powerpc/mm/tlb-radix.c | 41 ++++++++++++++++++++++ 6 files changed, 73 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index a2350194fc76..8e21bb492dca 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -102,7 +102,6 @@ static inline void pgtable_free_tlb(struct mmu_gather *tlb, static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { - tlb_flush_pgtable(tlb, address); pgtable_page_dtor(table); pgtable_free_tlb(tlb, page_address(table), 0); } diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 488279edb1f0..26eb2cb80c4e 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -110,6 +110,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, unsigned long address) { + /* + * By now all the pud entries should be none entries. So go + * ahead and flush the page walk cache + */ + flush_tlb_pgtable(tlb, address); pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE); } @@ -127,6 +132,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long address) { + /* + * By now all the pud entries should be none entries. So go + * ahead and flush the page walk cache + */ + flush_tlb_pgtable(tlb, address); return pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX); } @@ -198,7 +208,11 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { - tlb_flush_pgtable(tlb, address); + /* + * By now all the pud entries should be none entries. So go + * ahead and flush the page walk cache + */ + flush_tlb_pgtable(tlb, address); pgtable_free_tlb(tlb, table, 0); } diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 13ef38828dfe..3fa94fcac628 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -18,16 +18,19 @@ extern void radix__local_flush_tlb_mm(struct mm_struct *mm); extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); extern void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid); +extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__tlb_flush(struct mmu_gather *tlb); #ifdef CONFIG_SMP extern void radix__flush_tlb_mm(struct mm_struct *mm); extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); extern void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid); +extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); #else #define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm) #define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr) #define radix___flush_tlb_page(mm,addr,p,i) radix___local_flush_tlb_page(mm,addr,p,i) +#define radix__flush_tlb_pwc(tlb, addr) radix__local_flush_tlb_pwc(tlb, addr) #endif #endif diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index d98424ae356c..96e5769b18b0 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -72,5 +72,19 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, #define flush_tlb_mm(mm) local_flush_tlb_mm(mm) #define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr) #endif /* CONFIG_SMP */ +/* + * flush the page walk cache for the address + */ +static inline void flush_tlb_pgtable(struct mmu_gather *tlb, unsigned long address) +{ + /* + * Flush the page table walk cache on freeing a page table. We already + * have marked the upper/higher level page table entry none by now. + * So it is safe to flush PWC here. + */ + if (!radix_enabled()) + return; + radix__flush_tlb_pwc(tlb, address); +} #endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */ diff --git a/arch/powerpc/include/asm/book3s/pgalloc.h b/arch/powerpc/include/asm/book3s/pgalloc.h index 54f591e9572e..c0a69ae92256 100644 --- a/arch/powerpc/include/asm/book3s/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/pgalloc.h @@ -4,11 +4,6 @@ #include extern void tlb_remove_table(struct mmu_gather *tlb, void *table); -static inline void tlb_flush_pgtable(struct mmu_gather *tlb, - unsigned long address) -{ - -} #ifdef CONFIG_PPC64 #include diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 71083621884e..ab2f60e812e2 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -128,6 +128,21 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm) } EXPORT_SYMBOL(radix__local_flush_tlb_mm); +void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +{ + unsigned long pid; + struct mm_struct *mm = tlb->mm; + + preempt_disable(); + + pid = mm->context.id; + if (pid != MMU_NO_CONTEXT) + _tlbiel_pid(pid, RIC_FLUSH_PWC); + + preempt_enable(); +} +EXPORT_SYMBOL(radix__local_flush_tlb_pwc); + void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid) { @@ -183,6 +198,32 @@ no_context: } EXPORT_SYMBOL(radix__flush_tlb_mm); +void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +{ + unsigned long pid; + struct mm_struct *mm = tlb->mm; + + preempt_disable(); + + pid = mm->context.id; + if (unlikely(pid == MMU_NO_CONTEXT)) + goto no_context; + + if (!mm_is_core_local(mm)) { + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + + if (lock_tlbie) + raw_spin_lock(&native_tlbie_lock); + _tlbie_pid(pid, RIC_FLUSH_PWC); + if (lock_tlbie) + raw_spin_unlock(&native_tlbie_lock); + } else + _tlbiel_pid(pid, RIC_FLUSH_PWC); +no_context: + preempt_enable(); +} +EXPORT_SYMBOL(radix__flush_tlb_pwc); + void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid) { -- cgit v1.2.3-59-g8ed1b From 0487c44d1e1070b636ba3f3a12494ec456c2d005 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 10 Jun 2016 09:22:31 +0200 Subject: KVM: s390: ignore IBC if zero Looks like we forgot about the special IBC value of 0 meaning "no IBC". Let's fix that, otherwise it gets rounded up and suddenly an IBC is active with the lowest possible machine. Signed-off-by: David Hildenbrand Reviewed-by: Cornelia Huck Fixes: commit 053dd2308d81 ("KVM: s390: force ibc into valid range") Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6d8ec3ac9dd8..c9ae53924a69 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -657,7 +657,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) kvm->arch.model.cpuid = proc->cpuid; lowest_ibc = sclp.ibc >> 16 & 0xfff; unblocked_ibc = sclp.ibc & 0xfff; - if (lowest_ibc) { + if (lowest_ibc && proc->ibc) { if (proc->ibc > unblocked_ibc) kvm->arch.model.ibc = unblocked_ibc; else if (proc->ibc < lowest_ibc) -- cgit v1.2.3-59-g8ed1b From 9ec6de19235889ab0118e970ee732cb33c9efc06 Mon Sep 17 00:00:00 2001 From: Alexander Yarygin Date: Fri, 6 May 2016 16:33:06 +0300 Subject: KVM: s390: Add stats for PEI events Add partial execution intercepted events in kvm_stats_debugfs. Signed-off-by: Alexander Yarygin Acked-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/intercept.c | 2 ++ arch/s390/kvm/kvm-s390.c | 1 + 3 files changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 37b9017c6a96..ac82e8eb936d 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -245,6 +245,7 @@ struct kvm_vcpu_stat { u32 exit_stop_request; u32 exit_validity; u32 exit_instruction; + u32 exit_pei; u32 halt_successful_poll; u32 halt_attempted_poll; u32 halt_poll_invalid; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 2e6b54e4d3f9..252157181302 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -341,6 +341,8 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) static int handle_partial_execution(struct kvm_vcpu *vcpu) { + vcpu->stat.exit_pei++; + if (vcpu->arch.sie_block->ipa == 0xb254) /* MVPG */ return handle_mvpg_pei(vcpu); if (vcpu->arch.sie_block->ipa >> 8 == 0xae) /* SIGP */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c9ae53924a69..43f2a2b80490 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -61,6 +61,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "exit_external_request", VCPU_STAT(exit_external_request) }, { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, { "exit_instruction", VCPU_STAT(exit_instruction) }, + { "exit_pei", VCPU_STAT(exit_pei) }, { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, -- cgit v1.2.3-59-g8ed1b From aaee8c3c5cce2d9107310dd9f3026b4f901d441c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 24 May 2016 15:54:04 -0700 Subject: x86/entry/traps: Don't force in_interrupt() to return true in IST handlers Forcing in_interrupt() to return true if we're not in a bona fide interrupt confuses the softirq code. This fixes warnings like: NOHZ: local_softirq_pending 282 ... which can happen when running things like selftests/x86. This will change perf's static percpu buffer usage in IST context. I think this is okay, and it's changing the behavior to match historical (pre-4.0) behavior. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 959274753857 ("x86, traps: Track entry into and exit from IST context") Link: http://lkml.kernel.org/r/cdc215f94d118d691d73df35275022331156fb45.1464130360.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d1590486204a..00f03d82e69a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -96,6 +96,12 @@ static inline void cond_local_irq_disable(struct pt_regs *regs) local_irq_disable(); } +/* + * In IST context, we explicitly disable preemption. This serves two + * purposes: it makes it much less likely that we would accidentally + * schedule in IST context and it will force a warning if we somehow + * manage to schedule by accident. + */ void ist_enter(struct pt_regs *regs) { if (user_mode(regs)) { @@ -110,13 +116,7 @@ void ist_enter(struct pt_regs *regs) rcu_nmi_enter(); } - /* - * We are atomic because we're on the IST stack; or we're on - * x86_32, in which case we still shouldn't schedule; or we're - * on x86_64 and entered from user mode, in which case we're - * still atomic unless ist_begin_non_atomic is called. - */ - preempt_count_add(HARDIRQ_OFFSET); + preempt_disable(); /* This code is a bit fragile. Test it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work"); @@ -124,7 +124,7 @@ void ist_enter(struct pt_regs *regs) void ist_exit(struct pt_regs *regs) { - preempt_count_sub(HARDIRQ_OFFSET); + preempt_enable_no_resched(); if (!user_mode(regs)) rcu_nmi_exit(); @@ -155,7 +155,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) BUG_ON((unsigned long)(current_top_of_stack() - current_stack_pointer()) >= THREAD_SIZE); - preempt_count_sub(HARDIRQ_OFFSET); + preempt_enable_no_resched(); } /** @@ -165,7 +165,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) */ void ist_end_non_atomic(void) { - preempt_count_add(HARDIRQ_OFFSET); + preempt_disable(); } static nokprobe_inline int -- cgit v1.2.3-59-g8ed1b From 9d98bcec731756b8688b59ec998707924d716d7b Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Wed, 8 Jun 2016 14:59:52 +0800 Subject: x86/ioapic: Fix incorrect pointers in ioapic_setup_resources() On a 4-socket Brickland system, hot-removing one ioapic is fine. Hot-removing the 2nd one causes panic in mp_unregister_ioapic() while calling release_resource(). It is because the iomem_res pointer has already been released when removing the first ioapic. To explain the use of &res[num] here: res is assigned to ioapic_resources, and later in ioapic_insert_resources() we do: struct resource *r = ioapic_resources; for_each_ioapic(i) { insert_resource(&iomem_resource, r); r++; } Here 'r' is treated as an arry of 'struct resource', and the r++ ensures that each element of the array is inserted separately. Thus we should call release_resouce() on each element at &res[num]. Fix it by assigning the correct pointers to ioapics[i].iomem_res in ioapic_setup_resources(). Signed-off-by: Rui Wang Signed-off-by: Thomas Gleixner Cc: tony.luck@intel.com Cc: linux-pci@vger.kernel.org Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: bhelgaas@google.com Link: http://lkml.kernel.org/r/1465369193-4816-3-git-send-email-rui.y.wang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 84e33ff5a6d5..446702ed99dc 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2588,8 +2588,8 @@ static struct resource * __init ioapic_setup_resources(void) res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY; snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); mem += IOAPIC_RESOURCE_NAME_SIZE; + ioapics[i].iomem_res = &res[num]; num++; - ioapics[i].iomem_res = res; } ioapic_resources = res; -- cgit v1.2.3-59-g8ed1b From d16c0d722d09496a03222dc27ee3071b7b1051e5 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 24 May 2016 08:35:38 -0500 Subject: ARM: OMAP: DRA7: powerdomain data: Set L3init and L4per to ON As per the latest revision F of public TRM for DRA7/AM57xx SoCs SPRUHZ6F[1] (April 2016), L4Per and L3init power domains now operate in always "ON" mode due to asymmetric aging limitations. Update the same [1] http://www.ti.com/lit/pdf/spruhz6 Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/powerdomains7xx_data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/powerdomains7xx_data.c b/arch/arm/mach-omap2/powerdomains7xx_data.c index 0ec2d00f4237..8ea447ed4dc4 100644 --- a/arch/arm/mach-omap2/powerdomains7xx_data.c +++ b/arch/arm/mach-omap2/powerdomains7xx_data.c @@ -111,7 +111,7 @@ static struct powerdomain l4per_7xx_pwrdm = { .name = "l4per_pwrdm", .prcm_offs = DRA7XX_PRM_L4PER_INST, .prcm_partition = DRA7XX_PRM_PARTITION, - .pwrsts = PWRSTS_RET_ON, + .pwrsts = PWRSTS_ON, .pwrsts_logic_ret = PWRSTS_RET, .banks = 2, .pwrsts_mem_ret = { @@ -260,7 +260,7 @@ static struct powerdomain l3init_7xx_pwrdm = { .name = "l3init_pwrdm", .prcm_offs = DRA7XX_PRM_L3INIT_INST, .prcm_partition = DRA7XX_PRM_PARTITION, - .pwrsts = PWRSTS_RET_ON, + .pwrsts = PWRSTS_ON, .pwrsts_logic_ret = PWRSTS_RET, .banks = 3, .pwrsts_mem_ret = { -- cgit v1.2.3-59-g8ed1b From 9ffb668f268c79f2f58b56bbd63208440b31260f Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 24 May 2016 08:35:39 -0500 Subject: ARM: OMAP: DRA7: powerdomain data: Remove unused pwrsts_logic_ret As per the latest revision F of public TRM for DRA7/AM57xx SoCs SPRUHZ6F[1] (April 2016), with the exception of MPU power domain (and CPUx sub power domains), all other power domains can either operate in "ON" mode OR in some cases, "OFF" mode. For these power states, the logic retention state is basically ignored by PRCM and does not require to be programmed. [1] http://www.ti.com/lit/pdf/spruhz6 Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/powerdomains7xx_data.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/powerdomains7xx_data.c b/arch/arm/mach-omap2/powerdomains7xx_data.c index 8ea447ed4dc4..88107b449710 100644 --- a/arch/arm/mach-omap2/powerdomains7xx_data.c +++ b/arch/arm/mach-omap2/powerdomains7xx_data.c @@ -36,7 +36,6 @@ static struct powerdomain iva_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_IVA_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, - .pwrsts_logic_ret = PWRSTS_OFF, .banks = 4, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* hwa_mem */ @@ -76,7 +75,6 @@ static struct powerdomain ipu_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_IPU_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, - .pwrsts_logic_ret = PWRSTS_OFF, .banks = 2, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* aessmem */ @@ -95,7 +93,6 @@ static struct powerdomain dss_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_DSS_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, - .pwrsts_logic_ret = PWRSTS_OFF, .banks = 1, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* dss_mem */ @@ -112,7 +109,6 @@ static struct powerdomain l4per_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_L4PER_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, - .pwrsts_logic_ret = PWRSTS_RET, .banks = 2, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* nonretained_bank */ @@ -161,7 +157,6 @@ static struct powerdomain core_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_CORE_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, - .pwrsts_logic_ret = PWRSTS_RET, .banks = 5, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* core_nret_bank */ @@ -226,7 +221,6 @@ static struct powerdomain vpe_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_VPE_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, - .pwrsts_logic_ret = PWRSTS_OFF, .banks = 1, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* vpe_bank */ @@ -261,7 +255,6 @@ static struct powerdomain l3init_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_L3INIT_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, - .pwrsts_logic_ret = PWRSTS_RET, .banks = 3, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* gmac_bank */ -- cgit v1.2.3-59-g8ed1b From 6b41d44862e8f3a4b95102c6ff6cad3fccc7994b Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 24 May 2016 08:35:40 -0500 Subject: ARM: OMAP: DRA7: powerdomain data: Remove unused pwrsts_mem_ret As per the latest revision F of public TRM for DRA7/AM57xx SoCs SPRUHZ6F[1] (April 2016), with the exception of MPU power domain, all other power domains do not have memories capable of retention since they all operate in either "ON" or "OFF" mode. For these power states, the retention state for memories are basically ignored by PRCM and does not require to be programmed. [1] http://www.ti.com/lit/pdf/spruhz6 Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/powerdomains7xx_data.c | 65 ------------------------------ 1 file changed, 65 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/powerdomains7xx_data.c b/arch/arm/mach-omap2/powerdomains7xx_data.c index 88107b449710..eb350a673133 100644 --- a/arch/arm/mach-omap2/powerdomains7xx_data.c +++ b/arch/arm/mach-omap2/powerdomains7xx_data.c @@ -37,12 +37,6 @@ static struct powerdomain iva_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 4, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* hwa_mem */ - [1] = PWRSTS_OFF_RET, /* sl2_mem */ - [2] = PWRSTS_OFF_RET, /* tcm1_mem */ - [3] = PWRSTS_OFF_RET, /* tcm2_mem */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* hwa_mem */ [1] = PWRSTS_ON, /* sl2_mem */ @@ -76,10 +70,6 @@ static struct powerdomain ipu_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 2, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* aessmem */ - [1] = PWRSTS_OFF_RET, /* periphmem */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* aessmem */ [1] = PWRSTS_ON, /* periphmem */ @@ -94,9 +84,6 @@ static struct powerdomain dss_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* dss_mem */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* dss_mem */ }, @@ -110,10 +97,6 @@ static struct powerdomain l4per_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, .banks = 2, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* nonretained_bank */ - [1] = PWRSTS_OFF_RET, /* retained_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* nonretained_bank */ [1] = PWRSTS_ON, /* retained_bank */ @@ -128,9 +111,6 @@ static struct powerdomain gpu_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* gpu_mem */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* gpu_mem */ }, @@ -144,8 +124,6 @@ static struct powerdomain wkupaon_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, .banks = 1, - .pwrsts_mem_ret = { - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* wkup_bank */ }, @@ -158,13 +136,6 @@ static struct powerdomain core_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, .banks = 5, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* core_nret_bank */ - [1] = PWRSTS_OFF_RET, /* core_ocmram */ - [2] = PWRSTS_OFF_RET, /* core_other_bank */ - [3] = PWRSTS_OFF_RET, /* ipu_l2ram */ - [4] = PWRSTS_OFF_RET, /* ipu_unicache */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* core_nret_bank */ [1] = PWRSTS_ON, /* core_ocmram */ @@ -222,9 +193,6 @@ static struct powerdomain vpe_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* vpe_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* vpe_bank */ }, @@ -256,11 +224,6 @@ static struct powerdomain l3init_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, .banks = 3, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* gmac_bank */ - [1] = PWRSTS_OFF_RET, /* l3init_bank1 */ - [2] = PWRSTS_OFF_RET, /* l3init_bank2 */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* gmac_bank */ [1] = PWRSTS_ON, /* l3init_bank1 */ @@ -276,9 +239,6 @@ static struct powerdomain eve3_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* eve3_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* eve3_bank */ }, @@ -292,9 +252,6 @@ static struct powerdomain emu_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* emu_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* emu_bank */ }, @@ -307,11 +264,6 @@ static struct powerdomain dsp2_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 3, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* dsp2_edma */ - [1] = PWRSTS_OFF_RET, /* dsp2_l1 */ - [2] = PWRSTS_OFF_RET, /* dsp2_l2 */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* dsp2_edma */ [1] = PWRSTS_ON, /* dsp2_l1 */ @@ -327,11 +279,6 @@ static struct powerdomain dsp1_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 3, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* dsp1_edma */ - [1] = PWRSTS_OFF_RET, /* dsp1_l1 */ - [2] = PWRSTS_OFF_RET, /* dsp1_l2 */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* dsp1_edma */ [1] = PWRSTS_ON, /* dsp1_l1 */ @@ -347,9 +294,6 @@ static struct powerdomain cam_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* vip_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* vip_bank */ }, @@ -363,9 +307,6 @@ static struct powerdomain eve4_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* eve4_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* eve4_bank */ }, @@ -379,9 +320,6 @@ static struct powerdomain eve2_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* eve2_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* eve2_bank */ }, @@ -395,9 +333,6 @@ static struct powerdomain eve1_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* eve1_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* eve1_bank */ }, -- cgit v1.2.3-59-g8ed1b From 1c343f7b0e177e8ca7f4d4a5dd1fa790f85abbcc Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 13 Jun 2016 13:14:56 +0200 Subject: KVM: s390/mm: Fix CMMA reset during reboot commit 1e133ab296f ("s390/mm: split arch/s390/mm/pgtable.c") factored out the page table handling code from __gmap_zap and __s390_reset_cmma into ptep_zap_unused and added a simple flag that tells which one of the function (reset or not) is to be made. This also changed the behaviour, as it also zaps unused page table entries on reset. Turns out that this is wrong as s390_reset_cmma uses the page walker, which DOES NOT take the ptl lock. The most simple fix is to not do the zapping part on reset (which uses the walker) Signed-off-by: Christian Borntraeger Fixes: 1e133ab296f ("s390/mm: split arch/s390/mm/pgtable.c") Cc: stable@vger.kernel.org # 4.6+ Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4324b87f9398..9f0ce0e6eeb4 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -437,7 +437,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pgste = pgste_get_lock(ptep); pgstev = pgste_val(pgste); pte = *ptep; - if (pte_swap(pte) && + if (!reset && pte_swap(pte) && ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || (pgstev & _PGSTE_GPS_ZERO))) { ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); -- cgit v1.2.3-59-g8ed1b From 8550e2fa34f077c8a87cf1ba2453102bbbc9ade9 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 8 Jun 2016 19:55:55 +0530 Subject: powerpc/mm/hash: Use the correct PPP mask when updating HPTE With commit e58e87adc8bf9 "powerpc/mm: Update _PAGE_KERNEL_RO" we now use all the three PPP bits. The top bit is now used to have a PPP value of 0b110 which will be mapped to kernel read only. When updating the hpte entry use right mask such that we update the 63rd bit (top 'P' bit) too. Prior to e58e87adc8bf we didn't support KERNEL_RO at all (it was == KERNEL_RW), so this isn't a regression as such. Fixes: e58e87adc8bf ("powerpc/mm: Update _PAGE_KERNEL_RO") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 + arch/powerpc/mm/hash_native_64.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 290157e8d5b2..74839f24f412 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -88,6 +88,7 @@ #define HPTE_R_RPN_SHIFT 12 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) #define HPTE_R_PP ASM_CONST(0x0000000000000003) +#define HPTE_R_PPP ASM_CONST(0x8000000000000003) #define HPTE_R_N ASM_CONST(0x0000000000000004) #define HPTE_R_G ASM_CONST(0x0000000000000008) #define HPTE_R_M ASM_CONST(0x0000000000000010) diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 40e05e7f43de..f8a871a72985 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -316,8 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" -> hit\n"); /* Update the HPTE */ hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & - ~(HPTE_R_PP | HPTE_R_N)) | - (newpp & (HPTE_R_PP | HPTE_R_N | + ~(HPTE_R_PPP | HPTE_R_N)) | + (newpp & (HPTE_R_PPP | HPTE_R_N | HPTE_R_C))); } native_unlock_hpte(hptep); @@ -385,8 +385,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, /* Update the HPTE */ hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & - ~(HPTE_R_PP | HPTE_R_N)) | - (newpp & (HPTE_R_PP | HPTE_R_N))); + ~(HPTE_R_PPP | HPTE_R_N)) | + (newpp & (HPTE_R_PPP | HPTE_R_N))); /* * Ensure it is out of the tlb too. Bolted entries base and * actual page size will be same. -- cgit v1.2.3-59-g8ed1b From 797179bc4fe06c89e47a9f36f886f68640b423f8 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 9 Jun 2016 10:50:43 +0100 Subject: MIPS: KVM: Fix modular KVM under QEMU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copy __kvm_mips_vcpu_run() into unmapped memory, so that we can never get a TLB refill exception in it when KVM is built as a module. This was observed to happen with the host MIPS kernel running under QEMU, due to a not entirely transparent optimisation in the QEMU TLB handling where TLB entries replaced with TLBWR are copied to a separate part of the TLB array. Code in those pages continue to be executable, but those mappings persist only until the next ASID switch, even if they are marked global. An ASID switch happens in __kvm_mips_vcpu_run() at exception level after switching to the guest exception base. Subsequent TLB mapped kernel instructions just prior to switching to the guest trigger a TLB refill exception, which enters the guest exception handlers without updating EPC. This appears as a guest triggered TLB refill on a host kernel mapped (host KSeg2) address, which is not handled correctly as user (guest) mode accesses to kernel (host) segments always generate address error exceptions. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Ralf Baechle Cc: kvm@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: # 3.10.x- Signed-off-by: Paolo Bonzini --- arch/mips/include/asm/kvm_host.h | 1 + arch/mips/kvm/interrupt.h | 1 + arch/mips/kvm/locore.S | 1 + arch/mips/kvm/mips.c | 11 ++++++++++- 4 files changed, 13 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 6733ac575da4..2d5bb133d11a 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -338,6 +338,7 @@ struct kvm_mips_tlb { #define KVM_MIPS_GUEST_TLB_SIZE 64 struct kvm_vcpu_arch { void *host_ebase, *guest_ebase; + int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); unsigned long host_stack; unsigned long host_gp; diff --git a/arch/mips/kvm/interrupt.h b/arch/mips/kvm/interrupt.h index 4ab4bdfad703..2143884709e4 100644 --- a/arch/mips/kvm/interrupt.h +++ b/arch/mips/kvm/interrupt.h @@ -28,6 +28,7 @@ #define MIPS_EXC_MAX 12 /* XXXSL More to follow */ +extern char __kvm_mips_vcpu_run_end[]; extern char mips32_exception[], mips32_exceptionEnd[]; extern char mips32_GuestException[], mips32_GuestExceptionEnd[]; diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S index 3ef03009de5f..828fcfc1cd7f 100644 --- a/arch/mips/kvm/locore.S +++ b/arch/mips/kvm/locore.S @@ -202,6 +202,7 @@ FEXPORT(__kvm_mips_load_k0k1) /* Jump to guest */ eret +EXPORT(__kvm_mips_vcpu_run_end) VECTOR(MIPSX(exception), unknown) /* Find out what mode we came from and jump to the proper handler. */ diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index dc052fb5c7a2..44da5259f390 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -315,6 +315,15 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) memcpy(gebase + offset, mips32_GuestException, mips32_GuestExceptionEnd - mips32_GuestException); +#ifdef MODULE + offset += mips32_GuestExceptionEnd - mips32_GuestException; + memcpy(gebase + offset, (char *)__kvm_mips_vcpu_run, + __kvm_mips_vcpu_run_end - (char *)__kvm_mips_vcpu_run); + vcpu->arch.vcpu_run = gebase + offset; +#else + vcpu->arch.vcpu_run = __kvm_mips_vcpu_run; +#endif + /* Invalidate the icache for these ranges */ local_flush_icache_range((unsigned long)gebase, (unsigned long)gebase + ALIGN(size, PAGE_SIZE)); @@ -404,7 +413,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) /* Disable hardware page table walking while in guest */ htw_stop(); - r = __kvm_mips_vcpu_run(run, vcpu); + r = vcpu->arch.vcpu_run(run, vcpu); /* Re-enable HTW before enabling interrupts */ htw_start(); -- cgit v1.2.3-59-g8ed1b From 7f5a1ddc792901249c2060e165bcb3ca779cde35 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 9 Jun 2016 10:50:44 +0100 Subject: MIPS: KVM: Include bit 31 in segment matches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When faulting guest addresses are matched against guest segments with the KVM_GUEST_KSEGX() macro, change the mask to 0xe0000000 so as to include bit 31. This is mainly for safety's sake, as it prevents a rogue BadVAddr in the host kseg2/kseg3 segments (e.g. 0xC*******) after a TLB exception from matching the guest kseg0 segment (e.g. 0x4*******), triggering an internal KVM error instead of allowing the corresponding guest kseg0 page to be mapped into the host vmalloc space. Such a rogue BadVAddr was observed to happen with the host MIPS kernel running under QEMU with KVM built as a module, due to a not entirely transparent optimisation in the QEMU TLB handling. This has already been worked around properly in a previous commit. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Ralf Baechle Cc: kvm@vger.kernel.org Cc: linux-mips@linux-mips.org Signed-off-by: Paolo Bonzini --- arch/mips/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 2d5bb133d11a..36a391d289aa 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -74,7 +74,7 @@ #define KVM_GUEST_KUSEG 0x00000000UL #define KVM_GUEST_KSEG0 0x40000000UL #define KVM_GUEST_KSEG23 0x60000000UL -#define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0x60000000) +#define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0xe0000000) #define KVM_GUEST_CPHYSADDR(a) ((_ACAST32_(a)) & 0x1fffffff) #define KVM_GUEST_CKSEG0ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG0) -- cgit v1.2.3-59-g8ed1b From cc81e9486202345d6ca56495cf8b5f3d03fbc563 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 9 Jun 2016 10:50:45 +0100 Subject: MIPS: KVM: Don't unwind PC when emulating CACHE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a CACHE instruction is emulated by kvm_mips_emulate_cache(), the PC is first updated to point to the next instruction, and afterwards it falls through the "dont_update_pc" label, which rewinds the PC back to its original address. This works when dynamic translation of emulated instructions is enabled, since the CACHE instruction is replaced with a SYNCI which works without trapping, however when dynamic translation is disabled the guest hangs on CACHE instructions as they always trap and are never stepped over. Roughly swap the meanings of the "done" and "dont_update_pc" to match kvm_mips_emulate_CP0(), so that "done" will roll back the PC on failure, and "dont_update_pc" won't change PC at all (for the sake of exceptions that have already modified the PC). Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Ralf Baechle Cc: kvm@vger.kernel.org Cc: linux-mips@linux-mips.org Signed-off-by: Paolo Bonzini --- arch/mips/kvm/emulate.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 396df6eb0a12..52bec0fe2fbb 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1666,7 +1666,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, cache, op, base, arch->gprs[base], offset); er = EMULATE_FAIL; preempt_enable(); - goto dont_update_pc; + goto done; } @@ -1694,16 +1694,20 @@ skip_fault: kvm_err("NO-OP CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", cache, op, base, arch->gprs[base], offset); er = EMULATE_FAIL; - preempt_enable(); - goto dont_update_pc; } preempt_enable(); +done: + /* Rollback PC only if emulation was unsuccessful */ + if (er == EMULATE_FAIL) + vcpu->arch.pc = curr_pc; dont_update_pc: - /* Rollback PC */ - vcpu->arch.pc = curr_pc; -done: + /* + * This is for exceptions whose emulation updates the PC, so do not + * overwrite the PC under any circumstances + */ + return er; } -- cgit v1.2.3-59-g8ed1b From 6df82a7b88dc9b0b519765562b005ef9196d812a Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 9 Jun 2016 10:50:46 +0100 Subject: MIPS: KVM: Fix CACHE triggered exception emulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When emulating TLB miss / invalid exceptions during CACHE instruction emulation, be sure to set up the correct PC and host_cp0_badvaddr state for the kvm_mips_emlulate_tlb*_ld() function to pick up for guest EPC and BadVAddr. PC needs to be rewound otherwise the guest EPC will end up pointing at the next instruction after the faulting CACHE instruction. host_cp0_badvaddr must be set because guest CACHE instructions trap with a Coprocessor Unusable exception, which doesn't update the host BadVAddr as a TLB exception would. This doesn't tend to get hit when dynamic translation of emulated instructions is enabled, since only the first execution of each CACHE instruction actually goes through this code path, with subsequent executions hitting the SYNCI instruction that it gets replaced with. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Ralf Baechle Cc: kvm@vger.kernel.org Cc: linux-mips@linux-mips.org Signed-off-by: Paolo Bonzini --- arch/mips/kvm/emulate.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 52bec0fe2fbb..645c8a1982a7 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1636,6 +1636,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, if (index < 0) { vcpu->arch.host_cp0_entryhi = (va & VPN2_MASK); vcpu->arch.host_cp0_badvaddr = va; + vcpu->arch.pc = curr_pc; er = kvm_mips_emulate_tlbmiss_ld(cause, NULL, run, vcpu); preempt_enable(); @@ -1647,6 +1648,8 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, * invalid exception to the guest */ if (!TLB_IS_VALID(*tlb, va)) { + vcpu->arch.host_cp0_badvaddr = va; + vcpu->arch.pc = curr_pc; er = kvm_mips_emulate_tlbinv_ld(cause, NULL, run, vcpu); preempt_enable(); -- cgit v1.2.3-59-g8ed1b From dcfc47248d3f7d28df6f531e6426b933de94370d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jun 2016 23:06:53 +0900 Subject: kprobes/x86: Clear TF bit in fault on single-stepping Fix kprobe_fault_handler() to clear the TF (trap flag) bit of the flags register in the case of a fault fixup on single-stepping. If we put a kprobe on the instruction which caused a page fault (e.g. actual mov instructions in copy_user_*), that fault happens on the single-stepping buffer. In this case, kprobes resets running instance so that the CPU can retry execution on the original ip address. However, current code forgets to reset the TF bit. Since this fault happens with TF bit set for enabling single-stepping, when it retries, it causes a debug exception and kprobes can not handle it because it already reset itself. On the most of x86-64 platform, it can be easily reproduced by using kprobe tracer. E.g. # cd /sys/kernel/debug/tracing # echo p copy_user_enhanced_fast_string+5 > kprobe_events # echo 1 > events/kprobes/enable And you'll see a kernel panic on do_debug(), since the debug trap is not handled by kprobes. To fix this problem, we just need to clear the TF bit when resetting running kprobe. Signed-off-by: Masami Hiramatsu Reviewed-by: Ananth N Mavinakayanahalli Acked-by: Steven Rostedt Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: systemtap@sourceware.org Cc: stable@vger.kernel.org # All the way back to ancient kernels Link: http://lkml.kernel.org/r/20160611140648.25885.37482.stgit@devbox [ Updated the comments. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 38cf7a741250..7847e5c0e0b5 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -961,7 +961,19 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * normal page fault. */ regs->ip = (unsigned long)cur->addr; + /* + * Trap flag (TF) has been set here because this fault + * happened where the single stepping will be done. + * So clear it by resetting the current kprobe: + */ + regs->flags &= ~X86_EFLAGS_TF; + + /* + * If the TF flag was set before the kprobe hit, + * don't touch it: + */ regs->flags |= kcb->kprobe_old_flags; + if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); else -- cgit v1.2.3-59-g8ed1b From c5cea06be060f38e5400d796e61cfc8c36e52924 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 13 Jun 2016 11:15:14 +0100 Subject: arm64: fix dump_instr when PAN and UAO are in use If the kernel is set to show unhandled signals, and a user task does not handle a SIGILL as a result of an instruction abort, we will attempt to log the offending instruction with dump_instr before killing the task. We use dump_instr to log the encoding of the offending userspace instruction. However, dump_instr is also used to dump instructions from kernel space, and internally always switches to KERNEL_DS before dumping the instruction with get_user. When both PAN and UAO are in use, reading a user instruction via get_user while in KERNEL_DS will result in a permission fault, which leads to an Oops. As we have regs corresponding to the context of the original instruction abort, we can inspect this and only flip to KERNEL_DS if the original abort was taken from the kernel, avoiding this issue. At the same time, remove the redundant (and incorrect) comments regarding the order dump_mem and dump_instr are called in. Cc: Catalin Marinas Cc: James Morse Cc: Robin Murphy Cc: #4.6+ Signed-off-by: Mark Rutland Reported-by: Vladimir Murzin Tested-by: Vladimir Murzin Fixes: 57f4959bad0a154a ("arm64: kernel: Add support for User Access Override") Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f7cf463107df..2a43012616b7 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -64,8 +64,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, /* * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. + * to safely read from kernel space. */ fs = get_fs(); set_fs(KERNEL_DS); @@ -111,21 +110,12 @@ static void dump_backtrace_entry(unsigned long where) print_ip_sym(where); } -static void dump_instr(const char *lvl, struct pt_regs *regs) +static void __dump_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); - mm_segment_t fs; char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; int i; - /* - * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. - */ - fs = get_fs(); - set_fs(KERNEL_DS); - for (i = -4; i < 1; i++) { unsigned int val, bad; @@ -139,8 +129,18 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) } } printk("%sCode: %s\n", lvl, str); +} - set_fs(fs); +static void dump_instr(const char *lvl, struct pt_regs *regs) +{ + if (!user_mode(regs)) { + mm_segment_t fs = get_fs(); + set_fs(KERNEL_DS); + __dump_instr(lvl, regs); + set_fs(fs); + } else { + __dump_instr(lvl, regs); + } } static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) -- cgit v1.2.3-59-g8ed1b From bbb1681ee3653bdcfc6a4ba31902738118311fd4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 13 Jun 2016 17:57:02 +0100 Subject: arm64: mm: mark fault_info table const Unlike the debug_fault_info table, we never intentionally alter the fault_info table at runtime, and all derived pointers are treated as const currently. Make the table const so that it can be placed in .rodata and protected from unintentional writes, as we do for the syscall tables. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ba3fc12bd272..013e2cbe7924 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -441,7 +441,7 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) return 1; } -static struct fault_info { +static const struct fault_info { int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs); int sig; int code; -- cgit v1.2.3-59-g8ed1b From 7c64cc0531fa0d9720f9e15a0a0d97bcad1d1cd1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 26 Apr 2016 10:42:25 -0700 Subject: arm: Use _rcuidle for smp_cross_call() tracepoints Further testing with false negatives suppressed by commit 293e2421fe25 ("rcu: Remove superfluous versions of rcu_read_lock_sched_held()") identified another unprotected use of RCU from the idle loop. Because RCU actively ignores idle-loop code (for energy-efficiency reasons, among other things), using RCU from the idle loop can result in too-short grace periods, in turn resulting in arbitrary misbehavior. The resulting lockdep-RCU splat is as follows: ------------------------------------------------------------------------ =============================== [ INFO: suspicious RCU usage. ] 4.6.0-rc5-next-20160426+ #1112 Not tainted ------------------------------- include/trace/events/ipi.h:35 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! no locks held by swapper/0/0. stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1112 Hardware name: Generic OMAP4 (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xb0/0xe4) [] (dump_stack) from [] (smp_cross_call+0xbc/0x188) [] (smp_cross_call) from [] (generic_exec_single+0x9c/0x15c) [] (generic_exec_single) from [] (smp_call_function_single_async+0 x38/0x9c) [] (smp_call_function_single_async) from [] (cpuidle_coupled_poke_others+0x8c/0xa8) [] (cpuidle_coupled_poke_others) from [] (cpuidle_enter_state_coupled+0x26c/0x390) [] (cpuidle_enter_state_coupled) from [] (cpu_startup_entry+0x198/0x3a0) [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8) [] (start_kernel) from [<8000807c>] (0x8000807c) ------------------------------------------------------------------------ Reported-by: Tony Lindgren Signed-off-by: Paul E. McKenney Tested-by: Tony Lindgren Tested-by: Guenter Roeck Cc: Russell King Cc: Steven Rostedt Cc: Cc: --- arch/arm/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index baee70267f29..7afe48ae5d76 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -486,7 +486,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) { - trace_ipi_raise(target, ipi_types[ipinr]); + trace_ipi_raise_rcuidle(target, ipi_types[ipinr]); __smp_cross_call(target, ipinr); } -- cgit v1.2.3-59-g8ed1b From 38b850a73034f075c4088e7511b36ebbef9dce00 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 2 Jun 2016 15:27:04 +0100 Subject: arm64: spinlock: order spin_{is_locked,unlock_wait} against local locks spin_is_locked has grown two very different use-cases: (1) [The sane case] API functions may require a certain lock to be held by the caller and can therefore use spin_is_locked as part of an assert statement in order to verify that the lock is indeed held. For example, usage of assert_spin_locked. (2) [The insane case] There are two locks, where a CPU takes one of the locks and then checks whether or not the other one is held before accessing some shared state. For example, the "optimized locking" in ipc/sem.c. In the latter case, the sequence looks like: spin_lock(&sem->lock); if (!spin_is_locked(&sma->sem_perm.lock)) /* Access shared state */ and requires that the spin_is_locked check is ordered after taking the sem->lock. Unfortunately, since our spinlocks are implemented using a LDAXR/STXR sequence, the read of &sma->sem_perm.lock can be speculated before the STXR and consequently return a stale value. Whilst this hasn't been seen to cause issues in practice, PowerPC fixed the same issue in 51d7d5205d33 ("powerpc: Add smp_mb() to arch_spin_is_locked()") and, although we did something similar for spin_unlock_wait in d86b8da04dfa ("arm64: spinlock: serialise spin_unlock_wait against concurrent lockers") that doesn't actually take care of ordering against local acquisition of a different lock. This patch adds an smp_mb() to the start of our arch_spin_is_locked and arch_spin_unlock_wait routines to ensure that the lock value is always loaded after any other locks have been taken by the current CPU. Reported-by: Peter Zijlstra Signed-off-by: Will Deacon --- arch/arm64/include/asm/spinlock.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index fc9682bfe002..aac64d55cb22 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -31,6 +31,12 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) unsigned int tmp; arch_spinlock_t lockval; + /* + * Ensure prior spin_lock operations to other locks have completed + * on this CPU before we test whether "lock" is locked. + */ + smp_mb(); + asm volatile( " sevl\n" "1: wfe\n" @@ -148,6 +154,7 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { + smp_mb(); /* See arch_spin_unlock_wait */ return !arch_spin_value_unlocked(READ_ONCE(*lock)); } -- cgit v1.2.3-59-g8ed1b From 3a5facd09da848193f5bcb0dea098a298bc1a29d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 8 Jun 2016 15:10:57 +0100 Subject: arm64: spinlock: fix spin_unlock_wait for LSE atomics Commit d86b8da04dfa ("arm64: spinlock: serialise spin_unlock_wait against concurrent lockers") fixed spin_unlock_wait for LL/SC-based atomics under the premise that the LSE atomics (in particular, the LDADDA instruction) are indivisible. Unfortunately, these instructions are only indivisible when used with the -AL (full ordering) suffix and, consequently, the same issue can theoretically be observed with LSE atomics, where a later (in program order) load can be speculated before the write portion of the atomic operation. This patch fixes the issue by performing a CAS of the lock once we've established that it's unlocked, in much the same way as the LL/SC code. Fixes: d86b8da04dfa ("arm64: spinlock: serialise spin_unlock_wait against concurrent lockers") Signed-off-by: Will Deacon --- arch/arm64/include/asm/spinlock.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index aac64d55cb22..d5c894253e73 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -43,13 +43,17 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) "2: ldaxr %w0, %2\n" " eor %w1, %w0, %w0, ror #16\n" " cbnz %w1, 1b\n" + /* Serialise against any concurrent lockers */ ARM64_LSE_ATOMIC_INSN( /* LL/SC */ " stxr %w1, %w0, %2\n" -" cbnz %w1, 2b\n", /* Serialise against any concurrent lockers */ - /* LSE atomics */ " nop\n" -" nop\n") +" nop\n", + /* LSE atomics */ +" mov %w1, %w0\n" +" cas %w0, %w0, %2\n" +" eor %w1, %w1, %w0\n") +" cbnz %w1, 2b\n" : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) : : "memory"); -- cgit v1.2.3-59-g8ed1b From c56bdcac153e60d96a619a59c7981f2a78cba598 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 2 Jun 2016 18:40:07 +0100 Subject: arm64: spinlock: Ensure forward-progress in spin_unlock_wait Rather than wait until we observe the lock being free (which might never happen), we can also return from spin_unlock_wait if we observe that the lock is now held by somebody else, which implies that it was unlocked but we just missed seeing it in that state. Furthermore, in such a scenario there is no longer a need to write back the value that we loaded, since we know that there has been a lock hand-off, which is sufficient to publish any stores prior to the unlock_wait because the ARm architecture ensures that a Store-Release instruction is multi-copy atomic when observed by a Load-Acquire instruction. The litmus test is something like: AArch64 { 0:X1=x; 0:X3=y; 1:X1=y; 2:X1=y; 2:X3=x; } P0 | P1 | P2 ; MOV W0,#1 | MOV W0,#1 | LDAR W0,[X1] ; STR W0,[X1] | STLR W0,[X1] | LDR W2,[X3] ; DMB SY | | ; LDR W2,[X3] | | ; exists (0:X2=0 /\ 2:X0=1 /\ 2:X2=0) where P0 is doing spin_unlock_wait, P1 is doing spin_unlock and P2 is doing spin_lock. Signed-off-by: Will Deacon --- arch/arm64/include/asm/spinlock.h | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index d5c894253e73..e875a5a551d7 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -30,20 +30,39 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { unsigned int tmp; arch_spinlock_t lockval; + u32 owner; /* * Ensure prior spin_lock operations to other locks have completed * on this CPU before we test whether "lock" is locked. */ smp_mb(); + owner = READ_ONCE(lock->owner) << 16; asm volatile( " sevl\n" "1: wfe\n" "2: ldaxr %w0, %2\n" + /* Is the lock free? */ " eor %w1, %w0, %w0, ror #16\n" -" cbnz %w1, 1b\n" - /* Serialise against any concurrent lockers */ +" cbz %w1, 3f\n" + /* Lock taken -- has there been a subsequent unlock->lock transition? */ +" eor %w1, %w3, %w0, lsl #16\n" +" cbz %w1, 1b\n" + /* + * The owner has been updated, so there was an unlock->lock + * transition that we missed. That means we can rely on the + * store-release of the unlock operation paired with the + * load-acquire of the lock operation to publish any of our + * previous stores to the new lock owner and therefore don't + * need to bother with the writeback below. + */ +" b 4f\n" +"3:\n" + /* + * Serialise against any concurrent lockers by writing back the + * unlocked lock value + */ ARM64_LSE_ATOMIC_INSN( /* LL/SC */ " stxr %w1, %w0, %2\n" @@ -53,9 +72,11 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) " mov %w1, %w0\n" " cas %w0, %w0, %2\n" " eor %w1, %w1, %w0\n") + /* Somebody else wrote to the lock, GOTO 10 and reload the value */ " cbnz %w1, 2b\n" +"4:" : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) - : + : "r" (owner) : "memory"); } -- cgit v1.2.3-59-g8ed1b From 7d669f50847481c52faf0656aea7b4be63113210 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 15 Jun 2016 17:23:45 -0500 Subject: kvm: svm: Fix implicit declaration for __default_cpu_present_to_apicid() The commit 8221c1370056 ("svm: Manage vcpu load/unload when enable AVIC") introduces a build error due to implicit function declaration when #ifdef CONFIG_X86_32 and #ifndef CONFIG_X86_LOCAL_APIC (as reported by Kbuild test robot i386-randconfig-x0-06121009). So, this patch introduces kvm_cpu_get_apicid() wrapper around __default_cpu_present_to_apicid() with additional handling if CONFIG_X86_LOCAL_APIC is not defined. Reported-by: kbuild test robot Fixes: commit 8221c1370056 ("svm: Manage vcpu load/unload when enable AVIC") Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 11 +++++++++++ arch/x86/kvm/svm.c | 6 +++--- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e0fbe7e70dc1..69e62862b622 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -1368,4 +1369,14 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} +static inline int kvm_cpu_get_apicid(int mps_cpu) +{ +#ifdef CONFIG_X86_LOCAL_APIC + return __default_cpu_present_to_apicid(mps_cpu); +#else + WARN_ON_ONCE(1); + return BAD_APICID; +#endif +} + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1163e8173e5a..8a3c571e4a86 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1324,7 +1324,7 @@ free_avic: static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run) { u64 entry; - int h_physical_id = __default_cpu_present_to_apicid(vcpu->cpu); + int h_physical_id = kvm_cpu_get_apicid(vcpu->cpu); struct vcpu_svm *svm = to_svm(vcpu); if (!kvm_vcpu_apicv_active(vcpu)) @@ -1349,7 +1349,7 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; /* ID = 0xff (broadcast), ID > 0xff (reserved) */ - int h_physical_id = __default_cpu_present_to_apicid(cpu); + int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); if (!kvm_vcpu_apicv_active(vcpu)) @@ -4236,7 +4236,7 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) if (avic_vcpu_is_running(vcpu)) wrmsrl(SVM_AVIC_DOORBELL, - __default_cpu_present_to_apicid(vcpu->cpu)); + kvm_cpu_get_apicid(vcpu->cpu)); else kvm_vcpu_wake_up(vcpu); } -- cgit v1.2.3-59-g8ed1b From 5b8abf1f33ccd9f1cbc4248ade3cd507d9319c48 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 15 Jun 2016 17:24:36 -0500 Subject: kvm: svm: Do not support AVIC if not CONFIG_X86_LOCAL_APIC Add logic to disable AVIC #ifndef CONFIG_X86_LOCAL_APIC. Suggested-by: Paolo Bonzini Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8a3c571e4a86..16ef31b87452 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -238,7 +238,9 @@ module_param(nested, int, S_IRUGO); /* enable / disable AVIC */ static int avic; +#ifdef CONFIG_X86_LOCAL_APIC module_param(avic, int, S_IRUGO); +#endif static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu); @@ -981,11 +983,14 @@ static __init int svm_hardware_setup(void) } else kvm_disable_tdp(); - if (avic && (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC))) - avic = false; - - if (avic) - pr_info("AVIC enabled\n"); + if (avic) { + if (!npt_enabled || + !boot_cpu_has(X86_FEATURE_AVIC) || + !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) + avic = false; + else + pr_info("AVIC enabled\n"); + } return 0; -- cgit v1.2.3-59-g8ed1b From a0052191624e9bf8a8f9dc41b92ab5f252566c3c Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Mon, 13 Jun 2016 09:56:56 +0800 Subject: kvm: vmx: check apicv is active before using VT-d posted interrupt VT-d posted interrupt is relying on the CPU side's posted interrupt. Need to check whether VCPU's APICv is active before enabing VT-d posted interrupt. Fixes: d62caabb41f33d96333f9ef15e09cd26e1c12760 Cc: stable@vger.kernel.org Signed-off-by: Yang Zhang Signed-off-by: Shengge Ding Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fb93010beaa4..003618e324ce 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2072,7 +2072,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) unsigned int dest; if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(vcpu)) return; do { @@ -2180,7 +2181,8 @@ static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(vcpu)) return; /* Set SN when the vCPU is preempted */ @@ -10714,7 +10716,8 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu) struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(vcpu)) return 0; vcpu->pre_pcpu = vcpu->cpu; @@ -10780,7 +10783,8 @@ static void vmx_post_block(struct kvm_vcpu *vcpu) unsigned long flags; if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(vcpu)) return; do { @@ -10833,7 +10837,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, int idx, ret = -EINVAL; if (!kvm_arch_has_assigned_device(kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(kvm->vcpus[0])) return 0; idx = srcu_read_lock(&kvm->irq_srcu); -- cgit v1.2.3-59-g8ed1b From 9254e70c4ef1fee2e5c43feded4433d19cbb6177 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 9 Jun 2016 12:28:13 +0200 Subject: s390/cpum_cf: use perf software context for hardware counters On s390, there are two different hardware PMUs for counting and sampling. Previously, both PMUs have shared the perf_hw_context which is not correct and, recently, results in this warning: ------------[ cut here ]------------ WARNING: CPU: 5 PID: 1 at kernel/events/core.c:8485 perf_pmu_register+0x420/0x428 Modules linked in: CPU: 5 PID: 1 Comm: swapper/0 Not tainted 4.7.0-rc1+ #2 task: 00000009c5240000 ti: 00000009c5234000 task.ti: 00000009c5234000 Krnl PSW : 0704c00180000000 0000000000220c50 (perf_pmu_register+0x420/0x428) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Krnl GPRS: ffffffffffffffff 0000000000b15ac6 0000000000000000 00000009cb440000 000000000022087a 0000000000000000 0000000000b78fa0 0000000000000000 0000000000a9aa90 0000000000000084 0000000000000005 000000000088a97a 0000000000000004 0000000000749dd0 000000000022087a 00000009c5237cc0 Krnl Code: 0000000000220c44: a7f4ff54 brc 15,220aec 0000000000220c48: 92011000 mvi 0(%r1),1 #0000000000220c4c: a7f40001 brc 15,220c4e >0000000000220c50: a7f4ff12 brc 15,220a74 0000000000220c54: 0707 bcr 0,%r7 0000000000220c56: 0707 bcr 0,%r7 0000000000220c58: ebdff0800024 stmg %r13,%r15,128(%r15) 0000000000220c5e: a7f13fe0 tmll %r15,16352 Call Trace: ([<000000000022087a>] perf_pmu_register+0x4a/0x428) ([<0000000000b2c25c>] init_cpum_sampling_pmu+0x14c/0x1f8) ([<0000000000100248>] do_one_initcall+0x48/0x140) ([<0000000000b25d26>] kernel_init_freeable+0x1e6/0x2a0) ([<000000000072bda4>] kernel_init+0x24/0x138) ([<000000000073495e>] kernel_thread_starter+0x6/0xc) ([<0000000000734958>] kernel_thread_starter+0x0/0xc) Last Breaking-Event-Address: [<0000000000220c4c>] perf_pmu_register+0x41c/0x428 ---[ end trace 0c6ef9f5b771ad97 ]--- Using the perf_sw_context is an option because the cpum_cf PMU does not use interrupts. To make this more clear, initialize the capabilities in the PMU structure. Signed-off-by: Hendrik Brueckner Suggested-by: Peter Zijlstra Acked-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_cf.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 59215c518f37..7ec63b1d920d 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -649,6 +649,8 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu) /* Performance monitoring unit for s390x */ static struct pmu cpumf_pmu = { + .task_ctx_nr = perf_sw_context, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, .pmu_enable = cpumf_pmu_enable, .pmu_disable = cpumf_pmu_disable, .event_init = cpumf_pmu_event_init, @@ -708,12 +710,6 @@ static int __init cpumf_pmu_init(void) goto out; } - /* The CPU measurement counter facility does not have overflow - * interrupts to do sampling. Sampling must be provided by - * external means, for example, by timers. - */ - cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; - cpumf_pmu.attr_groups = cpumf_cf_event_group(); rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); if (rc) { -- cgit v1.2.3-59-g8ed1b From 0d15ef677839dab8313fbb86c007c3175b638d03 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 16 Jun 2016 16:51:52 +0100 Subject: arm64: kgdb: Match pstate size with gdbserver protocol Current versions of gdb do not interoperate cleanly with kgdb on arm64 systems because gdb and kgdb do not use the same register description. This patch modifies kgdb to work with recent releases of gdb (>= 7.8.1). Compatibility with gdb (after the patch is applied) is as follows: gdb-7.6 and earlier Ok gdb-7.7 series Works if user provides custom target description gdb-7.8(.0) Works if user provides custom target description gdb-7.8.1 and later Ok When commit 44679a4f142b ("arm64: KGDB: Add step debugging support") was introduced it was paired with a gdb patch that made an incompatible change to the gdbserver protocol. This patch was eventually merged into the gdb sources: https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;a=commit;h=a4d9ba85ec5597a6a556afe26b712e878374b9dd The change to the protocol was mostly made to simplify big-endian support inside the kernel gdb stub. Unfortunately the gdb project released gdb-7.7.x and gdb-7.8.0 before the protocol incompatibility was identified and reversed: https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;a=commit;h=bdc144174bcb11e808b4e73089b850cf9620a7ee This leaves us in a position where kgdb still uses the no-longer-used protocol; gdb-7.8.1, which restored the original behaviour, was released on 2014-10-29. I don't believe it is possible to detect/correct the protocol incompatiblity which means the kernel must take a view about which version of the gdb remote protocol is "correct". This patch takes the view that the original/current version of the protocol is correct and that version found in gdb-7.7.x and gdb-7.8.0 is anomalous. Signed-off-by: Daniel Thompson Signed-off-by: Will Deacon --- arch/arm64/include/asm/kgdb.h | 45 +++++++++++++++++++++++++++++++++++-------- arch/arm64/kernel/kgdb.c | 14 +++++++++++++- 2 files changed, 50 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h index f69f69c8120c..da84645525b9 100644 --- a/arch/arm64/include/asm/kgdb.h +++ b/arch/arm64/include/asm/kgdb.h @@ -38,25 +38,54 @@ extern int kgdb_fault_expected; #endif /* !__ASSEMBLY__ */ /* - * gdb is expecting the following registers layout. + * gdb remote procotol (well most versions of it) expects the following + * register layout. * * General purpose regs: * r0-r30: 64 bit * sp,pc : 64 bit - * pstate : 64 bit - * Total: 34 + * pstate : 32 bit + * Total: 33 + 1 * FPU regs: * f0-f31: 128 bit - * Total: 32 - * Extra regs * fpsr & fpcr: 32 bit - * Total: 2 + * Total: 32 + 2 * + * To expand a little on the "most versions of it"... when the gdb remote + * protocol for AArch64 was developed it depended on a statement in the + * Architecture Reference Manual that claimed "SPSR_ELx is a 32-bit register". + * and, as a result, allocated only 32-bits for the PSTATE in the remote + * protocol. In fact this statement is still present in ARM DDI 0487A.i. + * + * Unfortunately "is a 32-bit register" has a very special meaning for + * system registers. It means that "the upper bits, bits[63:32], are + * RES0.". RES0 is heavily used in the ARM architecture documents as a + * way to leave space for future architecture changes. So to translate a + * little for people who don't spend their spare time reading ARM architecture + * manuals, what "is a 32-bit register" actually means in this context is + * "is a 64-bit register but one with no meaning allocated to any of the + * upper 32-bits... *yet*". + * + * Perhaps then we should not be surprised that this has led to some + * confusion. Specifically a patch, influenced by the above translation, + * that extended PSTATE to 64-bit was accepted into gdb-7.7 but the patch + * was reverted in gdb-7.8.1 and all later releases, when this was + * discovered to be an undocumented protocol change. + * + * So... it is *not* wrong for us to only allocate 32-bits to PSTATE + * here even though the kernel itself allocates 64-bits for the same + * state. That is because this bit of code tells the kernel how the gdb + * remote protocol (well most versions of it) describes the register state. + * + * Note that if you are using one of the versions of gdb that supports + * the gdb-7.7 version of the protocol you cannot use kgdb directly + * without providing a custom register description (gdb can load new + * protocol descriptions at runtime). */ -#define _GP_REGS 34 +#define _GP_REGS 33 #define _FP_REGS 32 -#define _EXTRA_REGS 2 +#define _EXTRA_REGS 3 /* * general purpose registers size in bytes. * pstate is only 4 bytes. subtract 4 bytes diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index b67531a13136..b5f063e5eff7 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -58,7 +58,17 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { { "x30", 8, offsetof(struct pt_regs, regs[30])}, { "sp", 8, offsetof(struct pt_regs, sp)}, { "pc", 8, offsetof(struct pt_regs, pc)}, - { "pstate", 8, offsetof(struct pt_regs, pstate)}, + /* + * struct pt_regs thinks PSTATE is 64-bits wide but gdb remote + * protocol disagrees. Therefore we must extract only the lower + * 32-bits. Look for the big comment in asm/kgdb.h for more + * detail. + */ + { "pstate", 4, offsetof(struct pt_regs, pstate) +#ifdef CONFIG_CPU_BIG_ENDIAN + + 4 +#endif + }, { "v0", 16, -1 }, { "v1", 16, -1 }, { "v2", 16, -1 }, @@ -128,6 +138,8 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) memset((char *)gdb_regs, 0, NUMREGBYTES); thread_regs = task_pt_regs(task); memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES); + /* Special case for PSTATE (check comments in asm/kgdb.h for details) */ + dbg_get_reg(33, gdb_regs + GP_REG_BYTES, thread_regs); } void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) -- cgit v1.2.3-59-g8ed1b From ef5bdccf6d4363fd934035e0b1ca8445975e1d89 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Thu, 16 Jun 2016 21:56:30 +0200 Subject: ARM: OMAP1: fix ams-delta FIQ handler to work with sparse IRQ After OMAP1 IRQ definitions have been changed by commit 685e2d08c54b ("ARM: OMAP1: Change interrupt numbering for sparse IRQ") introduced in v4.2, ams-delta FIQ handler which depends on them no longer works as expected. Fix it. Created and tested on Amstrad Delta against Linux-4.7-rc3 Signed-off-by: Janusz Krzysztofik Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/ams-delta-fiq-handler.S | 6 +++--- arch/arm/mach-omap1/ams-delta-fiq.c | 5 +++-- arch/arm/mach-omap1/include/mach/ams-delta-fiq.h | 2 ++ 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap1/ams-delta-fiq-handler.S b/arch/arm/mach-omap1/ams-delta-fiq-handler.S index 5d7fb596bf4a..bf608441b357 100644 --- a/arch/arm/mach-omap1/ams-delta-fiq-handler.S +++ b/arch/arm/mach-omap1/ams-delta-fiq-handler.S @@ -43,8 +43,8 @@ #define OTHERS_MASK (MODEM_IRQ_MASK | HOOK_SWITCH_MASK) /* IRQ handler register bitmasks */ -#define DEFERRED_FIQ_MASK (0x1 << (INT_DEFERRED_FIQ % IH2_BASE)) -#define GPIO_BANK1_MASK (0x1 << INT_GPIO_BANK1) +#define DEFERRED_FIQ_MASK OMAP_IRQ_BIT(INT_DEFERRED_FIQ) +#define GPIO_BANK1_MASK OMAP_IRQ_BIT(INT_GPIO_BANK1) /* Driver buffer byte offsets */ #define BUF_MASK (FIQ_MASK * 4) @@ -110,7 +110,7 @@ ENTRY(qwerty_fiqin_start) mov r8, #2 @ reset FIQ agreement str r8, [r12, #IRQ_CONTROL_REG_OFFSET] - cmp r10, #INT_GPIO_BANK1 @ is it GPIO bank interrupt? + cmp r10, #(INT_GPIO_BANK1 - NR_IRQS_LEGACY) @ is it GPIO interrupt? beq gpio @ yes - process it mov r8, #1 diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c index d1f12095f315..ec760ae2f917 100644 --- a/arch/arm/mach-omap1/ams-delta-fiq.c +++ b/arch/arm/mach-omap1/ams-delta-fiq.c @@ -109,7 +109,8 @@ void __init ams_delta_init_fiq(void) * Since no set_type() method is provided by OMAP irq chip, * switch to edge triggered interrupt type manually. */ - offset = IRQ_ILR0_REG_OFFSET + INT_DEFERRED_FIQ * 0x4; + offset = IRQ_ILR0_REG_OFFSET + + ((INT_DEFERRED_FIQ - NR_IRQS_LEGACY) & 0x1f) * 0x4; val = omap_readl(DEFERRED_FIQ_IH_BASE + offset) & ~(1 << 1); omap_writel(val, DEFERRED_FIQ_IH_BASE + offset); @@ -149,7 +150,7 @@ void __init ams_delta_init_fiq(void) /* * Redirect GPIO interrupts to FIQ */ - offset = IRQ_ILR0_REG_OFFSET + INT_GPIO_BANK1 * 0x4; + offset = IRQ_ILR0_REG_OFFSET + (INT_GPIO_BANK1 - NR_IRQS_LEGACY) * 0x4; val = omap_readl(OMAP_IH1_BASE + offset) | 1; omap_writel(val, OMAP_IH1_BASE + offset); } diff --git a/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h b/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h index adb5e7649659..6dfc3e1210a3 100644 --- a/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h +++ b/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h @@ -14,6 +14,8 @@ #ifndef __AMS_DELTA_FIQ_H #define __AMS_DELTA_FIQ_H +#include + /* * Interrupt number used for passing control from FIQ to IRQ. * IRQ12, described as reserved, has been selected. -- cgit v1.2.3-59-g8ed1b From 970f9091d25df14e9540ec7ff48a2f709e284cd1 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Thu, 16 Jun 2016 15:25:18 +0300 Subject: ARM: OMAP2+: timer: add probe for clocksources A few platforms are currently missing clocksource_probe() completely in their time_init functionality. On OMAP3430 for example, this is causing cpuidle to be pretty much dead, as the counter32k is not going to be registered and instead a gptimer is used as a clocksource. This will tick in periodic mode, preventing any deeper idle states. While here, also drop one unnecessary check for populated DT before existing clocksource_probe() call. Signed-off-by: Tero Kristo Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/timer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index 5b385bb8aff9..cb9497a20fb3 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -496,8 +496,7 @@ void __init omap_init_time(void) __omap_sync32k_timer_init(1, "timer_32k_ck", "ti,timer-alwon", 2, "timer_sys_ck", NULL, false); - if (of_have_populated_dt()) - clocksource_probe(); + clocksource_probe(); } #if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM43XX) @@ -505,6 +504,8 @@ void __init omap3_secure_sync32k_timer_init(void) { __omap_sync32k_timer_init(12, "secure_32k_fck", "ti,timer-secure", 2, "timer_sys_ck", NULL, false); + + clocksource_probe(); } #endif /* CONFIG_ARCH_OMAP3 */ @@ -513,6 +514,8 @@ void __init omap3_gptimer_timer_init(void) { __omap_sync32k_timer_init(2, "timer_sys_ck", NULL, 1, "timer_sys_ck", "ti,timer-alwon", true); + + clocksource_probe(); } #endif -- cgit v1.2.3-59-g8ed1b From e568006b9d828403397668864d9797dc4bfefd28 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 17 Jun 2016 11:32:00 +0530 Subject: powerpc/mm/hash: Don't add memory coherence if cache inhibited is set H_ENTER hcall handling in qemu had assumptions that a cache inhibited hpte entry won't have memory conference set. Also older kernel mentioned that some version of pHyp required this (the code removed by the below commit says: /* Make pHyp happy */ if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU)) hpte_r &= ~HPTE_R_M; But with older kernel we had some inconsistent memory conherence mapping. We always enabled memory conherence in the page fault path and removed memory conherence is _PAGE_NO_CACHE was set when we mapped the page via htab_bolt_mapping. The commit mentioned below tried to consolidate that by always enabling memory conherence. But as mentioned above that breaks Qemu H_ENTER handling. This patch update this such that we enable memory conherence only if cache inhibited is not set and bring fault handling, lpar and bolt mapping in sync. Fixes: commit 30bda41aba4e("powerpc/mm: Drop WIMG in favour of new constant") Reported-by: Darrick J. Wong Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index b2740c67e172..5b22ba0b58bc 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -201,9 +201,8 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) /* * We can't allow hardware to update hpte bits. Hence always * set 'R' bit and set 'C' if it is a write fault - * Memory coherence is always enabled */ - rflags |= HPTE_R_R | HPTE_R_M; + rflags |= HPTE_R_R; if (pteflags & _PAGE_DIRTY) rflags |= HPTE_R_C; @@ -213,10 +212,15 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) rflags |= HPTE_R_I; - if ((pteflags & _PAGE_CACHE_CTL ) == _PAGE_NON_IDEMPOTENT) + else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT) rflags |= (HPTE_R_I | HPTE_R_G); - if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) - rflags |= (HPTE_R_I | HPTE_R_W); + else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) + rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M); + else + /* + * Add memory coherence if cache inhibited is not set + */ + rflags |= HPTE_R_M; return rflags; } -- cgit v1.2.3-59-g8ed1b From b23d9c5b9c83c05e013aa52460f12a8365062cf4 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 17 Jun 2016 11:40:36 +0530 Subject: powerpc/mm/radix: Update Radix tree size as per ISA 3.0 ISA 3.0 updated it to be encoded as Radix tree size = 2^(RTS + 31). We have it encoded as 2^(RTS + 28). Add a helper with the correct encoding and use it instead of opencoding. Fixes: 2bfd65e45e87 ("powerpc/mm/radix: Add radix callbacks for early init routines") Reviewed-by: Balbir Singh Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/radix.h | 15 +++++++++++++++ arch/powerpc/mm/mmu_context_book3s64.c | 2 +- arch/powerpc/mm/pgtable-radix.c | 9 +++------ 3 files changed, 19 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 937d4e247ac3..df294224e280 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -228,5 +228,20 @@ extern void radix__vmemmap_remove_mapping(unsigned long start, extern int radix__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, unsigned int psz); + +static inline unsigned long radix__get_tree_size(void) +{ + unsigned long rts_field; + /* + * we support 52 bits, hence 52-31 = 21, 0b10101 + * RTS encoding details + * bits 0 - 3 of rts -> bits 6 - 8 unsigned long + * bits 4 - 5 of rts -> bits 62 - 63 of unsigned long + */ + rts_field = (0x5UL << 5); /* 6 - 8 bits */ + rts_field |= (0x2UL << 61); + + return rts_field; +} #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 227b2a6c4544..196222227e82 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -65,7 +65,7 @@ static int radix__init_new_context(struct mm_struct *mm, int index) /* * set the process table entry, */ - rts_field = 3ull << PPC_BITLSHIFT(2); + rts_field = radix__get_tree_size(); process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE); return 0; } diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index c939e6e57a9e..e58707deef5c 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -160,9 +160,8 @@ redo: process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); /* * Fill in the process table. - * we support 52 bits, hence 52-28 = 24, 11000 */ - rts_field = 3ull << PPC_BITLSHIFT(2); + rts_field = radix__get_tree_size(); process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE); /* * Fill in the partition table. We are suppose to use effective address @@ -176,10 +175,8 @@ redo: static void __init radix_init_partition_table(void) { unsigned long rts_field; - /* - * we support 52 bits, hence 52-28 = 24, 11000 - */ - rts_field = 3ull << PPC_BITLSHIFT(2); + + rts_field = radix__get_tree_size(); BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT); -- cgit v1.2.3-59-g8ed1b From a3aa256b7258b3d19f8b44557cc64525a993b941 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 17 Jun 2016 13:05:11 +1000 Subject: powerpc/eeh: Fix invalid cached PE primary bus The PE primary bus cannot be got from its child devices when having full hotplug in error recovery. The PE primary bus is cached, which is done in commit <05ba75f84864> ("powerpc/eeh: Fix stale cached primary bus"). In eeh_reset_device(), the flag (EEH_PE_PRI_BUS) is cleared before the PCI hot remove. eeh_pe_bus_get() then returns NULL as the PE primary bus in pnv_eeh_reset() and it crashes the kernel eventually. This fixes the issue by clearing the flag (EEH_PE_PRI_BUS) before the PCI hot add. With it, the PowerNV EEH reset backend (pnv_eeh_reset()) can get valid PE primary bus through eeh_pe_bus_get(). Fixes: 67086e32b564 ("powerpc/eeh: powerpc/eeh: Support error recovery for VF PE") Reported-by: Pridhiviraj Paidipeddi Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 2714a3b81d24..b5f73cb5eeb6 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -642,7 +642,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (pe->type & EEH_PE_VF) { eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_lock_rescan_remove(); pci_hp_remove_devices(bus); pci_unlock_rescan_remove(); @@ -692,10 +691,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, */ edev = list_first_entry(&pe->edevs, struct eeh_dev, list); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - if (pe->type & EEH_PE_VF) + if (pe->type & EEH_PE_VF) { eeh_add_virt_device(edev, NULL); - else + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_hp_add_devices(bus); + } } else if (frozen_bus && rmv_data->removed) { pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); ssleep(5); -- cgit v1.2.3-59-g8ed1b From d279f7a7e95af6bb4b5eaea3527d1f85a28c5cf6 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Wed, 15 Jun 2016 11:45:28 +0530 Subject: ARM: dts: am437x-sk-evm: Reduce i2c0 bus speed for tps65218 Based on the latest timing specifications for the TPS65218 from the data sheet, http://www.ti.com/lit/ds/symlink/tps65218.pdf, document SLDS206 from November 2014, we must change the i2c bus speed to better fit within the minimum high SCL time required for proper i2c transfer. When running at 400khz, measurements show that SCL spends 0.8125 uS/1.666 uS high/low which violates the requirement for minimum high period of SCL provided in datasheet Table 7.6 which is 1 uS. Switching to 100khz gives us 5 uS/5 uS high/low which both fall above the minimum given values for 100 khz, 4.0 uS/4.7 uS high/low. Without this patch occasionally a voltage set operation from the kernel will appear to have worked but the actual voltage reflected on the PMIC will not have updated, causing problems especially with cpufreq that may update to a higher OPP without actually raising the voltage on DCDC2, leading to a hang. Signed-off-by: Dave Gerlach Signed-off-by: Nishanth Menon Signed-off-by: Franklin S Cooper Jr Signed-off-by: Aparna Balasubramanian Signed-off-by: Keerthy Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am437x-sk-evm.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts index d82dd6e3f9b1..5687d6b4da60 100644 --- a/arch/arm/boot/dts/am437x-sk-evm.dts +++ b/arch/arm/boot/dts/am437x-sk-evm.dts @@ -418,7 +418,7 @@ status = "okay"; pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins>; - clock-frequency = <400000>; + clock-frequency = <100000>; tps@24 { compatible = "ti,tps65218"; -- cgit v1.2.3-59-g8ed1b From 3a4955111ad46a022f05b51f91306d864f989625 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Fri, 27 May 2016 18:08:27 -0400 Subject: isa: Allow ISA-style drivers on modern systems Several modern devices, such as PC/104 cards, are expected to run on modern systems via an ISA bus interface. Since ISA is a legacy interface for most modern architectures, ISA support should remain disabled in general. Support for ISA-style drivers should be enabled on a per driver basis. To allow ISA-style drivers on modern systems, this patch introduces the ISA_BUS_API and ISA_BUS Kconfig options. The ISA bus driver will now build conditionally on the ISA_BUS_API Kconfig option, which defaults to the legacy ISA Kconfig option. The ISA_BUS Kconfig option allows the ISA_BUS_API Kconfig option to be selected on architectures which do not enable ISA (e.g. X86_64). The ISA_BUS Kconfig option is currently only implemented for X86 architectures. Other architectures may have their own ISA_BUS Kconfig options added as required. Reviewed-by: Guenter Roeck Signed-off-by: William Breathitt Gray Acked-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/Kconfig | 3 +++ arch/x86/Kconfig | 9 +++++++++ drivers/base/Makefile | 2 +- include/linux/isa.h | 2 +- 4 files changed, 14 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index d794384a0404..e9734796531f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -606,6 +606,9 @@ config HAVE_ARCH_HASH file which provides platform-specific implementations of some functions in or fs/namei.c. +config ISA_BUS_API + def_bool ISA + # # ABI hall of shame # diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0a7b885964ba..d9a94da0c29f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2439,6 +2439,15 @@ config PCI_CNB20LE_QUIRK source "drivers/pci/Kconfig" +config ISA_BUS + bool "ISA-style bus support on modern systems" if EXPERT + select ISA_BUS_API + help + Enables ISA-style drivers on modern systems. This is necessary to + support PC/104 devices on X86_64 platforms. + + If unsure, say N. + # x86_64 have no ISA slots, but can have ISA-style DMA. config ISA_DMA_API bool "ISA-style DMA support" if (X86_64 && EXPERT) diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 6b2a84e7f2be..2609ba20b396 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_DMA_CMA) += dma-contiguous.o obj-y += power/ obj-$(CONFIG_HAS_DMA) += dma-mapping.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o -obj-$(CONFIG_ISA) += isa.o +obj-$(CONFIG_ISA_BUS_API) += isa.o obj-$(CONFIG_FW_LOADER) += firmware_class.o obj-$(CONFIG_NUMA) += node.o obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o diff --git a/include/linux/isa.h b/include/linux/isa.h index 5ab85281230b..384ab9b7d79a 100644 --- a/include/linux/isa.h +++ b/include/linux/isa.h @@ -22,7 +22,7 @@ struct isa_driver { #define to_isa_driver(x) container_of((x), struct isa_driver, driver) -#ifdef CONFIG_ISA +#ifdef CONFIG_ISA_BUS_API int isa_register_driver(struct isa_driver *, unsigned int); void isa_unregister_driver(struct isa_driver *); #else -- cgit v1.2.3-59-g8ed1b From 0e289e534af1b20417a1940c8eba549588671cd8 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 17 Jun 2016 13:44:18 +0200 Subject: ARM: dts: STi: stih407-family: Disable reserved-memory co-processor nodes This patch fixes a non-booting issue in Mainline. When booting with a compressed kernel, we need to be careful how we populate memory close to DDR start. AUTO_ZRELADDR is enabled by default in multi-arch enabled configurations, which place some restrictions on where the kernel is placed and where it will be uncompressed to on boot. AUTO_ZRELADDR takes the decompressor code's start address and masks out the bottom 28 bits to obtain an address to uncompress the kernel to (thus a load address of 0x42000000 means that the kernel will be uncompressed to 0x40000000 i.e. DDR START on this platform). Even changing the load address to after the co-processor's shared memory won't render a booting platform, since the AUTO_ZRELADDR algorithm still ensures the kernel is uncompressed into memory shared with the first co-processor (0x40000000). Another option would be to move loading to 0x4A000000, since this will mean the decompressor will decompress the kernel to 0x48000000. However, this would mean a large chunk (0x44000000 => 0x48000000 (64MB)) of memory would essentially be wasted for no good reason. Until we can work with ST to find a suitable memory location to relocate co-processor shared memory, let's disable the shared memory nodes. This will ensure a working platform in the mean time. NB: The more observant of you will notice that we're leaving the DMU shared memory node enabled; this is because a) it is the only one in active use at the time of this writing and b) it is not affected by the current default behaviour which is causing issues. Fixes: fe135c6 (ARM: dts: STiH407: Move over to using the 'reserved-memory' API for obtaining DMA memory) Signed-off-by: Lee Jones Reviewed-by Peter Griffin Signed-off-by: Maxime Coquelin Signed-off-by: Olof Johansson --- arch/arm/boot/dts/stih407-family.dtsi | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi index ad8ba10764a3..d294e82447a2 100644 --- a/arch/arm/boot/dts/stih407-family.dtsi +++ b/arch/arm/boot/dts/stih407-family.dtsi @@ -24,18 +24,21 @@ compatible = "shared-dma-pool"; reg = <0x40000000 0x01000000>; no-map; + status = "disabled"; }; gp1_reserved: rproc@41000000 { compatible = "shared-dma-pool"; reg = <0x41000000 0x01000000>; no-map; + status = "disabled"; }; audio_reserved: rproc@42000000 { compatible = "shared-dma-pool"; reg = <0x42000000 0x01000000>; no-map; + status = "disabled"; }; dmu_reserved: rproc@43000000 { -- cgit v1.2.3-59-g8ed1b From f7e0efc9b50266f5317b50732efff98d40fc879a Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 17 Jun 2016 18:33:00 +0100 Subject: arm64: update ASID limit During a rollover, we mark the active ASID on each CPU as reserved, before allocating a new ID for the task that caused the rollover. This means that with N CPUs, we can only guarantee the new task to obtain a valid ASID if we have at least N+1 ASIDs. Update this limit in the initcall check. Note that this restriction was introduced by commit 8e648066 on the arch/arm side, which disallow re-using the previously active ASID on the local CPU, as it would introduce a TLB race. In addition, we only dispose of NUM_USER_ASIDS-1, since ASID 0 is reserved. Add this restriction as well. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- arch/arm64/mm/context.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index b7b397802088..efcf1f7ef1e4 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -179,7 +179,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) &asid_generation); flush_context(cpu); - /* We have at least 1 ASID per CPU, so this will always succeed */ + /* We have more ASIDs than CPUs, so this will always succeed */ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); set_asid: @@ -227,8 +227,11 @@ switch_mm_fastpath: static int asids_init(void) { asid_bits = get_cpu_asid_bits(); - /* If we end up with more CPUs than ASIDs, expect things to crash */ - WARN_ON(NUM_USER_ASIDS < num_possible_cpus()); + /* + * Expect allocation after rollover to fail if we don't have at least + * one more ASID than CPUs. ASID #0 is reserved for init_mm. + */ + WARN_ON(NUM_USER_ASIDS - 1 <= num_possible_cpus()); atomic64_set(&asid_generation, ASID_FIRST_VERSION); asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map), GFP_KERNEL); -- cgit v1.2.3-59-g8ed1b From 9ca4e58c20d5cb2a5bc378238188c71317aceac5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 21 Jun 2016 10:44:00 +0900 Subject: arm64: fix boot image dependencies to not generate invalid images I fixed boot image dependencies for arch/arm in commit 3939f3345050 ("ARM: 8418/1: add boot image dependencies to not generate invalid images"). I see a similar problem for arch/arm64; "make -jN Image Image.gz" would sometimes end up generating bad images where N > 1. Fix the dependency in arch/arm64/Makefile to avoid the race between "make Image" and "make Image.*". Acked-by: Catalin Marinas Signed-off-by: Masahiro Yamada Signed-off-by: Will Deacon --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7085e322dc42..648a32c89541 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -95,7 +95,7 @@ boot := arch/arm64/boot Image: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -Image.%: vmlinux +Image.%: Image $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ zinstall install: -- cgit v1.2.3-59-g8ed1b From 20c27a4270c775d7ed661491af8ac03264d60fc6 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Tue, 21 Jun 2016 15:32:57 +0800 Subject: arm64: mm: remove page_mapping check in __sync_icache_dcache __sync_icache_dcache unconditionally skips the cache maintenance for anonymous pages, under the assumption that flushing is only required in the presence of D-side aliases [see 7249b79f6b4cc ("arm64: Do not flush the D-cache for anonymous pages")]. Unfortunately, this breaks migration of anonymous pages holding self-modifying code, where userspace cannot be reasonably expected to reissue maintenance instructions in response to a migration. This patch fixes the problem by removing the broken page_mapping(page) check from the cache syncing code, otherwise we may end up fetching and executing stale instructions from the PoU. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Reviewed-by: Catalin Marinas Signed-off-by: Shaokun Zhang Signed-off-by: Will Deacon --- arch/arm64/mm/flush.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index dbd12ea8ce68..43a76b07eb32 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -71,10 +71,6 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) { struct page *page = pte_page(pte); - /* no flushing needed for anonymous pages */ - if (!page_mapping(page)) - return; - if (!test_and_set_bit(PG_dcache_clean, &page->flags)) sync_icache_aliases(page_address(page), PAGE_SIZE << compound_order(page)); -- cgit v1.2.3-59-g8ed1b From 5c492c3f5255bd34f7ff8867515ecf98dcba2a2e Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 22 Jun 2016 10:06:12 +0100 Subject: arm64: smp: Add function to determine if cpus are stuck in the kernel kernel/smp.c has a fancy counter that keeps track of the number of CPUs it marked as not-present and left in cpu_park_loop(). If there are any CPUs spinning in here, features like kexec or hibernate may release them by overwriting this memory. This problem also occurs on machines using spin-tables to release secondary cores. After commit 44dbcc93ab67 ("arm64: Fix behavior of maxcpus=N") we bring all known cpus into the secondary holding pen, meaning this memory can't be re-used by kexec or hibernate. Add a function cpus_are_stuck_in_kernel() to determine if either of these cases have occurred. Signed-off-by: James Morse Acked-by: Mark Rutland Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/smp.h | 12 ++++++++++++ arch/arm64/kernel/smp.c | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 433e50405274..022644704a93 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -124,6 +124,18 @@ static inline void cpu_panic_kernel(void) cpu_park_loop(); } +/* + * If a secondary CPU enters the kernel but fails to come online, + * (e.g. due to mismatched features), and cannot exit the kernel, + * we increment cpus_stuck_in_kernel and leave the CPU in a + * quiesecent loop within the kernel text. The memory containing + * this loop must not be re-used for anything else as the 'stuck' + * core is executing it. + * + * This function is used to inhibit features like kexec and hibernate. + */ +bool cpus_are_stuck_in_kernel(void); + #endif /* ifndef __ASSEMBLY__ */ #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 678e0842cb3b..62ff3c0622e2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -909,3 +909,21 @@ int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } + +static bool have_cpu_die(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + int any_cpu = raw_smp_processor_id(); + + if (cpu_ops[any_cpu]->cpu_die) + return true; +#endif + return false; +} + +bool cpus_are_stuck_in_kernel(void) +{ + bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die()); + + return !!cpus_stuck_in_kernel || smp_spin_tables; +} -- cgit v1.2.3-59-g8ed1b From d74b4e4f1a6dbe27acce723e071c86a6ed154bf2 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 22 Jun 2016 10:06:13 +0100 Subject: arm64: hibernate: Don't hibernate on systems with stuck CPUs Hibernate relies on cpu hotplug to prevent secondary cores executing the kernel text while it is being restored. Add a call to cpus_are_stuck_in_kernel() to determine if there are CPUs not counted by 'num_online_cpus()', and prevent hibernate in this case. Fixes: 82869ac57b5 ("arm64: kernel: Add support for hibernate/suspend-to-disk") Acked-by: Mark Rutland Signed-off-by: James Morse Signed-off-by: Will Deacon --- arch/arm64/kernel/hibernate.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index f8df75d740f4..21ab5df9fa76 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -236,6 +237,11 @@ int swsusp_arch_suspend(void) unsigned long flags; struct sleep_stack_data state; + if (cpus_are_stuck_in_kernel()) { + pr_err("Can't hibernate: no mechanism to offline secondary CPUs.\n"); + return -EBUSY; + } + local_dbg_save(flags); if (__cpu_suspend_enter(&state)) { -- cgit v1.2.3-59-g8ed1b From 6e914ee629c411d9c6d160399ce7d3472d2c0ec7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 20 Jun 2016 19:23:43 +1000 Subject: powerpc: Fix faults caused by radix patching of SLB miss handler As part of the Radix MMU support we added some feature sections in the SLB miss handler. These are intended to catch the case that we incorrectly take an SLB miss when Radix is enabled, and instead of crashing weirdly they bail out to a well defined exit path and trigger an oops. However the way they were written meant the bailout case was enabled by default until we did CPU feature patching. On powermacs the early debug prints in setup_system() can cause an SLB miss, which happens before code patching, and so the SLB miss handler would incorrectly bailout and crash during boot. Fix it by inverting the sense of the feature section, so that the code which is in place at boot is correct for the hash case. Once we determine we are using Radix - which will never happen on a powermac - only then do we patch in the bailout case which unconditionally jumps. Fixes: caca285e5ab4 ("powerpc/mm/radix: Use STD_MMU_64 to properly isolate hash related code") Reported-by: Denis Kirjanov Tested-by: Denis Kirjanov Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4c9440629128..8bcc1b457115 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1399,11 +1399,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_RADIX) lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ mtlr r10 -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX) andi. r10,r12,MSR_RI /* check for unrecoverable exception */ +BEGIN_MMU_FTR_SECTION beq- 2f +FTR_SECTION_ELSE + b 2f +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX) .machine push .machine "power4" -- cgit v1.2.3-59-g8ed1b From 844e3be47693f92a108cb1fb3b0606bf25e9c7a6 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 22 Jun 2016 21:55:01 +0530 Subject: powerpc/bpf/jit: Disable classic BPF JIT on ppc64le Classic BPF JIT was never ported completely to work on little endian powerpc. However, it can be enabled and will crash the system when used. As such, disable use of BPF JIT on ppc64le. Fixes: 7c105b63bd98 ("powerpc: Add CONFIG_CPU_LITTLE_ENDIAN kernel config option.") Reported-by: Thadeu Lima de Souza Cascardo Signed-off-by: Naveen N. Rao Acked-by: Thadeu Lima de Souza Cascardo Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 01f7464d9fea..0a9d439bcda6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -128,7 +128,7 @@ config PPC select IRQ_FORCED_THREADING select HAVE_RCU_TABLE_FREE if SMP select HAVE_SYSCALL_TRACEPOINTS - select HAVE_CBPF_JIT + select HAVE_CBPF_JIT if CPU_BIG_ENDIAN select HAVE_ARCH_JUMP_LABEL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_HAS_GCOV_PROFILE_ALL -- cgit v1.2.3-59-g8ed1b From 1cf38741308c64d08553602b3374fb39224eeb5a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 23 Jun 2016 07:12:27 +0200 Subject: x86/xen: fix upper bound of pmd loop in xen_cleanhighmap() xen_cleanhighmap() is operating on level2_kernel_pgt only. The upper bound of the loop setting non-kernel-image entries to zero should not exceed the size of level2_kernel_pgt. Reported-by: Linus Torvalds Signed-off-by: Juergen Gross Signed-off-by: David Vrabel --- arch/x86/xen/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 478a2de543a5..2693b7ed5a6f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1113,7 +1113,7 @@ static void __init xen_cleanhighmap(unsigned long vaddr, /* NOTE: The loop is more greedy than the cleanup_highmap variant. * We include the PMD passed in on _both_ boundaries. */ - for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE)); + for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PTRS_PER_PMD)); pmd++, vaddr += PMD_SIZE) { if (pmd_none(*pmd)) continue; -- cgit v1.2.3-59-g8ed1b From d6b186c1e2d852a92c43f090d0d8fad4704d51ef Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 17 May 2016 15:54:50 +0100 Subject: x86/xen: avoid m2p lookup when setting early page table entries When page tables entries are set using xen_set_pte_init() during early boot there is no page fault handler that could handle a fault when performing an M2P lookup. In 64 bit guests (usually dom0) early_ioremap() would fault in xen_set_pte_init() because an M2P lookup faults because the MFN is in MMIO space and not mapped in the M2P. This lookup is done to see if the PFN in in the range used for the initial page table pages, so that the PTE may be set as read-only. The M2P lookup can be avoided by moving the check (and clear of RW) earlier when the PFN is still available. Reported-by: Kevin Moraga Signed-off-by: David Vrabel Reviewed-by: Boris Ostrovsky Reviewed-by: Juergen Gross --- arch/x86/xen/mmu.c | 72 +++++++++++++++++++++++------------------------------- 1 file changed, 31 insertions(+), 41 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 2693b7ed5a6f..67433714b791 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1551,41 +1551,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) #endif } -#ifdef CONFIG_X86_32 -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) -{ - /* If there's an existing pte, then don't allow _PAGE_RW to be set */ - if (pte_val_ma(*ptep) & _PAGE_PRESENT) - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & - pte_val_ma(pte)); - - return pte; -} -#else /* CONFIG_X86_64 */ -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) -{ - unsigned long pfn; - - if (xen_feature(XENFEAT_writable_page_tables) || - xen_feature(XENFEAT_auto_translated_physmap) || - xen_start_info->mfn_list >= __START_KERNEL_map) - return pte; - - /* - * Pages belonging to the initial p2m list mapped outside the default - * address range must be mapped read-only. This region contains the - * page tables for mapping the p2m list, too, and page tables MUST be - * mapped read-only. - */ - pfn = pte_pfn(pte); - if (pfn >= xen_start_info->first_p2m_pfn && - pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames) - pte = __pte_ma(pte_val_ma(pte) & ~_PAGE_RW); - - return pte; -} -#endif /* CONFIG_X86_64 */ - /* * Init-time set_pte while constructing initial pagetables, which * doesn't allow RO page table pages to be remapped RW. @@ -1600,13 +1565,37 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) * so always write the PTE directly and rely on Xen trapping and * emulating any updates as necessary. */ -static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) +__visible pte_t xen_make_pte_init(pteval_t pte) { - if (pte_mfn(pte) != INVALID_P2M_ENTRY) - pte = mask_rw_pte(ptep, pte); - else - pte = __pte_ma(0); +#ifdef CONFIG_X86_64 + unsigned long pfn; + + /* + * Pages belonging to the initial p2m list mapped outside the default + * address range must be mapped read-only. This region contains the + * page tables for mapping the p2m list, too, and page tables MUST be + * mapped read-only. + */ + pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT; + if (xen_start_info->mfn_list < __START_KERNEL_map && + pfn >= xen_start_info->first_p2m_pfn && + pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames) + pte &= ~_PAGE_RW; +#endif + pte = pte_pfn_to_mfn(pte); + return native_make_pte(pte); +} +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init); +static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) +{ +#ifdef CONFIG_X86_32 + /* If there's an existing pte, then don't allow _PAGE_RW to be set */ + if (pte_mfn(pte) != INVALID_P2M_ENTRY + && pte_val_ma(*ptep) & _PAGE_PRESENT) + pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & + pte_val_ma(pte)); +#endif native_set_pte(ptep, pte); } @@ -2407,6 +2396,7 @@ static void __init xen_post_allocator_init(void) pv_mmu_ops.alloc_pud = xen_alloc_pud; pv_mmu_ops.release_pud = xen_release_pud; #endif + pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte); #ifdef CONFIG_X86_64 pv_mmu_ops.write_cr3 = &xen_write_cr3; @@ -2455,7 +2445,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .pte_val = PV_CALLEE_SAVE(xen_pte_val), .pgd_val = PV_CALLEE_SAVE(xen_pgd_val), - .make_pte = PV_CALLEE_SAVE(xen_make_pte), + .make_pte = PV_CALLEE_SAVE(xen_make_pte_init), .make_pgd = PV_CALLEE_SAVE(xen_make_pgd), #ifdef CONFIG_X86_PAE -- cgit v1.2.3-59-g8ed1b From da01e18a37a57f360222d3a123b8f6994aa1ad14 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 23 Jun 2016 12:20:01 -0700 Subject: x86: avoid avoid passing around 'thread_info' in stack dumping code None of the code actually wants a thread_info, it all wants a task_struct, and it's just converting to a thread_info pointer much too early. No semantic change. Signed-off-by: Linus Torvalds --- arch/x86/include/asm/stacktrace.h | 6 +++--- arch/x86/kernel/dumpstack.c | 22 ++++++++++------------ arch/x86/kernel/dumpstack_32.c | 4 +--- arch/x86/kernel/dumpstack_64.c | 8 +++----- 4 files changed, 17 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 7c247e7404be..0944218af9e2 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -14,7 +14,7 @@ extern int kstack_depth_to_print; struct thread_info; struct stacktrace_ops; -typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo, +typedef unsigned long (*walk_stack_t)(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, @@ -23,13 +23,13 @@ typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo, int *graph); extern unsigned long -print_context_stack(struct thread_info *tinfo, +print_context_stack(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph); extern unsigned long -print_context_stack_bp(struct thread_info *tinfo, +print_context_stack_bp(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 2bb25c3fe2e8..d6209f3a69cb 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -42,16 +42,14 @@ void printk_address(unsigned long address) static void print_ftrace_graph_addr(unsigned long addr, void *data, const struct stacktrace_ops *ops, - struct thread_info *tinfo, int *graph) + struct task_struct *task, int *graph) { - struct task_struct *task; unsigned long ret_addr; int index; if (addr != (unsigned long)return_to_handler) return; - task = tinfo->task; index = task->curr_ret_stack; if (!task->ret_stack || index < *graph) @@ -68,7 +66,7 @@ print_ftrace_graph_addr(unsigned long addr, void *data, static inline void print_ftrace_graph_addr(unsigned long addr, void *data, const struct stacktrace_ops *ops, - struct thread_info *tinfo, int *graph) + struct task_struct *task, int *graph) { } #endif @@ -79,10 +77,10 @@ print_ftrace_graph_addr(unsigned long addr, void *data, * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ -static inline int valid_stack_ptr(struct thread_info *tinfo, +static inline int valid_stack_ptr(struct task_struct *task, void *p, unsigned int size, void *end) { - void *t = tinfo; + void *t = task_thread_info(task); if (end) { if (p < end && p >= (end-THREAD_SIZE)) return 1; @@ -93,14 +91,14 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, } unsigned long -print_context_stack(struct thread_info *tinfo, +print_context_stack(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph) { struct stack_frame *frame = (struct stack_frame *)bp; - while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { + while (valid_stack_ptr(task, stack, sizeof(*stack), end)) { unsigned long addr; addr = *stack; @@ -112,7 +110,7 @@ print_context_stack(struct thread_info *tinfo, } else { ops->address(data, addr, 0); } - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); + print_ftrace_graph_addr(addr, data, ops, task, graph); } stack++; } @@ -121,7 +119,7 @@ print_context_stack(struct thread_info *tinfo, EXPORT_SYMBOL_GPL(print_context_stack); unsigned long -print_context_stack_bp(struct thread_info *tinfo, +print_context_stack_bp(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph) @@ -129,7 +127,7 @@ print_context_stack_bp(struct thread_info *tinfo, struct stack_frame *frame = (struct stack_frame *)bp; unsigned long *ret_addr = &frame->return_address; - while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) { + while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) { unsigned long addr = *ret_addr; if (!__kernel_text_address(addr)) @@ -139,7 +137,7 @@ print_context_stack_bp(struct thread_info *tinfo, break; frame = frame->next_frame; ret_addr = &frame->return_address; - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); + print_ftrace_graph_addr(addr, data, ops, task, graph); } return (unsigned long)frame; diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 464ffd69b92e..fef917e79b9d 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -61,15 +61,13 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, bp = stack_frame(task, regs); for (;;) { - struct thread_info *context; void *end_stack; end_stack = is_hardirq_stack(stack, cpu); if (!end_stack) end_stack = is_softirq_stack(stack, cpu); - context = task_thread_info(task); - bp = ops->walk_stack(context, stack, bp, ops, data, + bp = ops->walk_stack(task, stack, bp, ops, data, end_stack, &graph); /* Stop if not on irq stack */ diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 5f1c6266eb30..d558a8a49016 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -153,7 +153,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - struct thread_info *tinfo; unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu); unsigned long dummy; unsigned used = 0; @@ -179,7 +178,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, * current stack address. If the stacks consist of nested * exceptions */ - tinfo = task_thread_info(task); while (!done) { unsigned long *stack_end; enum stack_type stype; @@ -202,7 +200,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (ops->stack(data, id) < 0) break; - bp = ops->walk_stack(tinfo, stack, bp, ops, + bp = ops->walk_stack(task, stack, bp, ops, data, stack_end, &graph); ops->stack(data, ""); /* @@ -218,7 +216,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (ops->stack(data, "IRQ") < 0) break; - bp = ops->walk_stack(tinfo, stack, bp, + bp = ops->walk_stack(task, stack, bp, ops, data, stack_end, &graph); /* * We link to the next stack (which would be @@ -240,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, /* * This handles the process stack: */ - bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph); + bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph); put_cpu(); } EXPORT_SYMBOL(dump_trace); -- cgit v1.2.3-59-g8ed1b From b235beea9e996a4d36fed6cfef4801a3e7d7a9a5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 24 Jun 2016 15:09:37 -0700 Subject: Clarify naming of thread info/stack allocators We've had the thread info allocated together with the thread stack for most architectures for a long time (since the thread_info was split off from the task struct), but that is about to change. But the patches that move the thread info to be off-stack (and a part of the task struct instead) made it clear how confused the allocator and freeing functions are. Because the common case was that we share an allocation with the thread stack and the thread_info, the two pointers were identical. That identity then meant that we would have things like ti = alloc_thread_info_node(tsk, node); ... tsk->stack = ti; which certainly _worked_ (since stack and thread_info have the same value), but is rather confusing: why are we assigning a thread_info to the stack? And if we move the thread_info away, the "confusing" code just gets to be entirely bogus. So remove all this confusion, and make it clear that we are doing the stack allocation by renaming and clarifying the function names to be about the stack. The fact that the thread_info then shares the allocation is an implementation detail, and not really about the allocation itself. This is a pure renaming and type fix: we pass in the same pointer, it's just that we clarify what the pointer means. The ia64 code that actually only has one single allocation (for all of task_struct, thread_info and kernel thread stack) now looks a bit odd, but since "tsk->stack" is actually not even used there, that oddity doesn't matter. It would be a separate thing to clean that up, I intentionally left the ia64 changes as a pure brute-force renaming and type change. Acked-by: Andy Lutomirski Signed-off-by: Linus Torvalds --- arch/Kconfig | 4 +-- arch/ia64/Kconfig | 2 +- arch/ia64/include/asm/thread_info.h | 8 +++--- arch/mn10300/include/asm/thread_info.h | 2 +- arch/mn10300/kernel/kgdb.c | 3 +- arch/tile/include/asm/thread_info.h | 2 +- arch/tile/kernel/process.c | 3 +- include/linux/sched.h | 2 +- init/main.c | 4 +-- kernel/fork.c | 50 +++++++++++++++++----------------- 10 files changed, 41 insertions(+), 39 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index e9734796531f..15996290fed4 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -226,8 +226,8 @@ config ARCH_INIT_TASK config ARCH_TASK_STRUCT_ALLOCATOR bool -# Select if arch has its private alloc_thread_info() function -config ARCH_THREAD_INFO_ALLOCATOR +# Select if arch has its private alloc_thread_stack() function +config ARCH_THREAD_STACK_ALLOCATOR bool # Select if arch wants to size task_struct dynamically via arch_task_struct_size: diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index f80758cb7157..e109ee95e919 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -45,7 +45,7 @@ config IA64 select GENERIC_SMP_IDLE_THREAD select ARCH_INIT_TASK select ARCH_TASK_STRUCT_ALLOCATOR - select ARCH_THREAD_INFO_ALLOCATOR + select ARCH_THREAD_STACK_ALLOCATOR select ARCH_CLOCKSOURCE_DATA select GENERIC_TIME_VSYSCALL_OLD select SYSCTL_ARCH_UNALIGN_NO_WARN diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index aa995b67c3f5..d1212b84fb83 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -48,15 +48,15 @@ struct thread_info { #ifndef ASM_OFFSETS_C /* how to get the thread information struct from C */ #define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) -#define alloc_thread_info_node(tsk, node) \ - ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) +#define alloc_thread_stack_node(tsk, node) \ + ((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE)) #define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) #else #define current_thread_info() ((struct thread_info *) 0) -#define alloc_thread_info_node(tsk, node) ((struct thread_info *) 0) +#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) #define task_thread_info(tsk) ((struct thread_info *) 0) #endif -#define free_thread_info(ti) /* nothing */ +#define free_thread_stack(ti) /* nothing */ #define task_stack_page(tsk) ((void *)(tsk)) #define __HAVE_THREAD_FUNCTIONS diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h index 4861a78c7160..f5f90bbf019d 100644 --- a/arch/mn10300/include/asm/thread_info.h +++ b/arch/mn10300/include/asm/thread_info.h @@ -115,7 +115,7 @@ static inline unsigned long current_stack_pointer(void) } #ifndef CONFIG_KGDB -void arch_release_thread_info(struct thread_info *ti); +void arch_release_thread_stack(unsigned long *stack); #endif #define get_thread_info(ti) get_task_struct((ti)->task) #define put_thread_info(ti) put_task_struct((ti)->task) diff --git a/arch/mn10300/kernel/kgdb.c b/arch/mn10300/kernel/kgdb.c index 99770823451a..2d7986c386fe 100644 --- a/arch/mn10300/kernel/kgdb.c +++ b/arch/mn10300/kernel/kgdb.c @@ -397,8 +397,9 @@ static bool kgdb_arch_undo_singlestep(struct pt_regs *regs) * single-step state is cleared. At this point the breakpoints should have * been removed by __switch_to(). */ -void arch_release_thread_info(struct thread_info *ti) +void arch_release_thread_stack(unsigned long *stack) { + struct thread_info *ti = (void *)stack; if (kgdb_sstep_thread == ti) { kgdb_sstep_thread = NULL; diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index 4b7cef9e94e0..c1467ac59ce6 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -78,7 +78,7 @@ struct thread_info { #ifndef __ASSEMBLY__ -void arch_release_thread_info(struct thread_info *info); +void arch_release_thread_stack(unsigned long *stack); /* How to get the thread information struct from C. */ register unsigned long stack_pointer __asm__("sp"); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 6b705ccc9cc1..a465d8372edd 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -73,8 +73,9 @@ void arch_cpu_idle(void) /* * Release a thread_info structure */ -void arch_release_thread_info(struct thread_info *info) +void arch_release_thread_stack(unsigned long *stack) { + struct thread_info *info = (void *)stack; struct single_step_state *step_state = info->step_state; if (step_state) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 6e42ada26345..253538f29ade 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3007,7 +3007,7 @@ static inline int object_is_on_stack(void *obj) return (obj >= stack) && (obj < (stack + THREAD_SIZE)); } -extern void thread_info_cache_init(void); +extern void thread_stack_cache_init(void); #ifdef CONFIG_DEBUG_STACK_USAGE static inline unsigned long stack_not_used(struct task_struct *p) diff --git a/init/main.c b/init/main.c index 4c17fda5c2ff..826fd57fa3aa 100644 --- a/init/main.c +++ b/init/main.c @@ -453,7 +453,7 @@ void __init __weak smp_setup_processor_id(void) } # if THREAD_SIZE >= PAGE_SIZE -void __init __weak thread_info_cache_init(void) +void __init __weak thread_stack_cache_init(void) { } #endif @@ -627,7 +627,7 @@ asmlinkage __visible void __init start_kernel(void) /* Should be run before the first non-init thread is created */ init_espfix_bsp(); #endif - thread_info_cache_init(); + thread_stack_cache_init(); cred_init(); fork_init(); proc_caches_init(); diff --git a/kernel/fork.c b/kernel/fork.c index 5c2c355aa97f..37b9439b8c07 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -148,18 +148,18 @@ static inline void free_task_struct(struct task_struct *tsk) } #endif -void __weak arch_release_thread_info(struct thread_info *ti) +void __weak arch_release_thread_stack(unsigned long *stack) { } -#ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR +#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR /* * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a * kmemcache based allocator. */ # if THREAD_SIZE >= PAGE_SIZE -static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, +static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) { struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP, @@ -172,33 +172,33 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, return page ? page_address(page) : NULL; } -static inline void free_thread_info(struct thread_info *ti) +static inline void free_thread_stack(unsigned long *stack) { - struct page *page = virt_to_page(ti); + struct page *page = virt_to_page(stack); memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK, -(1 << THREAD_SIZE_ORDER)); __free_kmem_pages(page, THREAD_SIZE_ORDER); } # else -static struct kmem_cache *thread_info_cache; +static struct kmem_cache *thread_stack_cache; -static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, +static struct thread_info *alloc_thread_stack_node(struct task_struct *tsk, int node) { - return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node); + return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); } -static void free_thread_info(struct thread_info *ti) +static void free_stack(unsigned long *stack) { - kmem_cache_free(thread_info_cache, ti); + kmem_cache_free(thread_stack_cache, stack); } -void thread_info_cache_init(void) +void thread_stack_cache_init(void) { - thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, + thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE, THREAD_SIZE, 0, NULL); - BUG_ON(thread_info_cache == NULL); + BUG_ON(thread_stack_cache == NULL); } # endif #endif @@ -221,9 +221,9 @@ struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; -static void account_kernel_stack(struct thread_info *ti, int account) +static void account_kernel_stack(unsigned long *stack, int account) { - struct zone *zone = page_zone(virt_to_page(ti)); + struct zone *zone = page_zone(virt_to_page(stack)); mod_zone_page_state(zone, NR_KERNEL_STACK, account); } @@ -231,8 +231,8 @@ static void account_kernel_stack(struct thread_info *ti, int account) void free_task(struct task_struct *tsk) { account_kernel_stack(tsk->stack, -1); - arch_release_thread_info(tsk->stack); - free_thread_info(tsk->stack); + arch_release_thread_stack(tsk->stack); + free_thread_stack(tsk->stack); rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); put_seccomp_filter(tsk); @@ -343,7 +343,7 @@ void set_task_stack_end_magic(struct task_struct *tsk) static struct task_struct *dup_task_struct(struct task_struct *orig, int node) { struct task_struct *tsk; - struct thread_info *ti; + unsigned long *stack; int err; if (node == NUMA_NO_NODE) @@ -352,15 +352,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) if (!tsk) return NULL; - ti = alloc_thread_info_node(tsk, node); - if (!ti) + stack = alloc_thread_stack_node(tsk, node); + if (!stack) goto free_tsk; err = arch_dup_task_struct(tsk, orig); if (err) - goto free_ti; + goto free_stack; - tsk->stack = ti; + tsk->stack = stack; #ifdef CONFIG_SECCOMP /* * We must handle setting up seccomp filters once we're under @@ -392,14 +392,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->task_frag.page = NULL; tsk->wake_q.next = NULL; - account_kernel_stack(ti, 1); + account_kernel_stack(stack, 1); kcov_task_init(tsk); return tsk; -free_ti: - free_thread_info(ti); +free_stack: + free_thread_stack(stack); free_tsk: free_task_struct(tsk); return NULL; -- cgit v1.2.3-59-g8ed1b From aca9c293d098292579e345b2b39b394778d41526 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 24 Jun 2016 16:55:53 -0700 Subject: x86: fix up a few misc stack pointer vs thread_info confusions As the actual pointer value is the same for the thread stack allocation and the thread_info, code that confused the two worked fine, but will break when the thread info is moved away from the stack allocation. It also looks very confusing. For example, the kprobe code wanted to know the current top of stack. To do that, it used this: (unsigned long)current_thread_info() + THREAD_SIZE which did indeed give the correct value. But it's not only a fairly nonsensical expression, it's also rather complex, especially since we actually have this: static inline unsigned long current_top_of_stack(void) which not only gives us the value we are interested in, but happens to be how "current_thread_info()" is currently defined as: (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); so using current_thread_info() to figure out the top of the stack really is a very round-about thing to do. The other cases are just simpler confusion about task_thread_info() vs task_stack_page(), which currently return the same pointer - but if you want the stack page, you really should be using the latter one. And there was one entirely unused assignment of the current stack to a thread_info pointer. All cleaned up to make more sense today, and make it easier to move the thread_info away from the stack in the future. No semantic changes. Signed-off-by: Linus Torvalds --- arch/x86/include/asm/kprobes.h | 11 +++++------ arch/x86/kernel/dumpstack.c | 2 +- arch/x86/kernel/irq_32.c | 2 -- 3 files changed, 6 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 4421b5da409d..d1d1e5094c28 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -38,12 +38,11 @@ typedef u8 kprobe_opcode_t; #define RELATIVECALL_OPCODE 0xe8 #define RELATIVE_ADDR_SIZE 4 #define MAX_STACK_SIZE 64 -#define MIN_STACK_SIZE(ADDR) \ - (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ - THREAD_SIZE - (unsigned long)(ADDR))) \ - ? (MAX_STACK_SIZE) \ - : (((unsigned long)current_thread_info()) + \ - THREAD_SIZE - (unsigned long)(ADDR))) +#define CUR_STACK_SIZE(ADDR) \ + (current_top_of_stack() - (unsigned long)(ADDR)) +#define MIN_STACK_SIZE(ADDR) \ + (MAX_STACK_SIZE < CUR_STACK_SIZE(ADDR) ? \ + MAX_STACK_SIZE : CUR_STACK_SIZE(ADDR)) #define flush_insn_slot(p) do { } while (0) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index d6209f3a69cb..ef8017ca5ba9 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -80,7 +80,7 @@ print_ftrace_graph_addr(unsigned long addr, void *data, static inline int valid_stack_ptr(struct task_struct *task, void *p, unsigned int size, void *end) { - void *t = task_thread_info(task); + void *t = task_stack_page(task); if (end) { if (p < end && p >= (end-THREAD_SIZE)) return 1; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 38da8f29a9c8..c627bf8d98ad 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -130,11 +130,9 @@ void irq_ctx_init(int cpu) void do_softirq_own_stack(void) { - struct thread_info *curstk; struct irq_stack *irqstk; u32 *isp, *prev_esp; - curstk = current_stack(); irqstk = __this_cpu_read(softirq_stack); /* build the stack frame on the softirq stack */ -- cgit v1.2.3-59-g8ed1b From 7f1a00b6fcd0e3c19beba2e92d157dc0c2cf3494 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 24 Jun 2016 17:07:33 -0700 Subject: fix up initial thread stack pointer vs thread_info confusion The INIT_TASK() initializer was similarly confused about the stack vs thread_info allocation that the allocators had, and that were fixed in commit b235beea9e99 ("Clarify naming of thread info/stack allocators"). The task ->stack pointer only incidentally ends up having the same value as the thread_info, and in fact that will change. So fix the initial task struct initializer to point to 'init_stack' instead of 'init_thread_info', and make sure the ia64 definition for that exists. This actually makes the ia64 tsk->stack pointer be sensible for the initial task, but not for any other task. As mentioned in commit b235beea9e99, that whole pointer isn't actually used on ia64, since task_stack_page() there just points to the (single) allocation. All the other architectures seem to have copied the 'init_stack' definition, even if it tended to be generally unusued. Signed-off-by: Linus Torvalds --- arch/ia64/kernel/init_task.c | 1 + include/linux/init_task.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index f9efe9739d3f..0eaa89f3defd 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -26,6 +26,7 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * handled. This is done by having a special ".data..init_task" section... */ #define init_thread_info init_task_mem.s.thread_info +#define init_stack init_task_mem.stack union { struct { diff --git a/include/linux/init_task.h b/include/linux/init_task.h index f2cb8d45513d..f8834f820ec2 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -190,7 +190,7 @@ extern struct task_group root_task_group; #define INIT_TASK(tsk) \ { \ .state = 0, \ - .stack = &init_thread_info, \ + .stack = init_stack, \ .usage = ATOMIC_INIT(2), \ .flags = PF_KTHREAD, \ .prio = MAX_PRIO-20, \ -- cgit v1.2.3-59-g8ed1b From 32d6bd9059f265f617f6502c68dfbcae7e515add Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:48:47 -0700 Subject: tree wide: get rid of __GFP_REPEAT for order-0 allocations part I This is the third version of the patchset previously sent [1]. I have basically only rebased it on top of 4.7-rc1 tree and dropped "dm: get rid of superfluous gfp flags" which went through dm tree. I am sending it now because it is tree wide and chances for conflicts are reduced considerably when we want to target rc2. I plan to send the next step and rename the flag and move to a better semantic later during this release cycle so we will have a new semantic ready for 4.8 merge window hopefully. Motivation: While working on something unrelated I've checked the current usage of __GFP_REPEAT in the tree. It seems that a majority of the usage is and always has been bogus because __GFP_REPEAT has always been about costly high order allocations while we are using it for order-0 or very small orders very often. It seems that a big pile of them is just a copy&paste when a code has been adopted from one arch to another. I think it makes some sense to get rid of them because they are just making the semantic more unclear. Please note that GFP_REPEAT is documented as * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt * _might_ fail. This depends upon the particular VM implementation. while !costly requests have basically nofail semantic. So one could reasonably expect that order-0 request with __GFP_REPEAT will not loop for ever. This is not implemented right now though. I would like to move on with __GFP_REPEAT and define a better semantic for it. $ git grep __GFP_REPEAT origin/master | wc -l 111 $ git grep __GFP_REPEAT | wc -l 36 So we are down to the third after this patch series. The remaining places really seem to be relying on __GFP_REPEAT due to large allocation requests. This still needs some double checking which I will do later after all the simple ones are sorted out. I am touching a lot of arch specific code here and I hope I got it right but as a matter of fact I even didn't compile test for some archs as I do not have cross compiler for them. Patches should be quite trivial to review for stupid compile mistakes though. The tricky parts are usually hidden by macro definitions and thats where I would appreciate help from arch maintainers. [1] http://lkml.kernel.org/r/1461849846-27209-1-git-send-email-mhocko@kernel.org This patch (of 19): __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. Yet we have the full kernel tree with its usage for apparently order-0 allocations. This is really confusing because __GFP_REPEAT is explicitly documented to allow allocation failures which is a weaker semantic than the current order-0 has (basically nofail). Let's simply drop __GFP_REPEAT from those places. This would allow to identify place which really need allocator to retry harder and formulate a more specific semantic for what the flag is supposed to do actually. Link: http://lkml.kernel.org/r/1464599699-30131-2-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: "David S. Miller" Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: "Theodore Ts'o" Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chen Liqin Cc: Chris Metcalf [for tile] Cc: Guan Xuetao Cc: Heiko Carstens Cc: Helge Deller Cc: Ingo Molnar Cc: Jan Kara Cc: John Crispin Cc: Lennox Wu Cc: Ley Foon Tan Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Ralf Baechle Cc: Rich Felker Cc: Russell King Cc: Thomas Gleixner Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/pgalloc.h | 4 ++-- arch/arm/include/asm/pgalloc.h | 2 +- arch/avr32/include/asm/pgalloc.h | 6 +++--- arch/cris/include/asm/pgalloc.h | 4 ++-- arch/frv/mm/pgalloc.c | 6 +++--- arch/hexagon/include/asm/pgalloc.h | 4 ++-- arch/m68k/include/asm/mcf_pgalloc.h | 4 ++-- arch/m68k/include/asm/motorola_pgalloc.h | 4 ++-- arch/m68k/include/asm/sun3_pgalloc.h | 4 ++-- arch/metag/include/asm/pgalloc.h | 5 ++--- arch/microblaze/include/asm/pgalloc.h | 4 ++-- arch/microblaze/mm/pgtable.c | 3 +-- arch/mn10300/mm/pgtable.c | 6 +++--- arch/openrisc/include/asm/pgalloc.h | 2 +- arch/openrisc/mm/ioremap.c | 2 +- arch/parisc/include/asm/pgalloc.h | 4 ++-- arch/powerpc/include/asm/book3s/64/pgalloc.h | 2 +- arch/powerpc/include/asm/nohash/64/pgalloc.h | 2 +- arch/powerpc/mm/pgtable_32.c | 4 ++-- arch/powerpc/mm/pgtable_64.c | 3 +-- arch/sh/include/asm/pgalloc.h | 4 ++-- arch/sparc/mm/init_64.c | 6 ++---- arch/um/kernel/mem.c | 4 ++-- arch/x86/include/asm/pgalloc.h | 4 ++-- arch/x86/xen/p2m.c | 2 +- arch/xtensa/include/asm/pgalloc.h | 2 +- drivers/block/aoe/aoecmd.c | 2 +- 27 files changed, 47 insertions(+), 52 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h index aab14a019c20..c2ebb6f36c9d 100644 --- a/arch/alpha/include/asm/pgalloc.h +++ b/arch/alpha/include/asm/pgalloc.h @@ -40,7 +40,7 @@ pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return ret; } @@ -53,7 +53,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return pte; } diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index 19cfab526d13..20febb368844 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -29,7 +29,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); + return (pmd_t *)get_zeroed_page(GFP_KERNEL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) diff --git a/arch/avr32/include/asm/pgalloc.h b/arch/avr32/include/asm/pgalloc.h index 1aba19d68c5e..db039cb368be 100644 --- a/arch/avr32/include/asm/pgalloc.h +++ b/arch/avr32/include/asm/pgalloc.h @@ -43,7 +43,7 @@ static inline void pgd_ctor(void *x) */ static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return quicklist_alloc(QUICK_PGD, GFP_KERNEL | __GFP_REPEAT, pgd_ctor); + return quicklist_alloc(QUICK_PGD, GFP_KERNEL, pgd_ctor); } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) @@ -54,7 +54,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, @@ -63,7 +63,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, struct page *page; void *pg; - pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); if (!pg) return NULL; diff --git a/arch/cris/include/asm/pgalloc.h b/arch/cris/include/asm/pgalloc.h index 235ece437ddd..42f1affb9c2d 100644 --- a/arch/cris/include/asm/pgalloc.h +++ b/arch/cris/include/asm/pgalloc.h @@ -24,14 +24,14 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return pte; } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + pte = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0); if (!pte) return NULL; if (!pgtable_page_ctor(pte)) { diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c index 41907d25ed38..c9ed14f6c67d 100644 --- a/arch/frv/mm/pgalloc.c +++ b/arch/frv/mm/pgalloc.c @@ -22,7 +22,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((aligned(PAGE_SIZE))); pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL); if (pte) clear_page(pte); return pte; @@ -33,9 +33,9 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) struct page *page; #ifdef CONFIG_HIGHPTE - page = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0); + page = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM, 0); #else - page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + page = alloc_pages(GFP_KERNEL, 0); #endif if (!page) return NULL; diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h index 77da3b0ae3c2..eeebf862c46c 100644 --- a/arch/hexagon/include/asm/pgalloc.h +++ b/arch/hexagon/include/asm/pgalloc.h @@ -64,7 +64,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, { struct page *pte; - pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); + pte = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!pte) return NULL; if (!pgtable_page_ctor(pte)) { @@ -78,7 +78,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO; + gfp_t flags = GFP_KERNEL | __GFP_ZERO; return (pte_t *) __get_free_page(flags); } diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h index f9924fbcfe42..fb95aed5f428 100644 --- a/arch/m68k/include/asm/mcf_pgalloc.h +++ b/arch/m68k/include/asm/mcf_pgalloc.h @@ -14,7 +14,7 @@ extern const char bad_pmd_string[]; extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - unsigned long page = __get_free_page(GFP_DMA|__GFP_REPEAT); + unsigned long page = __get_free_page(GFP_DMA); if (!page) return NULL; @@ -51,7 +51,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page, static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_pages(GFP_DMA|__GFP_REPEAT, 0); + struct page *page = alloc_pages(GFP_DMA, 0); pte_t *pte; if (!page) diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h index 24bcba496c75..c895b987202c 100644 --- a/arch/m68k/include/asm/motorola_pgalloc.h +++ b/arch/m68k/include/asm/motorola_pgalloc.h @@ -11,7 +11,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long ad { pte_t *pte; - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); if (pte) { __flush_page_to_ram(pte); flush_tlb_kernel_page(pte); @@ -32,7 +32,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addres struct page *page; pte_t *pte; - page = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + page = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0); if(!page) return NULL; if (!pgtable_page_ctor(page)) { diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index 0931388de47f..1901f61f926f 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -37,7 +37,7 @@ do { \ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - unsigned long page = __get_free_page(GFP_KERNEL|__GFP_REPEAT); + unsigned long page = __get_free_page(GFP_KERNEL); if (!page) return NULL; @@ -49,7 +49,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + struct page *page = alloc_pages(GFP_KERNEL, 0); if (page == NULL) return NULL; diff --git a/arch/metag/include/asm/pgalloc.h b/arch/metag/include/asm/pgalloc.h index 3104df0a4822..c2caa1ee4360 100644 --- a/arch/metag/include/asm/pgalloc.h +++ b/arch/metag/include/asm/pgalloc.h @@ -42,8 +42,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | - __GFP_ZERO); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); return pte; } @@ -51,7 +50,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0); + pte = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); if (!pte) return NULL; if (!pgtable_page_ctor(pte)) { diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h index 61436d69775c..7c89390c0c13 100644 --- a/arch/microblaze/include/asm/pgalloc.h +++ b/arch/microblaze/include/asm/pgalloc.h @@ -116,9 +116,9 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, struct page *ptepage; #ifdef CONFIG_HIGHPTE - int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT; + int flags = GFP_KERNEL | __GFP_HIGHMEM; #else - int flags = GFP_KERNEL | __GFP_REPEAT; + int flags = GFP_KERNEL; #endif ptepage = alloc_pages(flags, 0); diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index 4f4520e779a5..eb99fcc76088 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -239,8 +239,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; if (mem_init_done) { - pte = (pte_t *)__get_free_page(GFP_KERNEL | - __GFP_REPEAT | __GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } else { pte = (pte_t *)early_get_page(); if (pte) diff --git a/arch/mn10300/mm/pgtable.c b/arch/mn10300/mm/pgtable.c index e77a7c728081..9577cf768875 100644 --- a/arch/mn10300/mm/pgtable.c +++ b/arch/mn10300/mm/pgtable.c @@ -63,7 +63,7 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL); if (pte) clear_page(pte); return pte; @@ -74,9 +74,9 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) struct page *pte; #ifdef CONFIG_HIGHPTE - pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0); + pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM, 0); #else - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + pte = alloc_pages(GFP_KERNEL, 0); #endif if (!pte) return NULL; diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h index 21484e5b9e9a..87eebd185089 100644 --- a/arch/openrisc/include/asm/pgalloc.h +++ b/arch/openrisc/include/asm/pgalloc.h @@ -77,7 +77,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + pte = alloc_pages(GFP_KERNEL, 0); if (!pte) return NULL; clear_page(page_address(pte)); diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c index 62b08ef392be..5b2a95116e8f 100644 --- a/arch/openrisc/mm/ioremap.c +++ b/arch/openrisc/mm/ioremap.c @@ -122,7 +122,7 @@ pte_t __init_refok *pte_alloc_one_kernel(struct mm_struct *mm, pte_t *pte; if (likely(mem_init_done)) { - pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_REPEAT); + pte = (pte_t *) __get_free_page(GFP_KERNEL); } else { pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); #if 0 diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index f2fd327dce2e..52c3defb40c9 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -124,7 +124,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO); if (!page) return NULL; if (!pgtable_page_ctor(page)) { @@ -137,7 +137,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return pte; } diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 488279edb1f0..049b80359db6 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -151,7 +151,7 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); + return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 069369f6414b..8a8a7d991249 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -88,7 +88,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); + return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index bf7bf32b54f8..7f922f557936 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -84,7 +84,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add pte_t *pte; if (slab_is_available()) { - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); } else { pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); if (pte) @@ -97,7 +97,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *ptepage; - gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO; + gfp_t flags = GFP_KERNEL | __GFP_ZERO; ptepage = alloc_pages(flags, 0); if (!ptepage) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index e009e0604a8a..f5e8d4edb808 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -350,8 +350,7 @@ static pte_t *get_from_cache(struct mm_struct *mm) static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel) { void *ret = NULL; - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | - __GFP_REPEAT | __GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); if (!page) return NULL; if (!kernel && !pgtable_page_ctor(page)) { diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h index a33673b3687d..f3f42c84c40f 100644 --- a/arch/sh/include/asm/pgalloc.h +++ b/arch/sh/include/asm/pgalloc.h @@ -34,7 +34,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, @@ -43,7 +43,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, struct page *page; void *pg; - pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); if (!pg) return NULL; page = virt_to_page(pg); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 14bb0d5ed3c6..aec508e37490 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2704,8 +2704,7 @@ void __flush_tlb_all(void) pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | - __GFP_REPEAT | __GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); pte_t *pte = NULL; if (page) @@ -2717,8 +2716,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | - __GFP_REPEAT | __GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); if (!page) return NULL; if (!pgtable_page_ctor(page)) { diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index b2a2dff50b4e..e7437ec62710 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -204,7 +204,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte; - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return pte; } @@ -212,7 +212,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = alloc_page(GFP_KERNEL|__GFP_ZERO); if (!pte) return NULL; if (!pgtable_page_ctor(pte)) { diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index bf7f8b55b0f9..574c23cf761a 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -81,7 +81,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { struct page *page; - page = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0); + page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); if (!page) return NULL; if (!pgtable_pmd_page_ctor(page)) { @@ -125,7 +125,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + return (pud_t *)get_zeroed_page(GFP_KERNEL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index cab9f766bb06..dd2a49a8aacc 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -182,7 +182,7 @@ static void * __ref alloc_p2m_page(void) if (unlikely(!slab_is_available())) return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); - return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); + return (void *)__get_free_page(GFP_KERNEL); } static void __ref free_p2m_page(void *p) diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h index d38eb9237e64..1065bc8bcae5 100644 --- a/arch/xtensa/include/asm/pgalloc.h +++ b/arch/xtensa/include/asm/pgalloc.h @@ -44,7 +44,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, pte_t *ptep; int i; - ptep = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + ptep = (pte_t *)__get_free_page(GFP_KERNEL); if (!ptep) return NULL; for (i = 0; i < 1024; i++) diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index d597e432e195..ab19adb07a12 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -1750,7 +1750,7 @@ aoecmd_init(void) int ret; /* get_zeroed_page returns page with ref count 1 */ - p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); + p = (void *) get_zeroed_page(GFP_KERNEL); if (!p) return -ENOMEM; empty_page = virt_to_page(p); -- cgit v1.2.3-59-g8ed1b From a3a9a59d206779dc0c4ca5a6de6a2ff40382732b Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:48:50 -0700 Subject: x86: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. PGALLOC_GFP uses __GFP_REPEAT but none of the allocation which uses this flag is for more than order-0. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-3-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/espfix_64.c | 2 +- arch/x86/mm/pgtable.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 4d38416e2a7f..04f89caef9c4 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -57,7 +57,7 @@ # error "Need more than one PGD for the ESPFIX hack" #endif -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) /* This contains the *bottom* address of the espfix stack */ DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 4eb287e25043..aa0ff4b02a96 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -6,7 +6,7 @@ #include #include -#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO +#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO #ifdef CONFIG_HIGHPTE #define PGALLOC_USER_GFP __GFP_HIGHMEM -- cgit v1.2.3-59-g8ed1b From f58f230a832ba8220a64f44aaafcce4b7358d826 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:48:53 -0700 Subject: x86/efi: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. efi_alloc_page_tables uses __GFP_REPEAT but it allocates an order-0 page. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-4-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Matt Fleming Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/platform/efi/efi_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 6e7242be1c87..b226b3f497f1 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -139,7 +139,7 @@ int __init efi_alloc_page_tables(void) if (efi_enabled(EFI_OLD_MEMMAP)) return 0; - gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO; + gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO; efi_pgd = (pgd_t *)__get_free_page(gfp_mask); if (!efi_pgd) return -ENOMEM; -- cgit v1.2.3-59-g8ed1b From f3610a6aff7dd70b788364255c0cbc128488ef72 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:48:56 -0700 Subject: arm64: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. {pte,pmd,pud}_alloc_one{_kernel}, late_pgtable_alloc use PGALLOC_GFP for __get_free_page (aka order-0). pgd_alloc is slightly more complex because it allocates from pgd_cache if PGD_SIZE != PAGE_SIZE and PGD_SIZE depends on the configuration (CONFIG_ARM64_VA_BITS, PAGE_SHIFT and CONFIG_PGTABLE_LEVELS). As per config PGTABLE_LEVELS int default 2 if ARM64_16K_PAGES && ARM64_VA_BITS_36 default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47 default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48 we should have the following options CONFIG_ARM64_VA_BITS:48 CONFIG_PGTABLE_LEVELS:4 PAGE_SIZE:4k size:4096 pages:1 CONFIG_ARM64_VA_BITS:48 CONFIG_PGTABLE_LEVELS:4 PAGE_SIZE:16k size:16 pages:1 CONFIG_ARM64_VA_BITS:48 CONFIG_PGTABLE_LEVELS:3 PAGE_SIZE:64k size:512 pages:1 CONFIG_ARM64_VA_BITS:47 CONFIG_PGTABLE_LEVELS:3 PAGE_SIZE:16k size:16384 pages:1 CONFIG_ARM64_VA_BITS:42 CONFIG_PGTABLE_LEVELS:2 PAGE_SIZE:64k size:65536 pages:1 CONFIG_ARM64_VA_BITS:39 CONFIG_PGTABLE_LEVELS:3 PAGE_SIZE:4k size:4096 pages:1 CONFIG_ARM64_VA_BITS:36 CONFIG_PGTABLE_LEVELS:2 PAGE_SIZE:16k size:16384 pages:1 All of them fit into a single page (aka order-0). This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-6-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Will Deacon Cc: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/pgalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index ff98585d085a..d25f4f137c2a 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -26,7 +26,7 @@ #define check_pgt_cache() do { } while (0) -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #if CONFIG_PGTABLE_LEVELS > 2 -- cgit v1.2.3-59-g8ed1b From 54d87d600adbe9889bccaff38420cec02250993b Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:48:58 -0700 Subject: arc: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pte_alloc_one_kernel uses __get_order_pte but this is obviously always zero because BITS_FOR_PTE is not larger than 9 yet the page size is always larger than 4K. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-7-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arc/include/asm/pgalloc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index 86ed671286df..3749234b7419 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -95,7 +95,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, + pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, __get_order_pte()); return pte; @@ -107,7 +107,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) pgtable_t pte_pg; struct page *page; - pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte()); + pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte()); if (!pte_pg) return 0; memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t)); -- cgit v1.2.3-59-g8ed1b From 65f84656ff7c24177c43652bc88cc2a06f9a48b1 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:01 -0700 Subject: mips: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pte_alloc_one{_kernel}, pmd_alloc_one allocate PTE_ORDER resp. PMD_ORDER but both are not larger than 1. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-8-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: John Crispin Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/include/asm/pgalloc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index b336037e8768..93c079a1cfc8 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -69,7 +69,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, PTE_ORDER); + pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER); return pte; } @@ -79,7 +79,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, { struct page *pte; - pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); + pte = alloc_pages(GFP_KERNEL, PTE_ORDER); if (!pte) return NULL; clear_highpage(pte); @@ -113,7 +113,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { pmd_t *pmd; - pmd = (pmd_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, PMD_ORDER); + pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, PMD_ORDER); if (pmd) pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); return pmd; -- cgit v1.2.3-59-g8ed1b From 565299d03363c71d5bcf7edabb41b2b36a9ea36e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:04 -0700 Subject: nios2: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pte_alloc_one{_kernel} allocate PTE_ORDER which is 0. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-9-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Ley Foon Tan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/nios2/include/asm/pgalloc.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h index 6e2985e0a7b9..bb47d08c8ef7 100644 --- a/arch/nios2/include/asm/pgalloc.h +++ b/arch/nios2/include/asm/pgalloc.h @@ -42,8 +42,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, - PTE_ORDER); + pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER); return pte; } @@ -53,7 +52,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, { struct page *pte; - pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); + pte = alloc_pages(GFP_KERNEL, PTE_ORDER); if (pte) { if (!pgtable_page_ctor(pte)) { __free_page(pte); -- cgit v1.2.3-59-g8ed1b From aade311a50b0be5d5ee93bac7ebc2da9a16556d7 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:06 -0700 Subject: parisc: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pmd_alloc_one allocate PMD_ORDER which is 1. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-10-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: "James E.J. Bottomley" Cc: Helge Deller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/parisc/include/asm/pgalloc.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index 52c3defb40c9..f08dda3f0995 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -63,8 +63,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, - PMD_ORDER); + pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL, PMD_ORDER); if (pmd) memset(pmd, 0, PAGE_SIZE< Date: Fri, 24 Jun 2016 14:49:09 -0700 Subject: score: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pte_alloc_one{_kernel} allocate PTE_ORDER which is 0. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-11-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Chen Liqin Cc: Lennox Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/score/include/asm/pgalloc.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/score/include/asm/pgalloc.h b/arch/score/include/asm/pgalloc.h index 2e067657db98..49b012d78c1a 100644 --- a/arch/score/include/asm/pgalloc.h +++ b/arch/score/include/asm/pgalloc.h @@ -42,8 +42,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, - PTE_ORDER); + pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER); return pte; } @@ -53,7 +52,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, { struct page *pte; - pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); + pte = alloc_pages(GFP_KERNEL, PTE_ORDER); if (!pte) return NULL; clear_highpage(pte); -- cgit v1.2.3-59-g8ed1b From 2379a23e34b58520dfc8f4909f116a08393138e4 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:12 -0700 Subject: powerpc: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. {pud,pmd}_alloc_one are allocating from {PGT,PUD}_CACHE initialized in pgtable_cache_init which doesn't have larger than sizeof(void *) << 12 size and that fits into !costly allocation request size. PGALLOC_GFP is used only in radix__pgd_alloc which uses either order-0 or order-4 requests. The first one doesn't need the flag while the second does. Drop __GFP_REPEAT from PGALLOC_GFP and add it for the order-4 one. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-12-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/book3s/64/pgalloc.h | 10 ++++------ arch/powerpc/include/asm/nohash/64/pgalloc.h | 6 ++---- arch/powerpc/mm/hugetlbpage.c | 2 +- 3 files changed, 7 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 049b80359db6..d14fcf82c00c 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -41,7 +41,7 @@ extern struct kmem_cache *pgtable_cache[]; pgtable_cache[(shift) - 1]; \ }) -#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO +#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int); extern void pte_fragment_free(unsigned long *, int); @@ -56,7 +56,7 @@ static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) return (pgd_t *)__get_free_page(PGALLOC_GFP); #else struct page *page; - page = alloc_pages(PGALLOC_GFP, 4); + page = alloc_pages(PGALLOC_GFP | __GFP_REPEAT, 4); if (!page) return NULL; return (pgd_t *) page_address(page); @@ -93,8 +93,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -115,8 +114,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 8a8a7d991249..897d2e1c8a9b 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -57,8 +57,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -190,8 +189,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 5aac1a3f86cd..119d18611500 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -73,7 +73,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, cachep = PGT_CACHE(pdshift - pshift); #endif - new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT); + new = kmem_cache_zalloc(cachep, GFP_KERNEL); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); -- cgit v1.2.3-59-g8ed1b From 45eeff260d40ff02af3d5b8e2919033ee59f9ff6 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:14 -0700 Subject: sparc: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. {pud,pmd}_alloc_one is using __GFP_REPEAT but it always allocates from pgtable_cache which is initialzed to PAGE_SIZE objects. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-13-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/pgalloc_64.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index 5e3187185b4a..3529f1378cd8 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h @@ -41,8 +41,7 @@ static inline void __pud_populate(pud_t *pud, pmd_t *pmd) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(pgtable_cache, - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -52,8 +51,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(pgtable_cache, - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) -- cgit v1.2.3-59-g8ed1b From 10d58bf297e2cba0cfa2cd143d4f0df26e129040 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:17 -0700 Subject: s390: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. page_table_alloc then uses the flag for a single page allocation. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-14-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/mm/pgalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index e8b5962ac12a..e2565d2d0c32 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -169,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) return table; } /* Allocate a fresh page */ - page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + page = alloc_page(GFP_KERNEL); if (!page) return NULL; if (!pgtable_page_ctor(page)) { -- cgit v1.2.3-59-g8ed1b From 884ed4cb8aa19ccff32f5c5586257c56e56f91a4 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:20 -0700 Subject: sh: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. PGALLOC_GFP uses __GFP_REPEAT but {pgd,pmd}_alloc allocate from {pgd,pmd}_cache but both caches are allocating up to PAGE_SIZE objects. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-15-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Yoshinori Sato Cc: Rich Felker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sh/mm/pgtable.c b/arch/sh/mm/pgtable.c index 26e03a1f7ca4..a62bd8696779 100644 --- a/arch/sh/mm/pgtable.c +++ b/arch/sh/mm/pgtable.c @@ -1,7 +1,7 @@ #include #include -#define PGALLOC_GFP GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO +#define PGALLOC_GFP GFP_KERNEL | __GFP_ZERO static struct kmem_cache *pgd_cachep; #if PAGETABLE_LEVELS > 2 -- cgit v1.2.3-59-g8ed1b From f45eebc25e78991ef6a6d784ab54151d3003cfdf Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:22 -0700 Subject: tile: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pgtable_alloc_one uses __GFP_REPEAT flag for L2_USER_PGTABLE_ORDER but the order is either 0 or 3 if L2_KERNEL_PGTABLE_SHIFT for HPAGE_SHIFT. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-16-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Chris Metcalf [for tile] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/tile/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 7bf2491a9c1f..c4d5bf841a7f 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -231,7 +231,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address, int order) { - gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO; + gfp_t flags = GFP_KERNEL|__GFP_ZERO; struct page *p; int i; -- cgit v1.2.3-59-g8ed1b From a830627b01b26452a13abb7e7b37d39365be4b05 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:25 -0700 Subject: unicore32: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. PGALLOC_GFP uses __GFP_REPEAT but it is only used in pte_alloc_one, pte_alloc_one_kernel which does order-0 request. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-17-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Guan Xuetao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/unicore32/include/asm/pgalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h index 2e02d1356fdf..26775793c204 100644 --- a/arch/unicore32/include/asm/pgalloc.h +++ b/arch/unicore32/include/asm/pgalloc.h @@ -28,7 +28,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd); #define pgd_alloc(mm) get_pgd_slow(mm) #define pgd_free(mm, pgd) free_pgd_slow(mm, pgd) -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) /* * Allocate one PTE table. -- cgit v1.2.3-59-g8ed1b From 8e96a87c5431c256feb65bcfc5aec92d9f7839b6 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Fri, 17 Jun 2016 14:58:34 +1000 Subject: powerpc/tm: Always reclaim in start_thread() for exec() class syscalls Userspace can quite legitimately perform an exec() syscall with a suspended transaction. exec() does not return to the old process, rather it load a new one and starts that, the expectation therefore is that the new process starts not in a transaction. Currently exec() is not treated any differently to any other syscall which creates problems. Firstly it could allow a new process to start with a suspended transaction for a binary that no longer exists. This means that the checkpointed state won't be valid and if the suspended transaction were ever to be resumed and subsequently aborted (a possibility which is exceedingly likely as exec()ing will likely doom the transaction) the new process will jump to invalid state. Secondly the incorrect attempt to keep the transactional state while still zeroing state for the new process creates at least two TM Bad Things. The first triggers on the rfid to return to userspace as start_thread() has given the new process a 'clean' MSR but the suspend will still be set in the hardware MSR. The second TM Bad Thing triggers in __switch_to() as the processor is still transactionally suspended but __switch_to() wants to zero the TM sprs for the new process. This is an example of the outcome of calling exec() with a suspended transaction. Note the first 700 is likely the first TM bad thing decsribed earlier only the kernel can't report it as we've loaded userspace registers. c000000000009980 is the rfid in fast_exception_return() Bad kernel stack pointer 3fffcfa1a370 at c000000000009980 Oops: Bad kernel stack pointer, sig: 6 [#1] CPU: 0 PID: 2006 Comm: tm-execed Not tainted NIP: c000000000009980 LR: 0000000000000000 CTR: 0000000000000000 REGS: c00000003ffefd40 TRAP: 0700 Not tainted MSR: 8000000300201031 CR: 00000000 XER: 00000000 CFAR: c0000000000098b4 SOFTE: 0 PACATMSCRATCH: b00000010000d033 GPR00: 0000000000000000 00003fffcfa1a370 0000000000000000 0000000000000000 GPR04: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR12: 00003fff966611c0 0000000000000000 0000000000000000 0000000000000000 NIP [c000000000009980] fast_exception_return+0xb0/0xb8 LR [0000000000000000] (null) Call Trace: Instruction dump: f84d0278 e9a100d8 7c7b03a6 e84101a0 7c4ff120 e8410170 7c5a03a6 e8010070 e8410080 e8610088 e8810090 e8210078 <4c000024> 48000000 e8610178 88ed023b Kernel BUG at c000000000043e80 [verbose debug info unavailable] Unexpected TM Bad Thing exception at c000000000043e80 (msr 0x201033) Oops: Unrecoverable exception, sig: 6 [#2] CPU: 0 PID: 2006 Comm: tm-execed Tainted: G D task: c0000000fbea6d80 ti: c00000003ffec000 task.ti: c0000000fb7ec000 NIP: c000000000043e80 LR: c000000000015a24 CTR: 0000000000000000 REGS: c00000003ffef7e0 TRAP: 0700 Tainted: G D MSR: 8000000300201033 CR: 28002828 XER: 00000000 CFAR: c000000000015a20 SOFTE: 0 PACATMSCRATCH: b00000010000d033 GPR00: 0000000000000000 c00000003ffefa60 c000000000db5500 c0000000fbead000 GPR04: 8000000300001033 2222222222222222 2222222222222222 00000000ff160000 GPR08: 0000000000000000 800000010000d033 c0000000fb7e3ea0 c00000000fe00004 GPR12: 0000000000002200 c00000000fe00000 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 c0000000fbea7410 00000000ff160000 GPR24: c0000000ffe1f600 c0000000fbea8700 c0000000fbea8700 c0000000fbead000 GPR28: c000000000e20198 c0000000fbea6d80 c0000000fbeab680 c0000000fbea6d80 NIP [c000000000043e80] tm_restore_sprs+0xc/0x1c LR [c000000000015a24] __switch_to+0x1f4/0x420 Call Trace: Instruction dump: 7c800164 4e800020 7c0022a6 f80304a8 7c0222a6 f80304b0 7c0122a6 f80304b8 4e800020 e80304a8 7c0023a6 e80304b0 <7c0223a6> e80304b8 7c0123a6 4e800020 This fixes CVE-2016-5828. Fixes: bc2a9408fa65 ("powerpc: Hook in new transactional memory code") Cc: stable@vger.kernel.org # v3.9+ Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e2f12cbcade9..0b93893424f5 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1505,6 +1505,16 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.regs = regs - 1; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * Clear any transactional state, we're exec()ing. The cause is + * not important as there will never be a recheckpoint so it's not + * user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); +#endif + memset(regs->gpr, 0, sizeof(regs->gpr)); regs->ctr = 0; regs->link = 0; -- cgit v1.2.3-59-g8ed1b From 591d215afcc2f94e8e2c69a63c924c044677eb31 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 8 Jun 2016 17:24:45 +0100 Subject: KVM: arm/arm64: Stop leaking vcpu pid references kvm provides kvm_vcpu_uninit(), which amongst other things, releases the last reference to the struct pid of the task that was last running the vcpu. On arm64 built with CONFIG_DEBUG_KMEMLEAK, starting a guest with kvmtool, then killing it with SIGKILL results (after some considerable time) in: > cat /sys/kernel/debug/kmemleak > unreferenced object 0xffff80007d5ea080 (size 128): > comm "lkvm", pid 2025, jiffies 4294942645 (age 1107.776s) > hex dump (first 32 bytes): > 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ > 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ > backtrace: > [] create_object+0xfc/0x278 > [] kmemleak_alloc+0x34/0x70 > [] kmem_cache_alloc+0x16c/0x1d8 > [] alloc_pid+0x34/0x4d0 > [] copy_process.isra.6+0x79c/0x1338 > [] _do_fork+0x74/0x320 > [] SyS_clone+0x18/0x20 > [] el0_svc_naked+0x24/0x28 > [] 0xffffffffffffffff On x86 kvm_vcpu_uninit() is called on the path from kvm_arch_destroy_vm(), on arm no equivalent call is made. Add the call to kvm_arch_vcpu_free(). Signed-off-by: James Morse Fixes: 749cf76c5a36 ("KVM: ARM: Initial skeleton to compile KVM support") Cc: # 3.10+ Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 893941ec98dc..f1bde7c4e736 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -263,6 +263,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvm_timer_vcpu_terminate(vcpu); kvm_vgic_vcpu_destroy(vcpu); kvm_pmu_vcpu_destroy(vcpu); + kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } -- cgit v1.2.3-59-g8ed1b From 749d088b8e7f4b9826ede02b9a043e417fa84aa1 Mon Sep 17 00:00:00 2001 From: Minfei Huang Date: Fri, 27 May 2016 14:17:10 +0800 Subject: pvclock: Add CPU barriers to get correct version value Protocol for the "version" fields is: hypervisor raises it (making it uneven) before it starts updating the fields and raises it again (making it even) when it is done. Thus the guest can make sure the time values it got are consistent by checking the version before and after reading them. Add CPU barries after getting version value just like what function vread_pvclock does, because all of callees in this function is inline. Fixes: 502dfeff239e8313bfbe906ca0a1a6827ac8481b Cc: stable@vger.kernel.org Signed-off-by: Minfei Huang Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/pvclock.h | 2 ++ arch/x86/kernel/pvclock.c | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index fdcc04020636..538ae944855e 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -85,6 +85,8 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, u8 ret_flags; version = src->version; + /* Make the latest version visible */ + smp_rmb(); offset = pvclock_get_nsec_offset(src); ret = src->system_time + offset; diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 99bfc025111d..7f82fe0a6807 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -66,6 +66,8 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src) do { version = __pvclock_read_cycles(src, &ret, &flags); + /* Make sure that the version double-check is last. */ + smp_rmb(); } while ((src->version & 1) || version != src->version); return flags & valid_flags; @@ -80,6 +82,8 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) do { version = __pvclock_read_cycles(src, &ret, &flags); + /* Make sure that the version double-check is last. */ + smp_rmb(); } while ((src->version & 1) || version != src->version); if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { -- cgit v1.2.3-59-g8ed1b From f7550d076d55c1b5f02f95012e3a5a84d264c47d Mon Sep 17 00:00:00 2001 From: Minfei Huang Date: Fri, 27 May 2016 14:17:11 +0800 Subject: pvclock: Cleanup to remove function pvclock_get_nsec_offset Function __pvclock_read_cycles is short enough, so there is no need to have another function pvclock_get_nsec_offset to calculate tsc delta. It's better to combine it into function __pvclock_read_cycles. Remove useless variables in function __pvclock_read_cycles. Signed-off-by: Minfei Huang Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/pvclock.h | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 538ae944855e..7c1c89598688 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -68,32 +68,23 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) return product; } -static __always_inline -u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src) -{ - u64 delta = rdtsc_ordered() - src->tsc_timestamp; - return pvclock_scale_delta(delta, src->tsc_to_system_mul, - src->tsc_shift); -} - static __always_inline unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, cycle_t *cycles, u8 *flags) { unsigned version; - cycle_t ret, offset; - u8 ret_flags; + cycle_t offset; + u64 delta; version = src->version; /* Make the latest version visible */ smp_rmb(); - offset = pvclock_get_nsec_offset(src); - ret = src->system_time + offset; - ret_flags = src->flags; - - *cycles = ret; - *flags = ret_flags; + delta = rdtsc_ordered() - src->tsc_timestamp; + offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, + src->tsc_shift); + *cycles = src->system_time + offset; + *flags = src->flags; return version; } -- cgit v1.2.3-59-g8ed1b From ed911b43adb889c37a37fa57a995f0b460c633b6 Mon Sep 17 00:00:00 2001 From: Minfei Huang Date: Sat, 28 May 2016 20:27:43 +0800 Subject: pvclock: Get rid of __pvclock_read_cycles in function pvclock_read_flags There is a generic function __pvclock_read_cycles to be used to get both flags and cycles. For function pvclock_read_flags, it's useless to get cycles value. To make this function be more effective, get this variable flags directly in function. Signed-off-by: Minfei Huang Signed-off-by: Paolo Bonzini --- arch/x86/kernel/pvclock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 7f82fe0a6807..06c58ce46762 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -61,11 +61,14 @@ void pvclock_resume(void) u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src) { unsigned version; - cycle_t ret; u8 flags; do { - version = __pvclock_read_cycles(src, &ret, &flags); + version = src->version; + /* Make the latest version visible */ + smp_rmb(); + + flags = src->flags; /* Make sure that the version double-check is last. */ smp_rmb(); } while ((src->version & 1) || version != src->version); -- cgit v1.2.3-59-g8ed1b From 8d93c874ac899bfdf0ad3787baef684a0c878c2c Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 20 Jun 2016 22:28:02 -0300 Subject: KVM: x86: move nsec_to_cycles from x86.c to x86.h Move the inline function nsec_to_cycles from x86.c to x86.h, as the next patch uses it from lapic.c. Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 6 ------ arch/x86/kvm/x86.h | 7 +++++++ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 902d9da12392..7da5dd2057a9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1244,12 +1244,6 @@ static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0); static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); static unsigned long max_tsc_khz; -static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) -{ - return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult, - vcpu->arch.virtual_tsc_shift); -} - static u32 adjust_tsc_khz(u32 khz, s32 ppm) { u64 v = (u64)khz * (1000000 + ppm); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 7ce3634ab5fe..a82ca466b62e 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -2,6 +2,7 @@ #define ARCH_X86_KVM_X86_H #include +#include #include "kvm_cache_regs.h" #define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL @@ -195,6 +196,12 @@ extern unsigned int lapic_timer_advance_ns; extern struct static_key kvm_no_apic_vcpu; +static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) +{ + return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult, + vcpu->arch.virtual_tsc_shift); +} + /* Same "calling convention" as do_div: * - divide (n << 32) by base * - put result in n -- cgit v1.2.3-59-g8ed1b From b606f189c7d5bf9b875bba168162fe05287880fe Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 20 Jun 2016 22:33:48 -0300 Subject: KVM: LAPIC: cap __delay at lapic_timer_advance_ns The host timer which emulates the guest LAPIC TSC deadline timer has its expiration diminished by lapic_timer_advance_ns nanoseconds. Therefore if, at wait_lapic_expire, a difference larger than lapic_timer_advance_ns is encountered, delay at most lapic_timer_advance_ns. This fixes a problem where the guest can cause the host to delay for large amounts of time. Reported-by: Alan Jenkins Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bbb5b283ff63..a397200281c1 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1310,7 +1310,8 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ if (guest_tsc < tsc_deadline) - __delay(tsc_deadline - guest_tsc); + __delay(min(tsc_deadline - guest_tsc, + nsec_to_cycles(vcpu, lapic_timer_advance_ns))); } static void start_apic_timer(struct kvm_lapic *apic) -- cgit v1.2.3-59-g8ed1b From ff30ef40deca4658e27b0c596e7baf39115e858f Mon Sep 17 00:00:00 2001 From: Quentin Casasnovas Date: Sat, 18 Jun 2016 11:01:05 +0200 Subject: KVM: nVMX: VMX instructions: fix segment checks when L1 is in long mode. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I couldn't get Xen to boot a L2 HVM when it was nested under KVM - it was getting a GP(0) on a rather unspecial vmread from Xen: (XEN) ----[ Xen-4.7.0-rc x86_64 debug=n Not tainted ]---- (XEN) CPU: 1 (XEN) RIP: e008:[] vmx_get_segment_register+0x14e/0x450 (XEN) RFLAGS: 0000000000010202 CONTEXT: hypervisor (d1v0) (XEN) rax: ffff82d0801e6288 rbx: ffff83003ffbfb7c rcx: fffffffffffab928 (XEN) rdx: 0000000000000000 rsi: 0000000000000000 rdi: ffff83000bdd0000 (XEN) rbp: ffff83000bdd0000 rsp: ffff83003ffbfab0 r8: ffff830038813910 (XEN) r9: ffff83003faf3958 r10: 0000000a3b9f7640 r11: ffff83003f82d418 (XEN) r12: 0000000000000000 r13: ffff83003ffbffff r14: 0000000000004802 (XEN) r15: 0000000000000008 cr0: 0000000080050033 cr4: 00000000001526e0 (XEN) cr3: 000000003fc79000 cr2: 0000000000000000 (XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: 0000 cs: e008 (XEN) Xen code around (vmx_get_segment_register+0x14e/0x450): (XEN) 00 00 41 be 02 48 00 00 <44> 0f 78 74 24 08 0f 86 38 56 00 00 b8 08 68 00 (XEN) Xen stack trace from rsp=ffff83003ffbfab0: ... (XEN) Xen call trace: (XEN) [] vmx_get_segment_register+0x14e/0x450 (XEN) [] get_page_from_gfn_p2m+0x165/0x300 (XEN) [] hvmemul_get_seg_reg+0x52/0x60 (XEN) [] hvm_emulate_prepare+0x53/0x70 (XEN) [] handle_mmio+0x2b/0xd0 (XEN) [] emulate.c#_hvm_emulate_one+0x111/0x2c0 (XEN) [] handle_hvm_io_completion+0x274/0x2a0 (XEN) [] __get_gfn_type_access+0xfa/0x270 (XEN) [] timer.c#add_entry+0x4b/0xb0 (XEN) [] timer.c#remove_entry+0x7c/0x90 (XEN) [] hvm_do_resume+0x23/0x140 (XEN) [] vmx_do_resume+0xa7/0x140 (XEN) [] context_switch+0x13b/0xe40 (XEN) [] schedule.c#schedule+0x22e/0x570 (XEN) [] softirq.c#__do_softirq+0x5c/0x90 (XEN) [] domain.c#idle_loop+0x25/0x50 (XEN) (XEN) (XEN) **************************************** (XEN) Panic on CPU 1: (XEN) GENERAL PROTECTION FAULT (XEN) [error_code=0000] (XEN) **************************************** Tracing my host KVM showed it was the one injecting the GP(0) when emulating the VMREAD and checking the destination segment permissions in get_vmx_mem_address(): 3) | vmx_handle_exit() { 3) | handle_vmread() { 3) | nested_vmx_check_permission() { 3) | vmx_get_segment() { 3) 0.074 us | vmx_read_guest_seg_base(); 3) 0.065 us | vmx_read_guest_seg_selector(); 3) 0.066 us | vmx_read_guest_seg_ar(); 3) 1.636 us | } 3) 0.058 us | vmx_get_rflags(); 3) 0.062 us | vmx_read_guest_seg_ar(); 3) 3.469 us | } 3) | vmx_get_cs_db_l_bits() { 3) 0.058 us | vmx_read_guest_seg_ar(); 3) 0.662 us | } 3) | get_vmx_mem_address() { 3) 0.068 us | vmx_cache_reg(); 3) | vmx_get_segment() { 3) 0.074 us | vmx_read_guest_seg_base(); 3) 0.068 us | vmx_read_guest_seg_selector(); 3) 0.071 us | vmx_read_guest_seg_ar(); 3) 1.756 us | } 3) | kvm_queue_exception_e() { 3) 0.066 us | kvm_multiple_exception(); 3) 0.684 us | } 3) 4.085 us | } 3) 9.833 us | } 3) + 10.366 us | } Cross-checking the KVM/VMX VMREAD emulation code with the Intel Software Developper Manual Volume 3C - "VMREAD - Read Field from Virtual-Machine Control Structure", I found that we're enforcing that the destination operand is NOT located in a read-only data segment or any code segment when the L1 is in long mode - BUT that check should only happen when it is in protected mode. Shuffling the code a bit to make our emulation follow the specification allows me to boot a Xen dom0 in a nested KVM and start HVM L2 guests without problems. Fixes: f9eb4af67c9d ("KVM: nVMX: VMX instructions: add checks for #GP/#SS exceptions") Signed-off-by: Quentin Casasnovas Cc: Eugene Korenevsky Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: linux-stable Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 003618e324ce..64a79f271276 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6671,7 +6671,13 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, /* Checks for #GP/#SS exceptions. */ exn = false; - if (is_protmode(vcpu)) { + if (is_long_mode(vcpu)) { + /* Long mode: #GP(0)/#SS(0) if the memory address is in a + * non-canonical form. This is the only check on the memory + * destination for long mode! + */ + exn = is_noncanonical_address(*ret); + } else if (is_protmode(vcpu)) { /* Protected mode: apply checks for segment validity in the * following order: * - segment type check (#GP(0) may be thrown) @@ -6688,17 +6694,10 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, * execute-only code segment */ exn = ((s.type & 0xa) == 8); - } - if (exn) { - kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return 1; - } - if (is_long_mode(vcpu)) { - /* Long mode: #GP(0)/#SS(0) if the memory address is in a - * non-canonical form. This is an only check for long mode. - */ - exn = is_noncanonical_address(*ret); - } else if (is_protmode(vcpu)) { + if (exn) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return 1; + } /* Protected mode: #GP(0)/#SS(0) if the segment is unusable. */ exn = (s.unusable != 0); -- cgit v1.2.3-59-g8ed1b From f52e126cc7476196f44f3c313b7d9f0699a881fc Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 28 Jun 2016 09:42:25 +0530 Subject: ARC: unwind: ensure that .debug_frame is generated (vs. .eh_frame) With recent binutils update to support dwarf CFI pseudo-ops in gas, we now get .eh_frame vs. .debug_frame. Although the call frame info is exactly the same in both, the CIE differs, which the current kernel unwinder can't cope with. This broke both the kernel unwinder as well as loadable modules (latter because of a new unhandled relo R_ARC_32_PCREL from .rela.eh_frame in the module loader) The ideal solution would be to switch unwinder to .eh_frame. For now however we can make do by just ensureing .debug_frame is generated by removing -fasynchronous-unwind-tables .eh_frame generated with -gdwarf-2 -fasynchronous-unwind-tables .debug_frame generated with -gdwarf-2 Fixes STAR 9001058196 Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/Makefile b/arch/arc/Makefile index d4df6be66d58..85814e74677d 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -66,8 +66,6 @@ endif endif -cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables - # By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok ifeq ($(atleast_gcc48),y) cflags-$(CONFIG_ARC_DW2_UNWIND) += -gdwarf-2 -- cgit v1.2.3-59-g8ed1b From 9bd54517ee86cb164c734f72ea95aeba4804f10b Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 23 Jun 2016 11:00:39 +0300 Subject: arc: unwind: warn only once if DW2_UNWIND is disabled If CONFIG_ARC_DW2_UNWIND is disabled every time arc_unwind_core() gets called following message gets printed in debug console: ----------------->8--------------- CONFIG_ARC_DW2_UNWIND needs to be enabled ----------------->8--------------- That message makes sense if user indeed wants to see a backtrace or get nice function call-graphs in perf but what if user disabled unwinder for the purpose? Why pollute his debug console? So instead we'll warn user about possibly missing feature once and let him decide if that was what he or she really wanted. Signed-off-by: Alexey Brodkin Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta --- arch/arc/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index e0efff15a5ae..b9192a653b7e 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -142,7 +142,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, * prelogue is setup (callee regs saved and then fp set and not other * way around */ - pr_warn("CONFIG_ARC_DW2_UNWIND needs to be enabled\n"); + pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n"); return 0; #endif -- cgit v1.2.3-59-g8ed1b From 5419447e2142d6ed68c9f5c1a28630b3a290a845 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 13 Jun 2016 17:03:48 +0200 Subject: Revert "s390/kdump: Clear subchannel ID to signal non-CCW/SCSI IPL" This reverts commit 852ffd0f4e23248b47531058e531066a988434b5. There are use cases where an intermediate boot kernel (1) uses kexec to boot the final production kernel (2). For this scenario we should provide the original boot information to the production kernel (2). Therefore clearing the boot information during kexec() should not be done. Cc: stable@vger.kernel.org # v3.17+ Reported-by: Steffen Maier Signed-off-by: Michael Holzheu Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ipl.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index f20abdb5630a..d14069d4b88d 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2064,12 +2064,5 @@ void s390_reset_system(void) S390_lowcore.program_new_psw.addr = (unsigned long) s390_base_pgm_handler; - /* - * Clear subchannel ID and number to signal new kernel that no CCW or - * SCSI IPL has been done (for kexec and kdump) - */ - S390_lowcore.subchannel_id = 0; - S390_lowcore.subchannel_nr = 0; - do_reset_calls(); } -- cgit v1.2.3-59-g8ed1b From bcf4dd5f9ee096bd1510f838dd4750c35df4e38b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 27 Jun 2016 17:06:45 +0200 Subject: s390: fix test_fp_ctl inline assembly contraints The test_fp_ctl function is used to test if a given value is a valid floating-point control. The inline assembly in test_fp_ctl uses an incorrect constraint for the 'orig_fpc' variable. If the compiler chooses the same register for 'fpc' and 'orig_fpc' the test_fp_ctl() function always returns true. This allows user space to trigger kernel oopses with invalid floating-point control values on the signal stack. This problem has been introduced with git commit 4725c86055f5bbdcdf "s390: fix save and restore of the floating-point-control register" Cc: stable@vger.kernel.org # v3.13+ Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/fpu/api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index 5e04f3cbd320..8ae236b0f80b 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -22,7 +22,7 @@ static inline int test_fp_ctl(u32 fpc) " la %0,0\n" "1:\n" EX_TABLE(0b,1b) - : "=d" (rc), "=d" (orig_fpc) + : "=d" (rc), "=&d" (orig_fpc) : "d" (fpc), "0" (-EINVAL)); return rc; } -- cgit v1.2.3-59-g8ed1b From cca0e542e02e48cce541a49c4046ec094ec27c1e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 24 Jun 2016 14:49:02 +1000 Subject: powerpc/eeh: Fix wrong argument passed to eeh_rmv_device() When calling eeh_rmv_device() in eeh_reset_device() for partial hotplug case, @rmv_data instead of its address is the proper argument. Otherwise, the stack frame is corrupted when writing to @rmv_data (actually its address) in eeh_rmv_device(). It results in kernel crash as observed. This fixes the issue by passing @rmv_data, not its address to eeh_rmv_device() in eeh_reset_device(). Fixes: 67086e32b564 ("powerpc/eeh: powerpc/eeh: Support error recovery for VF PE") Reported-by: Pridhiviraj Paidipeddi Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index b5f73cb5eeb6..d70101e1e25c 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -647,7 +647,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, pci_unlock_rescan_remove(); } } else if (frozen_bus) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data); + eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); } /* -- cgit v1.2.3-59-g8ed1b From 190ce8693c23eae09ba5f303a83bf2fbeb6478b1 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 28 Jun 2016 13:01:04 +1000 Subject: powerpc/tm: Avoid SLB faults in treclaim/trecheckpoint when RI=0 Currently we have 2 segments that are bolted for the kernel linear mapping (ie 0xc000... addresses). This is 0 to 1TB and also the kernel stacks. Anything accessed outside of these regions may need to be faulted in. (In practice machines with TM always have 1T segments) If a machine has < 2TB of memory we never fault on the kernel linear mapping as these two segments cover all physical memory. If a machine has > 2TB of memory, there may be structures outside of these two segments that need to be faulted in. This faulting can occur when running as a guest as the hypervisor may remove any SLB that's not bolted. When we treclaim and trecheckpoint we have a window where we need to run with the userspace GPRs. This means that we no longer have a valid stack pointer in r1. For this window we therefore clear MSR RI to indicate that any exceptions taken at this point won't be able to be handled. This means that we can't take segment misses in this RI=0 window. In this RI=0 region, we currently access the thread_struct for the process being context switched to or from. This thread_struct access may cause a segment fault since it's not guaranteed to be covered by the two bolted segment entries described above. We've seen this with a crash when running as a guest with > 2TB of memory on PowerVM: Unrecoverable exception 4100 at c00000000004f138 Oops: Unrecoverable exception, sig: 6 [#1] SMP NR_CPUS=2048 NUMA pSeries CPU: 1280 PID: 7755 Comm: kworker/1280:1 Tainted: G X 4.4.13-46-default #1 task: c000189001df4210 ti: c000189001d5c000 task.ti: c000189001d5c000 NIP: c00000000004f138 LR: 0000000010003a24 CTR: 0000000010001b20 REGS: c000189001d5f730 TRAP: 4100 Tainted: G X (4.4.13-46-default) MSR: 8000000100001031 CR: 24000048 XER: 00000000 CFAR: c00000000004ed18 SOFTE: 0 GPR00: ffffffffc58d7b60 c000189001d5f9b0 00000000100d7d00 000000003a738288 GPR04: 0000000000002781 0000000000000006 0000000000000000 c0000d1f4d889620 GPR08: 000000000000c350 00000000000008ab 00000000000008ab 00000000100d7af0 GPR12: 00000000100d7ae8 00003ffe787e67a0 0000000000000000 0000000000000211 GPR16: 0000000010001b20 0000000000000000 0000000000800000 00003ffe787df110 GPR20: 0000000000000001 00000000100d1e10 0000000000000000 00003ffe787df050 GPR24: 0000000000000003 0000000000010000 0000000000000000 00003fffe79e2e30 GPR28: 00003fffe79e2e68 00000000003d0f00 00003ffe787e67a0 00003ffe787de680 NIP [c00000000004f138] restore_gprs+0xd0/0x16c LR [0000000010003a24] 0x10003a24 Call Trace: [c000189001d5f9b0] [c000189001d5f9f0] 0xc000189001d5f9f0 (unreliable) [c000189001d5fb90] [c00000000001583c] tm_recheckpoint+0x6c/0xa0 [c000189001d5fbd0] [c000000000015c40] __switch_to+0x2c0/0x350 [c000189001d5fc30] [c0000000007e647c] __schedule+0x32c/0x9c0 [c000189001d5fcb0] [c0000000007e6b58] schedule+0x48/0xc0 [c000189001d5fce0] [c0000000000deabc] worker_thread+0x22c/0x5b0 [c000189001d5fd80] [c0000000000e7000] kthread+0x110/0x130 [c000189001d5fe30] [c000000000009538] ret_from_kernel_thread+0x5c/0xa4 Instruction dump: 7cb103a6 7cc0e3a6 7ca222a6 78a58402 38c00800 7cc62838 08860000 7cc000a6 38a00006 78c60022 7cc62838 0b060000 7ccff120 e8270078 e8a70098 ---[ end trace 602126d0a1dedd54 ]--- This fixes this by copying the required data from the thread_struct to the stack before we clear MSR RI. Then once we clear RI, we only access the stack, guaranteeing there's no segment miss. We also tighten the region over which we set RI=0 on the treclaim() path. This may have a slight performance impact since we're adding an mtmsr instruction. Fixes: 090b9284d725 ("powerpc/tm: Clear MSR RI in non-recoverable TM code") Signed-off-by: Michael Neuling Reviewed-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/tm.S | 61 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index bf8f34a58670..b7019b559ddb 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -110,17 +110,11 @@ _GLOBAL(tm_reclaim) std r3, STK_PARAM(R3)(r1) SAVE_NVGPRS(r1) - /* We need to setup MSR for VSX register save instructions. Here we - * also clear the MSR RI since when we do the treclaim, we won't have a - * valid kernel pointer for a while. We clear RI here as it avoids - * adding another mtmsr closer to the treclaim. This makes the region - * maked as non-recoverable wider than it needs to be but it saves on - * inserting another mtmsrd later. - */ + /* We need to setup MSR for VSX register save instructions. */ mfmsr r14 mr r15, r14 ori r15, r15, MSR_FP - li r16, MSR_RI + li r16, 0 ori r16, r16, MSR_EE /* IRQs hard off */ andc r15, r15, r16 oris r15, r15, MSR_VEC@h @@ -176,7 +170,17 @@ dont_backup_fp: 1: tdeqi r6, 0 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 - /* The moment we treclaim, ALL of our GPRs will switch + /* Clear MSR RI since we are about to change r1, EE is already off. */ + li r4, 0 + mtmsrd r4, 1 + + /* + * BE CAREFUL HERE: + * At this point we can't take an SLB miss since we have MSR_RI + * off. Load only to/from the stack/paca which are in SLB bolted regions + * until we turn MSR RI back on. + * + * The moment we treclaim, ALL of our GPRs will switch * to user register state. (FPRs, CCR etc. also!) * Use an sprg and a tm_scratch in the PACA to shuffle. */ @@ -197,6 +201,11 @@ dont_backup_fp: /* Store the PPR in r11 and reset to decent value */ std r11, GPR11(r1) /* Temporary stash */ + + /* Reset MSR RI so we can take SLB faults again */ + li r11, MSR_RI + mtmsrd r11, 1 + mfspr r11, SPRN_PPR HMT_MEDIUM @@ -397,11 +406,6 @@ restore_gprs: ld r5, THREAD_TM_DSCR(r3) ld r6, THREAD_TM_PPR(r3) - /* Clear the MSR RI since we are about to change R1. EE is already off - */ - li r4, 0 - mtmsrd r4, 1 - REST_GPR(0, r7) /* GPR0 */ REST_2GPRS(2, r7) /* GPR2-3 */ REST_GPR(4, r7) /* GPR4 */ @@ -439,10 +443,33 @@ restore_gprs: ld r6, _CCR(r7) mtcr r6 - REST_GPR(1, r7) /* GPR1 */ - REST_GPR(5, r7) /* GPR5-7 */ REST_GPR(6, r7) - ld r7, GPR7(r7) + + /* + * Store r1 and r5 on the stack so that we can access them + * after we clear MSR RI. + */ + + REST_GPR(5, r7) + std r5, -8(r1) + ld r5, GPR1(r7) + std r5, -16(r1) + + REST_GPR(7, r7) + + /* Clear MSR RI since we are about to change r1. EE is already off */ + li r5, 0 + mtmsrd r5, 1 + + /* + * BE CAREFUL HERE: + * At this point we can't take an SLB miss since we have MSR_RI + * off. Load only to/from the stack/paca which are in SLB bolted regions + * until we turn MSR RI back on. + */ + + ld r5, -8(r1) + ld r1, -16(r1) /* Commit register state as checkpointed state: */ TRECHKPT -- cgit v1.2.3-59-g8ed1b From bfa37087aa04e45f56c41142dfceecb79b8e6ef9 Mon Sep 17 00:00:00 2001 From: Darren Stevens Date: Wed, 29 Jun 2016 21:06:28 +0100 Subject: powerpc: Initialise pci_io_base as early as possible Commit d6a9996e84ac ("powerpc/mm: vmalloc abstraction in preparation for radix") turned kernel memory and IO addresses from #defined constants to variables initialised at runtime. On PA6T (pasemi) systems the setup_arch() machine call initialises the onboard PCI-e root-ports, and uses pci_io_base to do this, which is now before its value has been set, resulting in a panic early in boot before console IO is initialised. Move the pci_io_base initialisation to the same place as vmalloc ranges are set (hash__early_init_mmu()/radix__early_init_mmu()) - this is the earliest possible place we can initialise it. Fixes: d6a9996e84ac ("powerpc/mm: vmalloc abstraction in preparation for radix") Reported-by: Christian Zigotzky Signed-off-by: Darren Stevens Reviewed-by: Aneesh Kumar K.V [mpe: Add #ifdef CONFIG_PCI, massage change log slightly] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 + arch/powerpc/kernel/pci_64.c | 1 - arch/powerpc/mm/hash_utils_64.c | 4 ++++ arch/powerpc/mm/pgtable-radix.c | 5 +++++ 4 files changed, 10 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 88a5ecaa157b..ab84c89c9e98 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -230,6 +230,7 @@ extern unsigned long __kernel_virt_size; #define KERN_VIRT_SIZE __kernel_virt_size extern struct page *vmemmap; extern unsigned long ioremap_bot; +extern unsigned long pci_io_base; #endif /* __ASSEMBLY__ */ #include diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 3759df52bd67..a5ae49a2dcc4 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -47,7 +47,6 @@ static int __init pcibios_init(void) printk(KERN_INFO "PCI: Probing PCI hardware\n"); - pci_io_base = ISA_IO_BASE; /* For now, override phys_mem_access_prot. If we need it,g * later, we may move that initialization to each ppc_md */ diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 5b22ba0b58bc..2971ea18c768 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -922,6 +922,10 @@ void __init hash__early_init_mmu(void) vmemmap = (struct page *)H_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; +#ifdef CONFIG_PCI + pci_io_base = ISA_IO_BASE; +#endif + /* Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index e58707deef5c..7931e1496f0d 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -328,6 +328,11 @@ void __init radix__early_init_mmu(void) __vmalloc_end = RADIX_VMALLOC_END; vmemmap = (struct page *)RADIX_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; + +#ifdef CONFIG_PCI + pci_io_base = ISA_IO_BASE; +#endif + /* * For now radix also use the same frag size */ -- cgit v1.2.3-59-g8ed1b From 6d037de90a1fd7b4879b48d4dd5c4839b271be98 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Fri, 1 Jul 2016 15:01:01 +0200 Subject: MIPS: Fix possible corruption of cache mode by mprotect. The following testcase may result in a page table entries with a invalid CCA field being generated: static void *bindstack; static int sysrqfd; static void protect_low(int protect) { mprotect(bindstack, BINDSTACK_SIZE, protect); } static void sigbus_handler(int signal, siginfo_t * info, void *context) { void *addr = info->si_addr; write(sysrqfd, "x", 1); printf("sigbus, fault address %p (should not happen, but might)\n", addr); abort(); } static void run_bind_test(void) { unsigned int *p = bindstack; p[0] = 0xf001f001; write(sysrqfd, "x", 1); /* Set trap on access to p[0] */ protect_low(PROT_NONE); write(sysrqfd, "x", 1); /* Clear trap on access to p[0] */ protect_low(PROT_READ | PROT_WRITE | PROT_EXEC); write(sysrqfd, "x", 1); /* Check the contents of p[0] */ if (p[0] != 0xf001f001) { write(sysrqfd, "x", 1); /* Reached, but shouldn't be */ printf("badness, shouldn't happen but does\n"); abort(); } } int main(void) { struct sigaction sa; sysrqfd = open("/proc/sysrq-trigger", O_WRONLY); if (sigprocmask(SIG_BLOCK, NULL, &sa.sa_mask)) { perror("sigprocmask"); return 0; } sa.sa_sigaction = sigbus_handler; sa.sa_flags = SA_SIGINFO | SA_NODEFER | SA_RESTART; if (sigaction(SIGBUS, &sa, NULL)) { perror("sigaction"); return 0; } bindstack = mmap(NULL, BINDSTACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (bindstack == MAP_FAILED) { perror("mmap bindstack"); return 0; } printf("bindstack: %p\n", bindstack); run_bind_test(); printf("done\n"); return 0; } There are multiple ingredients for this: 1) PAGE_NONE is defined to _CACHE_CACHABLE_NONCOHERENT, which is CCA 3 on all platforms except SB1 where it's CCA 5. 2) _page_cachable_default must have bits set which are not set _CACHE_CACHABLE_NONCOHERENT. 3) Either the defective version of pte_modify for XPA or the standard version must be in used. However pte_modify for the 36 bit address space support is no affected. In that case additional bits in the final CCA mode may generate an invalid value for the CCA field. On the R10000 system where this was tracked down for example a CCA 7 has been observed, which is Uncached Accelerated. Fixed by: 1) Using the proper CCA mode for PAGE_NONE just like for all the other PAGE_* pte/pmd bits. 2) Fix the two affected variants of pte_modify. Further code inspection also shows the same issue to exist in pmd_modify which would affect huge page systems. Issue in pte_modify tracked down by Alastair Bridgewater, PAGE_NONE and pmd_modify issue found by me. The history of this goes back beyond Linus' git history. Chris Dearman's commit 351336929ccf222ae38ff0cb7a8dd5fd5c6236a0 ("[MIPS] Allow setting of the cache attribute at run time.") missed the opportunity to fix this but it was originally introduced in lmo commit d523832cf12007b3242e50bb77d0c9e63e0b6518 ("Missing from last commit.") and 32cc38229ac7538f2346918a09e75413e8861f87 ("New configuration option CONFIG_MIPS_UNCACHED.") Signed-off-by: Ralf Baechle Reported-by: Alastair Bridgewater --- arch/mips/include/asm/pgtable.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index a6b611f1da43..f53816744d60 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -24,7 +24,7 @@ struct mm_struct; struct vm_area_struct; #define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_NO_READ | \ - _CACHE_CACHABLE_NONCOHERENT) + _page_cachable_default) #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | \ _page_cachable_default) #define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_NO_EXEC | \ @@ -476,7 +476,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) pte.pte_low &= (_PAGE_MODIFIED | _PAGE_ACCESSED | _PFNX_MASK); pte.pte_high &= (_PFN_MASK | _CACHE_MASK); pte.pte_low |= pgprot_val(newprot) & ~_PFNX_MASK; - pte.pte_high |= pgprot_val(newprot) & ~_PFN_MASK; + pte.pte_high |= pgprot_val(newprot) & ~(_PFN_MASK | _CACHE_MASK); return pte; } #elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) @@ -491,7 +491,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #else static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | + (pgprot_val(newprot) & ~_PAGE_CHG_MASK)); } #endif @@ -632,7 +633,8 @@ static inline struct page *pmd_page(pmd_t pmd) static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) | pgprot_val(newprot); + pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) | + (pgprot_val(newprot) & ~_PAGE_CHG_MASK); return pmd; } -- cgit v1.2.3-59-g8ed1b From 06ee6d571f0e350253a8fc3492316b2be007fae2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 17:39:24 +0900 Subject: genirq: Add affinity hint to irq allocation Add an extra argument to the irq(domain) allocation functions, so we can hand down affinity hints to the allocator. Thats necessary to implement proper support for multiqueue devices. Signed-off-by: Thomas Gleixner Cc: Christoph Hellwig Cc: linux-block@vger.kernel.org Cc: linux-pci@vger.kernel.org Cc: linux-nvme@lists.infradead.org Cc: axboe@fb.com Cc: agordeev@redhat.com Link: http://lkml.kernel.org/r/1467621574-8277-4-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner --- arch/sparc/kernel/irq_64.c | 2 +- arch/x86/kernel/apic/io_apic.c | 5 +++-- include/linux/irq.h | 4 ++-- include/linux/irqdomain.h | 9 ++++++--- kernel/irq/ipi.c | 2 +- kernel/irq/irqdesc.c | 12 ++++++++---- kernel/irq/irqdomain.c | 22 ++++++++++++++-------- kernel/irq/manage.c | 7 ++++--- kernel/irq/msi.c | 3 ++- 9 files changed, 41 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index e22416ce56ea..34a7930b76ef 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -242,7 +242,7 @@ unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino) { int irq; - irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL); + irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL, NULL); if (irq <= 0) goto out; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 446702ed99dc..7c4f90dd4c2a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -981,7 +981,7 @@ static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi, return __irq_domain_alloc_irqs(domain, irq, 1, ioapic_alloc_attr_node(info), - info, legacy); + info, legacy, NULL); } /* @@ -1014,7 +1014,8 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain, info->ioapic_pin)) return -ENOMEM; } else { - irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true); + irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true, + NULL); if (irq >= 0) { irq_data = irq_domain_get_irq_data(domain, irq); data = irq_data->chip_data; diff --git a/include/linux/irq.h b/include/linux/irq.h index f6074813688d..39ce46ac5c18 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -708,11 +708,11 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) unsigned int arch_dynirq_lower_bound(unsigned int from); int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, - struct module *owner); + struct module *owner, const struct cpumask *affinity); /* use macros to avoid needing export.h for THIS_MODULE */ #define irq_alloc_descs(irq, from, cnt, node) \ - __irq_alloc_descs(irq, from, cnt, node, THIS_MODULE) + __irq_alloc_descs(irq, from, cnt, node, THIS_MODULE, NULL) #define irq_alloc_desc(node) \ irq_alloc_descs(-1, 0, 1, node) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index f1f36e04d885..1aee0fbe900e 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -39,6 +39,7 @@ struct irq_domain; struct of_device_id; struct irq_chip; struct irq_data; +struct cpumask; /* Number of irqs reserved for a legacy isa controller */ #define NUM_ISA_INTERRUPTS 16 @@ -217,7 +218,8 @@ extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); extern void irq_set_default_host(struct irq_domain *host); extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, - irq_hw_number_t hwirq, int node); + irq_hw_number_t hwirq, int node, + const struct cpumask *affinity); static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) { @@ -389,7 +391,7 @@ static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *par extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, - bool realloc); + bool realloc, const struct cpumask *affinity); extern void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs); extern void irq_domain_activate_irq(struct irq_data *irq_data); extern void irq_domain_deactivate_irq(struct irq_data *irq_data); @@ -397,7 +399,8 @@ extern void irq_domain_deactivate_irq(struct irq_data *irq_data); static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, int node, void *arg) { - return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false); + return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false, + NULL); } extern int irq_domain_alloc_irqs_recursive(struct irq_domain *domain, diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c index 89b49f6773f0..4fd23510d5f2 100644 --- a/kernel/irq/ipi.c +++ b/kernel/irq/ipi.c @@ -76,7 +76,7 @@ int irq_reserve_ipi(struct irq_domain *domain, } } - virq = irq_domain_alloc_descs(-1, nr_irqs, 0, NUMA_NO_NODE); + virq = irq_domain_alloc_descs(-1, nr_irqs, 0, NUMA_NO_NODE, NULL); if (virq <= 0) { pr_warn("Can't reserve IPI, failed to alloc descs\n"); return -ENOMEM; diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 8731e1c5d1e7..b8df4fcdbb5f 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -223,7 +223,7 @@ static void free_desc(unsigned int irq) } static int alloc_descs(unsigned int start, unsigned int cnt, int node, - struct module *owner) + const struct cpumask *affinity, struct module *owner) { struct irq_desc *desc; int i; @@ -333,6 +333,7 @@ static void free_desc(unsigned int irq) } static inline int alloc_descs(unsigned int start, unsigned int cnt, int node, + const struct cpumask *affinity, struct module *owner) { u32 i; @@ -453,12 +454,15 @@ EXPORT_SYMBOL_GPL(irq_free_descs); * @cnt: Number of consecutive irqs to allocate. * @node: Preferred node on which the irq descriptor should be allocated * @owner: Owning module (can be NULL) + * @affinity: Optional pointer to an affinity mask which hints where the + * irq descriptors should be allocated and which default + * affinities to use * * Returns the first irq number or error code */ int __ref __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, - struct module *owner) + struct module *owner, const struct cpumask *affinity) { int start, ret; @@ -494,7 +498,7 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, bitmap_set(allocated_irqs, start, cnt); mutex_unlock(&sparse_irq_lock); - return alloc_descs(start, cnt, node, owner); + return alloc_descs(start, cnt, node, affinity, owner); err: mutex_unlock(&sparse_irq_lock); @@ -512,7 +516,7 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs); */ unsigned int irq_alloc_hwirqs(int cnt, int node) { - int i, irq = __irq_alloc_descs(-1, 0, cnt, node, NULL); + int i, irq = __irq_alloc_descs(-1, 0, cnt, node, NULL, NULL); if (irq < 0) return 0; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 8798b6c9e945..79459b732dc9 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -481,7 +481,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain, } /* Allocate a virtual interrupt number */ - virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node)); + virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), NULL); if (virq <= 0) { pr_debug("-> virq allocation failed\n"); return 0; @@ -835,19 +835,23 @@ const struct irq_domain_ops irq_domain_simple_ops = { EXPORT_SYMBOL_GPL(irq_domain_simple_ops); int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq, - int node) + int node, const struct cpumask *affinity) { unsigned int hint; if (virq >= 0) { - virq = irq_alloc_descs(virq, virq, cnt, node); + virq = __irq_alloc_descs(virq, virq, cnt, node, THIS_MODULE, + affinity); } else { hint = hwirq % nr_irqs; if (hint == 0) hint++; - virq = irq_alloc_descs_from(hint, cnt, node); - if (virq <= 0 && hint > 1) - virq = irq_alloc_descs_from(1, cnt, node); + virq = __irq_alloc_descs(-1, hint, cnt, node, THIS_MODULE, + affinity); + if (virq <= 0 && hint > 1) { + virq = __irq_alloc_descs(-1, 1, cnt, node, THIS_MODULE, + affinity); + } } return virq; @@ -1160,6 +1164,7 @@ int irq_domain_alloc_irqs_recursive(struct irq_domain *domain, * @node: NUMA node id for memory allocation * @arg: domain specific argument * @realloc: IRQ descriptors have already been allocated if true + * @affinity: Optional irq affinity mask for multiqueue devices * * Allocate IRQ numbers and initialized all data structures to support * hierarchy IRQ domains. @@ -1175,7 +1180,7 @@ int irq_domain_alloc_irqs_recursive(struct irq_domain *domain, */ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, - bool realloc) + bool realloc, const struct cpumask *affinity) { int i, ret, virq; @@ -1193,7 +1198,8 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, if (realloc && irq_base >= 0) { virq = irq_base; } else { - virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node); + virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node, + affinity); if (virq < 0) { pr_debug("cannot allocate IRQ(base %d, count %d)\n", irq_base, nr_irqs); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 30658e9827f0..ad0aac6d1248 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -353,10 +353,11 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask) return 0; /* - * Preserve an userspace affinity setup, but make sure that - * one of the targets is online. + * Preserve the managed affinity setting and an userspace affinity + * setup, but make sure that one of the targets is online. */ - if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) { + if (irqd_affinity_is_managed(&desc->irq_data) || + irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) { if (cpumask_intersects(desc->irq_common_data.affinity, cpu_online_mask)) set = desc->irq_common_data.affinity; diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index eb5bf2b50b07..58dbbacc6fbb 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -334,7 +334,8 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, ops->set_desc(&arg, desc); virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used, - dev_to_node(dev), &arg, false); + dev_to_node(dev), &arg, false, + NULL); if (virq < 0) { ret = -ENOSPC; if (ops->handle_error) -- cgit v1.2.3-59-g8ed1b