aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt212
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml384
-rw-r--r--Documentation/power/energy-model.rst24
-rw-r--r--arch/arm64/kernel/smp.c1
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/acpi/boot.c2
-rw-r--r--drivers/acpi/bus.c34
-rw-r--r--drivers/acpi/cppc_acpi.c44
-rw-r--r--drivers/base/power/common.c8
-rw-r--r--drivers/base/power/domain.c278
-rw-r--r--drivers/base/power/domain_governor.c65
-rw-r--r--drivers/base/power/runtime.c53
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c211
-rw-r--r--drivers/cpufreq/cpufreq.c112
-rw-r--r--drivers/cpufreq/cpufreq_governor.c20
-rw-r--r--drivers/cpufreq/cpufreq_governor.h1
-rw-r--r--drivers/cpufreq/intel_pstate.c2
-rw-r--r--drivers/cpufreq/mediatek-cpufreq-hw.c4
-rw-r--r--drivers/cpufreq/pasemi-cpufreq.c1
-rw-r--r--drivers/cpufreq/pmac32-cpufreq.c2
-rw-r--r--drivers/cpufreq/pmac64-cpufreq.c2
-rw-r--r--drivers/cpufreq/ppc_cbe_cpufreq.c1
-rw-r--r--drivers/cpufreq/ppc_cbe_cpufreq_pmi.c2
-rw-r--r--drivers/cpufreq/scmi-cpufreq.c4
-rw-r--r--drivers/cpuidle/cpuidle-psci-domain.c4
-rw-r--r--drivers/cpuidle/cpuidle-psci.c46
-rw-r--r--drivers/cpuidle/cpuidle-riscv-sbi.c4
-rw-r--r--drivers/devfreq/devfreq.c20
-rw-r--r--drivers/devfreq/governor.h27
-rw-r--r--drivers/devfreq/governor_passive.c403
-rw-r--r--drivers/devfreq/rk3399_dmc.c312
-rw-r--r--drivers/idle/intel_idle.c133
-rw-r--r--drivers/iio/chemical/scd30.h5
-rw-r--r--drivers/iio/chemical/scd30_core.c10
-rw-r--r--drivers/iio/chemical/scd30_i2c.c3
-rw-r--r--drivers/iio/chemical/scd30_serial.c3
-rw-r--r--drivers/opp/of.c6
-rw-r--r--drivers/powercap/dtpm_cpu.c2
-rw-r--r--drivers/powercap/intel_rapl_common.c4
-rw-r--r--drivers/powercap/intel_rapl_msr.c1
-rw-r--r--drivers/soc/rockchip/pm_domains.c118
-rw-r--r--drivers/thermal/cpufreq_cooling.c2
-rw-r--r--drivers/thermal/devfreq_cooling.c8
-rw-r--r--include/acpi/cppc_acpi.h5
-rw-r--r--include/linux/acpi.h2
-rw-r--r--include/linux/devfreq.h17
-rw-r--r--include/linux/energy_model.h35
-rw-r--r--include/linux/pm.h14
-rw-r--r--include/linux/pm_domain.h24
-rw-r--r--include/linux/pm_runtime.h10
-rw-r--r--include/linux/suspend.h44
-rw-r--r--include/soc/rockchip/pm_domains.h25
-rw-r--r--kernel/power/Makefile6
-rw-r--r--kernel/power/energy_model.c65
-rw-r--r--kernel/power/main.c29
-rw-r--r--kernel/power/process.c3
-rw-r--r--kernel/power/snapshot.c12
-rw-r--r--tools/power/x86/turbostat/Makefile2
-rw-r--r--tools/power/x86/turbostat/turbostat.82
-rw-r--r--tools/power/x86/turbostat/turbostat.c594
60 files changed, 2463 insertions, 1005 deletions
diff --git a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
deleted file mode 100644
index 58fc8a6cebc7..000000000000
--- a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
+++ /dev/null
@@ -1,212 +0,0 @@
-* Rockchip rk3399 DMC (Dynamic Memory Controller) device
-
-Required properties:
-- compatible: Must be "rockchip,rk3399-dmc".
-- devfreq-events: Node to get DDR loading, Refer to
- Documentation/devicetree/bindings/devfreq/event/
- rockchip-dfi.txt
-- clocks: Phandles for clock specified in "clock-names" property
-- clock-names : The name of clock used by the DFI, must be
- "pclk_ddr_mon";
-- operating-points-v2: Refer to Documentation/devicetree/bindings/opp/opp-v2.yaml
- for details.
-- center-supply: DMC supply node.
-- status: Marks the node enabled/disabled.
-- rockchip,pmu: Phandle to the syscon managing the "PMU general register
- files".
-
-Optional properties:
-- interrupts: The CPU interrupt number. The interrupt specifier
- format depends on the interrupt controller.
- It should be a DCF interrupt. When DDR DVFS finishes
- a DCF interrupt is triggered.
-- rockchip,pmu: Phandle to the syscon managing the "PMU general register
- files".
-
-Following properties relate to DDR timing:
-
-- rockchip,dram_speed_bin : Value reference include/dt-bindings/clock/rk3399-ddr.h,
- it selects the DDR3 cl-trp-trcd type. It must be
- set according to "Speed Bin" in DDR3 datasheet,
- DO NOT use a smaller "Speed Bin" than specified
- for the DDR3 being used.
-
-- rockchip,pd_idle : Configure the PD_IDLE value. Defines the
- power-down idle period in which memories are
- placed into power-down mode if bus is idle
- for PD_IDLE DFI clock cycles.
-
-- rockchip,sr_idle : Configure the SR_IDLE value. Defines the
- self-refresh idle period in which memories are
- placed into self-refresh mode if bus is idle
- for SR_IDLE * 1024 DFI clock cycles (DFI
- clocks freq is half of DRAM clock), default
- value is "0".
-
-- rockchip,sr_mc_gate_idle : Defines the memory self-refresh and controller
- clock gating idle period. Memories are placed
- into self-refresh mode and memory controller
- clock arg gating started if bus is idle for
- sr_mc_gate_idle*1024 DFI clock cycles.
-
-- rockchip,srpd_lite_idle : Defines the self-refresh power down idle
- period in which memories are placed into
- self-refresh power down mode if bus is idle
- for srpd_lite_idle * 1024 DFI clock cycles.
- This parameter is for LPDDR4 only.
-
-- rockchip,standby_idle : Defines the standby idle period in which
- memories are placed into self-refresh mode.
- The controller, pi, PHY and DRAM clock will
- be gated if bus is idle for standby_idle * DFI
- clock cycles.
-
-- rockchip,dram_dll_dis_freq : Defines the DDR3 DLL bypass frequency in MHz.
- When DDR frequency is less than DRAM_DLL_DISB_FREQ,
- DDR3 DLL will be bypassed. Note: if DLL was bypassed,
- the odt will also stop working.
-
-- rockchip,phy_dll_dis_freq : Defines the PHY dll bypass frequency in
- MHz (Mega Hz). When DDR frequency is less than
- DRAM_DLL_DISB_FREQ, PHY DLL will be bypassed.
- Note: PHY DLL and PHY ODT are independent.
-
-- rockchip,ddr3_odt_dis_freq : When the DRAM type is DDR3, this parameter defines
- the ODT disable frequency in MHz (Mega Hz).
- when the DDR frequency is less then ddr3_odt_dis_freq,
- the ODT on the DRAM side and controller side are
- both disabled.
-
-- rockchip,ddr3_drv : When the DRAM type is DDR3, this parameter defines
- the DRAM side driver strength in ohms. Default
- value is 40.
-
-- rockchip,ddr3_odt : When the DRAM type is DDR3, this parameter defines
- the DRAM side ODT strength in ohms. Default value
- is 120.
-
-- rockchip,phy_ddr3_ca_drv : When the DRAM type is DDR3, this parameter defines
- the phy side CA line (incluing command line,
- address line and clock line) driver strength.
- Default value is 40.
-
-- rockchip,phy_ddr3_dq_drv : When the DRAM type is DDR3, this parameter defines
- the PHY side DQ line (including DQS/DQ/DM line)
- driver strength. Default value is 40.
-
-- rockchip,phy_ddr3_odt : When the DRAM type is DDR3, this parameter defines
- the PHY side ODT strength. Default value is 240.
-
-- rockchip,lpddr3_odt_dis_freq : When the DRAM type is LPDDR3, this parameter defines
- then ODT disable frequency in MHz (Mega Hz).
- When DDR frequency is less then ddr3_odt_dis_freq,
- the ODT on the DRAM side and controller side are
- both disabled.
-
-- rockchip,lpddr3_drv : When the DRAM type is LPDDR3, this parameter defines
- the DRAM side driver strength in ohms. Default
- value is 34.
-
-- rockchip,lpddr3_odt : When the DRAM type is LPDDR3, this parameter defines
- the DRAM side ODT strength in ohms. Default value
- is 240.
-
-- rockchip,phy_lpddr3_ca_drv : When the DRAM type is LPDDR3, this parameter defines
- the PHY side CA line (including command line,
- address line and clock line) driver strength.
- Default value is 40.
-
-- rockchip,phy_lpddr3_dq_drv : When the DRAM type is LPDDR3, this parameter defines
- the PHY side DQ line (including DQS/DQ/DM line)
- driver strength. Default value is 40.
-
-- rockchip,phy_lpddr3_odt : When dram type is LPDDR3, this parameter define
- the phy side odt strength, default value is 240.
-
-- rockchip,lpddr4_odt_dis_freq : When the DRAM type is LPDDR4, this parameter
- defines the ODT disable frequency in
- MHz (Mega Hz). When the DDR frequency is less then
- ddr3_odt_dis_freq, the ODT on the DRAM side and
- controller side are both disabled.
-
-- rockchip,lpddr4_drv : When the DRAM type is LPDDR4, this parameter defines
- the DRAM side driver strength in ohms. Default
- value is 60.
-
-- rockchip,lpddr4_dq_odt : When the DRAM type is LPDDR4, this parameter defines
- the DRAM side ODT on DQS/DQ line strength in ohms.
- Default value is 40.
-
-- rockchip,lpddr4_ca_odt : When the DRAM type is LPDDR4, this parameter defines
- the DRAM side ODT on CA line strength in ohms.
- Default value is 40.
-
-- rockchip,phy_lpddr4_ca_drv : When the DRAM type is LPDDR4, this parameter defines
- the PHY side CA line (including command address
- line) driver strength. Default value is 40.
-
-- rockchip,phy_lpddr4_ck_cs_drv : When the DRAM type is LPDDR4, this parameter defines
- the PHY side clock line and CS line driver
- strength. Default value is 80.
-
-- rockchip,phy_lpddr4_dq_drv : When the DRAM type is LPDDR4, this parameter defines
- the PHY side DQ line (including DQS/DQ/DM line)
- driver strength. Default value is 80.
-
-- rockchip,phy_lpddr4_odt : When the DRAM type is LPDDR4, this parameter defines
- the PHY side ODT strength. Default value is 60.
-
-Example:
- dmc_opp_table: dmc_opp_table {
- compatible = "operating-points-v2";
-
- opp00 {
- opp-hz = /bits/ 64 <300000000>;
- opp-microvolt = <900000>;
- };
- opp01 {
- opp-hz = /bits/ 64 <666000000>;
- opp-microvolt = <900000>;
- };
- };
-
- dmc: dmc {
- compatible = "rockchip,rk3399-dmc";
- devfreq-events = <&dfi>;
- interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&cru SCLK_DDRC>;
- clock-names = "dmc_clk";
- operating-points-v2 = <&dmc_opp_table>;
- center-supply = <&ppvar_centerlogic>;
- upthreshold = <15>;
- downdifferential = <10>;
- rockchip,ddr3_speed_bin = <21>;
- rockchip,pd_idle = <0x40>;
- rockchip,sr_idle = <0x2>;
- rockchip,sr_mc_gate_idle = <0x3>;
- rockchip,srpd_lite_idle = <0x4>;
- rockchip,standby_idle = <0x2000>;
- rockchip,dram_dll_dis_freq = <300>;
- rockchip,phy_dll_dis_freq = <125>;
- rockchip,auto_pd_dis_freq = <666>;
- rockchip,ddr3_odt_dis_freq = <333>;
- rockchip,ddr3_drv = <40>;
- rockchip,ddr3_odt = <120>;
- rockchip,phy_ddr3_ca_drv = <40>;
- rockchip,phy_ddr3_dq_drv = <40>;
- rockchip,phy_ddr3_odt = <240>;
- rockchip,lpddr3_odt_dis_freq = <333>;
- rockchip,lpddr3_drv = <34>;
- rockchip,lpddr3_odt = <240>;
- rockchip,phy_lpddr3_ca_drv = <40>;
- rockchip,phy_lpddr3_dq_drv = <40>;
- rockchip,phy_lpddr3_odt = <240>;
- rockchip,lpddr4_odt_dis_freq = <333>;
- rockchip,lpddr4_drv = <60>;
- rockchip,lpddr4_dq_odt = <40>;
- rockchip,lpddr4_ca_odt = <40>;
- rockchip,phy_lpddr4_ca_drv = <40>;
- rockchip,phy_lpddr4_ck_cs_drv = <80>;
- rockchip,phy_lpddr4_dq_drv = <80>;
- rockchip,phy_lpddr4_odt = <60>;
- };
diff --git a/Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml b/Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml
new file mode 100644
index 000000000000..fb4920397d08
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml
@@ -0,0 +1,384 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# %YAML 1.2
+---
+$id: http://devicetree.org/schemas/memory-controllers/rockchip,rk3399-dmc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip rk3399 DMC (Dynamic Memory Controller) device
+
+maintainers:
+ - Brian Norris <briannorris@chromium.org>
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rk3399-dmc
+
+ devfreq-events:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Node to get DDR loading. Refer to
+ Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt.
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: dmc_clk
+
+ operating-points-v2: true
+
+ center-supply:
+ description:
+ DMC regulator supply.
+
+ rockchip,pmu:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon managing the "PMU general register files".
+
+ interrupts:
+ maxItems: 1
+ description:
+ The CPU interrupt number. It should be a DCF interrupt. When DDR DVFS
+ finishes, a DCF interrupt is triggered.
+
+ rockchip,ddr3_speed_bin:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ For values, reference include/dt-bindings/clock/rk3399-ddr.h. Selects the
+ DDR3 cl-trp-trcd type. It must be set according to "Speed Bin" in DDR3
+ datasheet; DO NOT use a smaller "Speed Bin" than specified for the DDR3
+ being used.
+
+ rockchip,pd_idle:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Configure the PD_IDLE value. Defines the power-down idle period in which
+ memories are placed into power-down mode if bus is idle for PD_IDLE DFI
+ clock cycles.
+ See also rockchip,pd-idle-ns.
+
+ rockchip,sr_idle:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Configure the SR_IDLE value. Defines the self-refresh idle period in
+ which memories are placed into self-refresh mode if bus is idle for
+ SR_IDLE * 1024 DFI clock cycles (DFI clocks freq is half of DRAM clock).
+ See also rockchip,sr-idle-ns.
+ default: 0
+
+ rockchip,sr_mc_gate_idle:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Defines the memory self-refresh and controller clock gating idle period.
+ Memories are placed into self-refresh mode and memory controller clock
+ arg gating started if bus is idle for sr_mc_gate_idle*1024 DFI clock
+ cycles.
+ See also rockchip,sr-mc-gate-idle-ns.
+
+ rockchip,srpd_lite_idle:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Defines the self-refresh power down idle period in which memories are
+ placed into self-refresh power down mode if bus is idle for
+ srpd_lite_idle * 1024 DFI clock cycles. This parameter is for LPDDR4
+ only.
+ See also rockchip,srpd-lite-idle-ns.
+
+ rockchip,standby_idle:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Defines the standby idle period in which memories are placed into
+ self-refresh mode. The controller, pi, PHY and DRAM clock will be gated
+ if bus is idle for standby_idle * DFI clock cycles.
+ See also rockchip,standby-idle-ns.
+
+ rockchip,dram_dll_dis_freq:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ Defines the DDR3 DLL bypass frequency in MHz. When DDR frequency is less
+ than DRAM_DLL_DISB_FREQ, DDR3 DLL will be bypassed.
+ Note: if DLL was bypassed, the odt will also stop working.
+
+ rockchip,phy_dll_dis_freq:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ Defines the PHY dll bypass frequency in MHz (Mega Hz). When DDR frequency
+ is less than DRAM_DLL_DISB_FREQ, PHY DLL will be bypassed.
+ Note: PHY DLL and PHY ODT are independent.
+
+ rockchip,auto_pd_dis_freq:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ Defines the auto PD disable frequency in MHz.
+
+ rockchip,ddr3_odt_dis_freq:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 1000000 # In case anyone thought this was MHz.
+ description:
+ When the DRAM type is DDR3, this parameter defines the ODT disable
+ frequency in Hz. When the DDR frequency is less then ddr3_odt_dis_freq,
+ the ODT on the DRAM side and controller side are both disabled.
+
+ rockchip,ddr3_drv:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the DRAM type is DDR3, this parameter defines the DRAM side drive
+ strength in ohms.
+ default: 40
+
+ rockchip,ddr3_odt:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the DRAM type is DDR3, this parameter defines the DRAM side ODT
+ strength in ohms.
+ default: 120
+
+ rockchip,phy_ddr3_ca_drv:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the DRAM type is DDR3, this parameter defines the phy side CA line
+ (incluing command line, address line and clock line) drive strength.
+ default: 40
+
+ rockchip,phy_ddr3_dq_drv:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the DRAM type is DDR3, this parameter defines the PHY side DQ line
+ (including DQS/DQ/DM line) drive strength.
+ default: 40
+
+ rockchip,phy_ddr3_odt:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the DRAM type is DDR3, this parameter defines the PHY side ODT
+ strength.
+ default: 240
+
+ rockchip,lpddr3_odt_dis_freq:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 1000000 # In case anyone thought this was MHz.
+ description:
+ When the DRAM type is LPDDR3, this parameter defines then ODT disable
+ frequency in Hz. When DDR frequency is less then ddr3_odt_dis_freq, the
+ ODT on the DRAM side and controller side are both disabled.
+
+ rockchip,lpddr3_drv:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the DRAM type is LPDDR3, this parameter defines the DRAM side drive
+ strength in ohms.
+ default: 34
+
+ rockchip,lpddr3_odt:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the DRAM type is LPDDR3, this parameter defines the DRAM side ODT
+ strength in ohms.
+ default: 240
+
+ rockchip,phy_lpddr3_ca_drv:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the DRAM type is LPDDR3, this parameter defines the PHY side CA line
+ (including command line, address line and clock line) drive strength.
+ default: 40
+
+ rockchip,phy_lpddr3_dq_drv:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the DRAM type is LPDDR3, this parameter defines the PHY side DQ line
+ (including DQS/DQ/DM line) drive strength.
+ default: 40
+
+ rockchip,phy_lpddr3_odt:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When dram type is LPDDR3, this parameter define the phy side odt
+ strength, default value is 240.
+
+ rockchip,lpddr4_odt_dis_freq:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 1000000 # In case anyone thought this was MHz.
+ description:
+ When the DRAM type is LPDDR4, this parameter defines the ODT disable
+ frequency in Hz. When the DDR frequency is less then ddr3_odt_dis_freq,
+ the ODT on the DRAM side and controller side are both disabled.
+
+ rockchip,lpddr4_drv:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the DRAM type is LPDDR4, this parameter defines the DRAM side drive
+ strength in ohms.
+ default: 60
+
+ rockchip,lpddr4_dq_odt:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the DRAM type is LPDDR4, this parameter defines the DRAM side ODT on
+ DQS/DQ line strength in ohms.
+ default: 40
+
+ rockchip,lpddr4_ca_odt:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the DRAM type is LPDDR4, this parameter defines the DRAM side ODT on
+ CA line strength in ohms.
+ default: 40
+
+ rockchip,phy_lpddr4_ca_drv:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the DRAM type is LPDDR4, this parameter defines the PHY side CA line
+ (including command address line) drive strength.
+ default: 40
+
+ rockchip,phy_lpddr4_ck_cs_drv:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the DRAM type is LPDDR4, this parameter defines the PHY side clock
+ line and CS line drive strength.
+ default: 80
+
+ rockchip,phy_lpddr4_dq_drv:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the DRAM type is LPDDR4, this parameter defines the PHY side DQ line
+ (including DQS/DQ/DM line) drive strength.
+ default: 80
+
+ rockchip,phy_lpddr4_odt:
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the DRAM type is LPDDR4, this parameter defines the PHY side ODT
+ strength.
+ default: 60
+
+ rockchip,pd-idle-ns:
+ description:
+ Configure the PD_IDLE value in nanoseconds. Defines the power-down idle
+ period in which memories are placed into power-down mode if bus is idle
+ for PD_IDLE nanoseconds.
+
+ rockchip,sr-idle-ns:
+ description:
+ Configure the SR_IDLE value in nanoseconds. Defines the self-refresh idle
+ period in which memories are placed into self-refresh mode if bus is idle
+ for SR_IDLE nanoseconds.
+ default: 0
+
+ rockchip,sr-mc-gate-idle-ns:
+ description:
+ Defines the memory self-refresh and controller clock gating idle period in nanoseconds.
+ Memories are placed into self-refresh mode and memory controller clock
+ arg gating started if bus is idle for sr_mc_gate_idle nanoseconds.
+
+ rockchip,srpd-lite-idle-ns:
+ description:
+ Defines the self-refresh power down idle period in which memories are
+ placed into self-refresh power down mode if bus is idle for
+ srpd_lite_idle nanoseonds. This parameter is for LPDDR4 only.
+
+ rockchip,standby-idle-ns:
+ description:
+ Defines the standby idle period in which memories are placed into
+ self-refresh mode. The controller, pi, PHY and DRAM clock will be gated
+ if bus is idle for standby_idle nanoseconds.
+
+ rockchip,pd-idle-dis-freq-hz:
+ description:
+ Defines the power-down idle disable frequency in Hz. When the DDR
+ frequency is greater than pd-idle-dis-freq, power-down idle is disabled.
+ See also rockchip,pd-idle-ns.
+
+ rockchip,sr-idle-dis-freq-hz:
+ description:
+ Defines the self-refresh idle disable frequency in Hz. When the DDR
+ frequency is greater than sr-idle-dis-freq, self-refresh idle is
+ disabled. See also rockchip,sr-idle-ns.
+
+ rockchip,sr-mc-gate-idle-dis-freq-hz:
+ description:
+ Defines the self-refresh and memory-controller clock gating disable
+ frequency in Hz. When the DDR frequency is greater than
+ sr-mc-gate-idle-dis-freq, the clock will not be gated when idle. See also
+ rockchip,sr-mc-gate-idle-ns.
+
+ rockchip,srpd-lite-idle-dis-freq-hz:
+ description:
+ Defines the self-refresh power down idle disable frequency in Hz. When
+ the DDR frequency is greater than srpd-lite-idle-dis-freq, memory will
+ not be placed into self-refresh power down mode when idle. See also
+ rockchip,srpd-lite-idle-ns.
+
+ rockchip,standby-idle-dis-freq-hz:
+ description:
+ Defines the standby idle disable frequency in Hz. When the DDR frequency
+ is greater than standby-idle-dis-freq, standby idle is disabled. See also
+ rockchip,standby-idle-ns.
+
+required:
+ - compatible
+ - devfreq-events
+ - clocks
+ - clock-names
+ - operating-points-v2
+ - center-supply
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/rk3399-cru.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ memory-controller {
+ compatible = "rockchip,rk3399-dmc";
+ devfreq-events = <&dfi>;
+ rockchip,pmu = <&pmu>;
+ interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cru SCLK_DDRC>;
+ clock-names = "dmc_clk";
+ operating-points-v2 = <&dmc_opp_table>;
+ center-supply = <&ppvar_centerlogic>;
+ rockchip,pd-idle-ns = <160>;
+ rockchip,sr-idle-ns = <10240>;
+ rockchip,sr-mc-gate-idle-ns = <40960>;
+ rockchip,srpd-lite-idle-ns = <61440>;
+ rockchip,standby-idle-ns = <81920>;
+ rockchip,ddr3_odt_dis_freq = <333000000>;
+ rockchip,lpddr3_odt_dis_freq = <333000000>;
+ rockchip,lpddr4_odt_dis_freq = <333000000>;
+ rockchip,pd-idle-dis-freq-hz = <1000000000>;
+ rockchip,sr-idle-dis-freq-hz = <1000000000>;
+ rockchip,sr-mc-gate-idle-dis-freq-hz = <1000000000>;
+ rockchip,srpd-lite-idle-dis-freq-hz = <0>;
+ rockchip,standby-idle-dis-freq-hz = <928000000>;
+ };
diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst
index 49549aab41b4..feb257b7f350 100644
--- a/Documentation/power/energy-model.rst
+++ b/Documentation/power/energy-model.rst
@@ -123,6 +123,26 @@ allows a platform to register EM power values which are reflecting total power
(static + dynamic). These power values might be coming directly from
experiments and measurements.
+Registration of 'artificial' EM
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There is an option to provide a custom callback for drivers missing detailed
+knowledge about power value for each performance state. The callback
+.get_cost() is optional and provides the 'cost' values used by the EAS.
+This is useful for platforms that only provide information on relative
+efficiency between CPU types, where one could use the information to
+create an abstract power model. But even an abstract power model can
+sometimes be hard to fit in, given the input power value size restrictions.
+The .get_cost() allows to provide the 'cost' values which reflect the
+efficiency of the CPUs. This would allow to provide EAS information which
+has different relation than what would be forced by the EM internal
+formulas calculating 'cost' values. To register an EM for such platform, the
+driver must set the flag 'milliwatts' to 0, provide .get_power() callback
+and provide .get_cost() callback. The EM framework would handle such platform
+properly during registration. A flag EM_PERF_DOMAIN_ARTIFICIAL is set for such
+platform. Special care should be taken by other frameworks which are using EM
+to test and treat this flag properly.
+
Registration of 'simple' EM
~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -181,8 +201,8 @@ EM framework::
-> drivers/cpufreq/foo_cpufreq.c
- 01 static int est_power(unsigned long *mW, unsigned long *KHz,
- 02 struct device *dev)
+ 01 static int est_power(struct device *dev, unsigned long *mW,
+ 02 unsigned long *KHz)
03 {
04 long freq, power;
05
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 3b46041f2b97..62ed361a4376 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -512,6 +512,7 @@ struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu)
{
return &cpu_madt_gicc[cpu];
}
+EXPORT_SYMBOL_GPL(acpi_cpu_get_madt_gicc);
/*
* acpi_map_gic_cpu_interface - parse processor MADT entry
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 14154b8b1720..403e83b4adc8 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -319,6 +319,7 @@
/* Run Time Average Power Limiting (RAPL) Interface */
+#define MSR_VR_CURRENT_CONFIG 0x00000601
#define MSR_RAPL_POWER_UNIT 0x00000606
#define MSR_PKG_POWER_LIMIT 0x00000610
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 058e7b924189..907cc98b1938 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1862,7 +1862,7 @@ int __acpi_release_global_lock(unsigned int *lock)
void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
{
- e820__range_add(addr, size, E820_TYPE_ACPI);
+ e820__range_add(addr, size, E820_TYPE_NVS);
e820__update_table_print();
}
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index fe0000eb7cae..b67d2ee77cd1 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -279,6 +279,20 @@ bool osc_pc_lpi_support_confirmed;
EXPORT_SYMBOL_GPL(osc_pc_lpi_support_confirmed);
/*
+ * ACPI 6.2 Section 6.2.11.2 'Platform-Wide OSPM Capabilities':
+ * Starting with ACPI Specification 6.2, all _CPC registers can be in
+ * PCC, System Memory, System IO, or Functional Fixed Hardware address
+ * spaces. OSPM support for this more flexible register space scheme is
+ * indicated by the “Flexible Address Space for CPPC Registers” _OSC bit.
+ *
+ * Otherwise (cf ACPI 6.1, s8.4.7.1.1.X), _CPC registers must be in:
+ * - PCC or Functional Fixed Hardware address space if defined
+ * - SystemMemory address space (NULL register) if not defined
+ */
+bool osc_cpc_flexible_adr_space_confirmed;
+EXPORT_SYMBOL_GPL(osc_cpc_flexible_adr_space_confirmed);
+
+/*
* ACPI 6.4 Operating System Capabilities for USB.
*/
bool osc_sb_native_usb4_support_confirmed;
@@ -315,12 +329,15 @@ static void acpi_bus_osc_negotiate_platform_control(void)
#endif
#ifdef CONFIG_X86
capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_GENERIC_INITIATOR_SUPPORT;
- if (boot_cpu_has(X86_FEATURE_HWP)) {
- capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_SUPPORT;
- capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPCV2_SUPPORT;
- }
#endif
+#ifdef CONFIG_ACPI_CPPC_LIB
+ capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_SUPPORT;
+ capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPCV2_SUPPORT;
+#endif
+
+ capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_FLEXIBLE_ADR_SPACE;
+
if (IS_ENABLED(CONFIG_SCHED_MC_PRIO))
capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_DIVERSE_HIGH_SUPPORT;
@@ -341,10 +358,9 @@ static void acpi_bus_osc_negotiate_platform_control(void)
return;
}
-#ifdef CONFIG_X86
- if (boot_cpu_has(X86_FEATURE_HWP))
- osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] &
- (OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT));
+#ifdef CONFIG_ACPI_CPPC_LIB
+ osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] &
+ (OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT));
#endif
/*
@@ -366,6 +382,8 @@ static void acpi_bus_osc_negotiate_platform_control(void)
capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT;
osc_sb_native_usb4_support_confirmed =
capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT;
+ osc_cpc_flexible_adr_space_confirmed =
+ capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_CPC_FLEXIBLE_ADR_SPACE;
}
kfree(context.ret.pointer);
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index bc1454789a06..3b299b28a8af 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -100,6 +100,16 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr);
(cpc)->cpc_entry.reg.space_id == \
ACPI_ADR_SPACE_PLATFORM_COMM)
+/* Check if a CPC register is in SystemMemory */
+#define CPC_IN_SYSTEM_MEMORY(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \
+ (cpc)->cpc_entry.reg.space_id == \
+ ACPI_ADR_SPACE_SYSTEM_MEMORY)
+
+/* Check if a CPC register is in SystemIo */
+#define CPC_IN_SYSTEM_IO(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \
+ (cpc)->cpc_entry.reg.space_id == \
+ ACPI_ADR_SPACE_SYSTEM_IO)
+
/* Evaluates to True if reg is a NULL register descriptor */
#define IS_NULL_REG(reg) ((reg)->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY && \
(reg)->address == 0 && \
@@ -424,6 +434,24 @@ bool acpi_cpc_valid(void)
}
EXPORT_SYMBOL_GPL(acpi_cpc_valid);
+bool cppc_allow_fast_switch(void)
+{
+ struct cpc_register_resource *desired_reg;
+ struct cpc_desc *cpc_ptr;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ cpc_ptr = per_cpu(cpc_desc_ptr, cpu);
+ desired_reg = &cpc_ptr->cpc_regs[DESIRED_PERF];
+ if (!CPC_IN_SYSTEM_MEMORY(desired_reg) &&
+ !CPC_IN_SYSTEM_IO(desired_reg))
+ return false;
+ }
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(cppc_allow_fast_switch);
+
/**
* acpi_get_psd_map - Map the CPUs in the freq domain of a given cpu
* @cpu: Find all CPUs that share a domain with cpu.
@@ -736,6 +764,11 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
if (gas_t->address) {
void __iomem *addr;
+ if (!osc_cpc_flexible_adr_space_confirmed) {
+ pr_debug("Flexible address space capability not supported\n");
+ goto out_free;
+ }
+
addr = ioremap(gas_t->address, gas_t->bit_width/8);
if (!addr)
goto out_free;
@@ -758,6 +791,10 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
gas_t->address);
goto out_free;
}
+ if (!osc_cpc_flexible_adr_space_confirmed) {
+ pr_debug("Flexible address space capability not supported\n");
+ goto out_free;
+ }
} else {
if (gas_t->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE || !cpc_ffh_supported()) {
/* Support only PCC, SystemMemory, SystemIO, and FFH type regs. */
@@ -1447,6 +1484,9 @@ EXPORT_SYMBOL_GPL(cppc_set_perf);
* transition latency for performance change requests. The closest we have
* is the timing information from the PCCT tables which provides the info
* on the number and frequency of PCC commands the platform can handle.
+ *
+ * If desired_reg is in the SystemMemory or SystemIo ACPI address space,
+ * then assume there is no latency.
*/
unsigned int cppc_get_transition_latency(int cpu_num)
{
@@ -1472,7 +1512,9 @@ unsigned int cppc_get_transition_latency(int cpu_num)
return CPUFREQ_ETERNAL;
desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
- if (!CPC_IN_PCC(desired_reg))
+ if (CPC_IN_SYSTEM_MEMORY(desired_reg) || CPC_IN_SYSTEM_IO(desired_reg))
+ return 0;
+ else if (!CPC_IN_PCC(desired_reg))
return CPUFREQ_ETERNAL;
if (pcc_ss_id < 0)
diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index bbddb267c2e6..72115917e0bd 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -172,10 +172,10 @@ EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_name);
* @dev: Device to detach.
* @power_off: Used to indicate whether we should power off the device.
*
- * This functions will reverse the actions from dev_pm_domain_attach() and
- * dev_pm_domain_attach_by_id(), thus it detaches @dev from its PM domain.
- * Typically it should be invoked during the remove phase, either from
- * subsystem level code or from drivers.
+ * This functions will reverse the actions from dev_pm_domain_attach(),
+ * dev_pm_domain_attach_by_id() and dev_pm_domain_attach_by_name(), thus it
+ * detaches @dev from its PM domain. Typically it should be invoked during the
+ * remove phase, either from subsystem level code or from drivers.
*
* Callers must ensure proper synchronization of this function with power
* management callbacks.
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 1ee878d126fd..739e52cd4aba 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -131,7 +131,7 @@ static const struct genpd_lock_ops genpd_spin_ops = {
#define genpd_is_cpu_domain(genpd) (genpd->flags & GENPD_FLAG_CPU_DOMAIN)
#define genpd_is_rpm_always_on(genpd) (genpd->flags & GENPD_FLAG_RPM_ALWAYS_ON)
-static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
+static inline bool irq_safe_dev_in_sleep_domain(struct device *dev,
const struct generic_pm_domain *genpd)
{
bool ret;
@@ -139,11 +139,14 @@ static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
ret = pm_runtime_is_irq_safe(dev) && !genpd_is_irq_safe(genpd);
/*
- * Warn once if an IRQ safe device is attached to a no sleep domain, as
- * to indicate a suboptimal configuration for PM. For an always on
- * domain this isn't case, thus don't warn.
+ * Warn once if an IRQ safe device is attached to a domain, which
+ * callbacks are allowed to sleep. This indicates a suboptimal
+ * configuration for PM, but it doesn't matter for an always on domain.
*/
- if (ret && !genpd_is_always_on(genpd))
+ if (genpd_is_always_on(genpd) || genpd_is_rpm_always_on(genpd))
+ return ret;
+
+ if (ret)
dev_warn_once(dev, "PM domain %s will not be powered off\n",
genpd->name);
@@ -225,24 +228,23 @@ static void genpd_debug_remove(struct generic_pm_domain *genpd)
static void genpd_update_accounting(struct generic_pm_domain *genpd)
{
- ktime_t delta, now;
+ u64 delta, now;
- now = ktime_get();
- delta = ktime_sub(now, genpd->accounting_time);
+ now = ktime_get_mono_fast_ns();
+ if (now <= genpd->accounting_time)
+ return;
+
+ delta = now - genpd->accounting_time;
/*
* If genpd->status is active, it means we are just
* out of off and so update the idle time and vice
* versa.
*/
- if (genpd->status == GENPD_STATE_ON) {
- int state_idx = genpd->state_idx;
-
- genpd->states[state_idx].idle_time =
- ktime_add(genpd->states[state_idx].idle_time, delta);
- } else {
- genpd->on_time = ktime_add(genpd->on_time, delta);
- }
+ if (genpd->status == GENPD_STATE_ON)
+ genpd->states[genpd->state_idx].idle_time += delta;
+ else
+ genpd->on_time += delta;
genpd->accounting_time = now;
}
@@ -476,15 +478,16 @@ EXPORT_SYMBOL_GPL(dev_pm_genpd_set_performance_state);
*/
void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next)
{
- struct generic_pm_domain_data *gpd_data;
struct generic_pm_domain *genpd;
+ struct gpd_timing_data *td;
genpd = dev_to_genpd_safe(dev);
if (!genpd)
return;
- gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
- gpd_data->next_wakeup = next;
+ td = to_gpd_data(dev->power.subsys_data->domain_data)->td;
+ if (td)
+ td->next_wakeup = next;
}
EXPORT_SYMBOL_GPL(dev_pm_genpd_set_next_wakeup);
@@ -506,6 +509,7 @@ static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed)
if (!genpd->power_on)
goto out;
+ timed = timed && genpd->gd && !genpd->states[state_idx].fwnode;
if (!timed) {
ret = genpd->power_on(genpd);
if (ret)
@@ -524,7 +528,7 @@ static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed)
goto out;
genpd->states[state_idx].power_on_latency_ns = elapsed_ns;
- genpd->max_off_time_changed = true;
+ genpd->gd->max_off_time_changed = true;
pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n",
genpd->name, "on", elapsed_ns);
@@ -555,6 +559,7 @@ static int _genpd_power_off(struct generic_pm_domain *genpd, bool timed)
if (!genpd->power_off)
goto out;
+ timed = timed && genpd->gd && !genpd->states[state_idx].fwnode;
if (!timed) {
ret = genpd->power_off(genpd);
if (ret)
@@ -573,7 +578,7 @@ static int _genpd_power_off(struct generic_pm_domain *genpd, bool timed)
goto out;
genpd->states[state_idx].power_off_latency_ns = elapsed_ns;
- genpd->max_off_time_changed = true;
+ genpd->gd->max_off_time_changed = true;
pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n",
genpd->name, "off", elapsed_ns);
@@ -649,18 +654,12 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
}
list_for_each_entry(pdd, &genpd->dev_list, list_node) {
- enum pm_qos_flags_status stat;
-
- stat = dev_pm_qos_flags(pdd->dev, PM_QOS_FLAG_NO_POWER_OFF);
- if (stat > PM_QOS_FLAGS_NONE)
- return -EBUSY;
-
/*
* Do not allow PM domain to be powered off, when an IRQ safe
* device is part of a non-IRQ safe domain.
*/
if (!pm_runtime_suspended(pdd->dev) ||
- irq_safe_dev_in_no_sleep_domain(pdd->dev, genpd))
+ irq_safe_dev_in_sleep_domain(pdd->dev, genpd))
not_suspended++;
}
@@ -775,25 +774,27 @@ static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
dev = gpd_data->base.dev;
for (;;) {
- struct generic_pm_domain *genpd;
+ struct generic_pm_domain *genpd = ERR_PTR(-ENODATA);
struct pm_domain_data *pdd;
+ struct gpd_timing_data *td;
spin_lock_irq(&dev->power.lock);
pdd = dev->power.subsys_data ?
dev->power.subsys_data->domain_data : NULL;
if (pdd) {
- to_gpd_data(pdd)->td.constraint_changed = true;
- genpd = dev_to_genpd(dev);
- } else {
- genpd = ERR_PTR(-ENODATA);
+ td = to_gpd_data(pdd)->td;
+ if (td) {
+ td->constraint_changed = true;
+ genpd = dev_to_genpd(dev);
+ }
}
spin_unlock_irq(&dev->power.lock);
if (!IS_ERR(genpd)) {
genpd_lock(genpd);
- genpd->max_off_time_changed = true;
+ genpd->gd->max_off_time_changed = true;
genpd_unlock(genpd);
}
@@ -879,9 +880,9 @@ static int genpd_runtime_suspend(struct device *dev)
struct generic_pm_domain *genpd;
bool (*suspend_ok)(struct device *__dev);
struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev);
- struct gpd_timing_data *td = &gpd_data->td;
+ struct gpd_timing_data *td = gpd_data->td;
bool runtime_pm = pm_runtime_enabled(dev);
- ktime_t time_start;
+ ktime_t time_start = 0;
s64 elapsed_ns;
int ret;
@@ -902,8 +903,7 @@ static int genpd_runtime_suspend(struct device *dev)
return -EBUSY;
/* Measure suspend latency. */
- time_start = 0;
- if (runtime_pm)
+ if (td && runtime_pm)
time_start = ktime_get();
ret = __genpd_runtime_suspend(dev);
@@ -917,13 +917,13 @@ static int genpd_runtime_suspend(struct device *dev)
}
/* Update suspend latency value if the measured time exceeds it. */
- if (runtime_pm) {
+ if (td && runtime_pm) {
elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
if (elapsed_ns > td->suspend_latency_ns) {
td->suspend_latency_ns = elapsed_ns;
dev_dbg(dev, "suspend latency exceeded, %lld ns\n",
elapsed_ns);
- genpd->max_off_time_changed = true;
+ genpd->gd->max_off_time_changed = true;
td->constraint_changed = true;
}
}
@@ -932,7 +932,7 @@ static int genpd_runtime_suspend(struct device *dev)
* If power.irq_safe is set, this routine may be run with
* IRQs disabled, so suspend only if the PM domain also is irq_safe.
*/
- if (irq_safe_dev_in_no_sleep_domain(dev, genpd))
+ if (irq_safe_dev_in_sleep_domain(dev, genpd))
return 0;
genpd_lock(genpd);
@@ -955,12 +955,11 @@ static int genpd_runtime_resume(struct device *dev)
{
struct generic_pm_domain *genpd;
struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev);
- struct gpd_timing_data *td = &gpd_data->td;
- bool runtime_pm = pm_runtime_enabled(dev);
- ktime_t time_start;
+ struct gpd_timing_data *td = gpd_data->td;
+ bool timed = td && pm_runtime_enabled(dev);
+ ktime_t time_start = 0;
s64 elapsed_ns;
int ret;
- bool timed = true;
dev_dbg(dev, "%s()\n", __func__);
@@ -972,10 +971,8 @@ static int genpd_runtime_resume(struct device *dev)
* As we don't power off a non IRQ safe domain, which holds
* an IRQ safe device, we don't need to restore power to it.
*/
- if (irq_safe_dev_in_no_sleep_domain(dev, genpd)) {
- timed = false;
+ if (irq_safe_dev_in_sleep_domain(dev, genpd))
goto out;
- }
genpd_lock(genpd);
ret = genpd_power_on(genpd, 0);
@@ -988,8 +985,7 @@ static int genpd_runtime_resume(struct device *dev)
out:
/* Measure resume latency. */
- time_start = 0;
- if (timed && runtime_pm)
+ if (timed)
time_start = ktime_get();
ret = genpd_start_dev(genpd, dev);
@@ -1001,13 +997,13 @@ static int genpd_runtime_resume(struct device *dev)
goto err_stop;
/* Update resume latency value if the measured time exceeds it. */
- if (timed && runtime_pm) {
+ if (timed) {
elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
if (elapsed_ns > td->resume_latency_ns) {
td->resume_latency_ns = elapsed_ns;
dev_dbg(dev, "resume latency exceeded, %lld ns\n",
elapsed_ns);
- genpd->max_off_time_changed = true;
+ genpd->gd->max_off_time_changed = true;
td->constraint_changed = true;
}
}
@@ -1500,9 +1496,11 @@ EXPORT_SYMBOL_GPL(dev_pm_genpd_resume);
#endif /* CONFIG_PM_SLEEP */
-static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev)
+static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev,
+ bool has_governor)
{
struct generic_pm_domain_data *gpd_data;
+ struct gpd_timing_data *td;
int ret;
ret = dev_pm_get_subsys_data(dev);
@@ -1516,26 +1514,38 @@ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev)
}
gpd_data->base.dev = dev;
- gpd_data->td.constraint_changed = true;
- gpd_data->td.effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS;
gpd_data->nb.notifier_call = genpd_dev_pm_qos_notifier;
- gpd_data->next_wakeup = KTIME_MAX;
- spin_lock_irq(&dev->power.lock);
+ /* Allocate data used by a governor. */
+ if (has_governor) {
+ td = kzalloc(sizeof(*td), GFP_KERNEL);
+ if (!td) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
- if (dev->power.subsys_data->domain_data) {
- ret = -EINVAL;
- goto err_free;
+ td->constraint_changed = true;
+ td->effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS;
+ td->next_wakeup = KTIME_MAX;
+ gpd_data->td = td;
}
- dev->power.subsys_data->domain_data = &gpd_data->base;
+ spin_lock_irq(&dev->power.lock);
+
+ if (dev->power.subsys_data->domain_data)
+ ret = -EINVAL;
+ else
+ dev->power.subsys_data->domain_data = &gpd_data->base;
spin_unlock_irq(&dev->power.lock);
+ if (ret)
+ goto err_free;
+
return gpd_data;
err_free:
- spin_unlock_irq(&dev->power.lock);
+ kfree(gpd_data->td);
kfree(gpd_data);
err_put:
dev_pm_put_subsys_data(dev);
@@ -1551,6 +1561,7 @@ static void genpd_free_dev_data(struct device *dev,
spin_unlock_irq(&dev->power.lock);
+ kfree(gpd_data->td);
kfree(gpd_data);
dev_pm_put_subsys_data(dev);
}
@@ -1607,6 +1618,7 @@ static int genpd_get_cpu(struct generic_pm_domain *genpd, struct device *dev)
static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
struct device *base_dev)
{
+ struct genpd_governor_data *gd = genpd->gd;
struct generic_pm_domain_data *gpd_data;
int ret;
@@ -1615,7 +1627,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev))
return -EINVAL;
- gpd_data = genpd_alloc_dev_data(dev);
+ gpd_data = genpd_alloc_dev_data(dev, gd);
if (IS_ERR(gpd_data))
return PTR_ERR(gpd_data);
@@ -1631,7 +1643,8 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
dev_pm_domain_set(dev, &genpd->domain);
genpd->device_count++;
- genpd->max_off_time_changed = true;
+ if (gd)
+ gd->max_off_time_changed = true;
list_add_tail(&gpd_data->base.list_node, &genpd->dev_list);
@@ -1685,7 +1698,8 @@ static int genpd_remove_device(struct generic_pm_domain *genpd,
}
genpd->device_count--;
- genpd->max_off_time_changed = true;
+ if (genpd->gd)
+ genpd->gd->max_off_time_changed = true;
genpd_clear_cpumask(genpd, gpd_data->cpu);
dev_pm_domain_set(dev, NULL);
@@ -1958,6 +1972,53 @@ static int genpd_set_default_power_state(struct generic_pm_domain *genpd)
return 0;
}
+static int genpd_alloc_data(struct generic_pm_domain *genpd)
+{
+ struct genpd_governor_data *gd = NULL;
+ int ret;
+
+ if (genpd_is_cpu_domain(genpd) &&
+ !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL))
+ return -ENOMEM;
+
+ if (genpd->gov) {
+ gd = kzalloc(sizeof(*gd), GFP_KERNEL);
+ if (!gd) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ gd->max_off_time_ns = -1;
+ gd->max_off_time_changed = true;
+ gd->next_wakeup = KTIME_MAX;
+ }
+
+ /* Use only one "off" state if there were no states declared */
+ if (genpd->state_count == 0) {
+ ret = genpd_set_default_power_state(genpd);
+ if (ret)
+ goto free;
+ }
+
+ genpd->gd = gd;
+ return 0;
+
+free:
+ if (genpd_is_cpu_domain(genpd))
+ free_cpumask_var(genpd->cpus);
+ kfree(gd);
+ return ret;
+}
+
+static void genpd_free_data(struct generic_pm_domain *genpd)
+{
+ if (genpd_is_cpu_domain(genpd))
+ free_cpumask_var(genpd->cpus);
+ if (genpd->free_states)
+ genpd->free_states(genpd->states, genpd->state_count);
+ kfree(genpd->gd);
+}
+
static void genpd_lock_init(struct generic_pm_domain *genpd)
{
if (genpd->flags & GENPD_FLAG_IRQ_SAFE) {
@@ -1995,11 +2056,9 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
atomic_set(&genpd->sd_count, 0);
genpd->status = is_off ? GENPD_STATE_OFF : GENPD_STATE_ON;
genpd->device_count = 0;
- genpd->max_off_time_ns = -1;
- genpd->max_off_time_changed = true;
genpd->provider = NULL;
genpd->has_provider = false;
- genpd->accounting_time = ktime_get();
+ genpd->accounting_time = ktime_get_mono_fast_ns();
genpd->domain.ops.runtime_suspend = genpd_runtime_suspend;
genpd->domain.ops.runtime_resume = genpd_runtime_resume;
genpd->domain.ops.prepare = genpd_prepare;
@@ -2017,26 +2076,22 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
genpd->dev_ops.start = pm_clk_resume;
}
+ /* The always-on governor works better with the corresponding flag. */
+ if (gov == &pm_domain_always_on_gov)
+ genpd->flags |= GENPD_FLAG_RPM_ALWAYS_ON;
+
/* Always-on domains must be powered on at initialization. */
if ((genpd_is_always_on(genpd) || genpd_is_rpm_always_on(genpd)) &&
!genpd_status_on(genpd))
return -EINVAL;
- if (genpd_is_cpu_domain(genpd) &&
- !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL))
- return -ENOMEM;
-
- /* Use only one "off" state if there were no states declared */
- if (genpd->state_count == 0) {
- ret = genpd_set_default_power_state(genpd);
- if (ret) {
- if (genpd_is_cpu_domain(genpd))
- free_cpumask_var(genpd->cpus);
- return ret;
- }
- } else if (!gov && genpd->state_count > 1) {
+ /* Multiple states but no governor doesn't make sense. */
+ if (!gov && genpd->state_count > 1)
pr_warn("%s: no governor for states\n", genpd->name);
- }
+
+ ret = genpd_alloc_data(genpd);
+ if (ret)
+ return ret;
device_initialize(&genpd->dev);
dev_set_name(&genpd->dev, "%s", genpd->name);
@@ -2081,10 +2136,7 @@ static int genpd_remove(struct generic_pm_domain *genpd)
genpd_unlock(genpd);
genpd_debug_remove(genpd);
cancel_work_sync(&genpd->power_off_work);
- if (genpd_is_cpu_domain(genpd))
- free_cpumask_var(genpd->cpus);
- if (genpd->free_states)
- genpd->free_states(genpd->states, genpd->state_count);
+ genpd_free_data(genpd);
pr_debug("%s: removed %s\n", __func__, genpd->name);
@@ -3163,6 +3215,7 @@ static int sub_domains_show(struct seq_file *s, void *data)
static int idle_states_show(struct seq_file *s, void *data)
{
struct generic_pm_domain *genpd = s->private;
+ u64 now, delta, idle_time = 0;
unsigned int i;
int ret = 0;
@@ -3173,17 +3226,19 @@ static int idle_states_show(struct seq_file *s, void *data)
seq_puts(s, "State Time Spent(ms) Usage Rejected\n");
for (i = 0; i < genpd->state_count; i++) {
- ktime_t delta = 0;
- s64 msecs;
+ idle_time += genpd->states[i].idle_time;
- if ((genpd->status == GENPD_STATE_OFF) &&
- (genpd->state_idx == i))
- delta = ktime_sub(ktime_get(), genpd->accounting_time);
+ if (genpd->status == GENPD_STATE_OFF && genpd->state_idx == i) {
+ now = ktime_get_mono_fast_ns();
+ if (now > genpd->accounting_time) {
+ delta = now - genpd->accounting_time;
+ idle_time += delta;
+ }
+ }
- msecs = ktime_to_ms(
- ktime_add(genpd->states[i].idle_time, delta));
- seq_printf(s, "S%-13i %-14lld %-14llu %llu\n", i, msecs,
- genpd->states[i].usage, genpd->states[i].rejected);
+ do_div(idle_time, NSEC_PER_MSEC);
+ seq_printf(s, "S%-13i %-14llu %-14llu %llu\n", i, idle_time,
+ genpd->states[i].usage, genpd->states[i].rejected);
}
genpd_unlock(genpd);
@@ -3193,18 +3248,22 @@ static int idle_states_show(struct seq_file *s, void *data)
static int active_time_show(struct seq_file *s, void *data)
{
struct generic_pm_domain *genpd = s->private;
- ktime_t delta = 0;
+ u64 now, on_time, delta = 0;
int ret = 0;
ret = genpd_lock_interruptible(genpd);
if (ret)
return -ERESTARTSYS;
- if (genpd->status == GENPD_STATE_ON)
- delta = ktime_sub(ktime_get(), genpd->accounting_time);
+ if (genpd->status == GENPD_STATE_ON) {
+ now = ktime_get_mono_fast_ns();
+ if (now > genpd->accounting_time)
+ delta = now - genpd->accounting_time;
+ }
- seq_printf(s, "%lld ms\n", ktime_to_ms(
- ktime_add(genpd->on_time, delta)));
+ on_time = genpd->on_time + delta;
+ do_div(on_time, NSEC_PER_MSEC);
+ seq_printf(s, "%llu ms\n", on_time);
genpd_unlock(genpd);
return ret;
@@ -3213,7 +3272,7 @@ static int active_time_show(struct seq_file *s, void *data)
static int total_idle_time_show(struct seq_file *s, void *data)
{
struct generic_pm_domain *genpd = s->private;
- ktime_t delta = 0, total = 0;
+ u64 now, delta, total = 0;
unsigned int i;
int ret = 0;
@@ -3222,16 +3281,19 @@ static int total_idle_time_show(struct seq_file *s, void *data)
return -ERESTARTSYS;
for (i = 0; i < genpd->state_count; i++) {
+ total += genpd->states[i].idle_time;
- if ((genpd->status == GENPD_STATE_OFF) &&
- (genpd->state_idx == i))
- delta = ktime_sub(ktime_get(), genpd->accounting_time);
-
- total = ktime_add(total, genpd->states[i].idle_time);
+ if (genpd->status == GENPD_STATE_OFF && genpd->state_idx == i) {
+ now = ktime_get_mono_fast_ns();
+ if (now > genpd->accounting_time) {
+ delta = now - genpd->accounting_time;
+ total += delta;
+ }
+ }
}
- total = ktime_add(total, delta);
- seq_printf(s, "%lld ms\n", ktime_to_ms(total));
+ do_div(total, NSEC_PER_MSEC);
+ seq_printf(s, "%llu ms\n", total);
genpd_unlock(genpd);
return ret;
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index cd08c5885190..282a3a135827 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -18,6 +18,8 @@ static int dev_update_qos_constraint(struct device *dev, void *data)
s64 constraint_ns;
if (dev->power.subsys_data && dev->power.subsys_data->domain_data) {
+ struct gpd_timing_data *td = dev_gpd_data(dev)->td;
+
/*
* Only take suspend-time QoS constraints of devices into
* account, because constraints updated after the device has
@@ -25,7 +27,8 @@ static int dev_update_qos_constraint(struct device *dev, void *data)
* anyway. In order for them to take effect, the device has to
* be resumed and suspended again.
*/
- constraint_ns = dev_gpd_data(dev)->td.effective_constraint_ns;
+ constraint_ns = td ? td->effective_constraint_ns :
+ PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS;
} else {
/*
* The child is not in a domain and there's no info on its
@@ -49,7 +52,7 @@ static int dev_update_qos_constraint(struct device *dev, void *data)
*/
static bool default_suspend_ok(struct device *dev)
{
- struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
+ struct gpd_timing_data *td = dev_gpd_data(dev)->td;
unsigned long flags;
s64 constraint_ns;
@@ -136,26 +139,28 @@ static void update_domain_next_wakeup(struct generic_pm_domain *genpd, ktime_t n
* is able to enter its optimal idle state.
*/
list_for_each_entry(pdd, &genpd->dev_list, list_node) {
- next_wakeup = to_gpd_data(pdd)->next_wakeup;
+ next_wakeup = to_gpd_data(pdd)->td->next_wakeup;
if (next_wakeup != KTIME_MAX && !ktime_before(next_wakeup, now))
if (ktime_before(next_wakeup, domain_wakeup))
domain_wakeup = next_wakeup;
}
list_for_each_entry(link, &genpd->parent_links, parent_node) {
- next_wakeup = link->child->next_wakeup;
+ struct genpd_governor_data *cgd = link->child->gd;
+
+ next_wakeup = cgd ? cgd->next_wakeup : KTIME_MAX;
if (next_wakeup != KTIME_MAX && !ktime_before(next_wakeup, now))
if (ktime_before(next_wakeup, domain_wakeup))
domain_wakeup = next_wakeup;
}
- genpd->next_wakeup = domain_wakeup;
+ genpd->gd->next_wakeup = domain_wakeup;
}
static bool next_wakeup_allows_state(struct generic_pm_domain *genpd,
unsigned int state, ktime_t now)
{
- ktime_t domain_wakeup = genpd->next_wakeup;
+ ktime_t domain_wakeup = genpd->gd->next_wakeup;
s64 idle_time_ns, min_sleep_ns;
min_sleep_ns = genpd->states[state].power_off_latency_ns +
@@ -185,8 +190,9 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd,
* All subdomains have been powered off already at this point.
*/
list_for_each_entry(link, &genpd->parent_links, parent_node) {
- struct generic_pm_domain *sd = link->child;
- s64 sd_max_off_ns = sd->max_off_time_ns;
+ struct genpd_governor_data *cgd = link->child->gd;
+
+ s64 sd_max_off_ns = cgd ? cgd->max_off_time_ns : -1;
if (sd_max_off_ns < 0)
continue;
@@ -215,7 +221,7 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd,
* domain to turn off and on (that's how much time it will
* have to wait worst case).
*/
- td = &to_gpd_data(pdd)->td;
+ td = to_gpd_data(pdd)->td;
constraint_ns = td->effective_constraint_ns;
/*
* Zero means "no suspend at all" and this runs only when all
@@ -244,7 +250,7 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd,
* time and the time needed to turn the domain on is the maximum
* theoretical time this domain can spend in the "off" state.
*/
- genpd->max_off_time_ns = min_off_time_ns -
+ genpd->gd->max_off_time_ns = min_off_time_ns -
genpd->states[state].power_on_latency_ns;
return true;
}
@@ -259,6 +265,7 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd,
static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
{
struct generic_pm_domain *genpd = pd_to_genpd(pd);
+ struct genpd_governor_data *gd = genpd->gd;
int state_idx = genpd->state_count - 1;
struct gpd_link *link;
@@ -269,11 +276,11 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
* cannot be met.
*/
update_domain_next_wakeup(genpd, now);
- if ((genpd->flags & GENPD_FLAG_MIN_RESIDENCY) && (genpd->next_wakeup != KTIME_MAX)) {
+ if ((genpd->flags & GENPD_FLAG_MIN_RESIDENCY) && (gd->next_wakeup != KTIME_MAX)) {
/* Let's find out the deepest domain idle state, the devices prefer */
while (state_idx >= 0) {
if (next_wakeup_allows_state(genpd, state_idx, now)) {
- genpd->max_off_time_changed = true;
+ gd->max_off_time_changed = true;
break;
}
state_idx--;
@@ -281,14 +288,14 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
if (state_idx < 0) {
state_idx = 0;
- genpd->cached_power_down_ok = false;
+ gd->cached_power_down_ok = false;
goto done;
}
}
- if (!genpd->max_off_time_changed) {
- genpd->state_idx = genpd->cached_power_down_state_idx;
- return genpd->cached_power_down_ok;
+ if (!gd->max_off_time_changed) {
+ genpd->state_idx = gd->cached_power_down_state_idx;
+ return gd->cached_power_down_ok;
}
/*
@@ -297,12 +304,16 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
* going to be called for any parent until this instance
* returns.
*/
- list_for_each_entry(link, &genpd->child_links, child_node)
- link->parent->max_off_time_changed = true;
+ list_for_each_entry(link, &genpd->child_links, child_node) {
+ struct genpd_governor_data *pgd = link->parent->gd;
+
+ if (pgd)
+ pgd->max_off_time_changed = true;
+ }
- genpd->max_off_time_ns = -1;
- genpd->max_off_time_changed = false;
- genpd->cached_power_down_ok = true;
+ gd->max_off_time_ns = -1;
+ gd->max_off_time_changed = false;
+ gd->cached_power_down_ok = true;
/*
* Find a state to power down to, starting from the state
@@ -310,7 +321,7 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
*/
while (!__default_power_down_ok(pd, state_idx)) {
if (state_idx == 0) {
- genpd->cached_power_down_ok = false;
+ gd->cached_power_down_ok = false;
break;
}
state_idx--;
@@ -318,8 +329,8 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
done:
genpd->state_idx = state_idx;
- genpd->cached_power_down_state_idx = genpd->state_idx;
- return genpd->cached_power_down_ok;
+ gd->cached_power_down_state_idx = genpd->state_idx;
+ return gd->cached_power_down_ok;
}
static bool default_power_down_ok(struct dev_pm_domain *pd)
@@ -327,11 +338,6 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
return _default_power_down_ok(pd, ktime_get());
}
-static bool always_on_power_down_ok(struct dev_pm_domain *domain)
-{
- return false;
-}
-
#ifdef CONFIG_CPU_IDLE
static bool cpu_power_down_ok(struct dev_pm_domain *pd)
{
@@ -401,6 +407,5 @@ struct dev_power_governor simple_qos_governor = {
* pm_genpd_gov_always_on - A governor implementing an always-on policy
*/
struct dev_power_governor pm_domain_always_on_gov = {
- .power_down_ok = always_on_power_down_ok,
.suspend_ok = default_suspend_ok,
};
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index d4059e6ffeae..676dc72d912d 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -263,7 +263,7 @@ static int rpm_check_suspend_allowed(struct device *dev)
retval = -EINVAL;
else if (dev->power.disable_depth > 0)
retval = -EACCES;
- else if (atomic_read(&dev->power.usage_count) > 0)
+ else if (atomic_read(&dev->power.usage_count))
retval = -EAGAIN;
else if (!dev->power.ignore_children &&
atomic_read(&dev->power.child_count))
@@ -1039,13 +1039,33 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay)
}
EXPORT_SYMBOL_GPL(pm_schedule_suspend);
+static int rpm_drop_usage_count(struct device *dev)
+{
+ int ret;
+
+ ret = atomic_sub_return(1, &dev->power.usage_count);
+ if (ret >= 0)
+ return ret;
+
+ /*
+ * Because rpm_resume() does not check the usage counter, it will resume
+ * the device even if the usage counter is 0 or negative, so it is
+ * sufficient to increment the usage counter here to reverse the change
+ * made above.
+ */
+ atomic_inc(&dev->power.usage_count);
+ dev_warn(dev, "Runtime PM usage count underflow!\n");
+ return -EINVAL;
+}
+
/**
* __pm_runtime_idle - Entry point for runtime idle operations.
* @dev: Device to send idle notification for.
* @rpmflags: Flag bits.
*
* If the RPM_GET_PUT flag is set, decrement the device's usage count and
- * return immediately if it is larger than zero. Then carry out an idle
+ * return immediately if it is larger than zero (if it becomes negative, log a
+ * warning, increment it, and return an error). Then carry out an idle
* notification, either synchronous or asynchronous.
*
* This routine may be called in atomic context if the RPM_ASYNC flag is set,
@@ -1057,7 +1077,10 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
int retval;
if (rpmflags & RPM_GET_PUT) {
- if (!atomic_dec_and_test(&dev->power.usage_count)) {
+ retval = rpm_drop_usage_count(dev);
+ if (retval < 0) {
+ return retval;
+ } else if (retval > 0) {
trace_rpm_usage_rcuidle(dev, rpmflags);
return 0;
}
@@ -1079,7 +1102,8 @@ EXPORT_SYMBOL_GPL(__pm_runtime_idle);
* @rpmflags: Flag bits.
*
* If the RPM_GET_PUT flag is set, decrement the device's usage count and
- * return immediately if it is larger than zero. Then carry out a suspend,
+ * return immediately if it is larger than zero (if it becomes negative, log a
+ * warning, increment it, and return an error). Then carry out a suspend,
* either synchronous or asynchronous.
*
* This routine may be called in atomic context if the RPM_ASYNC flag is set,
@@ -1091,7 +1115,10 @@ int __pm_runtime_suspend(struct device *dev, int rpmflags)
int retval;
if (rpmflags & RPM_GET_PUT) {
- if (!atomic_dec_and_test(&dev->power.usage_count)) {
+ retval = rpm_drop_usage_count(dev);
+ if (retval < 0) {
+ return retval;
+ } else if (retval > 0) {
trace_rpm_usage_rcuidle(dev, rpmflags);
return 0;
}
@@ -1210,12 +1237,13 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
{
struct device *parent = dev->parent;
bool notify_parent = false;
+ unsigned long flags;
int error = 0;
if (status != RPM_ACTIVE && status != RPM_SUSPENDED)
return -EINVAL;
- spin_lock_irq(&dev->power.lock);
+ spin_lock_irqsave(&dev->power.lock, flags);
/*
* Prevent PM-runtime from being enabled for the device or return an
@@ -1226,7 +1254,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
else
error = -EAGAIN;
- spin_unlock_irq(&dev->power.lock);
+ spin_unlock_irqrestore(&dev->power.lock, flags);
if (error)
return error;
@@ -1247,7 +1275,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
device_links_read_unlock(idx);
}
- spin_lock_irq(&dev->power.lock);
+ spin_lock_irqsave(&dev->power.lock, flags);
if (dev->power.runtime_status == status || !parent)
goto out_set;
@@ -1288,7 +1316,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
dev->power.runtime_error = 0;
out:
- spin_unlock_irq(&dev->power.lock);
+ spin_unlock_irqrestore(&dev->power.lock, flags);
if (notify_parent)
pm_request_idle(parent);
@@ -1527,14 +1555,17 @@ EXPORT_SYMBOL_GPL(pm_runtime_forbid);
*/
void pm_runtime_allow(struct device *dev)
{
+ int ret;
+
spin_lock_irq(&dev->power.lock);
if (dev->power.runtime_auto)
goto out;
dev->power.runtime_auto = true;
- if (atomic_dec_and_test(&dev->power.usage_count))
+ ret = rpm_drop_usage_count(dev);
+ if (ret == 0)
rpm_idle(dev, RPM_AUTO | RPM_ASYNC);
- else
+ else if (ret > 0)
trace_rpm_usage_rcuidle(dev, RPM_AUTO | RPM_ASYNC);
out:
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 82d370ae6a4a..d092c9bb4ba3 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -389,6 +389,27 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
return ret;
}
+static unsigned int cppc_cpufreq_fast_switch(struct cpufreq_policy *policy,
+ unsigned int target_freq)
+{
+ struct cppc_cpudata *cpu_data = policy->driver_data;
+ unsigned int cpu = policy->cpu;
+ u32 desired_perf;
+ int ret;
+
+ desired_perf = cppc_cpufreq_khz_to_perf(cpu_data, target_freq);
+ cpu_data->perf_ctrls.desired_perf = desired_perf;
+ ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
+
+ if (ret) {
+ pr_debug("Failed to set target on CPU:%d. ret:%d\n",
+ cpu, ret);
+ return 0;
+ }
+
+ return target_freq;
+}
+
static int cppc_verify_policy(struct cpufreq_policy_data *policy)
{
cpufreq_verify_within_cpu_limits(policy);
@@ -420,12 +441,197 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
}
+static DEFINE_PER_CPU(unsigned int, efficiency_class);
+static void cppc_cpufreq_register_em(struct cpufreq_policy *policy);
+
+/* Create an artificial performance state every CPPC_EM_CAP_STEP capacity unit. */
+#define CPPC_EM_CAP_STEP (20)
+/* Increase the cost value by CPPC_EM_COST_STEP every performance state. */
+#define CPPC_EM_COST_STEP (1)
+/* Add a cost gap correspnding to the energy of 4 CPUs. */
+#define CPPC_EM_COST_GAP (4 * SCHED_CAPACITY_SCALE * CPPC_EM_COST_STEP \
+ / CPPC_EM_CAP_STEP)
+
+static unsigned int get_perf_level_count(struct cpufreq_policy *policy)
+{
+ struct cppc_perf_caps *perf_caps;
+ unsigned int min_cap, max_cap;
+ struct cppc_cpudata *cpu_data;
+ int cpu = policy->cpu;
+
+ cpu_data = policy->driver_data;
+ perf_caps = &cpu_data->perf_caps;
+ max_cap = arch_scale_cpu_capacity(cpu);
+ min_cap = div_u64(max_cap * perf_caps->lowest_perf, perf_caps->highest_perf);
+ if ((min_cap == 0) || (max_cap < min_cap))
+ return 0;
+ return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP;
+}
+
+/*
+ * The cost is defined as:
+ * cost = power * max_frequency / frequency
+ */
+static inline unsigned long compute_cost(int cpu, int step)
+{
+ return CPPC_EM_COST_GAP * per_cpu(efficiency_class, cpu) +
+ step * CPPC_EM_COST_STEP;
+}
+
+static int cppc_get_cpu_power(struct device *cpu_dev,
+ unsigned long *power, unsigned long *KHz)
+{
+ unsigned long perf_step, perf_prev, perf, perf_check;
+ unsigned int min_step, max_step, step, step_check;
+ unsigned long prev_freq = *KHz;
+ unsigned int min_cap, max_cap;
+ struct cpufreq_policy *policy;
+
+ struct cppc_perf_caps *perf_caps;
+ struct cppc_cpudata *cpu_data;
+
+ policy = cpufreq_cpu_get_raw(cpu_dev->id);
+ cpu_data = policy->driver_data;
+ perf_caps = &cpu_data->perf_caps;
+ max_cap = arch_scale_cpu_capacity(cpu_dev->id);
+ min_cap = div_u64(max_cap * perf_caps->lowest_perf,
+ perf_caps->highest_perf);
+
+ perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
+ min_step = min_cap / CPPC_EM_CAP_STEP;
+ max_step = max_cap / CPPC_EM_CAP_STEP;
+
+ perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
+ step = perf_prev / perf_step;
+
+ if (step > max_step)
+ return -EINVAL;
+
+ if (min_step == max_step) {
+ step = max_step;
+ perf = perf_caps->highest_perf;
+ } else if (step < min_step) {
+ step = min_step;
+ perf = perf_caps->lowest_perf;
+ } else {
+ step++;
+ if (step == max_step)
+ perf = perf_caps->highest_perf;
+ else
+ perf = step * perf_step;
+ }
+
+ *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
+ perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
+ step_check = perf_check / perf_step;
+
+ /*
+ * To avoid bad integer approximation, check that new frequency value
+ * increased and that the new frequency will be converted to the
+ * desired step value.
+ */
+ while ((*KHz == prev_freq) || (step_check != step)) {
+ perf++;
+ *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
+ perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
+ step_check = perf_check / perf_step;
+ }
+
+ /*
+ * With an artificial EM, only the cost value is used. Still the power
+ * is populated such as 0 < power < EM_MAX_POWER. This allows to add
+ * more sense to the artificial performance states.
+ */
+ *power = compute_cost(cpu_dev->id, step);
+
+ return 0;
+}
+
+static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz,
+ unsigned long *cost)
+{
+ unsigned long perf_step, perf_prev;
+ struct cppc_perf_caps *perf_caps;
+ struct cpufreq_policy *policy;
+ struct cppc_cpudata *cpu_data;
+ unsigned int max_cap;
+ int step;
+
+ policy = cpufreq_cpu_get_raw(cpu_dev->id);
+ cpu_data = policy->driver_data;
+ perf_caps = &cpu_data->perf_caps;
+ max_cap = arch_scale_cpu_capacity(cpu_dev->id);
+
+ perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, KHz);
+ perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
+ step = perf_prev / perf_step;
+
+ *cost = compute_cost(cpu_dev->id, step);
+
+ return 0;
+}
+
+static int populate_efficiency_class(void)
+{
+ struct acpi_madt_generic_interrupt *gicc;
+ DECLARE_BITMAP(used_classes, 256) = {};
+ int class, cpu, index;
+
+ for_each_possible_cpu(cpu) {
+ gicc = acpi_cpu_get_madt_gicc(cpu);
+ class = gicc->efficiency_class;
+ bitmap_set(used_classes, class, 1);
+ }
+
+ if (bitmap_weight(used_classes, 256) <= 1) {
+ pr_debug("Efficiency classes are all equal (=%d). "
+ "No EM registered", class);
+ return -EINVAL;
+ }
+
+ /*
+ * Squeeze efficiency class values on [0:#efficiency_class-1].
+ * Values are per spec in [0:255].
+ */
+ index = 0;
+ for_each_set_bit(class, used_classes, 256) {
+ for_each_possible_cpu(cpu) {
+ gicc = acpi_cpu_get_madt_gicc(cpu);
+ if (gicc->efficiency_class == class)
+ per_cpu(efficiency_class, cpu) = index;
+ }
+ index++;
+ }
+ cppc_cpufreq_driver.register_em = cppc_cpufreq_register_em;
+
+ return 0;
+}
+
+static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
+{
+ struct cppc_cpudata *cpu_data;
+ struct em_data_callback em_cb =
+ EM_ADV_DATA_CB(cppc_get_cpu_power, cppc_get_cpu_cost);
+
+ cpu_data = policy->driver_data;
+ em_dev_register_perf_domain(get_cpu_device(policy->cpu),
+ get_perf_level_count(policy), &em_cb,
+ cpu_data->shared_cpu_map, 0);
+}
+
#else
static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
{
return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
}
+static int populate_efficiency_class(void)
+{
+ return 0;
+}
+static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
+{
+}
#endif
@@ -536,6 +742,9 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
goto out;
}
+ policy->fast_switch_possible = cppc_allow_fast_switch();
+ policy->dvfs_possible_from_any_cpu = true;
+
/*
* If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost
* is supported.
@@ -681,6 +890,7 @@ static struct cpufreq_driver cppc_cpufreq_driver = {
.verify = cppc_verify_policy,
.target = cppc_cpufreq_set_target,
.get = cppc_cpufreq_get_rate,
+ .fast_switch = cppc_cpufreq_fast_switch,
.init = cppc_cpufreq_cpu_init,
.exit = cppc_cpufreq_cpu_exit,
.set_boost = cppc_cpufreq_set_boost,
@@ -742,6 +952,7 @@ static int __init cppc_cpufreq_init(void)
cppc_check_hisi_workaround();
cppc_freq_invariance_init();
+ populate_efficiency_class();
ret = cpufreq_register_driver(&cppc_cpufreq_driver);
if (ret)
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 80f535cc8a75..2cad42774164 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -28,6 +28,7 @@
#include <linux/suspend.h>
#include <linux/syscore_ops.h>
#include <linux/tick.h>
+#include <linux/units.h>
#include <trace/events/power.h>
static LIST_HEAD(cpufreq_policy_list);
@@ -947,13 +948,14 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
{
struct cpufreq_policy *policy = to_policy(kobj);
struct freq_attr *fattr = to_attr(attr);
- ssize_t ret;
+ ssize_t ret = -EBUSY;
if (!fattr->show)
return -EIO;
down_read(&policy->rwsem);
- ret = fattr->show(policy, buf);
+ if (likely(!policy_is_inactive(policy)))
+ ret = fattr->show(policy, buf);
up_read(&policy->rwsem);
return ret;
@@ -964,7 +966,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
{
struct cpufreq_policy *policy = to_policy(kobj);
struct freq_attr *fattr = to_attr(attr);
- ssize_t ret = -EINVAL;
+ ssize_t ret = -EBUSY;
if (!fattr->store)
return -EIO;
@@ -978,7 +980,8 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
if (cpu_online(policy->cpu)) {
down_write(&policy->rwsem);
- ret = fattr->store(policy, buf, count);
+ if (likely(!policy_is_inactive(policy)))
+ ret = fattr->store(policy, buf, count);
up_write(&policy->rwsem);
}
@@ -1019,11 +1022,12 @@ static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu,
dev_err(dev, "cpufreq symlink creation failed\n");
}
-static void remove_cpu_dev_symlink(struct cpufreq_policy *policy,
+static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu,
struct device *dev)
{
dev_dbg(dev, "%s: Removing symlink\n", __func__);
sysfs_remove_link(&dev->kobj, "cpufreq");
+ cpumask_clear_cpu(cpu, policy->real_cpus);
}
static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
@@ -1337,12 +1341,12 @@ static int cpufreq_online(unsigned int cpu)
down_write(&policy->rwsem);
policy->cpu = cpu;
policy->governor = NULL;
- up_write(&policy->rwsem);
} else {
new_policy = true;
policy = cpufreq_policy_alloc(cpu);
if (!policy)
return -ENOMEM;
+ down_write(&policy->rwsem);
}
if (!new_policy && cpufreq_driver->online) {
@@ -1382,7 +1386,6 @@ static int cpufreq_online(unsigned int cpu)
cpumask_copy(policy->related_cpus, policy->cpus);
}
- down_write(&policy->rwsem);
/*
* affected cpus must always be the one, which are online. We aren't
* managing offline cpus here.
@@ -1531,9 +1534,9 @@ static int cpufreq_online(unsigned int cpu)
out_destroy_policy:
for_each_cpu(j, policy->real_cpus)
- remove_cpu_dev_symlink(policy, get_cpu_device(j));
+ remove_cpu_dev_symlink(policy, j, get_cpu_device(j));
- up_write(&policy->rwsem);
+ cpumask_clear(policy->cpus);
out_offline_policy:
if (cpufreq_driver->offline)
@@ -1544,6 +1547,8 @@ out_exit_policy:
cpufreq_driver->exit(policy);
out_free_policy:
+ up_write(&policy->rwsem);
+
cpufreq_policy_free(policy);
return ret;
}
@@ -1575,47 +1580,36 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
return 0;
}
-static int cpufreq_offline(unsigned int cpu)
+static void __cpufreq_offline(unsigned int cpu, struct cpufreq_policy *policy)
{
- struct cpufreq_policy *policy;
int ret;
- pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
-
- policy = cpufreq_cpu_get_raw(cpu);
- if (!policy) {
- pr_debug("%s: No cpu_data found\n", __func__);
- return 0;
- }
-
- down_write(&policy->rwsem);
if (has_target())
cpufreq_stop_governor(policy);
cpumask_clear_cpu(cpu, policy->cpus);
- if (policy_is_inactive(policy)) {
- if (has_target())
- strncpy(policy->last_governor, policy->governor->name,
- CPUFREQ_NAME_LEN);
- else
- policy->last_policy = policy->policy;
- } else if (cpu == policy->cpu) {
- /* Nominate new CPU */
- policy->cpu = cpumask_any(policy->cpus);
- }
-
- /* Start governor again for active policy */
if (!policy_is_inactive(policy)) {
+ /* Nominate a new CPU if necessary. */
+ if (cpu == policy->cpu)
+ policy->cpu = cpumask_any(policy->cpus);
+
+ /* Start the governor again for the active policy. */
if (has_target()) {
ret = cpufreq_start_governor(policy);
if (ret)
pr_err("%s: Failed to start governor\n", __func__);
}
- goto unlock;
+ return;
}
+ if (has_target())
+ strncpy(policy->last_governor, policy->governor->name,
+ CPUFREQ_NAME_LEN);
+ else
+ policy->last_policy = policy->policy;
+
if (cpufreq_thermal_control_enabled(cpufreq_driver)) {
cpufreq_cooling_unregister(policy->cdev);
policy->cdev = NULL;
@@ -1634,8 +1628,24 @@ static int cpufreq_offline(unsigned int cpu)
cpufreq_driver->exit(policy);
policy->freq_table = NULL;
}
+}
+
+static int cpufreq_offline(unsigned int cpu)
+{
+ struct cpufreq_policy *policy;
+
+ pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
+
+ policy = cpufreq_cpu_get_raw(cpu);
+ if (!policy) {
+ pr_debug("%s: No cpu_data found\n", __func__);
+ return 0;
+ }
+
+ down_write(&policy->rwsem);
+
+ __cpufreq_offline(cpu, policy);
-unlock:
up_write(&policy->rwsem);
return 0;
}
@@ -1653,19 +1663,25 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
if (!policy)
return;
- if (cpu_online(cpu))
- cpufreq_offline(cpu);
+ down_write(&policy->rwsem);
- cpumask_clear_cpu(cpu, policy->real_cpus);
- remove_cpu_dev_symlink(policy, dev);
+ if (cpu_online(cpu))
+ __cpufreq_offline(cpu, policy);
- if (cpumask_empty(policy->real_cpus)) {
- /* We did light-weight exit earlier, do full tear down now */
- if (cpufreq_driver->offline)
- cpufreq_driver->exit(policy);
+ remove_cpu_dev_symlink(policy, cpu, dev);
- cpufreq_policy_free(policy);
+ if (!cpumask_empty(policy->real_cpus)) {
+ up_write(&policy->rwsem);
+ return;
}
+
+ /* We did light-weight exit earlier, do full tear down now */
+ if (cpufreq_driver->offline)
+ cpufreq_driver->exit(policy);
+
+ up_write(&policy->rwsem);
+
+ cpufreq_policy_free(policy);
}
/**
@@ -1707,6 +1723,16 @@ static unsigned int cpufreq_verify_current_freq(struct cpufreq_policy *policy, b
return new_freq;
if (policy->cur != new_freq) {
+ /*
+ * For some platforms, the frequency returned by hardware may be
+ * slightly different from what is provided in the frequency
+ * table, for example hardware may return 499 MHz instead of 500
+ * MHz. In such cases it is better to avoid getting into
+ * unnecessary frequency updates.
+ */
+ if (abs(policy->cur - new_freq) < HZ_PER_MHZ)
+ return policy->cur;
+
cpufreq_out_of_sync(policy, new_freq);
if (update)
schedule_work(&policy->update);
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 0d42cf8b88d8..85da677c43d6 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -388,6 +388,15 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs,
gov->free(policy_dbs);
}
+static void cpufreq_dbs_data_release(struct kobject *kobj)
+{
+ struct dbs_data *dbs_data = to_dbs_data(to_gov_attr_set(kobj));
+ struct dbs_governor *gov = dbs_data->gov;
+
+ gov->exit(dbs_data);
+ kfree(dbs_data);
+}
+
int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
{
struct dbs_governor *gov = dbs_governor_of(policy);
@@ -425,6 +434,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
goto free_policy_dbs_info;
}
+ dbs_data->gov = gov;
gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list);
ret = gov->init(dbs_data);
@@ -447,6 +457,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
policy->governor_data = policy_dbs;
gov->kobj_type.sysfs_ops = &governor_sysfs_ops;
+ gov->kobj_type.release = cpufreq_dbs_data_release;
ret = kobject_init_and_add(&dbs_data->attr_set.kobj, &gov->kobj_type,
get_governor_parent_kobj(policy),
"%s", gov->gov.name);
@@ -488,13 +499,8 @@ void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy)
policy->governor_data = NULL;
- if (!count) {
- if (!have_governor_per_policy())
- gov->gdbs_data = NULL;
-
- gov->exit(dbs_data);
- kfree(dbs_data);
- }
+ if (!count && !have_governor_per_policy())
+ gov->gdbs_data = NULL;
free_policy_dbs_info(policy_dbs, gov);
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index a5a0bc3cc23e..168c23fd7fca 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -37,6 +37,7 @@ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
/* Governor demand based switching data (per-policy or global). */
struct dbs_data {
struct gov_attr_set attr_set;
+ struct dbs_governor *gov;
void *tuners;
unsigned int ignore_nice_load;
unsigned int sampling_rate;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 846bb3a78788..57cdb3679885 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1322,6 +1322,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
mutex_unlock(&intel_pstate_limits_lock);
intel_pstate_update_policies();
+ arch_set_max_freq_ratio(global.no_turbo);
mutex_unlock(&intel_pstate_driver_lock);
@@ -2424,6 +2425,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
X86_MATCH(BROADWELL_X, core_funcs),
X86_MATCH(SKYLAKE_X, core_funcs),
X86_MATCH(ICELAKE_X, core_funcs),
+ X86_MATCH(SAPPHIRERAPIDS_X, core_funcs),
{}
};
diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c
index 0a94c56ddad2..813cccbfe934 100644
--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@@ -51,8 +51,8 @@ static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = {
};
static int __maybe_unused
-mtk_cpufreq_get_cpu_power(unsigned long *mW,
- unsigned long *KHz, struct device *cpu_dev)
+mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *mW,
+ unsigned long *KHz)
{
struct mtk_cpufreq_data *data;
struct cpufreq_policy *policy;
diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c
index 815645170c4d..039a66bbe1be 100644
--- a/drivers/cpufreq/pasemi-cpufreq.c
+++ b/drivers/cpufreq/pasemi-cpufreq.c
@@ -18,7 +18,6 @@
#include <asm/hw_irq.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/time.h>
#include <asm/smp.h>
diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c
index 4f20c6a9108d..20f64a8b0a35 100644
--- a/drivers/cpufreq/pmac32-cpufreq.c
+++ b/drivers/cpufreq/pmac32-cpufreq.c
@@ -24,7 +24,7 @@
#include <linux/device.h>
#include <linux/hardirq.h>
#include <linux/of_device.h>
-#include <asm/prom.h>
+
#include <asm/machdep.h>
#include <asm/irq.h>
#include <asm/pmac_feature.h>
diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c
index d7542a106e6b..ba9c31d98bd6 100644
--- a/drivers/cpufreq/pmac64-cpufreq.c
+++ b/drivers/cpufreq/pmac64-cpufreq.c
@@ -22,7 +22,7 @@
#include <linux/completion.h>
#include <linux/mutex.h>
#include <linux/of_device.h>
-#include <asm/prom.h>
+
#include <asm/machdep.h>
#include <asm/irq.h>
#include <asm/sections.h>
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c
index c58abb4cca3a..e3313ce63b38 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq.c
@@ -12,7 +12,6 @@
#include <linux/of_platform.h>
#include <asm/machdep.h>
-#include <asm/prom.h>
#include <asm/cell-regs.h>
#include "ppc_cbe_cpufreq.h"
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
index 037fe23bc6ed..4fba3637b115 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
@@ -13,9 +13,9 @@
#include <linux/init.h>
#include <linux/of_platform.h>
#include <linux/pm_qos.h>
+#include <linux/slab.h>
#include <asm/processor.h>
-#include <asm/prom.h>
#include <asm/pmi.h>
#include <asm/cell-regs.h>
diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index 919fa6e3f462..6d2a4cf46db7 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -96,8 +96,8 @@ scmi_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
}
static int __maybe_unused
-scmi_get_cpu_power(unsigned long *power, unsigned long *KHz,
- struct device *cpu_dev)
+scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power,
+ unsigned long *KHz)
{
unsigned long Hz;
int ret, domain;
diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
index 755bbdfc5b82..3db4fca1172b 100644
--- a/drivers/cpuidle/cpuidle-psci-domain.c
+++ b/drivers/cpuidle/cpuidle-psci-domain.c
@@ -52,7 +52,7 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
struct generic_pm_domain *pd;
struct psci_pd_provider *pd_provider;
struct dev_power_governor *pd_gov;
- int ret = -ENOMEM, state_count = 0;
+ int ret = -ENOMEM;
pd = dt_idle_pd_alloc(np, psci_dt_parse_state_node);
if (!pd)
@@ -71,7 +71,7 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
pd->flags |= GENPD_FLAG_ALWAYS_ON;
/* Use governor for CPU PM domains if it has some states to manage. */
- pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL;
+ pd_gov = pd->states ? &pm_domain_cpu_gov : NULL;
ret = pm_genpd_init(pd, pd_gov, false);
if (ret)
diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
index b51b5df08450..540105ca0781 100644
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -23,6 +23,7 @@
#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/syscore_ops.h>
#include <asm/cpuidle.h>
@@ -131,6 +132,49 @@ static int psci_idle_cpuhp_down(unsigned int cpu)
return 0;
}
+static void psci_idle_syscore_switch(bool suspend)
+{
+ bool cleared = false;
+ struct device *dev;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ dev = per_cpu_ptr(&psci_cpuidle_data, cpu)->dev;
+
+ if (dev && suspend) {
+ dev_pm_genpd_suspend(dev);
+ } else if (dev) {
+ dev_pm_genpd_resume(dev);
+
+ /* Account for userspace having offlined a CPU. */
+ if (pm_runtime_status_suspended(dev))
+ pm_runtime_set_active(dev);
+
+ /* Clear domain state to re-start fresh. */
+ if (!cleared) {
+ psci_set_domain_state(0);
+ cleared = true;
+ }
+ }
+ }
+}
+
+static int psci_idle_syscore_suspend(void)
+{
+ psci_idle_syscore_switch(true);
+ return 0;
+}
+
+static void psci_idle_syscore_resume(void)
+{
+ psci_idle_syscore_switch(false);
+}
+
+static struct syscore_ops psci_idle_syscore_ops = {
+ .suspend = psci_idle_syscore_suspend,
+ .resume = psci_idle_syscore_resume,
+};
+
static void psci_idle_init_cpuhp(void)
{
int err;
@@ -138,6 +182,8 @@ static void psci_idle_init_cpuhp(void)
if (!psci_cpuidle_use_cpuhp)
return;
+ register_syscore_ops(&psci_idle_syscore_ops);
+
err = cpuhp_setup_state_nocalls(CPUHP_AP_CPU_PM_STARTING,
"cpuidle/psci:online",
psci_idle_cpuhp_up,
diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c
index 5c852e671992..1151e5e2ba82 100644
--- a/drivers/cpuidle/cpuidle-riscv-sbi.c
+++ b/drivers/cpuidle/cpuidle-riscv-sbi.c
@@ -414,7 +414,7 @@ static int sbi_pd_init(struct device_node *np)
struct generic_pm_domain *pd;
struct sbi_pd_provider *pd_provider;
struct dev_power_governor *pd_gov;
- int ret = -ENOMEM, state_count = 0;
+ int ret = -ENOMEM;
pd = dt_idle_pd_alloc(np, sbi_dt_parse_state_node);
if (!pd)
@@ -433,7 +433,7 @@ static int sbi_pd_init(struct device_node *np)
pd->flags |= GENPD_FLAG_ALWAYS_ON;
/* Use governor for CPU PM domains if it has some states to manage. */
- pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL;
+ pd_gov = pd->states ? &pm_domain_cpu_gov : NULL;
ret = pm_genpd_init(pd, pd_gov, false);
if (ret)
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index a525a609dfc6..01474daf4548 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -112,16 +112,16 @@ static unsigned long find_available_max_freq(struct devfreq *devfreq)
}
/**
- * get_freq_range() - Get the current freq range
+ * devfreq_get_freq_range() - Get the current freq range
* @devfreq: the devfreq instance
* @min_freq: the min frequency
* @max_freq: the max frequency
*
* This takes into consideration all constraints.
*/
-static void get_freq_range(struct devfreq *devfreq,
- unsigned long *min_freq,
- unsigned long *max_freq)
+void devfreq_get_freq_range(struct devfreq *devfreq,
+ unsigned long *min_freq,
+ unsigned long *max_freq)
{
unsigned long *freq_table = devfreq->profile->freq_table;
s32 qos_min_freq, qos_max_freq;
@@ -158,6 +158,7 @@ static void get_freq_range(struct devfreq *devfreq,
if (*min_freq > *max_freq)
*min_freq = *max_freq;
}
+EXPORT_SYMBOL(devfreq_get_freq_range);
/**
* devfreq_get_freq_level() - Lookup freq_table for the frequency
@@ -418,7 +419,7 @@ int devfreq_update_target(struct devfreq *devfreq, unsigned long freq)
err = devfreq->governor->get_target_freq(devfreq, &freq);
if (err)
return err;
- get_freq_range(devfreq, &min_freq, &max_freq);
+ devfreq_get_freq_range(devfreq, &min_freq, &max_freq);
if (freq < min_freq) {
freq = min_freq;
@@ -785,6 +786,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
{
struct devfreq *devfreq;
struct devfreq_governor *governor;
+ unsigned long min_freq, max_freq;
int err = 0;
if (!dev || !profile || !governor_name) {
@@ -849,6 +851,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
goto err_dev;
}
+ devfreq_get_freq_range(devfreq, &min_freq, &max_freq);
+
devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
devfreq->opp_table = dev_pm_opp_get_opp_table(dev);
if (IS_ERR(devfreq->opp_table))
@@ -1587,7 +1591,7 @@ static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr,
unsigned long min_freq, max_freq;
mutex_lock(&df->lock);
- get_freq_range(df, &min_freq, &max_freq);
+ devfreq_get_freq_range(df, &min_freq, &max_freq);
mutex_unlock(&df->lock);
return sprintf(buf, "%lu\n", min_freq);
@@ -1641,7 +1645,7 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr,
unsigned long min_freq, max_freq;
mutex_lock(&df->lock);
- get_freq_range(df, &min_freq, &max_freq);
+ devfreq_get_freq_range(df, &min_freq, &max_freq);
mutex_unlock(&df->lock);
return sprintf(buf, "%lu\n", max_freq);
@@ -1955,7 +1959,7 @@ static int devfreq_summary_show(struct seq_file *s, void *data)
mutex_lock(&devfreq->lock);
cur_freq = devfreq->previous_freq;
- get_freq_range(devfreq, &min_freq, &max_freq);
+ devfreq_get_freq_range(devfreq, &min_freq, &max_freq);
timer = devfreq->profile->timer;
if (IS_SUPPORTED_ATTR(devfreq->governor->attrs, POLLING_INTERVAL))
diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h
index 002a7d67e39d..0adfebc0467a 100644
--- a/drivers/devfreq/governor.h
+++ b/drivers/devfreq/governor.h
@@ -48,6 +48,31 @@
#define DEVFREQ_GOV_ATTR_TIMER BIT(1)
/**
+ * struct devfreq_cpu_data - Hold the per-cpu data
+ * @node: list node
+ * @dev: reference to cpu device.
+ * @first_cpu: the cpumask of the first cpu of a policy.
+ * @opp_table: reference to cpu opp table.
+ * @cur_freq: the current frequency of the cpu.
+ * @min_freq: the min frequency of the cpu.
+ * @max_freq: the max frequency of the cpu.
+ *
+ * This structure stores the required cpu_data of a cpu.
+ * This is auto-populated by the governor.
+ */
+struct devfreq_cpu_data {
+ struct list_head node;
+
+ struct device *dev;
+ unsigned int first_cpu;
+
+ struct opp_table *opp_table;
+ unsigned int cur_freq;
+ unsigned int min_freq;
+ unsigned int max_freq;
+};
+
+/**
* struct devfreq_governor - Devfreq policy governor
* @node: list node - contains registered devfreq governors
* @name: Governor's name
@@ -89,6 +114,8 @@ int devm_devfreq_add_governor(struct device *dev,
int devfreq_update_status(struct devfreq *devfreq, unsigned long freq);
int devfreq_update_target(struct devfreq *devfreq, unsigned long freq);
+void devfreq_get_freq_range(struct devfreq *devfreq, unsigned long *min_freq,
+ unsigned long *max_freq);
static inline int devfreq_update_stats(struct devfreq *df)
{
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
index fc09324a03e0..72c67979ebe1 100644
--- a/drivers/devfreq/governor_passive.c
+++ b/drivers/devfreq/governor_passive.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-only
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* linux/drivers/devfreq/governor_passive.c
*
@@ -8,76 +8,129 @@
*/
#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/cpufreq.h>
+#include <linux/cpumask.h>
+#include <linux/slab.h>
#include <linux/device.h>
#include <linux/devfreq.h>
#include "governor.h"
-static int devfreq_passive_get_target_freq(struct devfreq *devfreq,
- unsigned long *freq)
+#define HZ_PER_KHZ 1000
+
+static struct devfreq_cpu_data *
+get_parent_cpu_data(struct devfreq_passive_data *p_data,
+ struct cpufreq_policy *policy)
{
- struct devfreq_passive_data *p_data
- = (struct devfreq_passive_data *)devfreq->data;
- struct devfreq *parent_devfreq = (struct devfreq *)p_data->parent;
- unsigned long child_freq = ULONG_MAX;
- struct dev_pm_opp *opp, *p_opp;
- int i, count;
+ struct devfreq_cpu_data *parent_cpu_data;
- /*
- * If the devfreq device with passive governor has the specific method
- * to determine the next frequency, should use the get_target_freq()
- * of struct devfreq_passive_data.
- */
- if (p_data->get_target_freq)
- return p_data->get_target_freq(devfreq, freq);
+ if (!p_data || !policy)
+ return NULL;
- /*
- * If the parent and passive devfreq device uses the OPP table,
- * get the next frequency by using the OPP table.
- */
+ list_for_each_entry(parent_cpu_data, &p_data->cpu_data_list, node)
+ if (parent_cpu_data->first_cpu == cpumask_first(policy->related_cpus))
+ return parent_cpu_data;
- /*
- * - parent devfreq device uses the governors except for passive.
- * - passive devfreq device uses the passive governor.
- *
- * Each devfreq has the OPP table. After deciding the new frequency
- * from the governor of parent devfreq device, the passive governor
- * need to get the index of new frequency on OPP table of parent
- * device. And then the index is used for getting the suitable
- * new frequency for passive devfreq device.
- */
- if (!devfreq->profile || !devfreq->profile->freq_table
- || devfreq->profile->max_state <= 0)
- return -EINVAL;
+ return NULL;
+}
- /*
- * The passive governor have to get the correct frequency from OPP
- * list of parent device. Because in this case, *freq is temporary
- * value which is decided by ondemand governor.
- */
- if (devfreq->opp_table && parent_devfreq->opp_table) {
- p_opp = devfreq_recommended_opp(parent_devfreq->dev.parent,
- freq, 0);
- if (IS_ERR(p_opp))
- return PTR_ERR(p_opp);
+static unsigned long get_target_freq_by_required_opp(struct device *p_dev,
+ struct opp_table *p_opp_table,
+ struct opp_table *opp_table,
+ unsigned long *freq)
+{
+ struct dev_pm_opp *opp = NULL, *p_opp = NULL;
+ unsigned long target_freq;
- opp = dev_pm_opp_xlate_required_opp(parent_devfreq->opp_table,
- devfreq->opp_table, p_opp);
- dev_pm_opp_put(p_opp);
+ if (!p_dev || !p_opp_table || !opp_table || !freq)
+ return 0;
- if (IS_ERR(opp))
- goto no_required_opp;
+ p_opp = devfreq_recommended_opp(p_dev, freq, 0);
+ if (IS_ERR(p_opp))
+ return 0;
- *freq = dev_pm_opp_get_freq(opp);
- dev_pm_opp_put(opp);
+ opp = dev_pm_opp_xlate_required_opp(p_opp_table, opp_table, p_opp);
+ dev_pm_opp_put(p_opp);
+ if (IS_ERR(opp))
return 0;
+
+ target_freq = dev_pm_opp_get_freq(opp);
+ dev_pm_opp_put(opp);
+
+ return target_freq;
+}
+
+static int get_target_freq_with_cpufreq(struct devfreq *devfreq,
+ unsigned long *target_freq)
+{
+ struct devfreq_passive_data *p_data =
+ (struct devfreq_passive_data *)devfreq->data;
+ struct devfreq_cpu_data *parent_cpu_data;
+ struct cpufreq_policy *policy;
+ unsigned long cpu, cpu_cur, cpu_min, cpu_max, cpu_percent;
+ unsigned long dev_min, dev_max;
+ unsigned long freq = 0;
+ int ret = 0;
+
+ for_each_online_cpu(cpu) {
+ policy = cpufreq_cpu_get(cpu);
+ if (!policy) {
+ ret = -EINVAL;
+ continue;
+ }
+
+ parent_cpu_data = get_parent_cpu_data(p_data, policy);
+ if (!parent_cpu_data) {
+ cpufreq_cpu_put(policy);
+ continue;
+ }
+
+ /* Get target freq via required opps */
+ cpu_cur = parent_cpu_data->cur_freq * HZ_PER_KHZ;
+ freq = get_target_freq_by_required_opp(parent_cpu_data->dev,
+ parent_cpu_data->opp_table,
+ devfreq->opp_table, &cpu_cur);
+ if (freq) {
+ *target_freq = max(freq, *target_freq);
+ cpufreq_cpu_put(policy);
+ continue;
+ }
+
+ /* Use interpolation if required opps is not available */
+ devfreq_get_freq_range(devfreq, &dev_min, &dev_max);
+
+ cpu_min = parent_cpu_data->min_freq;
+ cpu_max = parent_cpu_data->max_freq;
+ cpu_cur = parent_cpu_data->cur_freq;
+
+ cpu_percent = ((cpu_cur - cpu_min) * 100) / (cpu_max - cpu_min);
+ freq = dev_min + mult_frac(dev_max - dev_min, cpu_percent, 100);
+
+ *target_freq = max(freq, *target_freq);
+ cpufreq_cpu_put(policy);
}
-no_required_opp:
- /*
- * Get the OPP table's index of decided frequency by governor
- * of parent device.
- */
+ return ret;
+}
+
+static int get_target_freq_with_devfreq(struct devfreq *devfreq,
+ unsigned long *freq)
+{
+ struct devfreq_passive_data *p_data
+ = (struct devfreq_passive_data *)devfreq->data;
+ struct devfreq *parent_devfreq = (struct devfreq *)p_data->parent;
+ unsigned long child_freq = ULONG_MAX;
+ int i, count;
+
+ /* Get target freq via required opps */
+ child_freq = get_target_freq_by_required_opp(parent_devfreq->dev.parent,
+ parent_devfreq->opp_table,
+ devfreq->opp_table, freq);
+ if (child_freq)
+ goto out;
+
+ /* Use interpolation if required opps is not available */
for (i = 0; i < parent_devfreq->profile->max_state; i++)
if (parent_devfreq->profile->freq_table[i] == *freq)
break;
@@ -85,7 +138,6 @@ no_required_opp:
if (i == parent_devfreq->profile->max_state)
return -EINVAL;
- /* Get the suitable frequency by using index of parent device. */
if (i < devfreq->profile->max_state) {
child_freq = devfreq->profile->freq_table[i];
} else {
@@ -93,12 +145,202 @@ no_required_opp:
child_freq = devfreq->profile->freq_table[count - 1];
}
- /* Return the suitable frequency for passive device. */
+out:
*freq = child_freq;
return 0;
}
+static int devfreq_passive_get_target_freq(struct devfreq *devfreq,
+ unsigned long *freq)
+{
+ struct devfreq_passive_data *p_data =
+ (struct devfreq_passive_data *)devfreq->data;
+ int ret;
+
+ if (!p_data)
+ return -EINVAL;
+
+ /*
+ * If the devfreq device with passive governor has the specific method
+ * to determine the next frequency, should use the get_target_freq()
+ * of struct devfreq_passive_data.
+ */
+ if (p_data->get_target_freq)
+ return p_data->get_target_freq(devfreq, freq);
+
+ switch (p_data->parent_type) {
+ case DEVFREQ_PARENT_DEV:
+ ret = get_target_freq_with_devfreq(devfreq, freq);
+ break;
+ case CPUFREQ_PARENT_DEV:
+ ret = get_target_freq_with_cpufreq(devfreq, freq);
+ break;
+ default:
+ ret = -EINVAL;
+ dev_err(&devfreq->dev, "Invalid parent type\n");
+ break;
+ }
+
+ return ret;
+}
+
+static int cpufreq_passive_notifier_call(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct devfreq_passive_data *p_data =
+ container_of(nb, struct devfreq_passive_data, nb);
+ struct devfreq *devfreq = (struct devfreq *)p_data->this;
+ struct devfreq_cpu_data *parent_cpu_data;
+ struct cpufreq_freqs *freqs = ptr;
+ unsigned int cur_freq;
+ int ret;
+
+ if (event != CPUFREQ_POSTCHANGE || !freqs)
+ return 0;
+
+ parent_cpu_data = get_parent_cpu_data(p_data, freqs->policy);
+ if (!parent_cpu_data || parent_cpu_data->cur_freq == freqs->new)
+ return 0;
+
+ cur_freq = parent_cpu_data->cur_freq;
+ parent_cpu_data->cur_freq = freqs->new;
+
+ mutex_lock(&devfreq->lock);
+ ret = devfreq_update_target(devfreq, freqs->new);
+ mutex_unlock(&devfreq->lock);
+ if (ret) {
+ parent_cpu_data->cur_freq = cur_freq;
+ dev_err(&devfreq->dev, "failed to update the frequency.\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int cpufreq_passive_unregister_notifier(struct devfreq *devfreq)
+{
+ struct devfreq_passive_data *p_data
+ = (struct devfreq_passive_data *)devfreq->data;
+ struct devfreq_cpu_data *parent_cpu_data;
+ int cpu, ret = 0;
+
+ if (p_data->nb.notifier_call) {
+ ret = cpufreq_unregister_notifier(&p_data->nb,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ if (ret < 0)
+ return ret;
+ }
+
+ for_each_possible_cpu(cpu) {
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ if (!policy) {
+ ret = -EINVAL;
+ continue;
+ }
+
+ parent_cpu_data = get_parent_cpu_data(p_data, policy);
+ if (!parent_cpu_data) {
+ cpufreq_cpu_put(policy);
+ continue;
+ }
+
+ list_del(&parent_cpu_data->node);
+ if (parent_cpu_data->opp_table)
+ dev_pm_opp_put_opp_table(parent_cpu_data->opp_table);
+ kfree(parent_cpu_data);
+ cpufreq_cpu_put(policy);
+ }
+
+ return ret;
+}
+
+static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
+{
+ struct devfreq_passive_data *p_data
+ = (struct devfreq_passive_data *)devfreq->data;
+ struct device *dev = devfreq->dev.parent;
+ struct opp_table *opp_table = NULL;
+ struct devfreq_cpu_data *parent_cpu_data;
+ struct cpufreq_policy *policy;
+ struct device *cpu_dev;
+ unsigned int cpu;
+ int ret;
+
+ p_data->cpu_data_list
+ = (struct list_head)LIST_HEAD_INIT(p_data->cpu_data_list);
+
+ p_data->nb.notifier_call = cpufreq_passive_notifier_call;
+ ret = cpufreq_register_notifier(&p_data->nb, CPUFREQ_TRANSITION_NOTIFIER);
+ if (ret) {
+ dev_err(dev, "failed to register cpufreq notifier\n");
+ p_data->nb.notifier_call = NULL;
+ goto err;
+ }
+
+ for_each_possible_cpu(cpu) {
+ policy = cpufreq_cpu_get(cpu);
+ if (!policy) {
+ ret = -EPROBE_DEFER;
+ goto err;
+ }
+
+ parent_cpu_data = get_parent_cpu_data(p_data, policy);
+ if (parent_cpu_data) {
+ cpufreq_cpu_put(policy);
+ continue;
+ }
+
+ parent_cpu_data = kzalloc(sizeof(*parent_cpu_data),
+ GFP_KERNEL);
+ if (!parent_cpu_data) {
+ ret = -ENOMEM;
+ goto err_put_policy;
+ }
+
+ cpu_dev = get_cpu_device(cpu);
+ if (!cpu_dev) {
+ dev_err(dev, "failed to get cpu device\n");
+ ret = -ENODEV;
+ goto err_free_cpu_data;
+ }
+
+ opp_table = dev_pm_opp_get_opp_table(cpu_dev);
+ if (IS_ERR(opp_table)) {
+ dev_err(dev, "failed to get opp_table of cpu%d\n", cpu);
+ ret = PTR_ERR(opp_table);
+ goto err_free_cpu_data;
+ }
+
+ parent_cpu_data->dev = cpu_dev;
+ parent_cpu_data->opp_table = opp_table;
+ parent_cpu_data->first_cpu = cpumask_first(policy->related_cpus);
+ parent_cpu_data->cur_freq = policy->cur;
+ parent_cpu_data->min_freq = policy->cpuinfo.min_freq;
+ parent_cpu_data->max_freq = policy->cpuinfo.max_freq;
+
+ list_add_tail(&parent_cpu_data->node, &p_data->cpu_data_list);
+ cpufreq_cpu_put(policy);
+ }
+
+ mutex_lock(&devfreq->lock);
+ ret = devfreq_update_target(devfreq, 0L);
+ mutex_unlock(&devfreq->lock);
+ if (ret)
+ dev_err(dev, "failed to update the frequency\n");
+
+ return ret;
+
+err_free_cpu_data:
+ kfree(parent_cpu_data);
+err_put_policy:
+ cpufreq_cpu_put(policy);
+err:
+ WARN_ON(cpufreq_passive_unregister_notifier(devfreq));
+
+ return ret;
+}
+
static int devfreq_passive_notifier_call(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -131,30 +373,55 @@ static int devfreq_passive_notifier_call(struct notifier_block *nb,
return NOTIFY_DONE;
}
-static int devfreq_passive_event_handler(struct devfreq *devfreq,
- unsigned int event, void *data)
+static int devfreq_passive_unregister_notifier(struct devfreq *devfreq)
+{
+ struct devfreq_passive_data *p_data
+ = (struct devfreq_passive_data *)devfreq->data;
+ struct devfreq *parent = (struct devfreq *)p_data->parent;
+ struct notifier_block *nb = &p_data->nb;
+
+ return devfreq_unregister_notifier(parent, nb, DEVFREQ_TRANSITION_NOTIFIER);
+}
+
+static int devfreq_passive_register_notifier(struct devfreq *devfreq)
{
struct devfreq_passive_data *p_data
= (struct devfreq_passive_data *)devfreq->data;
struct devfreq *parent = (struct devfreq *)p_data->parent;
struct notifier_block *nb = &p_data->nb;
- int ret = 0;
if (!parent)
return -EPROBE_DEFER;
+ nb->notifier_call = devfreq_passive_notifier_call;
+ return devfreq_register_notifier(parent, nb, DEVFREQ_TRANSITION_NOTIFIER);
+}
+
+static int devfreq_passive_event_handler(struct devfreq *devfreq,
+ unsigned int event, void *data)
+{
+ struct devfreq_passive_data *p_data
+ = (struct devfreq_passive_data *)devfreq->data;
+ int ret = 0;
+
+ if (!p_data)
+ return -EINVAL;
+
+ if (!p_data->this)
+ p_data->this = devfreq;
+
switch (event) {
case DEVFREQ_GOV_START:
- if (!p_data->this)
- p_data->this = devfreq;
-
- nb->notifier_call = devfreq_passive_notifier_call;
- ret = devfreq_register_notifier(parent, nb,
- DEVFREQ_TRANSITION_NOTIFIER);
+ if (p_data->parent_type == DEVFREQ_PARENT_DEV)
+ ret = devfreq_passive_register_notifier(devfreq);
+ else if (p_data->parent_type == CPUFREQ_PARENT_DEV)
+ ret = cpufreq_passive_register_notifier(devfreq);
break;
case DEVFREQ_GOV_STOP:
- WARN_ON(devfreq_unregister_notifier(parent, nb,
- DEVFREQ_TRANSITION_NOTIFIER));
+ if (p_data->parent_type == DEVFREQ_PARENT_DEV)
+ WARN_ON(devfreq_passive_unregister_notifier(devfreq));
+ else if (p_data->parent_type == CPUFREQ_PARENT_DEV)
+ WARN_ON(cpufreq_passive_unregister_notifier(devfreq));
break;
default:
break;
diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c
index 293857ebfd75..daff40702615 100644
--- a/drivers/devfreq/rk3399_dmc.c
+++ b/drivers/devfreq/rk3399_dmc.c
@@ -5,6 +5,7 @@
*/
#include <linux/arm-smccc.h>
+#include <linux/bitfield.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/devfreq.h>
@@ -20,55 +21,49 @@
#include <linux/rwsem.h>
#include <linux/suspend.h>
+#include <soc/rockchip/pm_domains.h>
#include <soc/rockchip/rk3399_grf.h>
#include <soc/rockchip/rockchip_sip.h>
-struct dram_timing {
- unsigned int ddr3_speed_bin;
- unsigned int pd_idle;
- unsigned int sr_idle;
- unsigned int sr_mc_gate_idle;
- unsigned int srpd_lite_idle;
- unsigned int standby_idle;
- unsigned int auto_pd_dis_freq;
- unsigned int dram_dll_dis_freq;
- unsigned int phy_dll_dis_freq;
- unsigned int ddr3_odt_dis_freq;
- unsigned int ddr3_drv;
- unsigned int ddr3_odt;
- unsigned int phy_ddr3_ca_drv;
- unsigned int phy_ddr3_dq_drv;
- unsigned int phy_ddr3_odt;
- unsigned int lpddr3_odt_dis_freq;
- unsigned int lpddr3_drv;
- unsigned int lpddr3_odt;
- unsigned int phy_lpddr3_ca_drv;
- unsigned int phy_lpddr3_dq_drv;
- unsigned int phy_lpddr3_odt;
- unsigned int lpddr4_odt_dis_freq;
- unsigned int lpddr4_drv;
- unsigned int lpddr4_dq_odt;
- unsigned int lpddr4_ca_odt;
- unsigned int phy_lpddr4_ca_drv;
- unsigned int phy_lpddr4_ck_cs_drv;
- unsigned int phy_lpddr4_dq_drv;
- unsigned int phy_lpddr4_odt;
-};
+#define NS_TO_CYCLE(NS, MHz) (((NS) * (MHz)) / NSEC_PER_USEC)
+
+#define RK3399_SET_ODT_PD_0_SR_IDLE GENMASK(7, 0)
+#define RK3399_SET_ODT_PD_0_SR_MC_GATE_IDLE GENMASK(15, 8)
+#define RK3399_SET_ODT_PD_0_STANDBY_IDLE GENMASK(31, 16)
+
+#define RK3399_SET_ODT_PD_1_PD_IDLE GENMASK(11, 0)
+#define RK3399_SET_ODT_PD_1_SRPD_LITE_IDLE GENMASK(27, 16)
+
+#define RK3399_SET_ODT_PD_2_ODT_ENABLE BIT(0)
struct rk3399_dmcfreq {
struct device *dev;
struct devfreq *devfreq;
+ struct devfreq_dev_profile profile;
struct devfreq_simple_ondemand_data ondemand_data;
struct clk *dmc_clk;
struct devfreq_event_dev *edev;
struct mutex lock;
- struct dram_timing timing;
struct regulator *vdd_center;
struct regmap *regmap_pmu;
unsigned long rate, target_rate;
unsigned long volt, target_volt;
unsigned int odt_dis_freq;
- int odt_pd_arg0, odt_pd_arg1;
+
+ unsigned int pd_idle_ns;
+ unsigned int sr_idle_ns;
+ unsigned int sr_mc_gate_idle_ns;
+ unsigned int srpd_lite_idle_ns;
+ unsigned int standby_idle_ns;
+ unsigned int ddr3_odt_dis_freq;
+ unsigned int lpddr3_odt_dis_freq;
+ unsigned int lpddr4_odt_dis_freq;
+
+ unsigned int pd_idle_dis_freq;
+ unsigned int sr_idle_dis_freq;
+ unsigned int sr_mc_gate_idle_dis_freq;
+ unsigned int srpd_lite_idle_dis_freq;
+ unsigned int standby_idle_dis_freq;
};
static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
@@ -78,10 +73,14 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
struct dev_pm_opp *opp;
unsigned long old_clk_rate = dmcfreq->rate;
unsigned long target_volt, target_rate;
+ unsigned int ddrcon_mhz;
struct arm_smccc_res res;
- bool odt_enable = false;
int err;
+ u32 odt_pd_arg0 = 0;
+ u32 odt_pd_arg1 = 0;
+ u32 odt_pd_arg2 = 0;
+
opp = devfreq_recommended_opp(dev, freq, flags);
if (IS_ERR(opp))
return PTR_ERR(opp);
@@ -95,19 +94,71 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
mutex_lock(&dmcfreq->lock);
+ /*
+ * Ensure power-domain transitions don't interfere with ARM Trusted
+ * Firmware power-domain idling.
+ */
+ err = rockchip_pmu_block();
+ if (err) {
+ dev_err(dev, "Failed to block PMU: %d\n", err);
+ goto out_unlock;
+ }
+
+ /*
+ * Some idle parameters may be based on the DDR controller clock, which
+ * is half of the DDR frequency.
+ * pd_idle and standby_idle are based on the controller clock cycle.
+ * sr_idle_cycle, sr_mc_gate_idle_cycle, and srpd_lite_idle_cycle
+ * are based on the 1024 controller clock cycle
+ */
+ ddrcon_mhz = target_rate / USEC_PER_SEC / 2;
+
+ u32p_replace_bits(&odt_pd_arg1,
+ NS_TO_CYCLE(dmcfreq->pd_idle_ns, ddrcon_mhz),
+ RK3399_SET_ODT_PD_1_PD_IDLE);
+ u32p_replace_bits(&odt_pd_arg0,
+ NS_TO_CYCLE(dmcfreq->standby_idle_ns, ddrcon_mhz),
+ RK3399_SET_ODT_PD_0_STANDBY_IDLE);
+ u32p_replace_bits(&odt_pd_arg0,
+ DIV_ROUND_UP(NS_TO_CYCLE(dmcfreq->sr_idle_ns,
+ ddrcon_mhz), 1024),
+ RK3399_SET_ODT_PD_0_SR_IDLE);
+ u32p_replace_bits(&odt_pd_arg0,
+ DIV_ROUND_UP(NS_TO_CYCLE(dmcfreq->sr_mc_gate_idle_ns,
+ ddrcon_mhz), 1024),
+ RK3399_SET_ODT_PD_0_SR_MC_GATE_IDLE);
+ u32p_replace_bits(&odt_pd_arg1,
+ DIV_ROUND_UP(NS_TO_CYCLE(dmcfreq->srpd_lite_idle_ns,
+ ddrcon_mhz), 1024),
+ RK3399_SET_ODT_PD_1_SRPD_LITE_IDLE);
+
if (dmcfreq->regmap_pmu) {
+ if (target_rate >= dmcfreq->sr_idle_dis_freq)
+ odt_pd_arg0 &= ~RK3399_SET_ODT_PD_0_SR_IDLE;
+
+ if (target_rate >= dmcfreq->sr_mc_gate_idle_dis_freq)
+ odt_pd_arg0 &= ~RK3399_SET_ODT_PD_0_SR_MC_GATE_IDLE;
+
+ if (target_rate >= dmcfreq->standby_idle_dis_freq)
+ odt_pd_arg0 &= ~RK3399_SET_ODT_PD_0_STANDBY_IDLE;
+
+ if (target_rate >= dmcfreq->pd_idle_dis_freq)
+ odt_pd_arg1 &= ~RK3399_SET_ODT_PD_1_PD_IDLE;
+
+ if (target_rate >= dmcfreq->srpd_lite_idle_dis_freq)
+ odt_pd_arg1 &= ~RK3399_SET_ODT_PD_1_SRPD_LITE_IDLE;
+
if (target_rate >= dmcfreq->odt_dis_freq)
- odt_enable = true;
+ odt_pd_arg2 |= RK3399_SET_ODT_PD_2_ODT_ENABLE;
/*
* This makes a SMC call to the TF-A to set the DDR PD
* (power-down) timings and to enable or disable the
* ODT (on-die termination) resistors.
*/
- arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, dmcfreq->odt_pd_arg0,
- dmcfreq->odt_pd_arg1,
- ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD,
- odt_enable, 0, 0, 0, &res);
+ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, odt_pd_arg0, odt_pd_arg1,
+ ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD, odt_pd_arg2,
+ 0, 0, 0, &res);
}
/*
@@ -158,6 +209,8 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
dmcfreq->volt = target_volt;
out:
+ rockchip_pmu_unblock();
+out_unlock:
mutex_unlock(&dmcfreq->lock);
return err;
}
@@ -189,13 +242,6 @@ static int rk3399_dmcfreq_get_cur_freq(struct device *dev, unsigned long *freq)
return 0;
}
-static struct devfreq_dev_profile rk3399_devfreq_dmc_profile = {
- .polling_ms = 200,
- .target = rk3399_dmcfreq_target,
- .get_dev_status = rk3399_dmcfreq_get_dev_status,
- .get_cur_freq = rk3399_dmcfreq_get_cur_freq,
-};
-
static __maybe_unused int rk3399_dmcfreq_suspend(struct device *dev)
{
struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev);
@@ -238,69 +284,48 @@ static __maybe_unused int rk3399_dmcfreq_resume(struct device *dev)
static SIMPLE_DEV_PM_OPS(rk3399_dmcfreq_pm, rk3399_dmcfreq_suspend,
rk3399_dmcfreq_resume);
-static int of_get_ddr_timings(struct dram_timing *timing,
- struct device_node *np)
+static int rk3399_dmcfreq_of_props(struct rk3399_dmcfreq *data,
+ struct device_node *np)
{
int ret = 0;
- ret = of_property_read_u32(np, "rockchip,ddr3_speed_bin",
- &timing->ddr3_speed_bin);
- ret |= of_property_read_u32(np, "rockchip,pd_idle",
- &timing->pd_idle);
- ret |= of_property_read_u32(np, "rockchip,sr_idle",
- &timing->sr_idle);
- ret |= of_property_read_u32(np, "rockchip,sr_mc_gate_idle",
- &timing->sr_mc_gate_idle);
- ret |= of_property_read_u32(np, "rockchip,srpd_lite_idle",
- &timing->srpd_lite_idle);
- ret |= of_property_read_u32(np, "rockchip,standby_idle",
- &timing->standby_idle);
- ret |= of_property_read_u32(np, "rockchip,auto_pd_dis_freq",
- &timing->auto_pd_dis_freq);
- ret |= of_property_read_u32(np, "rockchip,dram_dll_dis_freq",
- &timing->dram_dll_dis_freq);
- ret |= of_property_read_u32(np, "rockchip,phy_dll_dis_freq",
- &timing->phy_dll_dis_freq);
+ /*
+ * These are all optional, and serve as minimum bounds. Give them large
+ * (i.e., never "disabled") values if the DT doesn't specify one.
+ */
+ data->pd_idle_dis_freq =
+ data->sr_idle_dis_freq =
+ data->sr_mc_gate_idle_dis_freq =
+ data->srpd_lite_idle_dis_freq =
+ data->standby_idle_dis_freq = UINT_MAX;
+
+ ret |= of_property_read_u32(np, "rockchip,pd-idle-ns",
+ &data->pd_idle_ns);
+ ret |= of_property_read_u32(np, "rockchip,sr-idle-ns",
+ &data->sr_idle_ns);
+ ret |= of_property_read_u32(np, "rockchip,sr-mc-gate-idle-ns",
+ &data->sr_mc_gate_idle_ns);
+ ret |= of_property_read_u32(np, "rockchip,srpd-lite-idle-ns",
+ &data->srpd_lite_idle_ns);
+ ret |= of_property_read_u32(np, "rockchip,standby-idle-ns",
+ &data->standby_idle_ns);
ret |= of_property_read_u32(np, "rockchip,ddr3_odt_dis_freq",
- &timing->ddr3_odt_dis_freq);
- ret |= of_property_read_u32(np, "rockchip,ddr3_drv",
- &timing->ddr3_drv);
- ret |= of_property_read_u32(np, "rockchip,ddr3_odt",
- &timing->ddr3_odt);
- ret |= of_property_read_u32(np, "rockchip,phy_ddr3_ca_drv",
- &timing->phy_ddr3_ca_drv);
- ret |= of_property_read_u32(np, "rockchip,phy_ddr3_dq_drv",
- &timing->phy_ddr3_dq_drv);
- ret |= of_property_read_u32(np, "rockchip,phy_ddr3_odt",
- &timing->phy_ddr3_odt);
+ &data->ddr3_odt_dis_freq);
ret |= of_property_read_u32(np, "rockchip,lpddr3_odt_dis_freq",
- &timing->lpddr3_odt_dis_freq);
- ret |= of_property_read_u32(np, "rockchip,lpddr3_drv",
- &timing->lpddr3_drv);
- ret |= of_property_read_u32(np, "rockchip,lpddr3_odt",
- &timing->lpddr3_odt);
- ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_ca_drv",
- &timing->phy_lpddr3_ca_drv);
- ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_dq_drv",
- &timing->phy_lpddr3_dq_drv);
- ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_odt",
- &timing->phy_lpddr3_odt);
+ &data->lpddr3_odt_dis_freq);
ret |= of_property_read_u32(np, "rockchip,lpddr4_odt_dis_freq",
- &timing->lpddr4_odt_dis_freq);
- ret |= of_property_read_u32(np, "rockchip,lpddr4_drv",
- &timing->lpddr4_drv);
- ret |= of_property_read_u32(np, "rockchip,lpddr4_dq_odt",
- &timing->lpddr4_dq_odt);
- ret |= of_property_read_u32(np, "rockchip,lpddr4_ca_odt",
- &timing->lpddr4_ca_odt);
- ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_ca_drv",
- &timing->phy_lpddr4_ca_drv);
- ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_ck_cs_drv",
- &timing->phy_lpddr4_ck_cs_drv);
- ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_dq_drv",
- &timing->phy_lpddr4_dq_drv);
- ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_odt",
- &timing->phy_lpddr4_odt);
+ &data->lpddr4_odt_dis_freq);
+
+ ret |= of_property_read_u32(np, "rockchip,pd-idle-dis-freq-hz",
+ &data->pd_idle_dis_freq);
+ ret |= of_property_read_u32(np, "rockchip,sr-idle-dis-freq-hz",
+ &data->sr_idle_dis_freq);
+ ret |= of_property_read_u32(np, "rockchip,sr-mc-gate-idle-dis-freq-hz",
+ &data->sr_mc_gate_idle_dis_freq);
+ ret |= of_property_read_u32(np, "rockchip,srpd-lite-idle-dis-freq-hz",
+ &data->srpd_lite_idle_dis_freq);
+ ret |= of_property_read_u32(np, "rockchip,standby-idle-dis-freq-hz",
+ &data->standby_idle_dis_freq);
return ret;
}
@@ -311,8 +336,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct device_node *np = pdev->dev.of_node, *node;
struct rk3399_dmcfreq *data;
- int ret, index, size;
- uint32_t *timing;
+ int ret;
struct dev_pm_opp *opp;
u32 ddr_type;
u32 val;
@@ -343,26 +367,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
return ret;
}
- /*
- * Get dram timing and pass it to arm trust firmware,
- * the dram driver in arm trust firmware will get these
- * timing and to do dram initial.
- */
- if (!of_get_ddr_timings(&data->timing, np)) {
- timing = &data->timing.ddr3_speed_bin;
- size = sizeof(struct dram_timing) / 4;
- for (index = 0; index < size; index++) {
- arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, *timing++, index,
- ROCKCHIP_SIP_CONFIG_DRAM_SET_PARAM,
- 0, 0, 0, 0, &res);
- if (res.a0) {
- dev_err(dev, "Failed to set dram param: %ld\n",
- res.a0);
- ret = -EINVAL;
- goto err_edev;
- }
- }
- }
+ rk3399_dmcfreq_of_props(data, np);
node = of_parse_phandle(np, "rockchip,pmu", 0);
if (!node)
@@ -381,13 +386,13 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
switch (ddr_type) {
case RK3399_PMUGRF_DDRTYPE_DDR3:
- data->odt_dis_freq = data->timing.ddr3_odt_dis_freq;
+ data->odt_dis_freq = data->ddr3_odt_dis_freq;
break;
case RK3399_PMUGRF_DDRTYPE_LPDDR3:
- data->odt_dis_freq = data->timing.lpddr3_odt_dis_freq;
+ data->odt_dis_freq = data->lpddr3_odt_dis_freq;
break;
case RK3399_PMUGRF_DDRTYPE_LPDDR4:
- data->odt_dis_freq = data->timing.lpddr4_odt_dis_freq;
+ data->odt_dis_freq = data->lpddr4_odt_dis_freq;
break;
default:
ret = -EINVAL;
@@ -400,62 +405,45 @@ no_pmu:
0, 0, 0, 0, &res);
/*
- * In TF-A there is a platform SIP call to set the PD (power-down)
- * timings and to enable or disable the ODT (on-die termination).
- * This call needs three arguments as follows:
- *
- * arg0:
- * bit[0-7] : sr_idle
- * bit[8-15] : sr_mc_gate_idle
- * bit[16-31] : standby idle
- * arg1:
- * bit[0-11] : pd_idle
- * bit[16-27] : srpd_lite_idle
- * arg2:
- * bit[0] : odt enable
- */
- data->odt_pd_arg0 = (data->timing.sr_idle & 0xff) |
- ((data->timing.sr_mc_gate_idle & 0xff) << 8) |
- ((data->timing.standby_idle & 0xffff) << 16);
- data->odt_pd_arg1 = (data->timing.pd_idle & 0xfff) |
- ((data->timing.srpd_lite_idle & 0xfff) << 16);
-
- /*
* We add a devfreq driver to our parent since it has a device tree node
* with operating points.
*/
- if (dev_pm_opp_of_add_table(dev)) {
+ if (devm_pm_opp_of_add_table(dev)) {
dev_err(dev, "Invalid operating-points in device tree.\n");
ret = -EINVAL;
goto err_edev;
}
- of_property_read_u32(np, "upthreshold",
- &data->ondemand_data.upthreshold);
- of_property_read_u32(np, "downdifferential",
- &data->ondemand_data.downdifferential);
+ data->ondemand_data.upthreshold = 25;
+ data->ondemand_data.downdifferential = 15;
data->rate = clk_get_rate(data->dmc_clk);
opp = devfreq_recommended_opp(dev, &data->rate, 0);
if (IS_ERR(opp)) {
ret = PTR_ERR(opp);
- goto err_free_opp;
+ goto err_edev;
}
data->rate = dev_pm_opp_get_freq(opp);
data->volt = dev_pm_opp_get_voltage(opp);
dev_pm_opp_put(opp);
- rk3399_devfreq_dmc_profile.initial_freq = data->rate;
+ data->profile = (struct devfreq_dev_profile) {
+ .polling_ms = 200,
+ .target = rk3399_dmcfreq_target,
+ .get_dev_status = rk3399_dmcfreq_get_dev_status,
+ .get_cur_freq = rk3399_dmcfreq_get_cur_freq,
+ .initial_freq = data->rate,
+ };
data->devfreq = devm_devfreq_add_device(dev,
- &rk3399_devfreq_dmc_profile,
+ &data->profile,
DEVFREQ_GOV_SIMPLE_ONDEMAND,
&data->ondemand_data);
if (IS_ERR(data->devfreq)) {
ret = PTR_ERR(data->devfreq);
- goto err_free_opp;
+ goto err_edev;
}
devm_devfreq_register_opp_notifier(dev, data->devfreq);
@@ -465,8 +453,6 @@ no_pmu:
return 0;
-err_free_opp:
- dev_pm_opp_of_remove_table(&pdev->dev);
err_edev:
devfreq_event_disable_edev(data->edev);
@@ -477,11 +463,7 @@ static int rk3399_dmcfreq_remove(struct platform_device *pdev)
{
struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(&pdev->dev);
- /*
- * Before remove the opp table we need to unregister the opp notifier.
- */
- devm_devfreq_unregister_opp_notifier(dmcfreq->dev, dmcfreq->devfreq);
- dev_pm_opp_of_remove_table(dmcfreq->dev);
+ devfreq_event_disable_edev(dmcfreq->edev);
return 0;
}
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 47551ab73ca8..b9bb94bd0f67 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -765,6 +765,106 @@ static struct cpuidle_state icx_cstates[] __initdata = {
};
/*
+ * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
+ * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
+ * But in this case there is effectively no C1, because C1 requests are
+ * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
+ * and C1E requests end up with C1, so there is effectively no C1E.
+ *
+ * By default we enable C1E and disable C1 by marking it with
+ * 'CPUIDLE_FLAG_UNUSABLE'.
+ */
+static struct cpuidle_state adl_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+ .exit_latency = 2,
+ .target_residency = 4,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 220,
+ .target_residency = 600,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C8",
+ .desc = "MWAIT 0x40",
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 280,
+ .target_residency = 800,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C10",
+ .desc = "MWAIT 0x60",
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 680,
+ .target_residency = 2000,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
+
+static struct cpuidle_state adl_l_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+ .exit_latency = 2,
+ .target_residency = 4,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 170,
+ .target_residency = 500,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C8",
+ .desc = "MWAIT 0x40",
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 200,
+ .target_residency = 600,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C10",
+ .desc = "MWAIT 0x60",
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 230,
+ .target_residency = 700,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
+
+/*
* On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
* versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
* MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1
@@ -1147,6 +1247,14 @@ static const struct idle_cpu idle_cpu_icx __initconst = {
.use_acpi = true,
};
+static const struct idle_cpu idle_cpu_adl __initconst = {
+ .state_table = adl_cstates,
+};
+
+static const struct idle_cpu idle_cpu_adl_l __initconst = {
+ .state_table = adl_l_cstates,
+};
+
static const struct idle_cpu idle_cpu_spr __initconst = {
.state_table = spr_cstates,
.disable_promotion_to_c1e = true,
@@ -1215,6 +1323,8 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl),
@@ -1574,6 +1684,25 @@ static void __init skx_idle_state_table_update(void)
}
/**
+ * adl_idle_state_table_update - Adjust AlderLake idle states table.
+ */
+static void __init adl_idle_state_table_update(void)
+{
+ /* Check if user prefers C1 over C1E. */
+ if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
+ cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
+ cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
+
+ /* Disable C1E by clearing the "C1E promotion" bit. */
+ c1e_promotion = C1E_PROMOTION_DISABLE;
+ return;
+ }
+
+ /* Make sure C1E is enabled by default */
+ c1e_promotion = C1E_PROMOTION_ENABLE;
+}
+
+/**
* spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
*/
static void __init spr_idle_state_table_update(void)
@@ -1642,6 +1771,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
case INTEL_FAM6_SAPPHIRERAPIDS_X:
spr_idle_state_table_update();
break;
+ case INTEL_FAM6_ALDERLAKE:
+ case INTEL_FAM6_ALDERLAKE_L:
+ adl_idle_state_table_update();
+ break;
}
for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
diff --git a/drivers/iio/chemical/scd30.h b/drivers/iio/chemical/scd30.h
index f60127bfe0f4..1ac9f3f79271 100644
--- a/drivers/iio/chemical/scd30.h
+++ b/drivers/iio/chemical/scd30.h
@@ -68,10 +68,7 @@ struct scd30_state {
scd30_command_t command;
};
-int scd30_suspend(struct device *dev);
-int scd30_resume(struct device *dev);
-
-static __maybe_unused SIMPLE_DEV_PM_OPS(scd30_pm_ops, scd30_suspend, scd30_resume);
+extern const struct dev_pm_ops scd30_pm_ops;
int scd30_probe(struct device *dev, int irq, const char *name, void *priv, scd30_command_t command);
diff --git a/drivers/iio/chemical/scd30_core.c b/drivers/iio/chemical/scd30_core.c
index 9fe6bbe9ee04..682fca39d14d 100644
--- a/drivers/iio/chemical/scd30_core.c
+++ b/drivers/iio/chemical/scd30_core.c
@@ -517,7 +517,7 @@ static const struct iio_chan_spec scd30_channels[] = {
IIO_CHAN_SOFT_TIMESTAMP(3),
};
-int __maybe_unused scd30_suspend(struct device *dev)
+static int scd30_suspend(struct device *dev)
{
struct iio_dev *indio_dev = dev_get_drvdata(dev);
struct scd30_state *state = iio_priv(indio_dev);
@@ -529,9 +529,8 @@ int __maybe_unused scd30_suspend(struct device *dev)
return regulator_disable(state->vdd);
}
-EXPORT_SYMBOL(scd30_suspend);
-int __maybe_unused scd30_resume(struct device *dev)
+static int scd30_resume(struct device *dev)
{
struct iio_dev *indio_dev = dev_get_drvdata(dev);
struct scd30_state *state = iio_priv(indio_dev);
@@ -543,7 +542,8 @@ int __maybe_unused scd30_resume(struct device *dev)
return scd30_command_write(state, CMD_START_MEAS, state->pressure_comp);
}
-EXPORT_SYMBOL(scd30_resume);
+
+EXPORT_NS_SIMPLE_DEV_PM_OPS(scd30_pm_ops, scd30_suspend, scd30_resume, IIO_SCD30);
static void scd30_stop_meas(void *data)
{
@@ -759,7 +759,7 @@ int scd30_probe(struct device *dev, int irq, const char *name, void *priv,
return devm_iio_device_register(dev, indio_dev);
}
-EXPORT_SYMBOL(scd30_probe);
+EXPORT_SYMBOL_NS(scd30_probe, IIO_SCD30);
MODULE_AUTHOR("Tomasz Duszynski <tomasz.duszynski@octakon.com>");
MODULE_DESCRIPTION("Sensirion SCD30 carbon dioxide sensor core driver");
diff --git a/drivers/iio/chemical/scd30_i2c.c b/drivers/iio/chemical/scd30_i2c.c
index 875892a070ee..bae479a4721f 100644
--- a/drivers/iio/chemical/scd30_i2c.c
+++ b/drivers/iio/chemical/scd30_i2c.c
@@ -128,7 +128,7 @@ static struct i2c_driver scd30_i2c_driver = {
.driver = {
.name = KBUILD_MODNAME,
.of_match_table = scd30_i2c_of_match,
- .pm = &scd30_pm_ops,
+ .pm = pm_sleep_ptr(&scd30_pm_ops),
},
.probe_new = scd30_i2c_probe,
};
@@ -137,3 +137,4 @@ module_i2c_driver(scd30_i2c_driver);
MODULE_AUTHOR("Tomasz Duszynski <tomasz.duszynski@octakon.com>");
MODULE_DESCRIPTION("Sensirion SCD30 carbon dioxide sensor i2c driver");
MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(IIO_SCD30);
diff --git a/drivers/iio/chemical/scd30_serial.c b/drivers/iio/chemical/scd30_serial.c
index 568b34486c44..3c519103d30b 100644
--- a/drivers/iio/chemical/scd30_serial.c
+++ b/drivers/iio/chemical/scd30_serial.c
@@ -252,7 +252,7 @@ static struct serdev_device_driver scd30_serdev_driver = {
.driver = {
.name = KBUILD_MODNAME,
.of_match_table = scd30_serdev_of_match,
- .pm = &scd30_pm_ops,
+ .pm = pm_sleep_ptr(&scd30_pm_ops),
},
.probe = scd30_serdev_probe,
};
@@ -261,3 +261,4 @@ module_serdev_device_driver(scd30_serdev_driver);
MODULE_AUTHOR("Tomasz Duszynski <tomasz.duszynski@octakon.com>");
MODULE_DESCRIPTION("Sensirion SCD30 carbon dioxide sensor serial driver");
MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(IIO_SCD30);
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 440ab5a03df9..485ea980bde7 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -1448,7 +1448,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node);
* Returns 0 on success or a proper -EINVAL value in case of error.
*/
static int __maybe_unused
-_get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev)
+_get_dt_power(struct device *dev, unsigned long *mW, unsigned long *kHz)
{
struct dev_pm_opp *opp;
unsigned long opp_freq, opp_power;
@@ -1482,8 +1482,8 @@ _get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev)
* Returns -EINVAL if the power calculation failed because of missing
* parameters, 0 otherwise.
*/
-static int __maybe_unused _get_power(unsigned long *mW, unsigned long *kHz,
- struct device *dev)
+static int __maybe_unused _get_power(struct device *dev, unsigned long *mW,
+ unsigned long *kHz)
{
struct dev_pm_opp *opp;
struct device_node *np;
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index bca2f912d349..f5eced0842b3 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -211,7 +211,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
return 0;
pd = em_cpu_get(cpu);
- if (!pd)
+ if (!pd || em_is_artificial(pd))
return -EINVAL;
dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 07611a00b78f..a9c99d9e8b42 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -1010,7 +1010,7 @@ static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value,
* where time_unit is default to 1 sec. Never 0.
*/
if (!to_raw)
- return (value) ? value *= rp->time_unit : rp->time_unit;
+ return (value) ? value * rp->time_unit : rp->time_unit;
value = div64_u64(value, rp->time_unit);
@@ -1107,6 +1107,8 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server),
X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core),
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index 1be45f36ab6c..9d23984d8931 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -140,6 +140,7 @@ static const struct x86_cpu_id pl4_support_ids[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_TIGERLAKE_L, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_L, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_RAPTORLAKE, X86_FEATURE_ANY },
{}
};
diff --git a/drivers/soc/rockchip/pm_domains.c b/drivers/soc/rockchip/pm_domains.c
index 0868b7d406fb..b1cf7d29dafd 100644
--- a/drivers/soc/rockchip/pm_domains.c
+++ b/drivers/soc/rockchip/pm_domains.c
@@ -8,6 +8,7 @@
#include <linux/io.h>
#include <linux/iopoll.h>
#include <linux/err.h>
+#include <linux/mutex.h>
#include <linux/pm_clock.h>
#include <linux/pm_domain.h>
#include <linux/of_address.h>
@@ -16,6 +17,7 @@
#include <linux/clk.h>
#include <linux/regmap.h>
#include <linux/mfd/syscon.h>
+#include <soc/rockchip/pm_domains.h>
#include <dt-bindings/power/px30-power.h>
#include <dt-bindings/power/rk3036-power.h>
#include <dt-bindings/power/rk3066-power.h>
@@ -139,6 +141,109 @@ struct rockchip_pmu {
#define DOMAIN_RK3568(name, pwr, req, wakeup) \
DOMAIN_M(name, pwr, pwr, req, req, req, wakeup)
+/*
+ * Dynamic Memory Controller may need to coordinate with us -- see
+ * rockchip_pmu_block().
+ *
+ * dmc_pmu_mutex protects registration-time races, so DMC driver doesn't try to
+ * block() while we're initializing the PMU.
+ */
+static DEFINE_MUTEX(dmc_pmu_mutex);
+static struct rockchip_pmu *dmc_pmu;
+
+/*
+ * Block PMU transitions and make sure they don't interfere with ARM Trusted
+ * Firmware operations. There are two conflicts, noted in the comments below.
+ *
+ * Caller must unblock PMU transitions via rockchip_pmu_unblock().
+ */
+int rockchip_pmu_block(void)
+{
+ struct rockchip_pmu *pmu;
+ struct generic_pm_domain *genpd;
+ struct rockchip_pm_domain *pd;
+ int i, ret;
+
+ mutex_lock(&dmc_pmu_mutex);
+
+ /* No PMU (yet)? Then we just block rockchip_pmu_probe(). */
+ if (!dmc_pmu)
+ return 0;
+ pmu = dmc_pmu;
+
+ /*
+ * mutex blocks all idle transitions: we can't touch the
+ * PMU_BUS_IDLE_REQ (our ".idle_offset") register while ARM Trusted
+ * Firmware might be using it.
+ */
+ mutex_lock(&pmu->mutex);
+
+ /*
+ * Power domain clocks: Per Rockchip, we *must* keep certain clocks
+ * enabled for the duration of power-domain transitions. Most
+ * transitions are handled by this driver, but some cases (in
+ * particular, DRAM DVFS / memory-controller idle) must be handled by
+ * firmware. Firmware can handle most clock management via a special
+ * "ungate" register (PMU_CRU_GATEDIS_CON0), but unfortunately, this
+ * doesn't handle PLLs. We can assist this transition by doing the
+ * clock management on behalf of firmware.
+ */
+ for (i = 0; i < pmu->genpd_data.num_domains; i++) {
+ genpd = pmu->genpd_data.domains[i];
+ if (genpd) {
+ pd = to_rockchip_pd(genpd);
+ ret = clk_bulk_enable(pd->num_clks, pd->clks);
+ if (ret < 0) {
+ dev_err(pmu->dev,
+ "failed to enable clks for domain '%s': %d\n",
+ genpd->name, ret);
+ goto err;
+ }
+ }
+ }
+
+ return 0;
+
+err:
+ for (i = i - 1; i >= 0; i--) {
+ genpd = pmu->genpd_data.domains[i];
+ if (genpd) {
+ pd = to_rockchip_pd(genpd);
+ clk_bulk_disable(pd->num_clks, pd->clks);
+ }
+ }
+ mutex_unlock(&pmu->mutex);
+ mutex_unlock(&dmc_pmu_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(rockchip_pmu_block);
+
+/* Unblock PMU transitions. */
+void rockchip_pmu_unblock(void)
+{
+ struct rockchip_pmu *pmu;
+ struct generic_pm_domain *genpd;
+ struct rockchip_pm_domain *pd;
+ int i;
+
+ if (dmc_pmu) {
+ pmu = dmc_pmu;
+ for (i = 0; i < pmu->genpd_data.num_domains; i++) {
+ genpd = pmu->genpd_data.domains[i];
+ if (genpd) {
+ pd = to_rockchip_pd(genpd);
+ clk_bulk_disable(pd->num_clks, pd->clks);
+ }
+ }
+
+ mutex_unlock(&pmu->mutex);
+ }
+
+ mutex_unlock(&dmc_pmu_mutex);
+}
+EXPORT_SYMBOL_GPL(rockchip_pmu_unblock);
+
static bool rockchip_pmu_domain_is_idle(struct rockchip_pm_domain *pd)
{
struct rockchip_pmu *pmu = pd->pmu;
@@ -690,6 +795,12 @@ static int rockchip_pm_domain_probe(struct platform_device *pdev)
error = -ENODEV;
+ /*
+ * Prevent any rockchip_pmu_block() from racing with the remainder of
+ * setup (clocks, register initialization).
+ */
+ mutex_lock(&dmc_pmu_mutex);
+
for_each_available_child_of_node(np, node) {
error = rockchip_pm_add_one_domain(pmu, node);
if (error) {
@@ -719,10 +830,17 @@ static int rockchip_pm_domain_probe(struct platform_device *pdev)
goto err_out;
}
+ /* We only expect one PMU. */
+ if (!WARN_ON_ONCE(dmc_pmu))
+ dmc_pmu = pmu;
+
+ mutex_unlock(&dmc_pmu_mutex);
+
return 0;
err_out:
rockchip_pm_domain_cleanup(pmu);
+ mutex_unlock(&dmc_pmu_mutex);
return error;
}
diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c
index 0bfb8eebd126..b8151d95a806 100644
--- a/drivers/thermal/cpufreq_cooling.c
+++ b/drivers/thermal/cpufreq_cooling.c
@@ -328,7 +328,7 @@ static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev,
struct cpufreq_policy *policy;
unsigned int nr_levels;
- if (!em)
+ if (!em || em_is_artificial(em))
return false;
policy = cpufreq_cdev->policy;
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 4310cb342a9f..b04dcbbf721a 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -358,6 +358,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
struct thermal_cooling_device *cdev;
struct device *dev = df->dev.parent;
struct devfreq_cooling_device *dfc;
+ struct em_perf_domain *em;
char *name;
int err, num_opps;
@@ -367,8 +368,9 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
dfc->devfreq = df;
- dfc->em_pd = em_pd_get(dev);
- if (dfc->em_pd) {
+ em = em_pd_get(dev);
+ if (em && !em_is_artificial(em)) {
+ dfc->em_pd = em;
devfreq_cooling_ops.get_requested_power =
devfreq_cooling_get_requested_power;
devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
@@ -379,7 +381,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
num_opps = em_pd_nr_perf_states(dfc->em_pd);
} else {
/* Backward compatibility for drivers which do not use IPA */
- dev_dbg(dev, "missing EM for cooling device\n");
+ dev_dbg(dev, "missing proper EM for cooling device\n");
num_opps = dev_pm_opp_get_opp_count(dev);
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 92b7ea8d8f5e..c6108581d97d 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -141,6 +141,7 @@ extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
extern int cppc_set_enable(int cpu, bool enable);
extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps);
extern bool acpi_cpc_valid(void);
+extern bool cppc_allow_fast_switch(void);
extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data);
extern unsigned int cppc_get_transition_latency(int cpu);
extern bool cpc_ffh_supported(void);
@@ -175,6 +176,10 @@ static inline bool acpi_cpc_valid(void)
{
return false;
}
+static inline bool cppc_allow_fast_switch(void)
+{
+ return false;
+}
static inline unsigned int cppc_get_transition_latency(int cpu)
{
return CPUFREQ_ETERNAL;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index d7136d13aa44..03465db16b68 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -574,6 +574,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
#define OSC_SB_OSLPI_SUPPORT 0x00000100
#define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT 0x00001000
#define OSC_SB_GENERIC_INITIATOR_SUPPORT 0x00002000
+#define OSC_SB_CPC_FLEXIBLE_ADR_SPACE 0x00004000
#define OSC_SB_NATIVE_USB4_SUPPORT 0x00040000
#define OSC_SB_PRM_SUPPORT 0x00200000
@@ -581,6 +582,7 @@ extern bool osc_sb_apei_support_acked;
extern bool osc_pc_lpi_support_confirmed;
extern bool osc_sb_native_usb4_support_confirmed;
extern bool osc_sb_cppc_not_supported;
+extern bool osc_cpc_flexible_adr_space_confirmed;
/* USB4 Capabilities */
#define OSC_USB_USB3_TUNNELING 0x00000001
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 142474b4af96..dc10bee75a72 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -38,6 +38,7 @@ enum devfreq_timer {
struct devfreq;
struct devfreq_governor;
+struct devfreq_cpu_data;
struct thermal_cooling_device;
/**
@@ -288,6 +289,11 @@ struct devfreq_simple_ondemand_data {
#endif
#if IS_ENABLED(CONFIG_DEVFREQ_GOV_PASSIVE)
+enum devfreq_parent_dev_type {
+ DEVFREQ_PARENT_DEV,
+ CPUFREQ_PARENT_DEV,
+};
+
/**
* struct devfreq_passive_data - ``void *data`` fed to struct devfreq
* and devfreq_add_device
@@ -299,8 +305,11 @@ struct devfreq_simple_ondemand_data {
* using governors except for passive governor.
* If the devfreq device has the specific method to decide
* the next frequency, should use this callback.
- * @this: the devfreq instance of own device.
- * @nb: the notifier block for DEVFREQ_TRANSITION_NOTIFIER list
+ * @parent_type: the parent type of the device.
+ * @this: the devfreq instance of own device.
+ * @nb: the notifier block for DEVFREQ_TRANSITION_NOTIFIER or
+ * CPUFREQ_TRANSITION_NOTIFIER list.
+ * @cpu_data_list: the list of cpu frequency data for all cpufreq_policy.
*
* The devfreq_passive_data have to set the devfreq instance of parent
* device with governors except for the passive governor. But, don't need to
@@ -314,9 +323,13 @@ struct devfreq_passive_data {
/* Optional callback to decide the next frequency of passvice device */
int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
+ /* Should set the type of parent device */
+ enum devfreq_parent_dev_type parent_type;
+
/* For passive governor's internal use. Don't need to set them */
struct devfreq *this;
struct notifier_block nb;
+ struct list_head cpu_data_list;
};
#endif
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 9f3c400bc52d..8419bffb4398 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -67,11 +67,16 @@ struct em_perf_domain {
*
* EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating
* energy consumption.
+ *
+ * EM_PERF_DOMAIN_ARTIFICIAL: The power values are artificial and might be
+ * created by platform missing real power information
*/
#define EM_PERF_DOMAIN_MILLIWATTS BIT(0)
#define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1)
+#define EM_PERF_DOMAIN_ARTIFICIAL BIT(2)
#define em_span_cpus(em) (to_cpumask((em)->cpus))
+#define em_is_artificial(em) ((em)->flags & EM_PERF_DOMAIN_ARTIFICIAL)
#ifdef CONFIG_ENERGY_MODEL
#define EM_MAX_POWER 0xFFFF
@@ -96,11 +101,11 @@ struct em_data_callback {
/**
* active_power() - Provide power at the next performance state of
* a device
+ * @dev : Device for which we do this operation (can be a CPU)
* @power : Active power at the performance state
* (modified)
* @freq : Frequency at the performance state in kHz
* (modified)
- * @dev : Device for which we do this operation (can be a CPU)
*
* active_power() must find the lowest performance state of 'dev' above
* 'freq' and update 'power' and 'freq' to the matching active power
@@ -112,11 +117,32 @@ struct em_data_callback {
*
* Return 0 on success.
*/
- int (*active_power)(unsigned long *power, unsigned long *freq,
- struct device *dev);
+ int (*active_power)(struct device *dev, unsigned long *power,
+ unsigned long *freq);
+
+ /**
+ * get_cost() - Provide the cost at the given performance state of
+ * a device
+ * @dev : Device for which we do this operation (can be a CPU)
+ * @freq : Frequency at the performance state in kHz
+ * @cost : The cost value for the performance state
+ * (modified)
+ *
+ * In case of CPUs, the cost is the one of a single CPU in the domain.
+ * It is expected to fit in the [0, EM_MAX_POWER] range due to internal
+ * usage in EAS calculation.
+ *
+ * Return 0 on success, or appropriate error value in case of failure.
+ */
+ int (*get_cost)(struct device *dev, unsigned long freq,
+ unsigned long *cost);
};
-#define EM_DATA_CB(_active_power_cb) { .active_power = &_active_power_cb }
#define EM_SET_ACTIVE_POWER_CB(em_cb, cb) ((em_cb).active_power = cb)
+#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) \
+ { .active_power = _active_power_cb, \
+ .get_cost = _cost_cb }
+#define EM_DATA_CB(_active_power_cb) \
+ EM_ADV_DATA_CB(_active_power_cb, NULL)
struct em_perf_domain *em_cpu_get(int cpu);
struct em_perf_domain *em_pd_get(struct device *dev);
@@ -264,6 +290,7 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd)
#else
struct em_data_callback {};
+#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) { }
#define EM_DATA_CB(_active_power_cb) { }
#define EM_SET_ACTIVE_POWER_CB(em_cb, cb) do { } while (0)
diff --git a/include/linux/pm.h b/include/linux/pm.h
index e65b3ab28377..ffe941958501 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -368,13 +368,13 @@ const struct dev_pm_ops name = { \
#ifdef CONFIG_PM
#define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
- runtime_resume_fn, idle_fn, sec) \
+ runtime_resume_fn, idle_fn, sec, ns) \
_DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
runtime_resume_fn, idle_fn); \
- _EXPORT_SYMBOL(name, sec)
+ __EXPORT_SYMBOL(name, sec, ns)
#else
#define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \
- runtime_resume_fn, idle_fn, sec) \
+ runtime_resume_fn, idle_fn, sec, ns) \
static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \
resume_fn, runtime_suspend_fn, \
runtime_resume_fn, idle_fn)
@@ -391,9 +391,13 @@ static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \
_DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL)
#define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
- _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "")
+ _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "", "")
#define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
- _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl")
+ _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl", "")
+#define EXPORT_NS_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \
+ _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "", #ns)
+#define EXPORT_NS_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \
+ _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl", #ns)
/* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */
#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 67017c9390c8..ebc351698090 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -91,6 +91,14 @@ struct gpd_dev_ops {
int (*stop)(struct device *dev);
};
+struct genpd_governor_data {
+ s64 max_off_time_ns;
+ bool max_off_time_changed;
+ ktime_t next_wakeup;
+ bool cached_power_down_ok;
+ bool cached_power_down_state_idx;
+};
+
struct genpd_power_state {
s64 power_off_latency_ns;
s64 power_on_latency_ns;
@@ -98,7 +106,7 @@ struct genpd_power_state {
u64 usage;
u64 rejected;
struct fwnode_handle *fwnode;
- ktime_t idle_time;
+ u64 idle_time;
void *data;
};
@@ -114,6 +122,7 @@ struct generic_pm_domain {
struct list_head child_links; /* Links with PM domain as a child */
struct list_head dev_list; /* List of devices */
struct dev_power_governor *gov;
+ struct genpd_governor_data *gd; /* Data used by a genpd governor. */
struct work_struct power_off_work;
struct fwnode_handle *provider; /* Identity of the domain provider */
bool has_provider;
@@ -134,11 +143,6 @@ struct generic_pm_domain {
int (*set_performance_state)(struct generic_pm_domain *genpd,
unsigned int state);
struct gpd_dev_ops dev_ops;
- s64 max_off_time_ns; /* Maximum allowed "suspended" time. */
- ktime_t next_wakeup; /* Maintained by the domain governor */
- bool max_off_time_changed;
- bool cached_power_down_ok;
- bool cached_power_down_state_idx;
int (*attach_dev)(struct generic_pm_domain *domain,
struct device *dev);
void (*detach_dev)(struct generic_pm_domain *domain,
@@ -149,8 +153,8 @@ struct generic_pm_domain {
unsigned int state_count);
unsigned int state_count; /* number of states */
unsigned int state_idx; /* state that genpd will go to when off */
- ktime_t on_time;
- ktime_t accounting_time;
+ u64 on_time;
+ u64 accounting_time;
const struct genpd_lock_ops *lock_ops;
union {
struct mutex mlock;
@@ -182,6 +186,7 @@ struct gpd_timing_data {
s64 suspend_latency_ns;
s64 resume_latency_ns;
s64 effective_constraint_ns;
+ ktime_t next_wakeup;
bool constraint_changed;
bool cached_suspend_ok;
};
@@ -193,14 +198,13 @@ struct pm_domain_data {
struct generic_pm_domain_data {
struct pm_domain_data base;
- struct gpd_timing_data td;
+ struct gpd_timing_data *td;
struct notifier_block nb;
struct notifier_block *power_nb;
int cpu;
unsigned int performance_state;
unsigned int default_pstate;
unsigned int rpm_pstate;
- ktime_t next_wakeup;
void *data;
};
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 2bff6a10095d..9e4d056967c6 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -41,10 +41,16 @@
#define EXPORT_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
_EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \
- suspend_fn, resume_fn, idle_fn, "")
+ suspend_fn, resume_fn, idle_fn, "", "")
#define EXPORT_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
_EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \
- suspend_fn, resume_fn, idle_fn, "_gpl")
+ suspend_fn, resume_fn, idle_fn, "_gpl", "")
+#define EXPORT_NS_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn, ns) \
+ _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \
+ suspend_fn, resume_fn, idle_fn, "", #ns)
+#define EXPORT_NS_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn, ns) \
+ _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \
+ suspend_fn, resume_fn, idle_fn, "_gpl", #ns)
#ifdef CONFIG_PM
extern struct workqueue_struct *pm_wq;
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 300273ff40cc..70f2921e2e70 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -542,22 +542,56 @@ static inline void unlock_system_sleep(void) {}
#ifdef CONFIG_PM_SLEEP_DEBUG
extern bool pm_print_times_enabled;
extern bool pm_debug_messages_on;
-extern __printf(2, 3) void __pm_pr_dbg(bool defer, const char *fmt, ...);
+static inline int pm_dyn_debug_messages_on(void)
+{
+#ifdef CONFIG_DYNAMIC_DEBUG
+ return 1;
+#else
+ return 0;
+#endif
+}
+#ifndef pr_fmt
+#define pr_fmt(fmt) "PM: " fmt
+#endif
+#define __pm_pr_dbg(fmt, ...) \
+ do { \
+ if (pm_debug_messages_on) \
+ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
+ else if (pm_dyn_debug_messages_on()) \
+ pr_debug(fmt, ##__VA_ARGS__); \
+ } while (0)
+#define __pm_deferred_pr_dbg(fmt, ...) \
+ do { \
+ if (pm_debug_messages_on) \
+ printk_deferred(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
+ } while (0)
#else
#define pm_print_times_enabled (false)
#define pm_debug_messages_on (false)
#include <linux/printk.h>
-#define __pm_pr_dbg(defer, fmt, ...) \
- no_printk(KERN_DEBUG fmt, ##__VA_ARGS__)
+#define __pm_pr_dbg(fmt, ...) \
+ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#define __pm_deferred_pr_dbg(fmt, ...) \
+ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#endif
+/**
+ * pm_pr_dbg - print pm sleep debug messages
+ *
+ * If pm_debug_messages_on is enabled, print message.
+ * If pm_debug_messages_on is disabled and CONFIG_DYNAMIC_DEBUG is enabled,
+ * print message only from instances explicitly enabled on dynamic debug's
+ * control.
+ * If pm_debug_messages_on is disabled and CONFIG_DYNAMIC_DEBUG is disabled,
+ * don't print message.
+ */
#define pm_pr_dbg(fmt, ...) \
- __pm_pr_dbg(false, fmt, ##__VA_ARGS__)
+ __pm_pr_dbg(fmt, ##__VA_ARGS__)
#define pm_deferred_pr_dbg(fmt, ...) \
- __pm_pr_dbg(true, fmt, ##__VA_ARGS__)
+ __pm_deferred_pr_dbg(fmt, ##__VA_ARGS__)
#ifdef CONFIG_PM_AUTOSLEEP
diff --git a/include/soc/rockchip/pm_domains.h b/include/soc/rockchip/pm_domains.h
new file mode 100644
index 000000000000..7dbd941fc937
--- /dev/null
+++ b/include/soc/rockchip/pm_domains.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2022, The Chromium OS Authors. All rights reserved.
+ */
+
+#ifndef __SOC_ROCKCHIP_PM_DOMAINS_H__
+#define __SOC_ROCKCHIP_PM_DOMAINS_H__
+
+#ifdef CONFIG_ROCKCHIP_PM_DOMAINS
+
+int rockchip_pmu_block(void);
+void rockchip_pmu_unblock(void);
+
+#else /* CONFIG_ROCKCHIP_PM_DOMAINS */
+
+static inline int rockchip_pmu_block(void)
+{
+ return 0;
+}
+
+static inline void rockchip_pmu_unblock(void) { }
+
+#endif /* CONFIG_ROCKCHIP_PM_DOMAINS */
+
+#endif /* __SOC_ROCKCHIP_PM_DOMAINS_H__ */
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 5899260a8bef..874ad834dc8d 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -1,6 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
-ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG
+ifeq ($(CONFIG_DYNAMIC_DEBUG), y)
+CFLAGS_swap.o := -DDEBUG
+CFLAGS_snapshot.o := -DDEBUG
+CFLAGS_energy_model.o := -DDEBUG
+endif
KASAN_SANITIZE_snapshot.o := n
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index 0153b0ca7b23..6c373f2960e7 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -54,28 +54,15 @@ static int em_debug_cpus_show(struct seq_file *s, void *unused)
}
DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
-static int em_debug_units_show(struct seq_file *s, void *unused)
+static int em_debug_flags_show(struct seq_file *s, void *unused)
{
struct em_perf_domain *pd = s->private;
- char *units = (pd->flags & EM_PERF_DOMAIN_MILLIWATTS) ?
- "milliWatts" : "bogoWatts";
- seq_printf(s, "%s\n", units);
+ seq_printf(s, "%#lx\n", pd->flags);
return 0;
}
-DEFINE_SHOW_ATTRIBUTE(em_debug_units);
-
-static int em_debug_skip_inefficiencies_show(struct seq_file *s, void *unused)
-{
- struct em_perf_domain *pd = s->private;
- int enabled = (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES) ? 1 : 0;
-
- seq_printf(s, "%d\n", enabled);
-
- return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(em_debug_skip_inefficiencies);
+DEFINE_SHOW_ATTRIBUTE(em_debug_flags);
static void em_debug_create_pd(struct device *dev)
{
@@ -89,9 +76,8 @@ static void em_debug_create_pd(struct device *dev)
debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus,
&em_debug_cpus_fops);
- debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops);
- debugfs_create_file("skip-inefficiencies", 0444, d, dev->em_pd,
- &em_debug_skip_inefficiencies_fops);
+ debugfs_create_file("flags", 0444, d, dev->em_pd,
+ &em_debug_flags_fops);
/* Create a sub-directory for each performance state */
for (i = 0; i < dev->em_pd->nr_perf_states; i++)
@@ -121,7 +107,8 @@ static void em_debug_remove_pd(struct device *dev) {}
#endif
static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
- int nr_states, struct em_data_callback *cb)
+ int nr_states, struct em_data_callback *cb,
+ unsigned long flags)
{
unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
struct em_perf_state *table;
@@ -139,7 +126,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
* lowest performance state of 'dev' above 'freq' and updates
* 'power' and 'freq' accordingly.
*/
- ret = cb->active_power(&power, &freq, dev);
+ ret = cb->active_power(dev, &power, &freq);
if (ret) {
dev_err(dev, "EM: invalid perf. state: %d\n",
ret);
@@ -173,10 +160,22 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
/* Compute the cost of each performance state. */
fmax = (u64) table[nr_states - 1].frequency;
for (i = nr_states - 1; i >= 0; i--) {
- unsigned long power_res = em_scale_power(table[i].power);
+ unsigned long power_res, cost;
+
+ if (flags & EM_PERF_DOMAIN_ARTIFICIAL) {
+ ret = cb->get_cost(dev, table[i].frequency, &cost);
+ if (ret || !cost || cost > EM_MAX_POWER) {
+ dev_err(dev, "EM: invalid cost %lu %d\n",
+ cost, ret);
+ goto free_ps_table;
+ }
+ } else {
+ power_res = em_scale_power(table[i].power);
+ cost = div64_u64(fmax * power_res, table[i].frequency);
+ }
+
+ table[i].cost = cost;
- table[i].cost = div64_u64(fmax * power_res,
- table[i].frequency);
if (table[i].cost >= prev_cost) {
table[i].flags = EM_PERF_STATE_INEFFICIENT;
dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
@@ -197,7 +196,8 @@ free_ps_table:
}
static int em_create_pd(struct device *dev, int nr_states,
- struct em_data_callback *cb, cpumask_t *cpus)
+ struct em_data_callback *cb, cpumask_t *cpus,
+ unsigned long flags)
{
struct em_perf_domain *pd;
struct device *cpu_dev;
@@ -215,7 +215,7 @@ static int em_create_pd(struct device *dev, int nr_states,
return -ENOMEM;
}
- ret = em_create_perf_table(dev, pd, nr_states, cb);
+ ret = em_create_perf_table(dev, pd, nr_states, cb, flags);
if (ret) {
kfree(pd);
return ret;
@@ -259,6 +259,8 @@ static void em_cpufreq_update_efficiencies(struct device *dev)
found++;
}
+ cpufreq_cpu_put(policy);
+
if (!found)
return;
@@ -332,6 +334,7 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
bool milliwatts)
{
unsigned long cap, prev_cap = 0;
+ unsigned long flags = 0;
int cpu, ret;
if (!dev || !nr_states || !cb)
@@ -378,12 +381,16 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
}
}
- ret = em_create_pd(dev, nr_states, cb, cpus);
+ if (milliwatts)
+ flags |= EM_PERF_DOMAIN_MILLIWATTS;
+ else if (cb->get_cost)
+ flags |= EM_PERF_DOMAIN_ARTIFICIAL;
+
+ ret = em_create_pd(dev, nr_states, cb, cpus, flags);
if (ret)
goto unlock;
- if (milliwatts)
- dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS;
+ dev->em_pd->flags |= flags;
em_cpufreq_update_efficiencies(dev);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 7e646079fbeb..5242bf2ee469 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -545,35 +545,6 @@ static int __init pm_debug_messages_setup(char *str)
}
__setup("pm_debug_messages", pm_debug_messages_setup);
-/**
- * __pm_pr_dbg - Print a suspend debug message to the kernel log.
- * @defer: Whether or not to use printk_deferred() to print the message.
- * @fmt: Message format.
- *
- * The message will be emitted if enabled through the pm_debug_messages
- * sysfs attribute.
- */
-void __pm_pr_dbg(bool defer, const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- if (!pm_debug_messages_on)
- return;
-
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
-
- if (defer)
- printk_deferred(KERN_DEBUG "PM: %pV", &vaf);
- else
- printk(KERN_DEBUG "PM: %pV", &vaf);
-
- va_end(args);
-}
-
#else /* !CONFIG_PM_SLEEP_DEBUG */
static inline void pm_print_times_init(void) {}
#endif /* CONFIG_PM_SLEEP_DEBUG */
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 11b570fcf049..3068601e585a 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -6,9 +6,6 @@
* Originally from swsusp.
*/
-
-#undef DEBUG
-
#include <linux/interrupt.h>
#include <linux/oom.h>
#include <linux/suspend.h>
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 330d49937692..2a406753af90 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -326,7 +326,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
return ret;
}
-/**
+/*
* Data types related to memory bitmaps.
*
* Memory bitmap is a structure consisting of many linked lists of
@@ -427,6 +427,10 @@ struct memory_bitmap {
/**
* alloc_rtree_node - Allocate a new node and add it to the radix tree.
+ * @gfp_mask: GFP mask for the allocation.
+ * @safe_needed: Get pages not used before hibernation (restore only)
+ * @ca: Pointer to a linked list of pages ("a chain") to allocate from
+ * @list: Radix Tree node to add.
*
* This function is used to allocate inner nodes as well as the
* leave nodes of the radix tree. It also adds the node to the
@@ -902,7 +906,7 @@ static bool rtree_next_node(struct memory_bitmap *bm)
}
/**
- * memory_bm_rtree_next_pfn - Find the next set bit in a memory bitmap.
+ * memory_bm_next_pfn - Find the next set bit in a memory bitmap.
* @bm: Memory bitmap.
*
* Starting from the last returned position this function searches for the next
@@ -1937,7 +1941,7 @@ static inline int get_highmem_buffer(int safe_needed)
}
/**
- * alloc_highmem_image_pages - Allocate some highmem pages for the image.
+ * alloc_highmem_pages - Allocate some highmem pages for the image.
*
* Try to allocate as many pages as needed, but if the number of free highmem
* pages is less than that, allocate them all.
@@ -2224,7 +2228,7 @@ static int check_header(struct swsusp_info *info)
}
/**
- * load header - Check the image header and copy the data from it.
+ * load_header - Check the image header and copy the data from it.
*/
static int load_header(struct swsusp_info *info)
{
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index f3e3c94ab9bd..92e139b9c792 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line")
endif
turbostat : turbostat.c
-override CFLAGS += -O2 -Wall -I../../../include
+override CFLAGS += -O2 -Wall -Wextra -I../../../include
override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
override CFLAGS += -D_FILE_OFFSET_BITS=64
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 9b17097bc3d7..1e7d3de55a94 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -292,7 +292,7 @@ starts a new interval.
must be run as root.
Alternatively, non-root users can be enabled to run turbostat this way:
-# setcap cap_sys_rawio=ep ./turbostat
+# setcap cap_sys_admin,cap_sys_rawio,cap_sys_nice=+ep ./turbostat
# chmod +r /dev/cpu/*/msr
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index bc5ae0872fed..ede31a4287a0 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3,7 +3,7 @@
* turbostat -- show CPU frequency and C-state residency
* on modern Intel and AMD processors.
*
- * Copyright (c) 2021 Intel Corporation.
+ * Copyright (c) 2022 Intel Corporation.
* Len Brown <len.brown@intel.com>
*/
@@ -37,6 +37,171 @@
#include <asm/unistd.h>
#include <stdbool.h>
+#define UNUSED(x) (void)(x)
+
+/*
+ * This list matches the column headers, except
+ * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
+ * 2. Core and CPU are moved to the end, we can't have strings that contain them
+ * matching on them for --show and --hide.
+ */
+
+/*
+ * buffer size used by sscanf() for added column names
+ * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
+ */
+#define NAME_BYTES 20
+#define PATH_BYTES 128
+
+enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
+enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
+enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
+
+struct msr_counter {
+ unsigned int msr_num;
+ char name[NAME_BYTES];
+ char path[PATH_BYTES];
+ unsigned int width;
+ enum counter_type type;
+ enum counter_format format;
+ struct msr_counter *next;
+ unsigned int flags;
+#define FLAGS_HIDE (1 << 0)
+#define FLAGS_SHOW (1 << 1)
+#define SYSFS_PERCPU (1 << 1)
+};
+
+struct msr_counter bic[] = {
+ { 0x0, "usec", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Time_Of_Day_Seconds", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Package", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Node", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Avg_MHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Busy%", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Bzy_MHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "TSC_MHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "IRQ", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL, 0 },
+ { 0x0, "sysfs", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CPU%c1", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CPU%c3", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CPU%c6", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CPU%c7", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "ThreadC", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CoreTmp", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CoreCnt", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "PkgTmp", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "GFX%rc6", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "GFXMHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Pkg%pc2", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Pkg%pc3", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Pkg%pc6", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Pkg%pc7", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Pkg%pc8", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Pkg%pc9", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Pk%pc10", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CPU%LPI", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SYS%LPI", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "PkgWatt", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CorWatt", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "GFXWatt", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "PkgCnt", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "RAMWatt", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "PKG_%", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "RAM_%", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Pkg_J", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Cor_J", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "GFX_J", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "RAM_J", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Mod%c6", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Totl%C0", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Any%C0", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "GFX%C0", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CPUGFX%", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Core", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CPU", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "APIC", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "X2APIC", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "Die", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "GFXAMHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
+};
+
+#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
+#define BIC_USEC (1ULL << 0)
+#define BIC_TOD (1ULL << 1)
+#define BIC_Package (1ULL << 2)
+#define BIC_Node (1ULL << 3)
+#define BIC_Avg_MHz (1ULL << 4)
+#define BIC_Busy (1ULL << 5)
+#define BIC_Bzy_MHz (1ULL << 6)
+#define BIC_TSC_MHz (1ULL << 7)
+#define BIC_IRQ (1ULL << 8)
+#define BIC_SMI (1ULL << 9)
+#define BIC_sysfs (1ULL << 10)
+#define BIC_CPU_c1 (1ULL << 11)
+#define BIC_CPU_c3 (1ULL << 12)
+#define BIC_CPU_c6 (1ULL << 13)
+#define BIC_CPU_c7 (1ULL << 14)
+#define BIC_ThreadC (1ULL << 15)
+#define BIC_CoreTmp (1ULL << 16)
+#define BIC_CoreCnt (1ULL << 17)
+#define BIC_PkgTmp (1ULL << 18)
+#define BIC_GFX_rc6 (1ULL << 19)
+#define BIC_GFXMHz (1ULL << 20)
+#define BIC_Pkgpc2 (1ULL << 21)
+#define BIC_Pkgpc3 (1ULL << 22)
+#define BIC_Pkgpc6 (1ULL << 23)
+#define BIC_Pkgpc7 (1ULL << 24)
+#define BIC_Pkgpc8 (1ULL << 25)
+#define BIC_Pkgpc9 (1ULL << 26)
+#define BIC_Pkgpc10 (1ULL << 27)
+#define BIC_CPU_LPI (1ULL << 28)
+#define BIC_SYS_LPI (1ULL << 29)
+#define BIC_PkgWatt (1ULL << 30)
+#define BIC_CorWatt (1ULL << 31)
+#define BIC_GFXWatt (1ULL << 32)
+#define BIC_PkgCnt (1ULL << 33)
+#define BIC_RAMWatt (1ULL << 34)
+#define BIC_PKG__ (1ULL << 35)
+#define BIC_RAM__ (1ULL << 36)
+#define BIC_Pkg_J (1ULL << 37)
+#define BIC_Cor_J (1ULL << 38)
+#define BIC_GFX_J (1ULL << 39)
+#define BIC_RAM_J (1ULL << 40)
+#define BIC_Mod_c6 (1ULL << 41)
+#define BIC_Totl_c0 (1ULL << 42)
+#define BIC_Any_c0 (1ULL << 43)
+#define BIC_GFX_c0 (1ULL << 44)
+#define BIC_CPUGFX (1ULL << 45)
+#define BIC_Core (1ULL << 46)
+#define BIC_CPU (1ULL << 47)
+#define BIC_APIC (1ULL << 48)
+#define BIC_X2APIC (1ULL << 49)
+#define BIC_Die (1ULL << 50)
+#define BIC_GFXACTMHz (1ULL << 51)
+#define BIC_IPC (1ULL << 52)
+#define BIC_CORE_THROT_CNT (1ULL << 53)
+
+#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
+#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
+#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz )
+#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
+#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
+
+#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
+
+unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
+unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
+
+#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
+#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
+#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
+#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
+#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
+#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
+
char *proc_stat = "/proc/stat";
FILE *outf;
int *fd_percpu;
@@ -48,6 +213,7 @@ struct timespec interval_ts = { 5, 0 };
unsigned int model_orig;
unsigned int num_iterations;
+unsigned int header_iterations;
unsigned int debug;
unsigned int quiet;
unsigned int shown;
@@ -159,13 +325,6 @@ int ignore_stdin;
#define MAX(a, b) ((a) > (b) ? (a) : (b))
-/*
- * buffer size used by sscanf() for added column names
- * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
- */
-#define NAME_BYTES 20
-#define PATH_BYTES 128
-
int backwards_count;
char *progname;
@@ -205,6 +364,7 @@ struct core_data {
unsigned int core_temp_c;
unsigned int core_energy; /* MSR_CORE_ENERGY_STAT */
unsigned int core_id;
+ unsigned long long core_throt_cnt;
unsigned long long counter[MAX_ADDED_COUNTERS];
} *core_even, *core_odd;
@@ -255,24 +415,6 @@ struct pkg_data {
#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
-enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
-enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
-enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
-
-struct msr_counter {
- unsigned int msr_num;
- char name[NAME_BYTES];
- char path[PATH_BYTES];
- unsigned int width;
- enum counter_type type;
- enum counter_format format;
- struct msr_counter *next;
- unsigned int flags;
-#define FLAGS_HIDE (1 << 0)
-#define FLAGS_SHOW (1 << 1)
-#define SYSFS_PERCPU (1 << 1)
-};
-
/*
* The accumulated sum of MSR is defined as a monotonic
* increasing MSR, it will be accumulated periodically,
@@ -522,8 +664,10 @@ static int perf_instr_count_open(int cpu_num)
/* counter for cpu_num, including user + kernel and all processes */
fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
- if (fd == -1)
- err(-1, "cpu%d: perf instruction counter\n", cpu_num);
+ if (fd == -1) {
+ warn("cpu%d: perf instruction counter", cpu_num);
+ BIC_NOT_PRESENT(BIC_IPC);
+ }
return fd;
}
@@ -550,143 +694,10 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
return 0;
}
-/*
- * This list matches the column headers, except
- * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
- * 2. Core and CPU are moved to the end, we can't have strings that contain them
- * matching on them for --show and --hide.
- */
-struct msr_counter bic[] = {
- { 0x0, "usec" },
- { 0x0, "Time_Of_Day_Seconds" },
- { 0x0, "Package" },
- { 0x0, "Node" },
- { 0x0, "Avg_MHz" },
- { 0x0, "Busy%" },
- { 0x0, "Bzy_MHz" },
- { 0x0, "TSC_MHz" },
- { 0x0, "IRQ" },
- { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL },
- { 0x0, "sysfs" },
- { 0x0, "CPU%c1" },
- { 0x0, "CPU%c3" },
- { 0x0, "CPU%c6" },
- { 0x0, "CPU%c7" },
- { 0x0, "ThreadC" },
- { 0x0, "CoreTmp" },
- { 0x0, "CoreCnt" },
- { 0x0, "PkgTmp" },
- { 0x0, "GFX%rc6" },
- { 0x0, "GFXMHz" },
- { 0x0, "Pkg%pc2" },
- { 0x0, "Pkg%pc3" },
- { 0x0, "Pkg%pc6" },
- { 0x0, "Pkg%pc7" },
- { 0x0, "Pkg%pc8" },
- { 0x0, "Pkg%pc9" },
- { 0x0, "Pk%pc10" },
- { 0x0, "CPU%LPI" },
- { 0x0, "SYS%LPI" },
- { 0x0, "PkgWatt" },
- { 0x0, "CorWatt" },
- { 0x0, "GFXWatt" },
- { 0x0, "PkgCnt" },
- { 0x0, "RAMWatt" },
- { 0x0, "PKG_%" },
- { 0x0, "RAM_%" },
- { 0x0, "Pkg_J" },
- { 0x0, "Cor_J" },
- { 0x0, "GFX_J" },
- { 0x0, "RAM_J" },
- { 0x0, "Mod%c6" },
- { 0x0, "Totl%C0" },
- { 0x0, "Any%C0" },
- { 0x0, "GFX%C0" },
- { 0x0, "CPUGFX%" },
- { 0x0, "Core" },
- { 0x0, "CPU" },
- { 0x0, "APIC" },
- { 0x0, "X2APIC" },
- { 0x0, "Die" },
- { 0x0, "GFXAMHz" },
- { 0x0, "IPC" },
-};
-
-#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
-#define BIC_USEC (1ULL << 0)
-#define BIC_TOD (1ULL << 1)
-#define BIC_Package (1ULL << 2)
-#define BIC_Node (1ULL << 3)
-#define BIC_Avg_MHz (1ULL << 4)
-#define BIC_Busy (1ULL << 5)
-#define BIC_Bzy_MHz (1ULL << 6)
-#define BIC_TSC_MHz (1ULL << 7)
-#define BIC_IRQ (1ULL << 8)
-#define BIC_SMI (1ULL << 9)
-#define BIC_sysfs (1ULL << 10)
-#define BIC_CPU_c1 (1ULL << 11)
-#define BIC_CPU_c3 (1ULL << 12)
-#define BIC_CPU_c6 (1ULL << 13)
-#define BIC_CPU_c7 (1ULL << 14)
-#define BIC_ThreadC (1ULL << 15)
-#define BIC_CoreTmp (1ULL << 16)
-#define BIC_CoreCnt (1ULL << 17)
-#define BIC_PkgTmp (1ULL << 18)
-#define BIC_GFX_rc6 (1ULL << 19)
-#define BIC_GFXMHz (1ULL << 20)
-#define BIC_Pkgpc2 (1ULL << 21)
-#define BIC_Pkgpc3 (1ULL << 22)
-#define BIC_Pkgpc6 (1ULL << 23)
-#define BIC_Pkgpc7 (1ULL << 24)
-#define BIC_Pkgpc8 (1ULL << 25)
-#define BIC_Pkgpc9 (1ULL << 26)
-#define BIC_Pkgpc10 (1ULL << 27)
-#define BIC_CPU_LPI (1ULL << 28)
-#define BIC_SYS_LPI (1ULL << 29)
-#define BIC_PkgWatt (1ULL << 30)
-#define BIC_CorWatt (1ULL << 31)
-#define BIC_GFXWatt (1ULL << 32)
-#define BIC_PkgCnt (1ULL << 33)
-#define BIC_RAMWatt (1ULL << 34)
-#define BIC_PKG__ (1ULL << 35)
-#define BIC_RAM__ (1ULL << 36)
-#define BIC_Pkg_J (1ULL << 37)
-#define BIC_Cor_J (1ULL << 38)
-#define BIC_GFX_J (1ULL << 39)
-#define BIC_RAM_J (1ULL << 40)
-#define BIC_Mod_c6 (1ULL << 41)
-#define BIC_Totl_c0 (1ULL << 42)
-#define BIC_Any_c0 (1ULL << 43)
-#define BIC_GFX_c0 (1ULL << 44)
-#define BIC_CPUGFX (1ULL << 45)
-#define BIC_Core (1ULL << 46)
-#define BIC_CPU (1ULL << 47)
-#define BIC_APIC (1ULL << 48)
-#define BIC_X2APIC (1ULL << 49)
-#define BIC_Die (1ULL << 50)
-#define BIC_GFXACTMHz (1ULL << 51)
-#define BIC_IPC (1ULL << 52)
-
-#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
-#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
-#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz )
-#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
-#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
-
-#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
-
-unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
-unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
-
-#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
-#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
-#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
-#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
-#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
-#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
-
#define MAX_DEFERRED 16
+char *deferred_add_names[MAX_DEFERRED];
char *deferred_skip_names[MAX_DEFERRED];
+int deferred_add_index;
int deferred_skip_index;
/*
@@ -720,6 +731,8 @@ void help(void)
" -l, --list list column headers only\n"
" -n, --num_iterations num\n"
" number of the measurement iterations\n"
+ " -N, --header_iterations num\n"
+ " print header every num iterations\n"
" -o, --out file\n"
" create or truncate \"file\" for all output\n"
" -q, --quiet skip decoding system configuration header\n"
@@ -741,7 +754,7 @@ void help(void)
*/
unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
{
- int i;
+ unsigned int i;
unsigned long long retval = 0;
while (name_list) {
@@ -752,40 +765,51 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
if (comma)
*comma = '\0';
- if (!strcmp(name_list, "all"))
- return ~0;
- if (!strcmp(name_list, "topology"))
- return BIC_TOPOLOGY;
- if (!strcmp(name_list, "power"))
- return BIC_THERMAL_PWR;
- if (!strcmp(name_list, "idle"))
- return BIC_IDLE;
- if (!strcmp(name_list, "frequency"))
- return BIC_FREQUENCY;
- if (!strcmp(name_list, "other"))
- return BIC_OTHER;
- if (!strcmp(name_list, "all"))
- return 0;
-
for (i = 0; i < MAX_BIC; ++i) {
if (!strcmp(name_list, bic[i].name)) {
retval |= (1ULL << i);
break;
}
+ if (!strcmp(name_list, "all")) {
+ retval |= ~0;
+ break;
+ } else if (!strcmp(name_list, "topology")) {
+ retval |= BIC_TOPOLOGY;
+ break;
+ } else if (!strcmp(name_list, "power")) {
+ retval |= BIC_THERMAL_PWR;
+ break;
+ } else if (!strcmp(name_list, "idle")) {
+ retval |= BIC_IDLE;
+ break;
+ } else if (!strcmp(name_list, "frequency")) {
+ retval |= BIC_FREQUENCY;
+ break;
+ } else if (!strcmp(name_list, "other")) {
+ retval |= BIC_OTHER;
+ break;
+ }
+
}
if (i == MAX_BIC) {
if (mode == SHOW_LIST) {
- fprintf(stderr, "Invalid counter name: %s\n", name_list);
- exit(-1);
- }
- deferred_skip_names[deferred_skip_index++] = name_list;
- if (debug)
- fprintf(stderr, "deferred \"%s\"\n", name_list);
- if (deferred_skip_index >= MAX_DEFERRED) {
- fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
- MAX_DEFERRED, name_list);
- help();
- exit(1);
+ deferred_add_names[deferred_add_index++] = name_list;
+ if (deferred_add_index >= MAX_DEFERRED) {
+ fprintf(stderr, "More than max %d un-recognized --add options '%s'\n",
+ MAX_DEFERRED, name_list);
+ help();
+ exit(1);
+ }
+ } else {
+ deferred_skip_names[deferred_skip_index++] = name_list;
+ if (debug)
+ fprintf(stderr, "deferred \"%s\"\n", name_list);
+ if (deferred_skip_index >= MAX_DEFERRED) {
+ fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
+ MAX_DEFERRED, name_list);
+ help();
+ exit(1);
+ }
}
}
@@ -872,6 +896,9 @@ void print_header(char *delim)
if (DO_BIC(BIC_CoreTmp))
outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CORE_THROT_CNT))
+ outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
+
if (do_rapl && !rapl_joules) {
if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
@@ -1011,6 +1038,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
outp += sprintf(outp, "c6: %016llX\n", c->c6);
outp += sprintf(outp, "c7: %016llX\n", c->c7);
outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
+ outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
@@ -1225,6 +1253,10 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
if (DO_BIC(BIC_CoreTmp))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
+ /* Core throttle count */
+ if (DO_BIC(BIC_CORE_THROT_CNT))
+ outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt);
+
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 32)
@@ -1311,6 +1343,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
if (DO_BIC(BIC_PkgWatt))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
+
if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
@@ -1386,14 +1419,14 @@ void flush_output_stderr(void)
void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
- static int printed;
+ static int count;
- if (!printed || !summary_only)
+ if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only)
print_header("\t");
format_counters(&average.threads, &average.cores, &average.packages);
- printed = 1;
+ count++;
if (summary_only)
return;
@@ -1467,6 +1500,7 @@ void delta_core(struct core_data *new, struct core_data *old)
old->c6 = new->c6 - old->c6;
old->c7 = new->c7 - old->c7;
old->core_temp_c = new->core_temp_c;
+ old->core_throt_cnt = new->core_throt_cnt;
old->mc6_us = new->mc6_us - old->mc6_us;
DELTA_WRAP32(new->core_energy, old->core_energy);
@@ -1626,6 +1660,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
c->mc6_us = 0;
c->core_temp_c = 0;
c->core_energy = 0;
+ c->core_throt_cnt = 0;
p->pkg_wtd_core_c0 = 0;
p->pkg_any_core_c0 = 0;
@@ -1710,6 +1745,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
average.cores.mc6_us += c->mc6_us;
average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
+ average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
average.cores.core_energy += c->core_energy;
@@ -1987,6 +2023,26 @@ void get_apic_id(struct thread_data *t)
fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
}
+int get_core_throt_cnt(int cpu, unsigned long long *cnt)
+{
+ char path[128 + PATH_BYTES];
+ unsigned long long tmp;
+ FILE *fp;
+ int ret;
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu);
+ fp = fopen(path, "r");
+ if (!fp)
+ return -1;
+ ret = fscanf(fp, "%lld", &tmp);
+ if (ret != 1)
+ return -1;
+ fclose(fp);
+ *cnt = tmp;
+
+ return 0;
+}
+
/*
* get_counters(...)
* migrate to cpu
@@ -2129,6 +2185,9 @@ retry:
c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
}
+ if (DO_BIC(BIC_CORE_THROT_CNT))
+ get_core_throt_cnt(cpu, &c->core_throt_cnt);
+
if (do_rapl & RAPL_AMD_F17H) {
if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
return -14;
@@ -2428,6 +2487,9 @@ int has_turbo_ratio_group_limits(int family, int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_SKYLAKE_X:
@@ -2435,8 +2497,9 @@ int has_turbo_ratio_group_limits(int family, int model)
case INTEL_FAM6_ATOM_GOLDMONT_D:
case INTEL_FAM6_ATOM_TREMONT_D:
return 1;
+ default:
+ return 0;
}
- return 0;
}
static void dump_turbo_ratio_limits(int family, int model)
@@ -3027,6 +3090,8 @@ void set_max_cpu_num(void)
*/
int count_cpus(int cpu)
{
+ UNUSED(cpu);
+
topo.num_cpus++;
return 0;
}
@@ -3361,6 +3426,9 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg
int i, ret;
int cpu = t->cpu_id;
+ UNUSED(c);
+ UNUSED(p);
+
for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
unsigned long long msr_cur, msr_last;
off_t offset;
@@ -3387,6 +3455,8 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg
static void msr_record_handler(union sigval v)
{
+ UNUSED(v);
+
for_all_cpus(update_msr_sum, EVEN_COUNTERS);
}
@@ -3439,6 +3509,9 @@ release_msr:
/*
* set_my_sched_priority(pri)
* return previous
+ *
+ * if non-root, do this:
+ * # /sbin/setcap cap_sys_rawio,cap_sys_nice=+ep /usr/bin/turbostat
*/
int set_my_sched_priority(int priority)
{
@@ -3457,7 +3530,7 @@ int set_my_sched_priority(int priority)
errno = 0;
retval = getpriority(PRIO_PROCESS, 0);
if (retval != priority)
- err(-1, "getpriority(%d) != setpriority(%d)", retval, priority);
+ err(retval, "getpriority(%d) != setpriority(%d)", retval, priority);
return original_priority;
}
@@ -3466,7 +3539,7 @@ void turbostat_loop()
{
int retval;
int restarted = 0;
- int done_iters = 0;
+ unsigned int done_iters = 0;
setup_signal_handler();
@@ -3678,6 +3751,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
break;
case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
no_MSR_MISC_PWR_MGMT = 1;
+ /* FALLTHRU */
case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */
pkg_cstate_limits = slv_pkg_cstate_limits;
break;
@@ -3721,6 +3795,9 @@ int has_slv_msrs(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_ATOM_SILVERMONT:
case INTEL_FAM6_ATOM_SILVERMONT_MID:
@@ -3736,6 +3813,9 @@ int is_dnv(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_ATOM_GOLDMONT_D:
return 1;
@@ -3749,6 +3829,9 @@ int is_bdx(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_BROADWELL_X:
return 1;
@@ -3762,6 +3845,9 @@ int is_skx(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_SKYLAKE_X:
return 1;
@@ -3775,6 +3861,9 @@ int is_icx(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_ICELAKE_X:
return 1;
@@ -3787,6 +3876,9 @@ int is_ehl(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_ATOM_TREMONT:
return 1;
@@ -3799,6 +3891,9 @@ int is_jvl(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_ATOM_TREMONT_D:
return 1;
@@ -3811,6 +3906,9 @@ int has_turbo_ratio_limit(unsigned int family, unsigned int model)
if (has_slv_msrs(family, model))
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
/* Nehalem compatible, but do not include turbo-ratio limit support */
case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
@@ -4125,6 +4223,9 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
char *epb_string;
int cpu, epb;
+ UNUSED(c);
+ UNUSED(p);
+
if (!has_epb)
return 0;
@@ -4171,6 +4272,9 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
unsigned long long msr;
int cpu;
+ UNUSED(c);
+ UNUSED(p);
+
if (!has_hwp)
return 0;
@@ -4254,6 +4358,9 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
unsigned long long msr;
int cpu;
+ UNUSED(c);
+ UNUSED(p);
+
cpu = t->cpu_id;
/* per-package */
@@ -4359,6 +4466,8 @@ double get_tdp_intel(unsigned int model)
double get_tdp_amd(unsigned int family)
{
+ UNUSED(family);
+
/* This is the max stock TDP of HEDT/Server Fam17h+ chips */
return 280.0;
}
@@ -4376,6 +4485,7 @@ static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
case INTEL_FAM6_BROADWELL_X: /* BDX */
case INTEL_FAM6_SKYLAKE_X: /* SKX */
case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
+ case INTEL_FAM6_ICELAKE_X: /* ICX */
return (rapl_dram_energy_units = 15.3 / 1000000);
default:
return (rapl_energy_units);
@@ -4559,6 +4669,8 @@ void rapl_probe_amd(unsigned int family, unsigned int model)
unsigned int has_rapl = 0;
double tdp;
+ UNUSED(model);
+
if (max_extended_level >= 0x80000007) {
__cpuid(0x80000007, eax, ebx, ecx, edx);
/* RAPL (Fam 17h+) */
@@ -4617,6 +4729,7 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
case INTEL_FAM6_HASWELL_L: /* HSW */
case INTEL_FAM6_HASWELL_G: /* HSW */
do_gfx_perf_limit_reasons = 1;
+ /* FALLTHRU */
case INTEL_FAM6_HASWELL_X: /* HSX */
do_core_perf_limit_reasons = 1;
do_ring_perf_limit_reasons = 1;
@@ -4643,6 +4756,9 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
unsigned int dts, dts2;
int cpu;
+ UNUSED(c);
+ UNUSED(p);
+
if (!(do_dts || do_ptm))
return 0;
@@ -4698,7 +4814,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
{
- fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
+ fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
cpu, label,
((msr >> 15) & 1) ? "EN" : "DIS",
((msr >> 0) & 0x7FFF) * rapl_power_units,
@@ -4714,6 +4830,9 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
const char *msr_name;
int cpu;
+ UNUSED(c);
+ UNUSED(p);
+
if (!do_rapl)
return 0;
@@ -4762,12 +4881,19 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
cpu, msr, (msr >> 63) & 1 ? "" : "UN");
print_power_limit_msr(cpu, msr, "PKG Limit #1");
- fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
+ fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n",
cpu,
((msr >> 47) & 1) ? "EN" : "DIS",
((msr >> 32) & 0x7FFF) * rapl_power_units,
(1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
((msr >> 48) & 1) ? "EN" : "DIS");
+
+ if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr))
+ return -9;
+
+ fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr);
+ fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n",
+ cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN");
}
if (do_rapl & RAPL_DRAM_POWER_INFO) {
@@ -4830,6 +4956,9 @@ int has_snb_msrs(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_SANDYBRIDGE:
case INTEL_FAM6_SANDYBRIDGE_X:
@@ -4873,6 +5002,9 @@ int has_c8910_msrs(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_HASWELL_L: /* HSW */
case INTEL_FAM6_BROADWELL: /* BDW */
@@ -4899,6 +5031,9 @@ int has_skl_msrs(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_SKYLAKE_L: /* SKL */
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
@@ -4911,6 +5046,10 @@ int is_slm(unsigned int family, unsigned int model)
{
if (!genuine_intel)
return 0;
+
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */
@@ -4923,6 +5062,10 @@ int is_knl(unsigned int family, unsigned int model)
{
if (!genuine_intel)
return 0;
+
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
return 1;
@@ -4935,6 +5078,9 @@ int is_cnl(unsigned int family, unsigned int model)
if (!genuine_intel)
return 0;
+ if (family != 6)
+ return 0;
+
switch (model) {
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
return 1;
@@ -4989,6 +5135,9 @@ int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
unsigned int eax, ebx, ecx, edx;
+ UNUSED(c);
+ UNUSED(p);
+
if (!genuine_intel)
return 0;
@@ -5025,6 +5174,9 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
unsigned int tcc_default, tcc_offset;
int cpu;
+ UNUSED(c);
+ UNUSED(p);
+
/* tj_max is used only for dts or ptm */
if (!(do_dts || do_ptm))
return 0;
@@ -5572,6 +5724,11 @@ void process_cpuid()
else
BIC_NOT_PRESENT(BIC_CPU_LPI);
+ if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
+ BIC_PRESENT(BIC_CORE_THROT_CNT);
+ else
+ BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
+
if (!access(sys_lpi_file_sysfs, R_OK)) {
sys_lpi_file = sys_lpi_file_sysfs;
BIC_PRESENT(BIC_SYS_LPI);
@@ -5601,11 +5758,6 @@ int dir_filter(const struct dirent *dirp)
return 0;
}
-int open_dev_cpu_msr(int dummy1)
-{
- return 0;
-}
-
void topology_probe()
{
int i;
@@ -5896,6 +6048,9 @@ void turbostat_init()
if (!quiet && do_irtl_snb)
print_irtl();
+
+ if (DO_BIC(BIC_IPC))
+ (void)get_instr_count_fd(base_cpu);
}
int fork_it(char **argv)
@@ -5973,7 +6128,7 @@ int get_and_dump_counters(void)
void print_version()
{
- fprintf(outf, "turbostat version 21.05.04" " - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2022.04.16 - Len Brown <lenb@kernel.org>\n");
}
int add_counter(unsigned int msr_num, char *path, char *name,
@@ -6138,6 +6293,16 @@ next:
}
}
+int is_deferred_add(char *name)
+{
+ int i;
+
+ for (i = 0; i < deferred_add_index; ++i)
+ if (!strcmp(name, deferred_add_names[i]))
+ return 1;
+ return 0;
+}
+
int is_deferred_skip(char *name)
{
int i;
@@ -6156,9 +6321,6 @@ void probe_sysfs(void)
int state;
char *sp;
- if (!DO_BIC(BIC_sysfs))
- return;
-
for (state = 10; state >= 0; --state) {
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
@@ -6181,6 +6343,9 @@ void probe_sysfs(void)
sprintf(path, "cpuidle/state%d/time", state);
+ if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
+ continue;
+
if (is_deferred_skip(name_buf))
continue;
@@ -6206,6 +6371,9 @@ void probe_sysfs(void)
sprintf(path, "cpuidle/state%d/usage", state);
+ if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
+ continue;
+
if (is_deferred_skip(name_buf))
continue;
@@ -6313,6 +6481,7 @@ void cmdline(int argc, char **argv)
{ "interval", required_argument, 0, 'i' },
{ "IPC", no_argument, 0, 'I' },
{ "num_iterations", required_argument, 0, 'n' },
+ { "header_iterations", required_argument, 0, 'N' },
{ "help", no_argument, 0, 'h' },
{ "hide", required_argument, 0, 'H' }, // meh, -h taken by --help
{ "Joules", no_argument, 0, 'J' },
@@ -6394,6 +6563,14 @@ void cmdline(int argc, char **argv)
exit(2);
}
break;
+ case 'N':
+ header_iterations = strtod(optarg, NULL);
+
+ if (header_iterations <= 0) {
+ fprintf(outf, "iterations %d should be positive number\n", header_iterations);
+ exit(2);
+ }
+ break;
case 's':
/*
* --show: show only those specified
@@ -6432,6 +6609,8 @@ int main(int argc, char **argv)
turbostat_init();
+ msr_sum_record();
+
/* dump counters and exit */
if (dump_only)
return get_and_dump_counters();
@@ -6443,7 +6622,6 @@ int main(int argc, char **argv)
return 0;
}
- msr_sum_record();
/*
* if any params left, it must be a command to fork
*/