aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/base
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-10 20:34:00 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-10 20:34:00 -0800
commitb35b6d4d71365fbfb6f2cc8edc331b3882ca817e (patch)
tree1b99ef00d7ad53f77002460e77c6048ae2c0c211 /drivers/base
parentMerge tag 'acpi-5.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm (diff)
parentMerge branches 'pm-opp', 'pm-devfreq' and 'powercap' (diff)
downloadlinux-dev-b35b6d4d71365fbfb6f2cc8edc331b3882ca817e.tar.xz
linux-dev-b35b6d4d71365fbfb6f2cc8edc331b3882ca817e.zip
Merge tag 'pm-5.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management updates from Rafael Wysocki: "The most signigicant change here is the addition of a new cpufreq 'P-state' driver for AMD processors as a better replacement for the venerable acpi-cpufreq driver. There are also other cpufreq updates (in the core, intel_pstate, ARM drivers), PM core updates (mostly related to adding new macros for declaring PM operations which should make the lives of driver developers somewhat easier), and a bunch of assorted fixes and cleanups. Summary: - Add new P-state driver for AMD processors (Huang Rui). - Fix initialization of min and max frequency QoS requests in the cpufreq core (Rafael Wysocki). - Fix EPP handling on Alder Lake in intel_pstate (Srinivas Pandruvada). - Make intel_pstate update cpuinfo.max_freq when notified of HWP capabilities changes and drop a redundant function call from that driver (Rafael Wysocki). - Improve IRQ support in the Qcom cpufreq driver (Ard Biesheuvel, Stephen Boyd, Vladimir Zapolskiy). - Fix double devm_remap() in the Mediatek cpufreq driver (Hector Yuan). - Introduce thermal pressure helpers for cpufreq CPU cooling (Lukasz Luba). - Make cpufreq use default_groups in kobj_type (Greg Kroah-Hartman). - Make cpuidle use default_groups in kobj_type (Greg Kroah-Hartman). - Fix two comments in cpuidle code (Jason Wang, Yang Li). - Allow model-specific normal EPB value to be used in the intel_epb sysfs attribute handling code (Srinivas Pandruvada). - Simplify locking in pm_runtime_put_suppliers() (Rafael Wysocki). - Add safety net to supplier device release in the runtime PM core code (Rafael Wysocki). - Capture device status before disabling runtime PM for it (Rafael Wysocki). - Add new macros for declaring PM operations to allow drivers to avoid guarding them with CONFIG_PM #ifdefs or __maybe_unused and update some drivers to use these macros (Paul Cercueil). - Allow ACPI hardware signature to be honoured during restore from hibernation (David Woodhouse). - Update outdated operating performance points (OPP) documentation (Tang Yizhou). - Reduce log severity for informative message regarding frequency transition failures in devfreq (Tzung-Bi Shih). - Add DRAM frequency controller devfreq driver for Allwinner sunXi SoCs (Samuel Holland). - Add missing COMMON_CLK dependency to sun8i devfreq driver (Arnd Bergmann). - Add support for new layout of Psys PowerLimit Register on SPR to the Intel RAPL power capping driver (Zhang Rui). - Fix typo in a comment in idle_inject.c (Jason Wang). - Remove unused function definition from the DTPM (Dynamit Thermal Power Management) power capping framework (Daniel Lezcano). - Reduce DTPM trace verbosity (Daniel Lezcano)" * tag 'pm-5.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (53 commits) x86, sched: Fix undefined reference to init_freq_invariance_cppc() build error cpufreq: amd-pstate: Fix Kconfig dependencies for AMD P-State cpufreq: amd-pstate: Fix struct amd_cpudata kernel-doc comment cpuidle: use default_groups in kobj_type x86: intel_epb: Allow model specific normal EPB value MAINTAINERS: Add AMD P-State driver maintainer entry Documentation: amd-pstate: Add AMD P-State driver introduction cpufreq: amd-pstate: Add AMD P-State performance attributes cpufreq: amd-pstate: Add AMD P-State frequencies attributes cpufreq: amd-pstate: Add boost mode support for AMD P-State cpufreq: amd-pstate: Add trace for AMD P-State module cpufreq: amd-pstate: Introduce the support for the processors with shared memory solution cpufreq: amd-pstate: Add fast switch function for AMD P-State cpufreq: amd-pstate: Introduce a new AMD P-State driver to support future processors ACPI: CPPC: Add CPPC enable register function ACPI: CPPC: Check present CPUs for determining _CPC is valid ACPI: CPPC: Implement support for SystemIO registers x86/msr: Add AMD CPPC MSR definitions x86/cpufeatures: Add AMD Collaborative Processor Performance Control feature flag cpufreq: use default_groups in kobj_type ...
Diffstat (limited to 'drivers/base')
-rw-r--r--drivers/base/arch_topology.c42
-rw-r--r--drivers/base/core.c3
-rw-r--r--drivers/base/power/runtime.c98
3 files changed, 102 insertions, 41 deletions
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index ff16a36a908b..976154140f0b 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -22,6 +22,7 @@
static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
static struct cpumask scale_freq_counters_mask;
static bool scale_freq_invariant;
+static DEFINE_PER_CPU(u32, freq_factor) = 1;
static bool supports_scale_freq_counters(const struct cpumask *cpus)
{
@@ -155,15 +156,49 @@ void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
DEFINE_PER_CPU(unsigned long, thermal_pressure);
-void topology_set_thermal_pressure(const struct cpumask *cpus,
- unsigned long th_pressure)
+/**
+ * topology_update_thermal_pressure() - Update thermal pressure for CPUs
+ * @cpus : The related CPUs for which capacity has been reduced
+ * @capped_freq : The maximum allowed frequency that CPUs can run at
+ *
+ * Update the value of thermal pressure for all @cpus in the mask. The
+ * cpumask should include all (online+offline) affected CPUs, to avoid
+ * operating on stale data when hot-plug is used for some CPUs. The
+ * @capped_freq reflects the currently allowed max CPUs frequency due to
+ * thermal capping. It might be also a boost frequency value, which is bigger
+ * than the internal 'freq_factor' max frequency. In such case the pressure
+ * value should simply be removed, since this is an indication that there is
+ * no thermal throttling. The @capped_freq must be provided in kHz.
+ */
+void topology_update_thermal_pressure(const struct cpumask *cpus,
+ unsigned long capped_freq)
{
+ unsigned long max_capacity, capacity, th_pressure;
+ u32 max_freq;
int cpu;
+ cpu = cpumask_first(cpus);
+ max_capacity = arch_scale_cpu_capacity(cpu);
+ max_freq = per_cpu(freq_factor, cpu);
+
+ /* Convert to MHz scale which is used in 'freq_factor' */
+ capped_freq /= 1000;
+
+ /*
+ * Handle properly the boost frequencies, which should simply clean
+ * the thermal pressure value.
+ */
+ if (max_freq <= capped_freq)
+ capacity = max_capacity;
+ else
+ capacity = mult_frac(max_capacity, capped_freq, max_freq);
+
+ th_pressure = max_capacity - capacity;
+
for_each_cpu(cpu, cpus)
WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
}
-EXPORT_SYMBOL_GPL(topology_set_thermal_pressure);
+EXPORT_SYMBOL_GPL(topology_update_thermal_pressure);
static ssize_t cpu_capacity_show(struct device *dev,
struct device_attribute *attr,
@@ -217,7 +252,6 @@ static void update_topology_flags_workfn(struct work_struct *work)
update_topology = 0;
}
-static DEFINE_PER_CPU(u32, freq_factor) = 1;
static u32 *raw_capacity;
static int free_raw_capacity(void)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index fd034d742447..b191bd17de89 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -485,8 +485,7 @@ static void device_link_release_fn(struct work_struct *work)
/* Ensure that all references to the link object have been dropped. */
device_link_synchronize_removal();
- while (refcount_dec_not_one(&link->rpm_active))
- pm_runtime_put(link->supplier);
+ pm_runtime_release_supplier(link, true);
put_device(link->consumer);
put_device(link->supplier);
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index d504cd4ab3cb..2f3cce17219b 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -305,19 +305,40 @@ static int rpm_get_suppliers(struct device *dev)
return 0;
}
+/**
+ * pm_runtime_release_supplier - Drop references to device link's supplier.
+ * @link: Target device link.
+ * @check_idle: Whether or not to check if the supplier device is idle.
+ *
+ * Drop all runtime PM references associated with @link to its supplier device
+ * and if @check_idle is set, check if that device is idle (and so it can be
+ * suspended).
+ */
+void pm_runtime_release_supplier(struct device_link *link, bool check_idle)
+{
+ struct device *supplier = link->supplier;
+
+ /*
+ * The additional power.usage_count check is a safety net in case
+ * the rpm_active refcount becomes saturated, in which case
+ * refcount_dec_not_one() would return true forever, but it is not
+ * strictly necessary.
+ */
+ while (refcount_dec_not_one(&link->rpm_active) &&
+ atomic_read(&supplier->power.usage_count) > 0)
+ pm_runtime_put_noidle(supplier);
+
+ if (check_idle)
+ pm_request_idle(supplier);
+}
+
static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend)
{
struct device_link *link;
list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
- device_links_read_lock_held()) {
-
- while (refcount_dec_not_one(&link->rpm_active))
- pm_runtime_put_noidle(link->supplier);
-
- if (try_to_suspend)
- pm_request_idle(link->supplier);
- }
+ device_links_read_lock_held())
+ pm_runtime_release_supplier(link, try_to_suspend);
}
static void rpm_put_suppliers(struct device *dev)
@@ -742,13 +763,15 @@ static int rpm_resume(struct device *dev, int rpmflags)
trace_rpm_resume_rcuidle(dev, rpmflags);
repeat:
- if (dev->power.runtime_error)
+ if (dev->power.runtime_error) {
retval = -EINVAL;
- else if (dev->power.disable_depth == 1 && dev->power.is_suspended
- && dev->power.runtime_status == RPM_ACTIVE)
- retval = 1;
- else if (dev->power.disable_depth > 0)
- retval = -EACCES;
+ } else if (dev->power.disable_depth > 0) {
+ if (dev->power.runtime_status == RPM_ACTIVE &&
+ dev->power.last_status == RPM_ACTIVE)
+ retval = 1;
+ else
+ retval = -EACCES;
+ }
if (retval)
goto out;
@@ -1410,8 +1433,10 @@ void __pm_runtime_disable(struct device *dev, bool check_resume)
/* Update time accounting before disabling PM-runtime. */
update_pm_runtime_accounting(dev);
- if (!dev->power.disable_depth++)
+ if (!dev->power.disable_depth++) {
__pm_runtime_barrier(dev);
+ dev->power.last_status = dev->power.runtime_status;
+ }
out:
spin_unlock_irq(&dev->power.lock);
@@ -1428,23 +1453,23 @@ void pm_runtime_enable(struct device *dev)
spin_lock_irqsave(&dev->power.lock, flags);
- if (dev->power.disable_depth > 0) {
- dev->power.disable_depth--;
-
- /* About to enable runtime pm, set accounting_timestamp to now */
- if (!dev->power.disable_depth)
- dev->power.accounting_timestamp = ktime_get_mono_fast_ns();
- } else {
+ if (!dev->power.disable_depth) {
dev_warn(dev, "Unbalanced %s!\n", __func__);
+ goto out;
}
- WARN(!dev->power.disable_depth &&
- dev->power.runtime_status == RPM_SUSPENDED &&
- !dev->power.ignore_children &&
- atomic_read(&dev->power.child_count) > 0,
- "Enabling runtime PM for inactive device (%s) with active children\n",
- dev_name(dev));
+ if (--dev->power.disable_depth > 0)
+ goto out;
+
+ dev->power.last_status = RPM_INVALID;
+ dev->power.accounting_timestamp = ktime_get_mono_fast_ns();
+
+ if (dev->power.runtime_status == RPM_SUSPENDED &&
+ !dev->power.ignore_children &&
+ atomic_read(&dev->power.child_count) > 0)
+ dev_warn(dev, "Enabling runtime PM for inactive device with active children\n");
+out:
spin_unlock_irqrestore(&dev->power.lock, flags);
}
EXPORT_SYMBOL_GPL(pm_runtime_enable);
@@ -1640,6 +1665,7 @@ EXPORT_SYMBOL_GPL(__pm_runtime_use_autosuspend);
void pm_runtime_init(struct device *dev)
{
dev->power.runtime_status = RPM_SUSPENDED;
+ dev->power.last_status = RPM_INVALID;
dev->power.idle_notification = false;
dev->power.disable_depth = 1;
@@ -1722,8 +1748,6 @@ void pm_runtime_get_suppliers(struct device *dev)
void pm_runtime_put_suppliers(struct device *dev)
{
struct device_link *link;
- unsigned long flags;
- bool put;
int idx;
idx = device_links_read_lock();
@@ -1731,11 +1755,17 @@ void pm_runtime_put_suppliers(struct device *dev)
list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
device_links_read_lock_held())
if (link->supplier_preactivated) {
+ bool put;
+
link->supplier_preactivated = false;
- spin_lock_irqsave(&dev->power.lock, flags);
+
+ spin_lock_irq(&dev->power.lock);
+
put = pm_runtime_status_suspended(dev) &&
refcount_dec_not_one(&link->rpm_active);
- spin_unlock_irqrestore(&dev->power.lock, flags);
+
+ spin_unlock_irq(&dev->power.lock);
+
if (put)
pm_runtime_put(link->supplier);
}
@@ -1772,9 +1802,7 @@ void pm_runtime_drop_link(struct device_link *link)
return;
pm_runtime_drop_link_count(link->consumer);
-
- while (refcount_dec_not_one(&link->rpm_active))
- pm_runtime_put(link->supplier);
+ pm_runtime_release_supplier(link, true);
}
static bool pm_runtime_need_not_resume(struct device *dev)