diff options
Diffstat (limited to 'kernel/power')
-rw-r--r-- | kernel/power/Kconfig | 81 | ||||
-rw-r--r-- | kernel/power/Makefile | 9 | ||||
-rw-r--r-- | kernel/power/autosleep.c | 2 | ||||
-rw-r--r-- | kernel/power/energy_model.c | 413 | ||||
-rw-r--r-- | kernel/power/hibernate.c | 237 | ||||
-rw-r--r-- | kernel/power/main.c | 70 | ||||
-rw-r--r-- | kernel/power/power.h | 26 | ||||
-rw-r--r-- | kernel/power/poweroff.c | 2 | ||||
-rw-r--r-- | kernel/power/process.c | 23 | ||||
-rw-r--r-- | kernel/power/qos.c | 567 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 124 | ||||
-rw-r--r-- | kernel/power/suspend.c | 54 | ||||
-rw-r--r-- | kernel/power/suspend_test.c | 10 | ||||
-rw-r--r-- | kernel/power/swap.c | 120 | ||||
-rw-r--r-- | kernel/power/user.c | 188 | ||||
-rw-r--r-- | kernel/power/wakelock.c | 11 |
16 files changed, 1030 insertions, 907 deletions
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 7cbfbeacd68a..60a1d3051cc7 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -3,7 +3,7 @@ config SUSPEND bool "Suspend to RAM and standby" depends on ARCH_SUSPEND_POSSIBLE default y - ---help--- + help Allow the system to enter sleep states in which main memory is powered and thus its contents are preserved, such as the suspend-to-RAM state (e.g. the ACPI S3 state). @@ -42,7 +42,7 @@ config HIBERNATION select LZO_COMPRESS select LZO_DECOMPRESS select CRC32 - ---help--- + help Enable the suspend to disk (STD) functionality, which is usually called "hibernation" in user interfaces. STD checkpoints the system and powers it off; and restores that checkpoint on reboot. @@ -80,29 +80,38 @@ config HIBERNATION For more information take a look at <file:Documentation/power/swsusp.rst>. -config ARCH_SAVE_PAGE_KEYS - bool +config HIBERNATION_SNAPSHOT_DEV + bool "Userspace snapshot device" + depends on HIBERNATION + default y + help + Device used by the uswsusp tools. + + Say N if no snapshotting from userspace is needed, this also + reduces the attack surface of the kernel. + + If in doubt, say Y. config PM_STD_PARTITION string "Default resume partition" depends on HIBERNATION default "" - ---help--- + help The default resume partition is the partition that the suspend- - to-disk implementation will look for a suspended disk image. + to-disk implementation will look for a suspended disk image. - The partition specified here will be different for almost every user. + The partition specified here will be different for almost every user. It should be a valid swap partition (at least for now) that is turned - on before suspending. + on before suspending. The partition specified can be overridden by specifying: - resume=/dev/<other device> + resume=/dev/<other device> - which will set the resume partition to the device specified. + which will set the resume partition to the device specified. Note there is currently not a way to specify which device to save the - suspended image to. It will simply pick the first available swap + suspended image to. It will simply pick the first available swap device. config PM_SLEEP @@ -122,7 +131,7 @@ config PM_SLEEP_SMP_NONZERO_CPU def_bool y depends on PM_SLEEP_SMP depends on ARCH_SUSPEND_NONZERO_CPU - ---help--- + help If an arch can suspend (for suspend, hibernate, kexec, etc) on a non-zero numbered CPU, it may define ARCH_SUSPEND_NONZERO_CPU. This will allow nohz_full mask to include CPU0. @@ -130,16 +139,34 @@ config PM_SLEEP_SMP_NONZERO_CPU config PM_AUTOSLEEP bool "Opportunistic sleep" depends on PM_SLEEP - default n - ---help--- + help Allow the kernel to trigger a system transition into a global sleep state automatically whenever there are no active wakeup sources. +config PM_USERSPACE_AUTOSLEEP + bool "Userspace opportunistic sleep" + depends on PM_SLEEP + help + Notify kernel of aggressive userspace autosleep power management policy. + + This option changes the behavior of various sleep-sensitive code to deal + with frequent userspace-initiated transitions into a global sleep state. + + Saying Y here, disables code paths that most users really should keep + enabled. In particular, only enable this if it is very common to be + asleep/awake for very short periods of time (<= 2 seconds). + + Only platforms, such as Android, that implement opportunistic sleep from + a userspace power manager service should enable this option; and not + other machines. Therefore, you should say N here, unless you are + extremely certain that this is what you want. The option otherwise has + bad, undesirable effects, and should not be enabled just for fun. + + config PM_WAKELOCKS bool "User space wakeup sources interface" depends on PM_SLEEP - default n - ---help--- + help Allow user space to create, activate and deactivate wakeup source objects with the help of a sysfs-based interface. @@ -156,7 +183,7 @@ config PM_WAKELOCKS_GC config PM bool "Device power management core functionality" - ---help--- + help Enable functionality allowing I/O devices to be put into energy-saving (low power) states, for example after a specified period of inactivity (autosuspended), and woken up in response to a hardware-generated @@ -170,7 +197,7 @@ config PM config PM_DEBUG bool "Power Management Debug Support" depends on PM - ---help--- + help This option enables various debugging support in the Power Management code. This is helpful when debugging and reporting PM bugs, like suspend support. @@ -178,7 +205,7 @@ config PM_DEBUG config PM_ADVANCED_DEBUG bool "Extra PM attributes in sysfs for low-level debugging/testing" depends on PM_DEBUG - ---help--- + help Add extra sysfs attributes allowing one to access some Power Management fields of device objects from user space. If you are not a kernel developer interested in debugging/testing Power Management, say "no". @@ -186,7 +213,7 @@ config PM_ADVANCED_DEBUG config PM_TEST_SUSPEND bool "Test suspend/resume and wakealarm during bootup" depends on SUSPEND && PM_DEBUG && RTC_CLASS=y - ---help--- + help This option will let you suspend your machine during bootup, and make it wake up a few seconds later using an RTC wakeup alarm. Enable this with a kernel parameter like "test_suspend=mem". @@ -201,7 +228,7 @@ config PM_SLEEP_DEBUG config DPM_WATCHDOG bool "Device suspend/resume watchdog" depends on PM_DEBUG && PSTORE && EXPERT - ---help--- + help Sets up a watchdog timer to capture drivers that are locked up attempting to suspend/resume a device. A detected lockup causes system panic with message @@ -234,7 +261,7 @@ config PM_TRACE_RTC depends on PM_SLEEP_DEBUG depends on X86 select PM_TRACE - ---help--- + help This enables some cheesy code to save the last PM event point in the RTC across reboots, so that you can debug a machine that just hangs during suspend (or more commonly, during resume). @@ -284,7 +311,6 @@ config PM_GENERIC_DOMAINS config WQ_POWER_EFFICIENT_DEFAULT bool "Enable workqueue power-efficient mode by default" depends on PM - default n help Per-cpu workqueues are generally preferred because they show better performance thanks to cache locality; unfortunately, @@ -313,15 +339,14 @@ config CPU_PM bool config ENERGY_MODEL - bool "Energy Model for CPUs" + bool "Energy Model for devices with DVFS (CPUs, GPUs, etc)" depends on SMP depends on CPU_FREQ - default n help Several subsystems (thermal and/or the task scheduler for example) - can leverage information about the energy consumed by CPUs to make - smarter decisions. This config option enables the framework from - which subsystems can access the energy models. + can leverage information about the energy consumed by devices to + make smarter decisions. This config option enables the framework + from which subsystems can access the energy models. The exact usage of the energy model is subsystem-dependent. diff --git a/kernel/power/Makefile b/kernel/power/Makefile index e7e47d9be1e5..874ad834dc8d 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -1,6 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG +ifeq ($(CONFIG_DYNAMIC_DEBUG), y) +CFLAGS_swap.o := -DDEBUG +CFLAGS_snapshot.o := -DDEBUG +CFLAGS_energy_model.o := -DDEBUG +endif KASAN_SANITIZE_snapshot.o := n @@ -10,7 +14,8 @@ obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o -obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o +obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o +obj-$(CONFIG_HIBERNATION_SNAPSHOT_DEV) += user.o obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index 9af5a50d3489..b29c8aca7486 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c @@ -54,7 +54,7 @@ static void try_to_suspend(struct work_struct *work) goto out; /* - * If the wakeup occured for an unknown reason, wait to prevent the + * If the wakeup occurred for an unknown reason, wait to prevent the * system from trying to suspend and waking up in a tight loop. */ if (final_count == initial_count) diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 0a9326f5f421..f82111837b8d 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -1,44 +1,49 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Energy Model of CPUs + * Energy Model of devices * - * Copyright (c) 2018, Arm ltd. + * Copyright (c) 2018-2021, Arm ltd. * Written by: Quentin Perret, Arm ltd. + * Improvements provided by: Lukasz Luba, Arm ltd. */ #define pr_fmt(fmt) "energy_model: " fmt #include <linux/cpu.h> +#include <linux/cpufreq.h> #include <linux/cpumask.h> #include <linux/debugfs.h> #include <linux/energy_model.h> #include <linux/sched/topology.h> #include <linux/slab.h> -/* Mapping of each CPU to the performance domain to which it belongs. */ -static DEFINE_PER_CPU(struct em_perf_domain *, em_data); - /* * Mutex serializing the registrations of performance domains and letting * callbacks defined by drivers sleep. */ static DEFINE_MUTEX(em_pd_mutex); +static bool _is_cpu_device(struct device *dev) +{ + return (dev->bus == &cpu_subsys); +} + #ifdef CONFIG_DEBUG_FS static struct dentry *rootdir; -static void em_debug_create_cs(struct em_cap_state *cs, struct dentry *pd) +static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd) { struct dentry *d; char name[24]; - snprintf(name, sizeof(name), "cs:%lu", cs->frequency); + snprintf(name, sizeof(name), "ps:%lu", ps->frequency); - /* Create per-cs directory */ + /* Create per-ps directory */ d = debugfs_create_dir(name, pd); - debugfs_create_ulong("frequency", 0444, d, &cs->frequency); - debugfs_create_ulong("power", 0444, d, &cs->power); - debugfs_create_ulong("cost", 0444, d, &cs->cost); + debugfs_create_ulong("frequency", 0444, d, &ps->frequency); + debugfs_create_ulong("power", 0444, d, &ps->power); + debugfs_create_ulong("cost", 0444, d, &ps->cost); + debugfs_create_ulong("inefficient", 0444, d, &ps->flags); } static int em_debug_cpus_show(struct seq_file *s, void *unused) @@ -49,22 +54,43 @@ static int em_debug_cpus_show(struct seq_file *s, void *unused) } DEFINE_SHOW_ATTRIBUTE(em_debug_cpus); -static void em_debug_create_pd(struct em_perf_domain *pd, int cpu) +static int em_debug_flags_show(struct seq_file *s, void *unused) +{ + struct em_perf_domain *pd = s->private; + + seq_printf(s, "%#lx\n", pd->flags); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(em_debug_flags); + +static void em_debug_create_pd(struct device *dev) { struct dentry *d; - char name[8]; int i; - snprintf(name, sizeof(name), "pd%d", cpu); - /* Create the directory of the performance domain */ - d = debugfs_create_dir(name, rootdir); + d = debugfs_create_dir(dev_name(dev), rootdir); - debugfs_create_file("cpus", 0444, d, pd->cpus, &em_debug_cpus_fops); + if (_is_cpu_device(dev)) + debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus, + &em_debug_cpus_fops); + + debugfs_create_file("flags", 0444, d, dev->em_pd, + &em_debug_flags_fops); + + /* Create a sub-directory for each performance state */ + for (i = 0; i < dev->em_pd->nr_perf_states; i++) + em_debug_create_ps(&dev->em_pd->table[i], d); - /* Create a sub-directory for each capacity state */ - for (i = 0; i < pd->nr_cap_states; i++) - em_debug_create_cs(&pd->table[i], d); +} + +static void em_debug_remove_pd(struct device *dev) +{ + struct dentry *debug_dir; + + debug_dir = debugfs_lookup(dev_name(dev), rootdir); + debugfs_remove_recursive(debug_dir); } static int __init em_debug_init(void) @@ -74,136 +100,252 @@ static int __init em_debug_init(void) return 0; } -core_initcall(em_debug_init); +fs_initcall(em_debug_init); #else /* CONFIG_DEBUG_FS */ -static void em_debug_create_pd(struct em_perf_domain *pd, int cpu) {} +static void em_debug_create_pd(struct device *dev) {} +static void em_debug_remove_pd(struct device *dev) {} #endif -static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states, - struct em_data_callback *cb) + +static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, + int nr_states, struct em_data_callback *cb, + unsigned long flags) { - unsigned long opp_eff, prev_opp_eff = ULONG_MAX; - unsigned long power, freq, prev_freq = 0; - int i, ret, cpu = cpumask_first(span); - struct em_cap_state *table; - struct em_perf_domain *pd; + unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX; + struct em_perf_state *table; + int i, ret; u64 fmax; - if (!cb->active_power) - return NULL; - - pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL); - if (!pd) - return NULL; - table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL); if (!table) - goto free_pd; + return -ENOMEM; - /* Build the list of capacity states for this performance domain */ + /* Build the list of performance states for this performance domain */ for (i = 0, freq = 0; i < nr_states; i++, freq++) { /* * active_power() is a driver callback which ceils 'freq' to - * lowest capacity state of 'cpu' above 'freq' and updates + * lowest performance state of 'dev' above 'freq' and updates * 'power' and 'freq' accordingly. */ - ret = cb->active_power(&power, &freq, cpu); + ret = cb->active_power(dev, &power, &freq); if (ret) { - pr_err("pd%d: invalid cap. state: %d\n", cpu, ret); - goto free_cs_table; + dev_err(dev, "EM: invalid perf. state: %d\n", + ret); + goto free_ps_table; } /* * We expect the driver callback to increase the frequency for - * higher capacity states. + * higher performance states. */ if (freq <= prev_freq) { - pr_err("pd%d: non-increasing freq: %lu\n", cpu, freq); - goto free_cs_table; + dev_err(dev, "EM: non-increasing freq: %lu\n", + freq); + goto free_ps_table; } /* * The power returned by active_state() is expected to be - * positive, in milli-watts and to fit into 16 bits. + * positive and be in range. */ - if (!power || power > EM_CPU_MAX_POWER) { - pr_err("pd%d: invalid power: %lu\n", cpu, power); - goto free_cs_table; + if (!power || power > EM_MAX_POWER) { + dev_err(dev, "EM: invalid power: %lu\n", + power); + goto free_ps_table; } table[i].power = power; table[i].frequency = prev_freq = freq; - - /* - * The hertz/watts efficiency ratio should decrease as the - * frequency grows on sane platforms. But this isn't always - * true in practice so warn the user if a higher OPP is more - * power efficient than a lower one. - */ - opp_eff = freq / power; - if (opp_eff >= prev_opp_eff) - pr_warn("pd%d: hertz/watts ratio non-monotonically decreasing: em_cap_state %d >= em_cap_state%d\n", - cpu, i, i - 1); - prev_opp_eff = opp_eff; } - /* Compute the cost of each capacity_state. */ + /* Compute the cost of each performance state. */ fmax = (u64) table[nr_states - 1].frequency; - for (i = 0; i < nr_states; i++) { - table[i].cost = div64_u64(fmax * table[i].power, - table[i].frequency); + for (i = nr_states - 1; i >= 0; i--) { + unsigned long power_res, cost; + + if (flags & EM_PERF_DOMAIN_ARTIFICIAL) { + ret = cb->get_cost(dev, table[i].frequency, &cost); + if (ret || !cost || cost > EM_MAX_POWER) { + dev_err(dev, "EM: invalid cost %lu %d\n", + cost, ret); + goto free_ps_table; + } + } else { + power_res = table[i].power; + cost = div64_u64(fmax * power_res, table[i].frequency); + } + + table[i].cost = cost; + + if (table[i].cost >= prev_cost) { + table[i].flags = EM_PERF_STATE_INEFFICIENT; + dev_dbg(dev, "EM: OPP:%lu is inefficient\n", + table[i].frequency); + } else { + prev_cost = table[i].cost; + } } pd->table = table; - pd->nr_cap_states = nr_states; - cpumask_copy(to_cpumask(pd->cpus), span); - - em_debug_create_pd(pd, cpu); + pd->nr_perf_states = nr_states; - return pd; + return 0; -free_cs_table: +free_ps_table: kfree(table); -free_pd: - kfree(pd); + return -EINVAL; +} + +static int em_create_pd(struct device *dev, int nr_states, + struct em_data_callback *cb, cpumask_t *cpus, + unsigned long flags) +{ + struct em_perf_domain *pd; + struct device *cpu_dev; + int cpu, ret, num_cpus; - return NULL; + if (_is_cpu_device(dev)) { + num_cpus = cpumask_weight(cpus); + + /* Prevent max possible energy calculation to not overflow */ + if (num_cpus > EM_MAX_NUM_CPUS) { + dev_err(dev, "EM: too many CPUs, overflow possible\n"); + return -EINVAL; + } + + pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL); + if (!pd) + return -ENOMEM; + + cpumask_copy(em_span_cpus(pd), cpus); + } else { + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return -ENOMEM; + } + + ret = em_create_perf_table(dev, pd, nr_states, cb, flags); + if (ret) { + kfree(pd); + return ret; + } + + if (_is_cpu_device(dev)) + for_each_cpu(cpu, cpus) { + cpu_dev = get_cpu_device(cpu); + cpu_dev->em_pd = pd; + } + + dev->em_pd = pd; + + return 0; +} + +static void em_cpufreq_update_efficiencies(struct device *dev) +{ + struct em_perf_domain *pd = dev->em_pd; + struct em_perf_state *table; + struct cpufreq_policy *policy; + int found = 0; + int i; + + if (!_is_cpu_device(dev) || !pd) + return; + + policy = cpufreq_cpu_get(cpumask_first(em_span_cpus(pd))); + if (!policy) { + dev_warn(dev, "EM: Access to CPUFreq policy failed"); + return; + } + + table = pd->table; + + for (i = 0; i < pd->nr_perf_states; i++) { + if (!(table[i].flags & EM_PERF_STATE_INEFFICIENT)) + continue; + + if (!cpufreq_table_set_inefficient(policy, table[i].frequency)) + found++; + } + + cpufreq_cpu_put(policy); + + if (!found) + return; + + /* + * Efficiencies have been installed in CPUFreq, inefficient frequencies + * will be skipped. The EM can do the same. + */ + pd->flags |= EM_PERF_DOMAIN_SKIP_INEFFICIENCIES; } /** + * em_pd_get() - Return the performance domain for a device + * @dev : Device to find the performance domain for + * + * Returns the performance domain to which @dev belongs, or NULL if it doesn't + * exist. + */ +struct em_perf_domain *em_pd_get(struct device *dev) +{ + if (IS_ERR_OR_NULL(dev)) + return NULL; + + return dev->em_pd; +} +EXPORT_SYMBOL_GPL(em_pd_get); + +/** * em_cpu_get() - Return the performance domain for a CPU * @cpu : CPU to find the performance domain for * - * Return: the performance domain to which 'cpu' belongs, or NULL if it doesn't + * Returns the performance domain to which @cpu belongs, or NULL if it doesn't * exist. */ struct em_perf_domain *em_cpu_get(int cpu) { - return READ_ONCE(per_cpu(em_data, cpu)); + struct device *cpu_dev; + + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + return NULL; + + return em_pd_get(cpu_dev); } EXPORT_SYMBOL_GPL(em_cpu_get); /** - * em_register_perf_domain() - Register the Energy Model of a performance domain - * @span : Mask of CPUs in the performance domain - * @nr_states : Number of capacity states to register + * em_dev_register_perf_domain() - Register the Energy Model (EM) for a device + * @dev : Device for which the EM is to register + * @nr_states : Number of performance states to register * @cb : Callback functions providing the data of the Energy Model + * @cpus : Pointer to cpumask_t, which in case of a CPU device is + * obligatory. It can be taken from i.e. 'policy->cpus'. For other + * type of devices this should be set to NULL. + * @microwatts : Flag indicating that the power values are in micro-Watts or + * in some other scale. It must be set properly. * * Create Energy Model tables for a performance domain using the callbacks * defined in cb. * + * The @microwatts is important to set with correct value. Some kernel + * sub-systems might rely on this flag and check if all devices in the EM are + * using the same scale. + * * If multiple clients register the same performance domain, all but the first * registration will be ignored. * * Return 0 on success */ -int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, - struct em_data_callback *cb) +int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, + struct em_data_callback *cb, cpumask_t *cpus, + bool microwatts) { unsigned long cap, prev_cap = 0; - struct em_perf_domain *pd; - int cpu, ret = 0; + unsigned long flags = 0; + int cpu, ret; - if (!span || !nr_states || !cb) + if (!dev || !nr_states || !cb) return -EINVAL; /* @@ -212,47 +354,88 @@ int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, */ mutex_lock(&em_pd_mutex); - for_each_cpu(cpu, span) { - /* Make sure we don't register again an existing domain. */ - if (READ_ONCE(per_cpu(em_data, cpu))) { - ret = -EEXIST; - goto unlock; - } + if (dev->em_pd) { + ret = -EEXIST; + goto unlock; + } - /* - * All CPUs of a domain must have the same micro-architecture - * since they all share the same table. - */ - cap = arch_scale_cpu_capacity(cpu); - if (prev_cap && prev_cap != cap) { - pr_err("CPUs of %*pbl must have the same capacity\n", - cpumask_pr_args(span)); + if (_is_cpu_device(dev)) { + if (!cpus) { + dev_err(dev, "EM: invalid CPU mask\n"); ret = -EINVAL; goto unlock; } - prev_cap = cap; + + for_each_cpu(cpu, cpus) { + if (em_cpu_get(cpu)) { + dev_err(dev, "EM: exists for CPU%d\n", cpu); + ret = -EEXIST; + goto unlock; + } + /* + * All CPUs of a domain must have the same + * micro-architecture since they all share the same + * table. + */ + cap = arch_scale_cpu_capacity(cpu); + if (prev_cap && prev_cap != cap) { + dev_err(dev, "EM: CPUs of %*pbl must have the same capacity\n", + cpumask_pr_args(cpus)); + + ret = -EINVAL; + goto unlock; + } + prev_cap = cap; + } } - /* Create the performance domain and add it to the Energy Model. */ - pd = em_create_pd(span, nr_states, cb); - if (!pd) { - ret = -EINVAL; + if (microwatts) + flags |= EM_PERF_DOMAIN_MICROWATTS; + else if (cb->get_cost) + flags |= EM_PERF_DOMAIN_ARTIFICIAL; + + ret = em_create_pd(dev, nr_states, cb, cpus, flags); + if (ret) goto unlock; - } - for_each_cpu(cpu, span) { - /* - * The per-cpu array can be read concurrently from em_cpu_get(). - * The barrier enforces the ordering needed to make sure readers - * can only access well formed em_perf_domain structs. - */ - smp_store_release(per_cpu_ptr(&em_data, cpu), pd); - } + dev->em_pd->flags |= flags; + + em_cpufreq_update_efficiencies(dev); + + em_debug_create_pd(dev); + dev_info(dev, "EM: created perf domain\n"); - pr_debug("Created perf domain %*pbl\n", cpumask_pr_args(span)); unlock: mutex_unlock(&em_pd_mutex); - return ret; } -EXPORT_SYMBOL_GPL(em_register_perf_domain); +EXPORT_SYMBOL_GPL(em_dev_register_perf_domain); + +/** + * em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device + * @dev : Device for which the EM is registered + * + * Unregister the EM for the specified @dev (but not a CPU device). + */ +void em_dev_unregister_perf_domain(struct device *dev) +{ + if (IS_ERR_OR_NULL(dev) || !dev->em_pd) + return; + + if (_is_cpu_device(dev)) + return; + + /* + * The mutex separates all register/unregister requests and protects + * from potential clean-up/setup issues in the debugfs directories. + * The debugfs directory name is the same as device's name. + */ + mutex_lock(&em_pd_mutex); + em_debug_remove_pd(dev); + + kfree(dev->em_pd->table); + kfree(dev->em_pd); + dev->em_pd = NULL; + mutex_unlock(&em_pd_mutex); +} +EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 6dbeedb7354c..793c55a2becb 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -28,9 +28,9 @@ #include <linux/gfp.h> #include <linux/syscore_ops.h> #include <linux/ctype.h> -#include <linux/genhd.h> #include <linux/ktime.h> #include <linux/security.h> +#include <linux/secretmem.h> #include <trace/events/power.h> #include "power.h" @@ -67,9 +67,23 @@ bool freezer_test_done; static const struct platform_hibernation_ops *hibernation_ops; +static atomic_t hibernate_atomic = ATOMIC_INIT(1); + +bool hibernate_acquire(void) +{ + return atomic_add_unless(&hibernate_atomic, -1, 0); +} + +void hibernate_release(void) +{ + atomic_inc(&hibernate_atomic); +} + bool hibernation_available(void) { - return nohibernate == 0 && !security_locked_down(LOCKDOWN_HIBERNATION); + return nohibernate == 0 && + !security_locked_down(LOCKDOWN_HIBERNATION) && + !secretmem_active() && !cxl_mem_active(); } /** @@ -78,20 +92,24 @@ bool hibernation_available(void) */ void hibernation_set_ops(const struct platform_hibernation_ops *ops) { + unsigned int sleep_flags; + if (ops && !(ops->begin && ops->end && ops->pre_snapshot && ops->prepare && ops->finish && ops->enter && ops->pre_restore && ops->restore_cleanup && ops->leave)) { WARN_ON(1); return; } - lock_system_sleep(); + + sleep_flags = lock_system_sleep(); + hibernation_ops = ops; if (ops) hibernation_mode = HIBERNATION_PLATFORM; else if (hibernation_mode == HIBERNATION_PLATFORM) hibernation_mode = HIBERNATION_SHUTDOWN; - unlock_system_sleep(); + unlock_system_sleep(sleep_flags); } EXPORT_SYMBOL_GPL(hibernation_set_ops); @@ -285,7 +303,7 @@ static int create_image(int platform_mode) if (error || hibernation_test(TEST_PLATFORM)) goto Platform_finish; - error = suspend_disable_secondary_cpus(); + error = pm_sleep_disable_secondary_cpus(); if (error || hibernation_test(TEST_CPUS)) goto Enable_cpus; @@ -314,7 +332,7 @@ static int create_image(int platform_mode) if (!in_suspend) { events_check_enabled = false; - clear_free_pages(); + clear_or_poison_free_pages(); } platform_leave(platform_mode); @@ -327,7 +345,7 @@ static int create_image(int platform_mode) local_irq_enable(); Enable_cpus: - suspend_enable_secondary_cpus(); + pm_sleep_enable_secondary_cpus(); /* Allow architectures to do nosmt-specific post-resume dances */ if (!in_suspend) @@ -451,6 +469,8 @@ static int resume_target_kernel(bool platform_mode) if (error) goto Cleanup; + cpuidle_pause(); + error = hibernate_resume_nonboot_cpu_disable(); if (error) goto Enable_cpus; @@ -494,7 +514,7 @@ static int resume_target_kernel(bool platform_mode) local_irq_enable(); Enable_cpus: - suspend_enable_secondary_cpus(); + pm_sleep_enable_secondary_cpus(); Cleanup: platform_restore_cleanup(platform_mode); @@ -572,7 +592,7 @@ int hibernation_platform_enter(void) if (error) goto Platform_finish; - error = suspend_disable_secondary_cpus(); + error = pm_sleep_disable_secondary_cpus(); if (error) goto Enable_cpus; @@ -594,7 +614,7 @@ int hibernation_platform_enter(void) local_irq_enable(); Enable_cpus: - suspend_enable_secondary_cpus(); + pm_sleep_enable_secondary_cpus(); Platform_finish: hibernation_ops->finish(); @@ -625,7 +645,7 @@ static void power_down(void) int error; if (hibernation_mode == HIBERNATION_SUSPEND) { - error = suspend_devices_and_enter(PM_SUSPEND_MEM); + error = suspend_devices_and_enter(mem_sleep_current); if (error) { hibernation_mode = hibernation_ops ? HIBERNATION_PLATFORM : @@ -647,9 +667,9 @@ static void power_down(void) break; case HIBERNATION_PLATFORM: hibernation_platform_enter(); - /* Fall through */ + fallthrough; case HIBERNATION_SHUTDOWN: - if (pm_power_off) + if (kernel_can_power_off()) kernel_power_off(); break; } @@ -672,13 +692,15 @@ static int load_image_and_restore(void) lock_device_hotplug(); error = create_basic_memory_bitmaps(); - if (error) + if (error) { + swsusp_close(FMODE_READ | FMODE_EXCL); goto Unlock; + } error = swsusp_read(&flags); - swsusp_close(FMODE_READ); + swsusp_close(FMODE_READ | FMODE_EXCL); if (!error) - hibernation_restore(flags & SF_PLATFORM_MODE); + error = hibernation_restore(flags & SF_PLATFORM_MODE); pr_err("Failed to load image, recovering.\n"); swsusp_free(); @@ -694,28 +716,27 @@ static int load_image_and_restore(void) */ int hibernate(void) { - int error, nr_calls = 0; bool snapshot_test = false; + unsigned int sleep_flags; + int error; if (!hibernation_available()) { pm_pr_dbg("Hibernation not available.\n"); return -EPERM; } - lock_system_sleep(); + sleep_flags = lock_system_sleep(); /* The snapshot device should not be opened while we're running */ - if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + if (!hibernate_acquire()) { error = -EBUSY; goto Unlock; } pr_info("hibernation entry\n"); pm_prepare_console(); - error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls); - if (error) { - nr_calls--; - goto Exit; - } + error = pm_notifier_call_chain_robust(PM_HIBERNATION_PREPARE, PM_POST_HIBERNATION); + if (error) + goto Restore; ksys_sync_helper(); @@ -773,16 +794,114 @@ int hibernate(void) /* Don't bother checking whether freezer_test_done is true */ freezer_test_done = false; Exit: - __pm_notifier_call_chain(PM_POST_HIBERNATION, nr_calls, NULL); + pm_notifier_call_chain(PM_POST_HIBERNATION); + Restore: pm_restore_console(); - atomic_inc(&snapshot_device_available); + hibernate_release(); Unlock: - unlock_system_sleep(); + unlock_system_sleep(sleep_flags); pr_info("hibernation exit\n"); return error; } +/** + * hibernate_quiet_exec - Execute a function with all devices frozen. + * @func: Function to execute. + * @data: Data pointer to pass to @func. + * + * Return the @func return value or an error code if it cannot be executed. + */ +int hibernate_quiet_exec(int (*func)(void *data), void *data) +{ + unsigned int sleep_flags; + int error; + + sleep_flags = lock_system_sleep(); + + if (!hibernate_acquire()) { + error = -EBUSY; + goto unlock; + } + + pm_prepare_console(); + + error = pm_notifier_call_chain_robust(PM_HIBERNATION_PREPARE, PM_POST_HIBERNATION); + if (error) + goto restore; + + error = freeze_processes(); + if (error) + goto exit; + + lock_device_hotplug(); + + pm_suspend_clear_flags(); + + error = platform_begin(true); + if (error) + goto thaw; + + error = freeze_kernel_threads(); + if (error) + goto thaw; + + error = dpm_prepare(PMSG_FREEZE); + if (error) + goto dpm_complete; + + suspend_console(); + + error = dpm_suspend(PMSG_FREEZE); + if (error) + goto dpm_resume; + + error = dpm_suspend_end(PMSG_FREEZE); + if (error) + goto dpm_resume; + + error = platform_pre_snapshot(true); + if (error) + goto skip; + + error = func(data); + +skip: + platform_finish(true); + + dpm_resume_start(PMSG_THAW); + +dpm_resume: + dpm_resume(PMSG_THAW); + + resume_console(); + +dpm_complete: + dpm_complete(PMSG_THAW); + + thaw_kernel_threads(); + +thaw: + platform_end(true); + + unlock_device_hotplug(); + + thaw_processes(); + +exit: + pm_notifier_call_chain(PM_POST_HIBERNATION); + +restore: + pm_restore_console(); + + hibernate_release(); + +unlock: + unlock_system_sleep(sleep_flags); + + return error; +} +EXPORT_SYMBOL_GPL(hibernate_quiet_exec); /** * software_resume - Resume from a saved hibernation image. @@ -801,7 +920,7 @@ int hibernate(void) */ static int software_resume(void) { - int error, nr_calls = 0; + int error; /* * If the user said "noresume".. bail out early. @@ -839,17 +958,6 @@ static int software_resume(void) /* Check if the device is there */ swsusp_resume_device = name_to_dev_t(resume_file); - - /* - * name_to_dev_t is ineffective to verify parition if resume_file is in - * integer format. (e.g. major:minor) - */ - if (isdigit(resume_file[0]) && resume_wait) { - int partno; - while (!get_gendisk(swsusp_resume_device, &partno)) - msleep(10); - } - if (!swsusp_resume_device) { /* * Some device discovery might still be in progress; we need @@ -880,38 +988,44 @@ static int software_resume(void) goto Unlock; /* The snapshot device should not be opened while we're running */ - if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + if (!hibernate_acquire()) { error = -EBUSY; - swsusp_close(FMODE_READ); + swsusp_close(FMODE_READ | FMODE_EXCL); goto Unlock; } pr_info("resume from hibernation\n"); pm_prepare_console(); - error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls); - if (error) { - nr_calls--; - goto Close_Finish; - } + error = pm_notifier_call_chain_robust(PM_RESTORE_PREPARE, PM_POST_RESTORE); + if (error) + goto Restore; pm_pr_dbg("Preparing processes for hibernation restore.\n"); error = freeze_processes(); if (error) goto Close_Finish; + + error = freeze_kernel_threads(); + if (error) { + thaw_processes(); + goto Close_Finish; + } + error = load_image_and_restore(); thaw_processes(); Finish: - __pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL); + pm_notifier_call_chain(PM_POST_RESTORE); + Restore: pm_restore_console(); pr_info("resume failed (%d)\n", error); - atomic_inc(&snapshot_device_available); + hibernate_release(); /* For success case, the suspend path will release the lock */ Unlock: mutex_unlock(&system_transition_mutex); pm_pr_dbg("Hibernation image not present or could not be loaded.\n"); return error; Close_Finish: - swsusp_close(FMODE_READ); + swsusp_close(FMODE_READ | FMODE_EXCL); goto Finish; } @@ -992,11 +1106,12 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { + int mode = HIBERNATION_INVALID; + unsigned int sleep_flags; int error = 0; - int i; int len; char *p; - int mode = HIBERNATION_INVALID; + int i; if (!hibernation_available()) return -EPERM; @@ -1004,7 +1119,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, p = memchr(buf, '\n', n); len = p ? p - buf : n; - lock_system_sleep(); + sleep_flags = lock_system_sleep(); for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { if (len == strlen(hibernation_modes[i]) && !strncmp(buf, hibernation_modes[i], len)) { @@ -1034,7 +1149,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, if (!error) pm_pr_dbg("Hibernation mode set to '%s'\n", hibernation_modes[mode]); - unlock_system_sleep(); + unlock_system_sleep(sleep_flags); return error ? error : n; } @@ -1043,16 +1158,17 @@ power_attr(disk); static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device), + return sprintf(buf, "%d:%d\n", MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); } static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { - dev_t res; + unsigned int sleep_flags; int len = n; char *name; + dev_t res; if (len && buf[len-1] == '\n') len--; @@ -1065,9 +1181,10 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, if (!res) return -EINVAL; - lock_system_sleep(); + sleep_flags = lock_system_sleep(); swsusp_resume_device = res; - unlock_system_sleep(); + unlock_system_sleep(sleep_flags); + pm_pr_dbg("Configured hibernation resume from disk to %u\n", swsusp_resume_device); noresume = 0; @@ -1143,7 +1260,7 @@ static ssize_t reserved_size_store(struct kobject *kobj, power_attr(reserved_size); -static struct attribute * g[] = { +static struct attribute *g[] = { &disk_attr.attr, &resume_offset_attr.attr, &resume_attr.attr, @@ -1171,7 +1288,7 @@ static int __init resume_setup(char *str) if (noresume) return 1; - strncpy( resume_file, str, 255 ); + strncpy(resume_file, str, 255); return 1; } @@ -1221,7 +1338,7 @@ static int __init resumedelay_setup(char *str) int rc = kstrtouint(str, 0, &resume_delay); if (rc) - return rc; + pr_warn("resumedelay: bad option string '%s'\n", str); return 1; } diff --git a/kernel/power/main.c b/kernel/power/main.c index 69b7a8aeca3b..31ec4a9b9d70 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -21,14 +21,16 @@ #ifdef CONFIG_PM_SLEEP -void lock_system_sleep(void) +unsigned int lock_system_sleep(void) { - current->flags |= PF_FREEZER_SKIP; + unsigned int flags = current->flags; + current->flags |= PF_NOFREEZE; mutex_lock(&system_transition_mutex); + return flags; } EXPORT_SYMBOL_GPL(lock_system_sleep); -void unlock_system_sleep(void) +void unlock_system_sleep(unsigned int flags) { /* * Don't use freezer_count() because we don't want the call to @@ -46,7 +48,8 @@ void unlock_system_sleep(void) * Which means, if we use try_to_freeze() here, it would make them * enter the refrigerator, thus causing hibernation to lockup. */ - current->flags &= ~PF_FREEZER_SKIP; + if (!(flags & PF_NOFREEZE)) + current->flags &= ~PF_NOFREEZE; mutex_unlock(&system_transition_mutex); } EXPORT_SYMBOL_GPL(unlock_system_sleep); @@ -80,18 +83,18 @@ int unregister_pm_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_pm_notifier); -int __pm_notifier_call_chain(unsigned long val, int nr_to_call, int *nr_calls) +int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down) { int ret; - ret = __blocking_notifier_call_chain(&pm_chain_head, val, NULL, - nr_to_call, nr_calls); + ret = blocking_notifier_call_chain_robust(&pm_chain_head, val_up, val_down, NULL); return notifier_to_errno(ret); } + int pm_notifier_call_chain(unsigned long val) { - return __pm_notifier_call_chain(val, -1, NULL); + return blocking_notifier_call_chain(&pm_chain_head, val, NULL); } /* If set, devices may be suspended and resumed asynchronously. */ @@ -127,7 +130,9 @@ static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr, char *s = buf; suspend_state_t i; - for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) + for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) { + if (i >= PM_SUSPEND_MEM && cxl_mem_active()) + continue; if (mem_sleep_states[i]) { const char *label = mem_sleep_states[i]; @@ -136,6 +141,7 @@ static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr, else s += sprintf(s, "%s ", label); } + } /* Convert the last space to a newline if needed. */ if (s != buf) @@ -260,16 +266,17 @@ static ssize_t pm_test_show(struct kobject *kobj, struct kobj_attribute *attr, static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { + unsigned int sleep_flags; const char * const *s; + int error = -EINVAL; int level; char *p; int len; - int error = -EINVAL; p = memchr(buf, '\n', n); len = p ? p - buf : n; - lock_system_sleep(); + sleep_flags = lock_system_sleep(); level = TEST_FIRST; for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++) @@ -279,7 +286,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, break; } - unlock_system_sleep(); + unlock_system_sleep(sleep_flags); return error ? error : n; } @@ -387,7 +394,7 @@ static struct attribute *suspend_attrs[] = { NULL, }; -static struct attribute_group suspend_attr_group = { +static const struct attribute_group suspend_attr_group = { .name = "suspend_stats", .attrs = suspend_attrs, }; @@ -504,7 +511,10 @@ static ssize_t pm_wakeup_irq_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return pm_wakeup_irq ? sprintf(buf, "%u\n", pm_wakeup_irq) : -ENODATA; + if (!pm_wakeup_irq()) + return -ENODATA; + + return sprintf(buf, "%u\n", pm_wakeup_irq()); } power_attr_ro(pm_wakeup_irq); @@ -535,34 +545,12 @@ static ssize_t pm_debug_messages_store(struct kobject *kobj, power_attr(pm_debug_messages); -/** - * __pm_pr_dbg - Print a suspend debug message to the kernel log. - * @defer: Whether or not to use printk_deferred() to print the message. - * @fmt: Message format. - * - * The message will be emitted if enabled through the pm_debug_messages - * sysfs attribute. - */ -void __pm_pr_dbg(bool defer, const char *fmt, ...) +static int __init pm_debug_messages_setup(char *str) { - struct va_format vaf; - va_list args; - - if (!pm_debug_messages_on) - return; - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - if (defer) - printk_deferred(KERN_DEBUG "PM: %pV", &vaf); - else - printk(KERN_DEBUG "PM: %pV", &vaf); - - va_end(args); + pm_debug_messages_on = true; + return 1; } +__setup("pm_debug_messages", pm_debug_messages_setup); #else /* !CONFIG_PM_SLEEP_DEBUG */ static inline void pm_print_times_init(void) {} @@ -570,7 +558,7 @@ static inline void pm_print_times_init(void) {} struct kobject *power_kobj; -/** +/* * state - control system sleep states. * * show() returns available sleep state labels, which may be "mem", "standby", diff --git a/kernel/power/power.h b/kernel/power/power.h index 7cdc64dc2373..b4f433943209 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -4,6 +4,8 @@ #include <linux/utsname.h> #include <linux/freezer.h> #include <linux/compiler.h> +#include <linux/cpu.h> +#include <linux/cpuidle.h> struct swsusp_info { struct new_utsname uts; @@ -32,7 +34,7 @@ static inline int init_header_complete(struct swsusp_info *info) return arch_hibernation_header_save(info, MAX_ARCH_HEADER_SIZE); } -static inline char *check_image_kernel(struct swsusp_info *info) +static inline const char *check_image_kernel(struct swsusp_info *info) { return arch_hibernation_header_restore(info) ? "architecture specific data" : NULL; @@ -106,7 +108,7 @@ extern int create_basic_memory_bitmaps(void); extern void free_basic_memory_bitmaps(void); extern int hibernate_preallocate_memory(void); -extern void clear_free_pages(void); +extern void clear_or_poison_free_pages(void); /** * Auxiliary structure used for reading the snapshot image data and @@ -154,8 +156,8 @@ extern int snapshot_write_next(struct snapshot_handle *handle); extern void snapshot_write_finalize(struct snapshot_handle *handle); extern int snapshot_image_loaded(struct snapshot_handle *handle); -/* If unset, the snapshot device cannot be open. */ -extern atomic_t snapshot_device_available; +extern bool hibernate_acquire(void); +extern void hibernate_release(void); extern sector_t alloc_swapdev_block(int swap); extern void free_all_swap_pages(int swap); @@ -168,6 +170,7 @@ extern int swsusp_swap_in_use(void); #define SF_PLATFORM_MODE 1 #define SF_NOCOMPRESS_MODE 2 #define SF_CRC32_MODE 4 +#define SF_HW_SIG 8 /* kernel/power/hibernate.c */ extern int swsusp_check(void); @@ -210,8 +213,7 @@ static inline void suspend_test_finish(const char *label) {} #ifdef CONFIG_PM_SLEEP /* kernel/power/main.c */ -extern int __pm_notifier_call_chain(unsigned long val, int nr_to_call, - int *nr_calls); +extern int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down); extern int pm_notifier_call_chain(unsigned long val); #endif @@ -311,3 +313,15 @@ extern int pm_wake_lock(const char *buf); extern int pm_wake_unlock(const char *buf); #endif /* !CONFIG_PM_WAKELOCKS */ + +static inline int pm_sleep_disable_secondary_cpus(void) +{ + cpuidle_pause(); + return suspend_disable_secondary_cpus(); +} + +static inline void pm_sleep_enable_secondary_cpus(void) +{ + suspend_enable_secondary_cpus(); + cpuidle_resume(); +} diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c index 6d475281c730..562aa0e450ed 100644 --- a/kernel/power/poweroff.c +++ b/kernel/power/poweroff.c @@ -29,7 +29,7 @@ static void handle_poweroff(int key) schedule_work_on(cpumask_first(cpu_online_mask), &poweroff_work); } -static struct sysrq_key_op sysrq_poweroff_op = { +static const struct sysrq_key_op sysrq_poweroff_op = { .handler = handle_poweroff, .help_msg = "poweroff(o)", .action_msg = "Power Off", diff --git a/kernel/power/process.c b/kernel/power/process.c index 4b6a54da7e65..ddd9988327fe 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -1,14 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* - * drivers/power/process.c - Functions for starting/stopping processes on + * drivers/power/process.c - Functions for starting/stopping processes on * suspend transitions. * * Originally from swsusp. */ - -#undef DEBUG - #include <linux/interrupt.h> #include <linux/oom.h> #include <linux/suspend.h> @@ -53,8 +50,7 @@ static int try_to_freeze_tasks(bool user_only) if (p == current || !freeze_task(p)) continue; - if (!freezer_should_skip(p)) - todo++; + todo++; } read_unlock(&tasklist_lock); @@ -94,13 +90,12 @@ static int try_to_freeze_tasks(bool user_only) todo - wq_busy, wq_busy); if (wq_busy) - show_workqueue_state(); + show_all_workqueues(); if (!wakeup || pm_debug_messages_on) { read_lock(&tasklist_lock); for_each_process_thread(g, p) { - if (p != current && !freezer_should_skip(p) - && freezing(p) && !frozen(p)) + if (p != current && freezing(p) && !frozen(p)) sched_show_task(p); } read_unlock(&tasklist_lock); @@ -132,9 +127,9 @@ int freeze_processes(void) current->flags |= PF_SUSPEND_TASK; if (!pm_freezing) - atomic_inc(&system_freezing_cnt); + static_branch_inc(&freezer_active); - pm_wakeup_clear(true); + pm_wakeup_clear(0); pr_info("Freezing user space processes ... "); pm_freezing = true; error = try_to_freeze_tasks(true); @@ -146,7 +141,7 @@ int freeze_processes(void) BUG_ON(in_atomic()); /* - * Now that the whole userspace is frozen we need to disbale + * Now that the whole userspace is frozen we need to disable * the OOM killer to disallow any further interference with * killable tasks. There is no guarantee oom victims will * ever reach a point they go away we have to wait with a timeout. @@ -193,7 +188,7 @@ void thaw_processes(void) trace_suspend_resume(TPS("thaw_processes"), 0, true); if (pm_freezing) - atomic_dec(&system_freezing_cnt); + static_branch_dec(&freezer_active); pm_freezing = false; pm_nosig_freezing = false; @@ -235,7 +230,7 @@ void thaw_kernel_threads(void) read_lock(&tasklist_lock); for_each_process_thread(g, p) { - if (p->flags & (PF_KTHREAD | PF_WQ_WORKER)) + if (p->flags & PF_KTHREAD) __thaw_task(p); } read_unlock(&tasklist_lock); diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 83edf8698118..af51ed6d45ef 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -1,31 +1,21 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * This module exposes the interface to kernel space for specifying - * QoS dependencies. It provides infrastructure for registration of: + * Power Management Quality of Service (PM QoS) support base. * - * Dependents on a QoS value : register requests - * Watchers of QoS value : get notified when target QoS value changes + * Copyright (C) 2020 Intel Corporation * - * This QoS design is best effort based. Dependents register their QoS needs. - * Watchers register to keep track of the current QoS needs of the system. + * Authors: + * Mark Gross <mgross@linux.intel.com> + * Rafael J. Wysocki <rafael.j.wysocki@intel.com> * - * There are 3 basic classes of QoS parameter: latency, timeout, throughput - * each have defined units: - * latency: usec - * timeout: usec <-- currently not used. - * throughput: kbs (kilo byte / sec) + * Provided here is an interface for specifying PM QoS dependencies. It allows + * entities depending on QoS constraints to register their requests which are + * aggregated as appropriate to produce effective constraints (target values) + * that can be monitored by entities needing to respect them, either by polling + * or through a built-in notification mechanism. * - * There are lists of pm_qos_objects each one wrapping requests, notifiers - * - * User mode requests on a QOS parameter register themselves to the - * subsystem by opening the device node /dev/... and writing there request to - * the node. As long as the process holds a file handle open to the node the - * client continues to be accounted for. Upon file release the usermode - * request is removed and a new qos target is computed. This way when the - * request that the application has is cleaned up when closes the file - * pointer or exits the pm_qos_object will get an opportunity to clean up. - * - * Mark Gross <mgross@linux.intel.com> + * In addition to the basic functionality, more specific interfaces for managing + * global CPU latency QoS requests and frequency QoS requests are provided. */ /*#define DEBUG*/ @@ -54,56 +44,19 @@ * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock * held, taken with _irqsave. One lock to rule them all */ -struct pm_qos_object { - struct pm_qos_constraints *constraints; - struct miscdevice pm_qos_power_miscdev; - char *name; -}; - static DEFINE_SPINLOCK(pm_qos_lock); -static struct pm_qos_object null_pm_qos; - -static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); -static struct pm_qos_constraints cpu_dma_constraints = { - .list = PLIST_HEAD_INIT(cpu_dma_constraints.list), - .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, - .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, - .no_constraint_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, - .type = PM_QOS_MIN, - .notifiers = &cpu_dma_lat_notifier, -}; -static struct pm_qos_object cpu_dma_pm_qos = { - .constraints = &cpu_dma_constraints, - .name = "cpu_dma_latency", -}; - -static struct pm_qos_object *pm_qos_array[] = { - &null_pm_qos, - &cpu_dma_pm_qos, -}; - -static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, - size_t count, loff_t *f_pos); -static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, - size_t count, loff_t *f_pos); -static int pm_qos_power_open(struct inode *inode, struct file *filp); -static int pm_qos_power_release(struct inode *inode, struct file *filp); - -static const struct file_operations pm_qos_power_fops = { - .write = pm_qos_power_write, - .read = pm_qos_power_read, - .open = pm_qos_power_open, - .release = pm_qos_power_release, - .llseek = noop_llseek, -}; - -/* unlocked internal variant */ -static inline int pm_qos_get_value(struct pm_qos_constraints *c) +/** + * pm_qos_read_value - Return the current effective constraint value. + * @c: List of PM QoS constraint requests. + */ +s32 pm_qos_read_value(struct pm_qos_constraints *c) { - struct plist_node *node; - int total_value = 0; + return READ_ONCE(c->target_value); +} +static int pm_qos_get_value(struct pm_qos_constraints *c) +{ if (plist_head_empty(&c->list)) return c->no_constraint_value; @@ -114,111 +67,42 @@ static inline int pm_qos_get_value(struct pm_qos_constraints *c) case PM_QOS_MAX: return plist_last(&c->list)->prio; - case PM_QOS_SUM: - plist_for_each(node, &c->list) - total_value += node->prio; - - return total_value; - default: - /* runtime check for not using enum */ - BUG(); + WARN(1, "Unknown PM QoS type in %s\n", __func__); return PM_QOS_DEFAULT_VALUE; } } -s32 pm_qos_read_value(struct pm_qos_constraints *c) -{ - return c->target_value; -} - -static inline void pm_qos_set_value(struct pm_qos_constraints *c, s32 value) +static void pm_qos_set_value(struct pm_qos_constraints *c, s32 value) { - c->target_value = value; + WRITE_ONCE(c->target_value, value); } -static int pm_qos_debug_show(struct seq_file *s, void *unused) -{ - struct pm_qos_object *qos = (struct pm_qos_object *)s->private; - struct pm_qos_constraints *c; - struct pm_qos_request *req; - char *type; - unsigned long flags; - int tot_reqs = 0; - int active_reqs = 0; - - if (IS_ERR_OR_NULL(qos)) { - pr_err("%s: bad qos param!\n", __func__); - return -EINVAL; - } - c = qos->constraints; - if (IS_ERR_OR_NULL(c)) { - pr_err("%s: Bad constraints on qos?\n", __func__); - return -EINVAL; - } - - /* Lock to ensure we have a snapshot */ - spin_lock_irqsave(&pm_qos_lock, flags); - if (plist_head_empty(&c->list)) { - seq_puts(s, "Empty!\n"); - goto out; - } - - switch (c->type) { - case PM_QOS_MIN: - type = "Minimum"; - break; - case PM_QOS_MAX: - type = "Maximum"; - break; - case PM_QOS_SUM: - type = "Sum"; - break; - default: - type = "Unknown"; - } - - plist_for_each_entry(req, &c->list, node) { - char *state = "Default"; - - if ((req->node).prio != c->default_value) { - active_reqs++; - state = "Active"; - } - tot_reqs++; - seq_printf(s, "%d: %d: %s\n", tot_reqs, - (req->node).prio, state); - } - - seq_printf(s, "Type=%s, Value=%d, Requests: active=%d / total=%d\n", - type, pm_qos_get_value(c), active_reqs, tot_reqs); - -out: - spin_unlock_irqrestore(&pm_qos_lock, flags); - return 0; -} - -DEFINE_SHOW_ATTRIBUTE(pm_qos_debug); - /** - * pm_qos_update_target - manages the constraints list and calls the notifiers - * if needed - * @c: constraints data struct - * @node: request to add to the list, to update or to remove - * @action: action to take on the constraints list - * @value: value of the request to add or update + * pm_qos_update_target - Update a list of PM QoS constraint requests. + * @c: List of PM QoS requests. + * @node: Target list entry. + * @action: Action to carry out (add, update or remove). + * @value: New request value for the target list entry. * - * This function returns 1 if the aggregated constraint value has changed, 0 - * otherwise. + * Update the given list of PM QoS constraint requests, @c, by carrying an + * @action involving the @node list entry and @value on it. + * + * The recognized values of @action are PM_QOS_ADD_REQ (store @value in @node + * and add it to the list), PM_QOS_UPDATE_REQ (remove @node from the list, store + * @value in it and add it to the list again), and PM_QOS_REMOVE_REQ (remove + * @node from the list, ignore @value). + * + * Return: 1 if the aggregate constraint value has changed, 0 otherwise. */ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, enum pm_qos_req_action action, int value) { - unsigned long flags; int prev_value, curr_value, new_value; - int ret; + unsigned long flags; spin_lock_irqsave(&pm_qos_lock, flags); + prev_value = pm_qos_get_value(c); if (value == PM_QOS_DEFAULT_VALUE) new_value = c->default_value; @@ -231,12 +115,11 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, break; case PM_QOS_UPDATE_REQ: /* - * to change the list, we atomically remove, reinit - * with new value and add, then see if the extremal - * changed + * To change the list, atomically remove, reinit with new value + * and add, then see if the aggregate has changed. */ plist_del(node, &c->list); - /* fall through */ + fallthrough; case PM_QOS_ADD_REQ: plist_node_init(node, new_value); plist_add(node, &c->list); @@ -252,16 +135,14 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, spin_unlock_irqrestore(&pm_qos_lock, flags); trace_pm_qos_update_target(action, prev_value, curr_value); - if (prev_value != curr_value) { - ret = 1; - if (c->notifiers) - blocking_notifier_call_chain(c->notifiers, - (unsigned long)curr_value, - NULL); - } else { - ret = 0; - } - return ret; + + if (prev_value == curr_value) + return 0; + + if (c->notifiers) + blocking_notifier_call_chain(c->notifiers, curr_value, NULL); + + return 1; } /** @@ -283,14 +164,12 @@ static void pm_qos_flags_remove_req(struct pm_qos_flags *pqf, /** * pm_qos_update_flags - Update a set of PM QoS flags. - * @pqf: Set of flags to update. + * @pqf: Set of PM QoS flags to update. * @req: Request to add to the set, to modify, or to remove from the set. * @action: Action to take on the set. * @val: Value of the request to add or modify. * - * Update the given set of PM QoS flags and call notifiers if the aggregate - * value has changed. Returns 1 if the aggregate constraint value has changed, - * 0 otherwise. + * Return: 1 if the aggregate constraint value has changed, 0 otherwise. */ bool pm_qos_update_flags(struct pm_qos_flags *pqf, struct pm_qos_flags_request *req, @@ -309,7 +188,7 @@ bool pm_qos_update_flags(struct pm_qos_flags *pqf, break; case PM_QOS_UPDATE_REQ: pm_qos_flags_remove_req(pqf, req); - /* fall through */ + fallthrough; case PM_QOS_ADD_REQ: req->flags = val; INIT_LIST_HEAD(&req->node); @@ -326,288 +205,180 @@ bool pm_qos_update_flags(struct pm_qos_flags *pqf, spin_unlock_irqrestore(&pm_qos_lock, irqflags); trace_pm_qos_update_flags(action, prev_value, curr_value); - return prev_value != curr_value; -} - -/** - * pm_qos_request - returns current system wide qos expectation - * @pm_qos_class: identification of which qos value is requested - * - * This function returns the current target value. - */ -int pm_qos_request(int pm_qos_class) -{ - return pm_qos_read_value(pm_qos_array[pm_qos_class]->constraints); -} -EXPORT_SYMBOL_GPL(pm_qos_request); -int pm_qos_request_active(struct pm_qos_request *req) -{ - return req->pm_qos_class != 0; + return prev_value != curr_value; } -EXPORT_SYMBOL_GPL(pm_qos_request_active); -static void __pm_qos_update_request(struct pm_qos_request *req, - s32 new_value) -{ - trace_pm_qos_update_request(req->pm_qos_class, new_value); +#ifdef CONFIG_CPU_IDLE +/* Definitions related to the CPU latency QoS. */ - if (new_value != req->node.prio) - pm_qos_update_target( - pm_qos_array[req->pm_qos_class]->constraints, - &req->node, PM_QOS_UPDATE_REQ, new_value); -} +static struct pm_qos_constraints cpu_latency_constraints = { + .list = PLIST_HEAD_INIT(cpu_latency_constraints.list), + .target_value = PM_QOS_CPU_LATENCY_DEFAULT_VALUE, + .default_value = PM_QOS_CPU_LATENCY_DEFAULT_VALUE, + .no_constraint_value = PM_QOS_CPU_LATENCY_DEFAULT_VALUE, + .type = PM_QOS_MIN, +}; /** - * pm_qos_work_fn - the timeout handler of pm_qos_update_request_timeout - * @work: work struct for the delayed work (timeout) - * - * This cancels the timeout request by falling back to the default at timeout. + * cpu_latency_qos_limit - Return current system-wide CPU latency QoS limit. */ -static void pm_qos_work_fn(struct work_struct *work) +s32 cpu_latency_qos_limit(void) { - struct pm_qos_request *req = container_of(to_delayed_work(work), - struct pm_qos_request, - work); - - __pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE); + return pm_qos_read_value(&cpu_latency_constraints); } /** - * pm_qos_add_request - inserts new qos request into the list - * @req: pointer to a preallocated handle - * @pm_qos_class: identifies which list of qos request to use - * @value: defines the qos request + * cpu_latency_qos_request_active - Check the given PM QoS request. + * @req: PM QoS request to check. * - * This function inserts a new entry in the pm_qos_class list of requested qos - * performance characteristics. It recomputes the aggregate QoS expectations - * for the pm_qos_class of parameters and initializes the pm_qos_request - * handle. Caller needs to save this handle for later use in updates and - * removal. + * Return: 'true' if @req has been added to the CPU latency QoS list, 'false' + * otherwise. */ - -void pm_qos_add_request(struct pm_qos_request *req, - int pm_qos_class, s32 value) +bool cpu_latency_qos_request_active(struct pm_qos_request *req) { - if (!req) /*guard against callers passing in null */ - return; + return req->qos == &cpu_latency_constraints; +} +EXPORT_SYMBOL_GPL(cpu_latency_qos_request_active); - if (pm_qos_request_active(req)) { - WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n"); - return; - } - req->pm_qos_class = pm_qos_class; - INIT_DELAYED_WORK(&req->work, pm_qos_work_fn); - trace_pm_qos_add_request(pm_qos_class, value); - pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints, - &req->node, PM_QOS_ADD_REQ, value); +static void cpu_latency_qos_apply(struct pm_qos_request *req, + enum pm_qos_req_action action, s32 value) +{ + int ret = pm_qos_update_target(req->qos, &req->node, action, value); + if (ret > 0) + wake_up_all_idle_cpus(); } -EXPORT_SYMBOL_GPL(pm_qos_add_request); /** - * pm_qos_update_request - modifies an existing qos request - * @req : handle to list element holding a pm_qos request to use - * @value: defines the qos request + * cpu_latency_qos_add_request - Add new CPU latency QoS request. + * @req: Pointer to a preallocated handle. + * @value: Requested constraint value. * - * Updates an existing qos request for the pm_qos_class of parameters along - * with updating the target pm_qos_class value. + * Use @value to initialize the request handle pointed to by @req, insert it as + * a new entry to the CPU latency QoS list and recompute the effective QoS + * constraint for that list. * - * Attempts are made to make this code callable on hot code paths. + * Callers need to save the handle for later use in updates and removal of the + * QoS request represented by it. */ -void pm_qos_update_request(struct pm_qos_request *req, - s32 new_value) +void cpu_latency_qos_add_request(struct pm_qos_request *req, s32 value) { - if (!req) /*guard against callers passing in null */ + if (!req) return; - if (!pm_qos_request_active(req)) { - WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n"); + if (cpu_latency_qos_request_active(req)) { + WARN(1, KERN_ERR "%s called for already added request\n", __func__); return; } - cancel_delayed_work_sync(&req->work); - __pm_qos_update_request(req, new_value); + trace_pm_qos_add_request(value); + + req->qos = &cpu_latency_constraints; + cpu_latency_qos_apply(req, PM_QOS_ADD_REQ, value); } -EXPORT_SYMBOL_GPL(pm_qos_update_request); +EXPORT_SYMBOL_GPL(cpu_latency_qos_add_request); /** - * pm_qos_update_request_timeout - modifies an existing qos request temporarily. - * @req : handle to list element holding a pm_qos request to use - * @new_value: defines the temporal qos request - * @timeout_us: the effective duration of this qos request in usecs. + * cpu_latency_qos_update_request - Modify existing CPU latency QoS request. + * @req : QoS request to update. + * @new_value: New requested constraint value. * - * After timeout_us, this qos request is cancelled automatically. + * Use @new_value to update the QoS request represented by @req in the CPU + * latency QoS list along with updating the effective constraint value for that + * list. */ -void pm_qos_update_request_timeout(struct pm_qos_request *req, s32 new_value, - unsigned long timeout_us) +void cpu_latency_qos_update_request(struct pm_qos_request *req, s32 new_value) { if (!req) return; - if (WARN(!pm_qos_request_active(req), - "%s called for unknown object.", __func__)) + + if (!cpu_latency_qos_request_active(req)) { + WARN(1, KERN_ERR "%s called for unknown object\n", __func__); return; + } - cancel_delayed_work_sync(&req->work); + trace_pm_qos_update_request(new_value); - trace_pm_qos_update_request_timeout(req->pm_qos_class, - new_value, timeout_us); - if (new_value != req->node.prio) - pm_qos_update_target( - pm_qos_array[req->pm_qos_class]->constraints, - &req->node, PM_QOS_UPDATE_REQ, new_value); + if (new_value == req->node.prio) + return; - schedule_delayed_work(&req->work, usecs_to_jiffies(timeout_us)); + cpu_latency_qos_apply(req, PM_QOS_UPDATE_REQ, new_value); } +EXPORT_SYMBOL_GPL(cpu_latency_qos_update_request); /** - * pm_qos_remove_request - modifies an existing qos request - * @req: handle to request list element + * cpu_latency_qos_remove_request - Remove existing CPU latency QoS request. + * @req: QoS request to remove. * - * Will remove pm qos request from the list of constraints and - * recompute the current target value for the pm_qos_class. Call this - * on slow code paths. + * Remove the CPU latency QoS request represented by @req from the CPU latency + * QoS list along with updating the effective constraint value for that list. */ -void pm_qos_remove_request(struct pm_qos_request *req) +void cpu_latency_qos_remove_request(struct pm_qos_request *req) { - if (!req) /*guard against callers passing in null */ + if (!req) return; - /* silent return to keep pcm code cleaner */ - if (!pm_qos_request_active(req)) { - WARN(1, KERN_ERR "pm_qos_remove_request() called for unknown object\n"); + if (!cpu_latency_qos_request_active(req)) { + WARN(1, KERN_ERR "%s called for unknown object\n", __func__); return; } - cancel_delayed_work_sync(&req->work); + trace_pm_qos_remove_request(PM_QOS_DEFAULT_VALUE); - trace_pm_qos_remove_request(req->pm_qos_class, PM_QOS_DEFAULT_VALUE); - pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints, - &req->node, PM_QOS_REMOVE_REQ, - PM_QOS_DEFAULT_VALUE); + cpu_latency_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); memset(req, 0, sizeof(*req)); } -EXPORT_SYMBOL_GPL(pm_qos_remove_request); - -/** - * pm_qos_add_notifier - sets notification entry for changes to target value - * @pm_qos_class: identifies which qos target changes should be notified. - * @notifier: notifier block managed by caller. - * - * will register the notifier into a notification chain that gets called - * upon changes to the pm_qos_class target value. - */ -int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) -{ - int retval; +EXPORT_SYMBOL_GPL(cpu_latency_qos_remove_request); - retval = blocking_notifier_chain_register( - pm_qos_array[pm_qos_class]->constraints->notifiers, - notifier); +/* User space interface to the CPU latency QoS via misc device. */ - return retval; -} -EXPORT_SYMBOL_GPL(pm_qos_add_notifier); - -/** - * pm_qos_remove_notifier - deletes notification entry from chain. - * @pm_qos_class: identifies which qos target changes are notified. - * @notifier: notifier block to be removed. - * - * will remove the notifier from the notification chain that gets called - * upon changes to the pm_qos_class target value. - */ -int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) +static int cpu_latency_qos_open(struct inode *inode, struct file *filp) { - int retval; - - retval = blocking_notifier_chain_unregister( - pm_qos_array[pm_qos_class]->constraints->notifiers, - notifier); - - return retval; -} -EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); - -/* User space interface to PM QoS classes via misc devices */ -static int register_pm_qos_misc(struct pm_qos_object *qos, struct dentry *d) -{ - qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR; - qos->pm_qos_power_miscdev.name = qos->name; - qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; - - debugfs_create_file(qos->name, S_IRUGO, d, (void *)qos, - &pm_qos_debug_fops); + struct pm_qos_request *req; - return misc_register(&qos->pm_qos_power_miscdev); -} + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; -static int find_pm_qos_object_by_minor(int minor) -{ - int pm_qos_class; + cpu_latency_qos_add_request(req, PM_QOS_DEFAULT_VALUE); + filp->private_data = req; - for (pm_qos_class = PM_QOS_CPU_DMA_LATENCY; - pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) { - if (minor == - pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor) - return pm_qos_class; - } - return -1; + return 0; } -static int pm_qos_power_open(struct inode *inode, struct file *filp) +static int cpu_latency_qos_release(struct inode *inode, struct file *filp) { - long pm_qos_class; - - pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); - if (pm_qos_class >= PM_QOS_CPU_DMA_LATENCY) { - struct pm_qos_request *req = kzalloc(sizeof(*req), GFP_KERNEL); - if (!req) - return -ENOMEM; - - pm_qos_add_request(req, pm_qos_class, PM_QOS_DEFAULT_VALUE); - filp->private_data = req; + struct pm_qos_request *req = filp->private_data; - return 0; - } - return -EPERM; -} + filp->private_data = NULL; -static int pm_qos_power_release(struct inode *inode, struct file *filp) -{ - struct pm_qos_request *req; - - req = filp->private_data; - pm_qos_remove_request(req); + cpu_latency_qos_remove_request(req); kfree(req); return 0; } - -static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, - size_t count, loff_t *f_pos) +static ssize_t cpu_latency_qos_read(struct file *filp, char __user *buf, + size_t count, loff_t *f_pos) { - s32 value; - unsigned long flags; struct pm_qos_request *req = filp->private_data; + unsigned long flags; + s32 value; - if (!req) - return -EINVAL; - if (!pm_qos_request_active(req)) + if (!req || !cpu_latency_qos_request_active(req)) return -EINVAL; spin_lock_irqsave(&pm_qos_lock, flags); - value = pm_qos_get_value(pm_qos_array[req->pm_qos_class]->constraints); + value = pm_qos_get_value(&cpu_latency_constraints); spin_unlock_irqrestore(&pm_qos_lock, flags); return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32)); } -static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, - size_t count, loff_t *f_pos) +static ssize_t cpu_latency_qos_write(struct file *filp, const char __user *buf, + size_t count, loff_t *f_pos) { s32 value; - struct pm_qos_request *req; if (count == sizeof(s32)) { if (copy_from_user(&value, buf, sizeof(s32))) @@ -620,36 +391,38 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, return ret; } - req = filp->private_data; - pm_qos_update_request(req, value); + cpu_latency_qos_update_request(filp->private_data, value); return count; } +static const struct file_operations cpu_latency_qos_fops = { + .write = cpu_latency_qos_write, + .read = cpu_latency_qos_read, + .open = cpu_latency_qos_open, + .release = cpu_latency_qos_release, + .llseek = noop_llseek, +}; -static int __init pm_qos_power_init(void) -{ - int ret = 0; - int i; - struct dentry *d; - - BUILD_BUG_ON(ARRAY_SIZE(pm_qos_array) != PM_QOS_NUM_CLASSES); +static struct miscdevice cpu_latency_qos_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "cpu_dma_latency", + .fops = &cpu_latency_qos_fops, +}; - d = debugfs_create_dir("pm_qos", NULL); +static int __init cpu_latency_qos_init(void) +{ + int ret; - for (i = PM_QOS_CPU_DMA_LATENCY; i < PM_QOS_NUM_CLASSES; i++) { - ret = register_pm_qos_misc(pm_qos_array[i], d); - if (ret < 0) { - pr_err("%s: %s setup failed\n", - __func__, pm_qos_array[i]->name); - return ret; - } - } + ret = misc_register(&cpu_latency_qos_miscdev); + if (ret < 0) + pr_err("%s: %s setup failed\n", __func__, + cpu_latency_qos_miscdev.name); return ret; } - -late_initcall(pm_qos_power_init); +late_initcall(cpu_latency_qos_init); +#endif /* CONFIG_CPU_IDLE */ /* Definitions related to the frequency QoS below. */ @@ -758,7 +531,7 @@ int freq_qos_add_request(struct freq_constraints *qos, { int ret; - if (IS_ERR_OR_NULL(qos) || !req) + if (IS_ERR_OR_NULL(qos) || !req || value < 0) return -EINVAL; if (WARN(freq_qos_request_active(req), @@ -790,7 +563,7 @@ EXPORT_SYMBOL_GPL(freq_qos_add_request); */ int freq_qos_update_request(struct freq_qos_request *req, s32 new_value) { - if (!req) + if (!req || new_value < 0) return -EINVAL; if (WARN(!freq_qos_request_active(req), diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index d82b7b88d616..2a406753af90 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -34,7 +34,6 @@ #include <linux/uaccess.h> #include <asm/mmu_context.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/io.h> @@ -77,6 +76,40 @@ static inline void hibernate_restore_protect_page(void *page_address) {} static inline void hibernate_restore_unprotect_page(void *page_address) {} #endif /* CONFIG_STRICT_KERNEL_RWX && CONFIG_ARCH_HAS_SET_MEMORY */ + +/* + * The calls to set_direct_map_*() should not fail because remapping a page + * here means that we only update protection bits in an existing PTE. + * It is still worth to have a warning here if something changes and this + * will no longer be the case. + */ +static inline void hibernate_map_page(struct page *page) +{ + if (IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) { + int ret = set_direct_map_default_noflush(page); + + if (ret) + pr_warn_once("Failed to remap page\n"); + } else { + debug_pagealloc_map_pages(page, 1); + } +} + +static inline void hibernate_unmap_page(struct page *page) +{ + if (IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) { + unsigned long addr = (unsigned long)page_address(page); + int ret = set_direct_map_invalid_noflush(page); + + if (ret) + pr_warn_once("Failed to remap page\n"); + + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + } else { + debug_pagealloc_unmap_pages(page, 1); + } +} + static int swsusp_page_is_free(struct page *); static void swsusp_set_page_forbidden(struct page *); static void swsusp_unset_page_forbidden(struct page *); @@ -293,12 +326,12 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) return ret; } -/** +/* * Data types related to memory bitmaps. * - * Memory bitmap is a structure consiting of many linked lists of + * Memory bitmap is a structure consisting of many linked lists of * objects. The main list's elements are of type struct zone_bitmap - * and each of them corresonds to one zone. For each zone bitmap + * and each of them corresponds to one zone. For each zone bitmap * object there is a list of objects of type struct bm_block that * represent each blocks of bitmap in which information is stored. * @@ -394,6 +427,10 @@ struct memory_bitmap { /** * alloc_rtree_node - Allocate a new node and add it to the radix tree. + * @gfp_mask: GFP mask for the allocation. + * @safe_needed: Get pages not used before hibernation (restore only) + * @ca: Pointer to a linked list of pages ("a chain") to allocate from + * @list: Radix Tree node to add. * * This function is used to allocate inner nodes as well as the * leave nodes of the radix tree. It also adds the node to the @@ -736,7 +773,7 @@ zone_found: */ /* - * If the zone we wish to scan is the the current zone and the + * If the zone we wish to scan is the current zone and the * pfn falls into the current node then we do not need to walk * the tree. */ @@ -869,7 +906,7 @@ static bool rtree_next_node(struct memory_bitmap *bm) } /** - * memory_bm_rtree_next_pfn - Find the next set bit in a memory bitmap. + * memory_bm_next_pfn - Find the next set bit in a memory bitmap. * @bm: Memory bitmap. * * Starting from the last returned position this function searches for the next @@ -945,8 +982,7 @@ static void memory_bm_recycle(struct memory_bitmap *bm) * Register a range of page frames the contents of which should not be saved * during hibernation (to be used in the early initialization code). */ -void __init __register_nosave_region(unsigned long start_pfn, - unsigned long end_pfn, int use_kmalloc) +void __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn) { struct nosave_region *region; @@ -962,18 +998,12 @@ void __init __register_nosave_region(unsigned long start_pfn, goto Report; } } - if (use_kmalloc) { - /* During init, this shouldn't fail */ - region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); - BUG_ON(!region); - } else { - /* This allocation cannot fail */ - region = memblock_alloc(sizeof(struct nosave_region), - SMP_CACHE_BYTES); - if (!region) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct nosave_region)); - } + /* This allocation cannot fail */ + region = memblock_alloc(sizeof(struct nosave_region), + SMP_CACHE_BYTES); + if (!region) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(struct nosave_region)); region->start_pfn = start_pfn; region->end_pfn = end_pfn; list_add_tail(®ion->list, &nosave_regions); @@ -1113,7 +1143,7 @@ int create_basic_memory_bitmaps(void) Free_second_object: kfree(bm2); Free_first_bitmap: - memory_bm_free(bm1, PG_UNSAFE_CLEAR); + memory_bm_free(bm1, PG_UNSAFE_CLEAR); Free_first_object: kfree(bm1); return -ENOMEM; @@ -1145,7 +1175,15 @@ void free_basic_memory_bitmaps(void) pr_debug("Basic memory bitmaps freed\n"); } -void clear_free_pages(void) +static void clear_or_poison_free_page(struct page *page) +{ + if (page_poisoning_enabled_static()) + __kernel_poison_pages(page, 1); + else if (want_init_on_free()) + clear_highpage(page); +} + +void clear_or_poison_free_pages(void) { struct memory_bitmap *bm = free_pages_map; unsigned long pfn; @@ -1153,12 +1191,12 @@ void clear_free_pages(void) if (WARN_ON(!(free_pages_map))) return; - if (IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) || want_init_on_free()) { + if (page_poisoning_enabled() || want_init_on_free()) { memory_bm_position_reset(bm); pfn = memory_bm_next_pfn(bm); while (pfn != BM_END_OF_MAP) { if (pfn_valid(pfn)) - clear_highpage(pfn_to_page(pfn)); + clear_or_poison_free_page(pfn_to_page(pfn)); pfn = memory_bm_next_pfn(bm); } @@ -1356,9 +1394,9 @@ static void safe_copy_page(void *dst, struct page *s_page) if (kernel_page_present(s_page)) { do_copy_page(dst, page_address(s_page)); } else { - kernel_map_pages(s_page, 1, 1); + hibernate_map_page(s_page); do_copy_page(dst, page_address(s_page)); - kernel_map_pages(s_page, 1, 0); + hibernate_unmap_page(s_page); } } @@ -1459,7 +1497,7 @@ static struct memory_bitmap copy_bm; /** * swsusp_free - Free pages allocated for hibernation image. * - * Image pages are alocated before snapshot creation, so they need to be + * Image pages are allocated before snapshot creation, so they need to be * released after resume. */ void swsusp_free(void) @@ -1664,7 +1702,7 @@ static unsigned long minimum_image_size(unsigned long saveable) { unsigned long size; - size = global_node_page_state(NR_SLAB_RECLAIMABLE) + size = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) + global_node_page_state(NR_ACTIVE_ANON) + global_node_page_state(NR_INACTIVE_ANON) + global_node_page_state(NR_ACTIVE_FILE) @@ -1744,9 +1782,6 @@ int hibernate_preallocate_memory(void) count += highmem; count -= totalreserve_pages; - /* Add number of pages required for page keys (s390 only). */ - size += page_key_additional_pages(saveable); - /* Compute the maximum number of saveable pages to leave in memory. */ max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); @@ -1906,7 +1941,7 @@ static inline int get_highmem_buffer(int safe_needed) } /** - * alloc_highmem_image_pages - Allocate some highmem pages for the image. + * alloc_highmem_pages - Allocate some highmem pages for the image. * * Try to allocate as many pages as needed, but if the number of free highmem * pages is less than that, allocate them all. @@ -2027,7 +2062,7 @@ static int init_header_complete(struct swsusp_info *info) return 0; } -static char *check_image_kernel(struct swsusp_info *info) +static const char *check_image_kernel(struct swsusp_info *info) { if (info->version_code != LINUX_VERSION_CODE) return "kernel version"; @@ -2075,8 +2110,6 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm) buf[j] = memory_bm_next_pfn(bm); if (unlikely(buf[j] == BM_END_OF_MAP)) break; - /* Save page key for data page (s390 only). */ - page_key_read(buf + j); } } @@ -2182,7 +2215,7 @@ static void mark_unsafe_pages(struct memory_bitmap *bm) static int check_header(struct swsusp_info *info) { - char *reason; + const char *reason; reason = check_image_kernel(info); if (!reason && info->num_physpages != get_num_physpages()) @@ -2195,7 +2228,7 @@ static int check_header(struct swsusp_info *info) } /** - * load header - Check the image header and copy the data from it. + * load_header - Check the image header and copy the data from it. */ static int load_header(struct swsusp_info *info) { @@ -2226,9 +2259,6 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) if (unlikely(buf[j] == BM_END_OF_MAP)) break; - /* Extract and buffer page key for data page (s390 only). */ - page_key_memorize(buf + j); - if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) memory_bm_set_bit(bm, buf[j]); else @@ -2293,7 +2323,7 @@ static struct memory_bitmap *safe_highmem_bm; * (@nr_highmem_p points to the variable containing the number of highmem image * pages). The pages that are "safe" (ie. will not be overwritten when the * hibernation image is restored entirely) have the corresponding bits set in - * @bm (it must be unitialized). + * @bm (it must be uninitialized). * * NOTE: This function should not be called if there are no highmem image pages. */ @@ -2450,7 +2480,7 @@ static inline void free_highmem_data(void) {} /** * prepare_image - Make room for loading hibernation image. - * @new_bm: Unitialized memory bitmap structure. + * @new_bm: Uninitialized memory bitmap structure. * @bm: Memory bitmap with unsafe pages marked. * * Use @bm to mark the pages that will be overwritten in the process of @@ -2623,11 +2653,6 @@ int snapshot_write_next(struct snapshot_handle *handle) if (error) return error; - /* Allocate buffer for page keys. */ - error = page_key_alloc(nr_copy_pages); - if (error) - return error; - hibernate_restore_protection_begin(); } else if (handle->cur <= nr_meta_pages + 1) { error = unpack_orig_pfns(buffer, ©_bm); @@ -2649,8 +2674,6 @@ int snapshot_write_next(struct snapshot_handle *handle) } } else { copy_last_highmem_page(); - /* Restore page key for data page (s390 only). */ - page_key_write(handle->buffer); hibernate_restore_protect_page(handle->buffer); handle->buffer = get_buffer(&orig_bm, &ca); if (IS_ERR(handle->buffer)) @@ -2673,9 +2696,6 @@ int snapshot_write_next(struct snapshot_handle *handle) void snapshot_write_finalize(struct snapshot_handle *handle) { copy_last_highmem_page(); - /* Restore page key for data page (s390 only). */ - page_key_write(handle->buffer); - page_key_free(); hibernate_restore_protect_page(handle->buffer); /* Do that only if we have loaded the image entirely */ if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 8b1bb5ee7e5d..fa3bf161d13f 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -75,9 +75,11 @@ EXPORT_SYMBOL_GPL(pm_suspend_default_s2idle); void s2idle_set_ops(const struct platform_s2idle_ops *ops) { - lock_system_sleep(); + unsigned int sleep_flags; + + sleep_flags = lock_system_sleep(); s2idle_ops = ops; - unlock_system_sleep(); + unlock_system_sleep(sleep_flags); } static void s2idle_begin(void) @@ -96,8 +98,7 @@ static void s2idle_enter(void) s2idle_state = S2IDLE_STATE_ENTER; raw_spin_unlock_irq(&s2idle_lock); - get_online_cpus(); - cpuidle_resume(); + cpus_read_lock(); /* Push all the CPUs into the idle loop. */ wake_up_all_idle_cpus(); @@ -105,8 +106,7 @@ static void s2idle_enter(void) swait_event_exclusive(s2idle_wait_head, s2idle_state == S2IDLE_STATE_WAKE); - cpuidle_pause(); - put_online_cpus(); + cpus_read_unlock(); raw_spin_lock_irq(&s2idle_lock); @@ -138,7 +138,8 @@ static void s2idle_loop(void) break; } - pm_wakeup_clear(false); + if (s2idle_ops && s2idle_ops->check) + s2idle_ops->check(); s2idle_enter(); } @@ -162,11 +163,13 @@ EXPORT_SYMBOL_GPL(s2idle_wake); static bool valid_state(suspend_state_t state) { /* - * PM_SUSPEND_STANDBY and PM_SUSPEND_MEM states need low level - * support and need to be valid to the low level - * implementation, no valid callback implies that none are valid. + * The PM_SUSPEND_STANDBY and PM_SUSPEND_MEM states require low-level + * support and need to be valid to the low-level implementation. + * + * No ->valid() or ->enter() callback implies that none are valid. */ - return suspend_ops && suspend_ops->valid && suspend_ops->valid(state); + return suspend_ops && suspend_ops->valid && suspend_ops->valid(state) && + suspend_ops->enter; } void __init pm_states_init(void) @@ -202,7 +205,9 @@ __setup("mem_sleep_default=", mem_sleep_default_setup); */ void suspend_set_ops(const struct platform_suspend_ops *ops) { - lock_system_sleep(); + unsigned int sleep_flags; + + sleep_flags = lock_system_sleep(); suspend_ops = ops; @@ -218,12 +223,13 @@ void suspend_set_ops(const struct platform_suspend_ops *ops) mem_sleep_current = PM_SUSPEND_MEM; } - unlock_system_sleep(); + unlock_system_sleep(sleep_flags); } EXPORT_SYMBOL_GPL(suspend_set_ops); /** * suspend_valid_only_mem - Generic memory-only valid callback. + * @state: Target system sleep state. * * Platform drivers that implement mem suspend only and only need to check for * that in their .valid() callback can use this instead of rolling their own @@ -237,7 +243,8 @@ EXPORT_SYMBOL_GPL(suspend_valid_only_mem); static bool sleep_state_supported(suspend_state_t state) { - return state == PM_SUSPEND_TO_IDLE || (suspend_ops && suspend_ops->enter); + return state == PM_SUSPEND_TO_IDLE || + (valid_state(state) && !cxl_mem_active()); } static int platform_suspend_prepare(suspend_state_t state) @@ -335,6 +342,7 @@ static int suspend_test(int level) /** * suspend_prepare - Prepare for entering system sleep state. + * @state: Target system sleep state. * * Common code run for every system sleep state that can be entered (except for * hibernation). Run suspend notifiers, allocate the "suspend" console and @@ -342,18 +350,16 @@ static int suspend_test(int level) */ static int suspend_prepare(suspend_state_t state) { - int error, nr_calls = 0; + int error; if (!sleep_state_supported(state)) return -EPERM; pm_prepare_console(); - error = __pm_notifier_call_chain(PM_SUSPEND_PREPARE, -1, &nr_calls); - if (error) { - nr_calls--; - goto Finish; - } + error = pm_notifier_call_chain_robust(PM_SUSPEND_PREPARE, PM_POST_SUSPEND); + if (error) + goto Restore; trace_suspend_resume(TPS("freeze_processes"), 0, true); error = suspend_freeze_processes(); @@ -363,8 +369,8 @@ static int suspend_prepare(suspend_state_t state) suspend_stats.failed_freeze++; dpm_save_failed_step(SUSPEND_FREEZE); - Finish: - __pm_notifier_call_chain(PM_POST_SUSPEND, nr_calls, NULL); + pm_notifier_call_chain(PM_POST_SUSPEND); + Restore: pm_restore_console(); return error; } @@ -422,7 +428,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) goto Platform_wake; } - error = suspend_disable_secondary_cpus(); + error = pm_sleep_disable_secondary_cpus(); if (error || suspend_test(TEST_CPUS)) goto Enable_cpus; @@ -452,7 +458,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) BUG_ON(irqs_disabled()); Enable_cpus: - suspend_enable_secondary_cpus(); + pm_sleep_enable_secondary_cpus(); Platform_wake: platform_resume_noirq(state); diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c index e1ed58adb69e..b663a97f5867 100644 --- a/kernel/power/suspend_test.c +++ b/kernel/power/suspend_test.c @@ -129,7 +129,7 @@ static int __init has_wakealarm(struct device *dev, const void *data) { struct rtc_device *candidate = to_rtc_device(dev); - if (!candidate->ops->set_alarm) + if (!test_bit(RTC_FEATURE_ALARM, candidate->features)) return 0; if (!device_may_wakeup(candidate->dev.parent)) return 0; @@ -157,22 +157,22 @@ static int __init setup_test_suspend(char *value) value++; suspend_type = strsep(&value, ","); if (!suspend_type) - return 0; + return 1; repeat = strsep(&value, ","); if (repeat) { if (kstrtou32(repeat, 0, &test_repeat_count_max)) - return 0; + return 1; } for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) if (!strcmp(pm_labels[i], suspend_type)) { test_state_label = pm_labels[i]; - return 0; + return 1; } printk(warn_bad_state, suspend_type); - return 0; + return 1; } __setup("test_suspend", setup_test_suspend); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index ca0fcb5ced71..277434b6c0bf 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -16,7 +16,6 @@ #include <linux/file.h> #include <linux/delay.h> #include <linux/bitops.h> -#include <linux/genhd.h> #include <linux/device.h> #include <linux/bio.h> #include <linux/blkdev.h> @@ -36,6 +35,8 @@ #define HIBERNATE_SIG "S1SUSPEND" +u32 swsusp_hardware_signature; + /* * When reading an {un,}compressed image, we may restore pages in place, * in which case some architectures need these pages cleaning before they @@ -87,7 +88,7 @@ struct swap_map_page_list { struct swap_map_page_list *next; }; -/** +/* * The swap_map_handle structure is used for handling swap in * a file-alike way */ @@ -104,7 +105,8 @@ struct swap_map_handle { struct swsusp_header { char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) - - sizeof(u32)]; + sizeof(u32) - sizeof(u32)]; + u32 hw_sig; u32 crc32; sector_t image; unsigned int flags; /* Flags to pass to the "boot" kernel */ @@ -114,7 +116,7 @@ struct swsusp_header { static struct swsusp_header *swsusp_header; -/** +/* * The following functions are used for tracing the allocated * swap pages, so that they can be freed in case of an error. */ @@ -168,7 +170,7 @@ static int swsusp_extents_insert(unsigned long swap_offset) return 0; } -/** +/* * alloc_swapdev_block - allocate a swap page and register that it has * been allocated, so that it can be freed in case of an error. */ @@ -187,7 +189,7 @@ sector_t alloc_swapdev_block(int swap) return 0; } -/** +/* * free_all_swap_pages - free swap pages allocated for saving image data. * It also frees the extents used to register which swap entries had been * allocated. @@ -226,6 +228,7 @@ struct hib_bio_batch { atomic_t count; wait_queue_head_t wait; blk_status_t error; + struct blk_plug plug; }; static void hib_init_batch(struct hib_bio_batch *hb) @@ -233,6 +236,12 @@ static void hib_init_batch(struct hib_bio_batch *hb) atomic_set(&hb->count, 0); init_waitqueue_head(&hb->wait); hb->error = BLK_STS_OK; + blk_start_plug(&hb->plug); +} + +static void hib_finish_batch(struct hib_bio_batch *hb) +{ + blk_finish_plug(&hb->plug); } static void hib_end_io(struct bio *bio) @@ -260,17 +269,15 @@ static void hib_end_io(struct bio *bio) bio_put(bio); } -static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr, - struct hib_bio_batch *hb) +static int hib_submit_io(blk_opf_t opf, pgoff_t page_off, void *addr, + struct hib_bio_batch *hb) { struct page *page = virt_to_page(addr); struct bio *bio; int error = 0; - bio = bio_alloc(GFP_NOIO | __GFP_HIGH, 1); + bio = bio_alloc(hib_resume_bdev, 1, opf, GFP_NOIO | __GFP_HIGH); bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9); - bio_set_dev(bio, hib_resume_bdev); - bio_set_op_attrs(bio, op, op_flags); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { pr_err("Adding page to bio failed at %llu\n", @@ -292,8 +299,12 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr, return error; } -static blk_status_t hib_wait_io(struct hib_bio_batch *hb) +static int hib_wait_io(struct hib_bio_batch *hb) { + /* + * We are relying on the behavior of blk_plug that a thread with + * a plug will flush the plug list before sleeping. + */ wait_event(hb->wait, atomic_read(&hb->count) == 0); return blk_status_to_errno(hb->error); } @@ -301,22 +312,24 @@ static blk_status_t hib_wait_io(struct hib_bio_batch *hb) /* * Saving part */ - static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) { int error; - hib_submit_io(REQ_OP_READ, 0, swsusp_resume_block, - swsusp_header, NULL); + hib_submit_io(REQ_OP_READ, swsusp_resume_block, swsusp_header, NULL); if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); memcpy(swsusp_header->sig, HIBERNATE_SIG, 10); swsusp_header->image = handle->first_sector; + if (swsusp_hardware_signature) { + swsusp_header->hw_sig = swsusp_hardware_signature; + flags |= SF_HW_SIG; + } swsusp_header->flags = flags; if (flags & SF_CRC32_MODE) swsusp_header->crc32 = handle->crc32; - error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC, + error = hib_submit_io(REQ_OP_WRITE | REQ_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { pr_err("Swap header not found!\n"); @@ -335,26 +348,23 @@ static int swsusp_swap_check(void) { int res; - res = swap_type_of(swsusp_resume_device, swsusp_resume_block, - &hib_resume_bdev); + if (swsusp_resume_device) + res = swap_type_of(swsusp_resume_device, swsusp_resume_block); + else + res = find_first_swap(&swsusp_resume_device); if (res < 0) return res; - root_swap = res; - res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL); - if (res) - return res; + + hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, FMODE_WRITE, + NULL); + if (IS_ERR(hib_resume_bdev)) + return PTR_ERR(hib_resume_bdev); res = set_blocksize(hib_resume_bdev, PAGE_SIZE); if (res < 0) blkdev_put(hib_resume_bdev, FMODE_WRITE); - /* - * Update the resume device to the one actually used, - * so the test_resume mode can use it in case it is - * invoked from hibernate() to test the snapshot. - */ - swsusp_resume_device = hib_resume_bdev->bd_dev; return res; } @@ -396,7 +406,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb) } else { src = buf; } - return hib_submit_io(REQ_OP_WRITE, REQ_SYNC, offset, src, hb); + return hib_submit_io(REQ_OP_WRITE | REQ_SYNC, offset, src, hb); } static void release_swap_writer(struct swap_map_handle *handle) @@ -489,10 +499,10 @@ static int swap_writer_finish(struct swap_map_handle *handle, unsigned int flags, int error) { if (!error) { - flush_swap_writer(handle); pr_info("S"); error = mark_swapfiles(handle, flags); pr_cont("|\n"); + flush_swap_writer(handle); } if (error) @@ -561,6 +571,7 @@ static int save_image(struct swap_map_handle *handle, nr_pages++; } err2 = hib_wait_io(&hb); + hib_finish_batch(&hb); stop = ktime_get(); if (!ret) ret = err2; @@ -696,22 +707,19 @@ static int save_image_lzo(struct swap_map_handle *handle, goto out_clean; } - data = vmalloc(array_size(nr_threads, sizeof(*data))); + data = vzalloc(array_size(nr_threads, sizeof(*data))); if (!data) { pr_err("Failed to allocate LZO data\n"); ret = -ENOMEM; goto out_clean; } - for (thr = 0; thr < nr_threads; thr++) - memset(&data[thr], 0, offsetof(struct cmp_data, go)); - crc = kmalloc(sizeof(*crc), GFP_KERNEL); + crc = kzalloc(sizeof(*crc), GFP_KERNEL); if (!crc) { pr_err("Failed to allocate crc\n"); ret = -ENOMEM; goto out_clean; } - memset(crc, 0, offsetof(struct crc_data, go)); /* * Start the compression threads. @@ -854,6 +862,7 @@ out_finish: pr_info("Image saving done\n"); swsusp_show_speed(start, stop, nr_to_write, "Wrote"); out_clean: + hib_finish_batch(&hb); if (crc) { if (crc->thr) kthread_stop(crc->thr); @@ -874,7 +883,7 @@ out_clean: * enough_swap - Make sure we have enough swap to save the image. * * Returns TRUE or FALSE after checking the total amount of swap - * space avaiable from the resume partition. + * space available from the resume partition. */ static int enough_swap(unsigned int nr_pages) @@ -992,7 +1001,7 @@ static int get_swap_reader(struct swap_map_handle *handle, return -ENOMEM; } - error = hib_submit_io(REQ_OP_READ, 0, offset, tmp->map, NULL); + error = hib_submit_io(REQ_OP_READ, offset, tmp->map, NULL); if (error) { release_swap_reader(handle); return error; @@ -1016,7 +1025,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf, offset = handle->cur->entries[handle->k]; if (!offset) return -EFAULT; - error = hib_submit_io(REQ_OP_READ, 0, offset, buf, hb); + error = hib_submit_io(REQ_OP_READ, offset, buf, hb); if (error) return error; if (++handle->k >= MAP_PAGE_ENTRIES) { @@ -1084,6 +1093,7 @@ static int load_image(struct swap_map_handle *handle, nr_pages++; } err2 = hib_wait_io(&hb); + hib_finish_batch(&hb); stop = ktime_get(); if (!ret) ret = err2; @@ -1114,7 +1124,7 @@ struct dec_data { }; /** - * Deompression function that runs in its own thread. + * Decompression function that runs in its own thread. */ static int lzo_decompress_threadfn(void *data) { @@ -1187,22 +1197,19 @@ static int load_image_lzo(struct swap_map_handle *handle, goto out_clean; } - data = vmalloc(array_size(nr_threads, sizeof(*data))); + data = vzalloc(array_size(nr_threads, sizeof(*data))); if (!data) { pr_err("Failed to allocate LZO data\n"); ret = -ENOMEM; goto out_clean; } - for (thr = 0; thr < nr_threads; thr++) - memset(&data[thr], 0, offsetof(struct dec_data, go)); - crc = kmalloc(sizeof(*crc), GFP_KERNEL); + crc = kzalloc(sizeof(*crc), GFP_KERNEL); if (!crc) { pr_err("Failed to allocate crc\n"); ret = -ENOMEM; goto out_clean; } - memset(crc, 0, offsetof(struct crc_data, go)); clean_pages_on_decompress = true; @@ -1447,6 +1454,7 @@ out_finish: } swsusp_show_speed(start, stop, nr_to_read, "Read"); out_clean: + hib_finish_batch(&hb); for (i = 0; i < ring_size; i++) free_page((unsigned long)page[i]); if (crc) { @@ -1509,14 +1517,14 @@ end: int swsusp_check(void) { int error; + void *holder; hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, - FMODE_READ, NULL); + FMODE_READ | FMODE_EXCL, &holder); if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); - error = hib_submit_io(REQ_OP_READ, 0, - swsusp_resume_block, + error = hib_submit_io(REQ_OP_READ, swsusp_resume_block, swsusp_header, NULL); if (error) goto put; @@ -1524,16 +1532,22 @@ int swsusp_check(void) if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) { memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); /* Reset swap signature now */ - error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC, + error = hib_submit_io(REQ_OP_WRITE | REQ_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { error = -EINVAL; } + if (!error && swsusp_header->flags & SF_HW_SIG && + swsusp_header->hw_sig != swsusp_hardware_signature) { + pr_info("Suspend image hardware signature mismatch (%08x now %08x); aborting resume.\n", + swsusp_header->hw_sig, swsusp_hardware_signature); + error = -EINVAL; + } put: if (error) - blkdev_put(hib_resume_bdev, FMODE_READ); + blkdev_put(hib_resume_bdev, FMODE_READ | FMODE_EXCL); else pr_debug("Image signature found, resuming\n"); } else { @@ -1569,11 +1583,11 @@ int swsusp_unmark(void) { int error; - hib_submit_io(REQ_OP_READ, 0, swsusp_resume_block, - swsusp_header, NULL); + hib_submit_io(REQ_OP_READ, swsusp_resume_block, + swsusp_header, NULL); if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) { memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10); - error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC, + error = hib_submit_io(REQ_OP_WRITE | REQ_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { @@ -1590,7 +1604,7 @@ int swsusp_unmark(void) } #endif -static int swsusp_header_init(void) +static int __init swsusp_header_init(void) { swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL); if (!swsusp_header) diff --git a/kernel/power/user.c b/kernel/power/user.c index 77438954cc2b..3a4e70366f35 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -26,8 +26,7 @@ #include "power.h" - -#define SNAPSHOT_MINOR 231 +static bool need_wait; static struct snapshot_data { struct snapshot_handle handle; @@ -37,27 +36,32 @@ static struct snapshot_data { bool ready; bool platform_support; bool free_bitmaps; + dev_t dev; } snapshot_state; -atomic_t snapshot_device_available = ATOMIC_INIT(1); +int is_hibernate_resume_dev(dev_t dev) +{ + return hibernation_available() && snapshot_state.dev == dev; +} static int snapshot_open(struct inode *inode, struct file *filp) { struct snapshot_data *data; - int error, nr_calls = 0; + unsigned int sleep_flags; + int error; if (!hibernation_available()) return -EPERM; - lock_system_sleep(); + sleep_flags = lock_system_sleep(); - if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + if (!hibernate_acquire()) { error = -EBUSY; goto Unlock; } if ((filp->f_flags & O_ACCMODE) == O_RDWR) { - atomic_inc(&snapshot_device_available); + hibernate_release(); error = -ENOSYS; goto Unlock; } @@ -67,41 +71,35 @@ static int snapshot_open(struct inode *inode, struct file *filp) memset(&data->handle, 0, sizeof(struct snapshot_handle)); if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { /* Hibernating. The image device should be accessible. */ - data->swap = swsusp_resume_device ? - swap_type_of(swsusp_resume_device, 0, NULL) : -1; + data->swap = swap_type_of(swsusp_resume_device, 0); data->mode = O_RDONLY; data->free_bitmaps = false; - error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls); - if (error) - __pm_notifier_call_chain(PM_POST_HIBERNATION, --nr_calls, NULL); + error = pm_notifier_call_chain_robust(PM_HIBERNATION_PREPARE, PM_POST_HIBERNATION); } else { /* * Resuming. We may need to wait for the image device to * appear. */ - wait_for_device_probe(); + need_wait = true; data->swap = -1; data->mode = O_WRONLY; - error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls); + error = pm_notifier_call_chain_robust(PM_RESTORE_PREPARE, PM_POST_RESTORE); if (!error) { error = create_basic_memory_bitmaps(); data->free_bitmaps = !error; - } else - nr_calls--; - - if (error) - __pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL); + } } if (error) - atomic_inc(&snapshot_device_available); + hibernate_release(); data->frozen = false; data->ready = false; data->platform_support = false; + data->dev = 0; Unlock: - unlock_system_sleep(); + unlock_system_sleep(sleep_flags); return error; } @@ -109,11 +107,13 @@ static int snapshot_open(struct inode *inode, struct file *filp) static int snapshot_release(struct inode *inode, struct file *filp) { struct snapshot_data *data; + unsigned int sleep_flags; - lock_system_sleep(); + sleep_flags = lock_system_sleep(); swsusp_free(); data = filp->private_data; + data->dev = 0; free_all_swap_pages(data->swap); if (data->frozen) { pm_restore_gfp_mask(); @@ -124,9 +124,9 @@ static int snapshot_release(struct inode *inode, struct file *filp) } pm_notifier_call_chain(data->mode == O_RDONLY ? PM_POST_HIBERNATION : PM_POST_RESTORE); - atomic_inc(&snapshot_device_available); + hibernate_release(); - unlock_system_sleep(); + unlock_system_sleep(sleep_flags); return 0; } @@ -134,11 +134,12 @@ static int snapshot_release(struct inode *inode, struct file *filp) static ssize_t snapshot_read(struct file *filp, char __user *buf, size_t count, loff_t *offp) { + loff_t pg_offp = *offp & ~PAGE_MASK; struct snapshot_data *data; + unsigned int sleep_flags; ssize_t res; - loff_t pg_offp = *offp & ~PAGE_MASK; - lock_system_sleep(); + sleep_flags = lock_system_sleep(); data = filp->private_data; if (!data->ready) { @@ -159,7 +160,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, *offp += res; Unlock: - unlock_system_sleep(); + unlock_system_sleep(sleep_flags); return res; } @@ -167,11 +168,17 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, static ssize_t snapshot_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp) { + loff_t pg_offp = *offp & ~PAGE_MASK; struct snapshot_data *data; + unsigned long sleep_flags; ssize_t res; - loff_t pg_offp = *offp & ~PAGE_MASK; - lock_system_sleep(); + if (need_wait) { + wait_for_device_probe(); + need_wait = false; + } + + sleep_flags = lock_system_sleep(); data = filp->private_data; @@ -180,7 +187,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, if (res <= 0) goto unlock; } else { - res = PAGE_SIZE - pg_offp; + res = PAGE_SIZE; } if (!data_of(data->handle)) { @@ -193,11 +200,52 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, if (res > 0) *offp += res; unlock: - unlock_system_sleep(); + unlock_system_sleep(sleep_flags); return res; } +struct compat_resume_swap_area { + compat_loff_t offset; + u32 dev; +} __packed; + +static int snapshot_set_swap_area(struct snapshot_data *data, + void __user *argp) +{ + sector_t offset; + dev_t swdev; + + if (swsusp_swap_in_use()) + return -EPERM; + + if (in_compat_syscall()) { + struct compat_resume_swap_area swap_area; + + if (copy_from_user(&swap_area, argp, sizeof(swap_area))) + return -EFAULT; + swdev = new_decode_dev(swap_area.dev); + offset = swap_area.offset; + } else { + struct resume_swap_area swap_area; + + if (copy_from_user(&swap_area, argp, sizeof(swap_area))) + return -EFAULT; + swdev = new_decode_dev(swap_area.dev); + offset = swap_area.offset; + } + + /* + * User space encodes device types as two-byte values, + * so we need to recode them + */ + data->swap = swap_type_of(swdev, offset); + if (data->swap < 0) + return swdev ? -ENODEV : -EINVAL; + data->dev = swdev; + return 0; +} + static long snapshot_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -206,6 +254,11 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, loff_t size; sector_t offset; + if (need_wait) { + wait_for_device_probe(); + need_wait = false; + } + if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC) return -ENOTTY; if (_IOC_NR(cmd) > SNAPSHOT_IOC_MAXNR) @@ -353,34 +406,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, break; case SNAPSHOT_SET_SWAP_AREA: - if (swsusp_swap_in_use()) { - error = -EPERM; - } else { - struct resume_swap_area swap_area; - dev_t swdev; - - error = copy_from_user(&swap_area, (void __user *)arg, - sizeof(struct resume_swap_area)); - if (error) { - error = -EFAULT; - break; - } - - /* - * User space encodes device types as two-byte values, - * so we need to recode them - */ - swdev = new_decode_dev(swap_area.dev); - if (swdev) { - offset = swap_area.offset; - data->swap = swap_type_of(swdev, offset, NULL); - if (data->swap < 0) - error = -ENODEV; - } else { - data->swap = -1; - error = -EINVAL; - } - } + error = snapshot_set_swap_area(data, (void __user *)arg); break; default: @@ -395,12 +421,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, } #ifdef CONFIG_COMPAT - -struct compat_resume_swap_area { - compat_loff_t offset; - u32 dev; -} __packed; - static long snapshot_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -409,49 +429,15 @@ snapshot_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) switch (cmd) { case SNAPSHOT_GET_IMAGE_SIZE: case SNAPSHOT_AVAIL_SWAP_SIZE: - case SNAPSHOT_ALLOC_SWAP_PAGE: { - compat_loff_t __user *uoffset = compat_ptr(arg); - loff_t offset; - mm_segment_t old_fs; - int err; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - err = snapshot_ioctl(file, cmd, (unsigned long) &offset); - set_fs(old_fs); - if (!err && put_user(offset, uoffset)) - err = -EFAULT; - return err; - } - + case SNAPSHOT_ALLOC_SWAP_PAGE: case SNAPSHOT_CREATE_IMAGE: + case SNAPSHOT_SET_SWAP_AREA: return snapshot_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); - - case SNAPSHOT_SET_SWAP_AREA: { - struct compat_resume_swap_area __user *u_swap_area = - compat_ptr(arg); - struct resume_swap_area swap_area; - mm_segment_t old_fs; - int err; - - err = get_user(swap_area.offset, &u_swap_area->offset); - err |= get_user(swap_area.dev, &u_swap_area->dev); - if (err) - return -EFAULT; - old_fs = get_fs(); - set_fs(KERNEL_DS); - err = snapshot_ioctl(file, SNAPSHOT_SET_SWAP_AREA, - (unsigned long) &swap_area); - set_fs(old_fs); - return err; - } - default: return snapshot_ioctl(file, cmd, arg); } } - #endif /* CONFIG_COMPAT */ static const struct file_operations snapshot_fops = { diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index 105df4dfc783..52571dcad768 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -39,23 +39,20 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active) { struct rb_node *node; struct wakelock *wl; - char *str = buf; - char *end = buf + PAGE_SIZE; + int len = 0; mutex_lock(&wakelocks_lock); for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) { wl = rb_entry(node, struct wakelock, node); if (wl->ws->active == show_active) - str += scnprintf(str, end - str, "%s ", wl->name); + len += sysfs_emit_at(buf, len, "%s ", wl->name); } - if (str > buf) - str--; - str += scnprintf(str, end - str, "\n"); + len += sysfs_emit_at(buf, len, "\n"); mutex_unlock(&wakelocks_lock); - return (str - buf); + return len; } #if CONFIG_PM_WAKELOCKS_LIMIT > 0 |