aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c20
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/cgroup_freezer.c21
-rw-r--r--kernel/compat.c25
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/groups.c6
-rw-r--r--kernel/hrtimer.c67
-rw-r--r--kernel/irq/handle.c3
-rw-r--r--kernel/irq/manage.c89
-rw-r--r--kernel/irq/proc.c60
-rw-r--r--kernel/pm_qos_params.c218
-rw-r--r--kernel/posix-cpu-timers.c298
-rw-r--r--kernel/power/Makefile3
-rw-r--r--kernel/power/block_io.c103
-rw-r--r--kernel/power/power.h27
-rw-r--r--kernel/power/snapshot.c145
-rw-r--r--kernel/power/swap.c333
-rw-r--r--kernel/power/user.c37
-rw-r--r--kernel/sys.c31
-rw-r--r--kernel/sysctl.c28
-rw-r--r--kernel/time.c11
-rw-r--r--kernel/time/clocksource.c48
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/time/timekeeping.c35
-rw-r--r--kernel/timer.c137
-rw-r--r--kernel/workqueue.c36
26 files changed, 964 insertions, 823 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index e4c0e1fee9b0..385b88461c29 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -216,7 +216,6 @@ static int acct_on(char *name)
{
struct file *file;
struct vfsmount *mnt;
- int error;
struct pid_namespace *ns;
struct bsd_acct_struct *acct = NULL;
@@ -244,13 +243,6 @@ static int acct_on(char *name)
}
}
- error = security_acct(file);
- if (error) {
- kfree(acct);
- filp_close(file, NULL);
- return error;
- }
-
spin_lock(&acct_lock);
if (ns->bacct == NULL) {
ns->bacct = acct;
@@ -281,7 +273,7 @@ static int acct_on(char *name)
*/
SYSCALL_DEFINE1(acct, const char __user *, name)
{
- int error;
+ int error = 0;
if (!capable(CAP_SYS_PACCT))
return -EPERM;
@@ -299,13 +291,11 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
if (acct == NULL)
return 0;
- error = security_acct(NULL);
- if (!error) {
- spin_lock(&acct_lock);
- acct_file_reopen(acct, NULL, NULL);
- spin_unlock(&acct_lock);
- }
+ spin_lock(&acct_lock);
+ acct_file_reopen(acct, NULL, NULL);
+ spin_unlock(&acct_lock);
}
+
return error;
}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e9ec642932ee..291775021b2e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3615,7 +3615,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
* @ss: the subsystem to load
*
* This function should be called in a modular subsystem's initcall. If the
- * subsytem is built as a module, it will be assigned a new subsys_id and set
+ * subsystem is built as a module, it will be assigned a new subsys_id and set
* up for use. If the subsystem is built-in anyway, work is delegated to the
* simpler cgroup_init_subsys.
*/
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e5c0244962b0..ce71ed53e88f 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -89,10 +89,10 @@ struct cgroup_subsys freezer_subsys;
/* Locks taken and their ordering
* ------------------------------
- * css_set_lock
* cgroup_mutex (AKA cgroup_lock)
- * task->alloc_lock (AKA task_lock)
* freezer->lock
+ * css_set_lock
+ * task->alloc_lock (AKA task_lock)
* task->sighand->siglock
*
* cgroup code forces css_set_lock to be taken before task->alloc_lock
@@ -100,33 +100,38 @@ struct cgroup_subsys freezer_subsys;
* freezer_create(), freezer_destroy():
* cgroup_mutex [ by cgroup core ]
*
- * can_attach():
- * cgroup_mutex
+ * freezer_can_attach():
+ * cgroup_mutex (held by caller of can_attach)
*
- * cgroup_frozen():
+ * cgroup_freezing_or_frozen():
* task->alloc_lock (to get task's cgroup)
*
* freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
- * task->alloc_lock (to get task's cgroup)
* freezer->lock
* sighand->siglock (if the cgroup is freezing)
*
* freezer_read():
* cgroup_mutex
* freezer->lock
+ * write_lock css_set_lock (cgroup iterator start)
+ * task->alloc_lock
* read_lock css_set_lock (cgroup iterator start)
*
* freezer_write() (freeze):
* cgroup_mutex
* freezer->lock
+ * write_lock css_set_lock (cgroup iterator start)
+ * task->alloc_lock
* read_lock css_set_lock (cgroup iterator start)
- * sighand->siglock
+ * sighand->siglock (fake signal delivery inside freeze_task())
*
* freezer_write() (unfreeze):
* cgroup_mutex
* freezer->lock
+ * write_lock css_set_lock (cgroup iterator start)
+ * task->alloc_lock
* read_lock css_set_lock (cgroup iterator start)
- * task->alloc_lock (to prevent races with freeze_task())
+ * task->alloc_lock (inside thaw_process(), prevents race with refrigerator())
* sighand->siglock
*/
static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
diff --git a/kernel/compat.c b/kernel/compat.c
index 7f40e9275fd9..5adab05a3172 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -495,29 +495,26 @@ asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len,
{
int ret;
cpumask_var_t mask;
- unsigned long *k;
- unsigned int min_length = cpumask_size();
-
- if (nr_cpu_ids <= BITS_PER_COMPAT_LONG)
- min_length = sizeof(compat_ulong_t);
- if (len < min_length)
+ if ((len * BITS_PER_BYTE) < nr_cpu_ids)
+ return -EINVAL;
+ if (len & (sizeof(compat_ulong_t)-1))
return -EINVAL;
if (!alloc_cpumask_var(&mask, GFP_KERNEL))
return -ENOMEM;
ret = sched_getaffinity(pid, mask);
- if (ret < 0)
- goto out;
+ if (ret == 0) {
+ size_t retlen = min_t(size_t, len, cpumask_size());
- k = cpumask_bits(mask);
- ret = compat_put_bitmap(user_mask_ptr, k, min_length * 8);
- if (ret == 0)
- ret = min_length;
-
-out:
+ if (compat_put_bitmap(user_mask_ptr, cpumask_bits(mask), retlen * 8))
+ ret = -EFAULT;
+ else
+ ret = retlen;
+ }
free_cpumask_var(mask);
+
return ret;
}
diff --git a/kernel/cred.c b/kernel/cred.c
index 8f3672a58a1e..2c24870c55d1 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -522,8 +522,6 @@ int commit_creds(struct cred *new)
#endif
BUG_ON(atomic_read(&new->usage) < 1);
- security_commit_creds(new, old);
-
get_cred(new); /* we will require a ref for the subj creds too */
/* dumpability changes */
diff --git a/kernel/groups.c b/kernel/groups.c
index 2b45b2ee3964..53b1916c9492 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -164,12 +164,6 @@ int groups_search(const struct group_info *group_info, gid_t grp)
*/
int set_groups(struct cred *new, struct group_info *group_info)
{
- int retval;
-
- retval = security_task_setgroups(group_info);
- if (retval)
- return retval;
-
put_group_info(new->group_info);
groups_sort(group_info);
get_group_info(group_info);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 0086628b6e97..b9b134b35088 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1749,35 +1749,15 @@ void __init hrtimers_init(void)
}
/**
- * schedule_hrtimeout_range - sleep until timeout
+ * schedule_hrtimeout_range_clock - sleep until timeout
* @expires: timeout value (ktime_t)
* @delta: slack in expires timeout (ktime_t)
* @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
- *
- * Make the current task sleep until the given expiry time has
- * elapsed. The routine will return immediately unless
- * the current task state has been set (see set_current_state()).
- *
- * The @delta argument gives the kernel the freedom to schedule the
- * actual wakeup to a time that is both power and performance friendly.
- * The kernel give the normal best effort behavior for "@expires+@delta",
- * but may decide to fire the timer earlier, but no earlier than @expires.
- *
- * You can set the task state as follows -
- *
- * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
- * pass before the routine returns.
- *
- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task.
- *
- * The current task state is guaranteed to be TASK_RUNNING when this
- * routine returns.
- *
- * Returns 0 when the timer has expired otherwise -EINTR
+ * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
*/
-int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
- const enum hrtimer_mode mode)
+int __sched
+schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
+ const enum hrtimer_mode mode, int clock)
{
struct hrtimer_sleeper t;
@@ -1799,7 +1779,7 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
return -EINTR;
}
- hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode);
+ hrtimer_init_on_stack(&t.timer, clock, mode);
hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
hrtimer_init_sleeper(&t, current);
@@ -1818,6 +1798,41 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
return !t.task ? 0 : -EINTR;
}
+
+/**
+ * schedule_hrtimeout_range - sleep until timeout
+ * @expires: timeout value (ktime_t)
+ * @delta: slack in expires timeout (ktime_t)
+ * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ *
+ * Make the current task sleep until the given expiry time has
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * The @delta argument gives the kernel the freedom to schedule the
+ * actual wakeup to a time that is both power and performance friendly.
+ * The kernel give the normal best effort behavior for "@expires+@delta",
+ * but may decide to fire the timer earlier, but no earlier than @expires.
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
+ * pass before the routine returns.
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task.
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Returns 0 when the timer has expired otherwise -EINTR
+ */
+int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
+ const enum hrtimer_mode mode)
+{
+ return schedule_hrtimeout_range_clock(expires, delta, mode,
+ CLOCK_MONOTONIC);
+}
EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
/**
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 76d5a671bfe1..27e5c6911223 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -370,9 +370,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
irqreturn_t ret, retval = IRQ_NONE;
unsigned int status = 0;
- if (!(action->flags & IRQF_DISABLED))
- local_irq_enable_in_hardirq();
-
do {
trace_irq_handler_entry(irq, action);
ret = action->handler(irq, action->dev_id);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 704e488730a5..3164ba7ce151 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -138,6 +138,22 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
return 0;
}
+int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ unsigned long flags;
+
+ if (!desc)
+ return -EINVAL;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ desc->affinity_hint = m;
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
+
#ifndef CONFIG_AUTO_IRQ_AFFINITY
/*
* Generic version of the affinity autoselector.
@@ -757,16 +773,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (new->flags & IRQF_ONESHOT)
desc->status |= IRQ_ONESHOT;
- /*
- * Force MSI interrupts to run with interrupts
- * disabled. The multi vector cards can cause stack
- * overflows due to nested interrupts when enough of
- * them are directed to a core and fire at the same
- * time.
- */
- if (desc->msi_desc)
- new->flags |= IRQF_DISABLED;
-
if (!(desc->status & IRQ_NOAUTOEN)) {
desc->depth = 0;
desc->status &= ~IRQ_DISABLED;
@@ -916,6 +922,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
desc->chip->disable(irq);
}
+#ifdef CONFIG_SMP
+ /* make sure affinity_hint is cleaned up */
+ if (WARN_ON_ONCE(desc->affinity_hint))
+ desc->affinity_hint = NULL;
+#endif
+
raw_spin_unlock_irqrestore(&desc->lock, flags);
unregister_handler_proc(irq, action);
@@ -1027,7 +1039,6 @@ EXPORT_SYMBOL(free_irq);
* Flags:
*
* IRQF_SHARED Interrupt is shared
- * IRQF_DISABLED Disable local interrupts while processing
* IRQF_SAMPLE_RANDOM The interrupt can be used for entropy
* IRQF_TRIGGER_* Specify active edge(s) or level
*
@@ -1041,25 +1052,6 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
int retval;
/*
- * handle_IRQ_event() always ignores IRQF_DISABLED except for
- * the _first_ irqaction (sigh). That can cause oopsing, but
- * the behavior is classified as "will not fix" so we need to
- * start nudging drivers away from using that idiom.
- */
- if ((irqflags & (IRQF_SHARED|IRQF_DISABLED)) ==
- (IRQF_SHARED|IRQF_DISABLED)) {
- pr_warning(
- "IRQ %d/%s: IRQF_DISABLED is not guaranteed on shared IRQs\n",
- irq, devname);
- }
-
-#ifdef CONFIG_LOCKDEP
- /*
- * Lockdep wants atomic interrupt handlers:
- */
- irqflags |= IRQF_DISABLED;
-#endif
- /*
* Sanity-check: shared interrupts must pass in a real dev-ID,
* otherwise we'll have trouble later trying to figure out
* which interrupt is which (messes up the interrupt freeing
@@ -1120,3 +1112,40 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
return retval;
}
EXPORT_SYMBOL(request_threaded_irq);
+
+/**
+ * request_any_context_irq - allocate an interrupt line
+ * @irq: Interrupt line to allocate
+ * @handler: Function to be called when the IRQ occurs.
+ * Threaded handler for threaded interrupts.
+ * @flags: Interrupt type flags
+ * @name: An ascii name for the claiming device
+ * @dev_id: A cookie passed back to the handler function
+ *
+ * This call allocates interrupt resources and enables the
+ * interrupt line and IRQ handling. It selects either a
+ * hardirq or threaded handling method depending on the
+ * context.
+ *
+ * On failure, it returns a negative value. On success,
+ * it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED.
+ */
+int request_any_context_irq(unsigned int irq, irq_handler_t handler,
+ unsigned long flags, const char *name, void *dev_id)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ int ret;
+
+ if (!desc)
+ return -EINVAL;
+
+ if (desc->status & IRQ_NESTED_THREAD) {
+ ret = request_threaded_irq(irq, NULL, handler,
+ flags, name, dev_id);
+ return !ret ? IRQC_IS_NESTED : ret;
+ }
+
+ ret = request_irq(irq, handler, flags, name, dev_id);
+ return !ret ? IRQC_IS_HARDIRQ : ret;
+}
+EXPORT_SYMBOL_GPL(request_any_context_irq);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 7a6eb04ef6b5..09a2ee540bd2 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -32,6 +32,27 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
return 0;
}
+static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
+{
+ struct irq_desc *desc = irq_to_desc((long)m->private);
+ unsigned long flags;
+ cpumask_var_t mask;
+
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ if (desc->affinity_hint)
+ cpumask_copy(mask, desc->affinity_hint);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+ seq_cpumask(m, mask);
+ seq_putc(m, '\n');
+ free_cpumask_var(mask);
+
+ return 0;
+}
+
#ifndef is_affinity_mask_valid
#define is_affinity_mask_valid(val) 1
#endif
@@ -84,6 +105,11 @@ static int irq_affinity_proc_open(struct inode *inode, struct file *file)
return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
}
+static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data);
+}
+
static const struct file_operations irq_affinity_proc_fops = {
.open = irq_affinity_proc_open,
.read = seq_read,
@@ -92,6 +118,13 @@ static const struct file_operations irq_affinity_proc_fops = {
.write = irq_affinity_proc_write,
};
+static const struct file_operations irq_affinity_hint_proc_fops = {
+ .open = irq_affinity_hint_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static int default_affinity_show(struct seq_file *m, void *v)
{
seq_cpumask(m, irq_default_affinity);
@@ -147,6 +180,26 @@ static const struct file_operations default_affinity_proc_fops = {
.release = single_release,
.write = default_affinity_write,
};
+
+static int irq_node_proc_show(struct seq_file *m, void *v)
+{
+ struct irq_desc *desc = irq_to_desc((long) m->private);
+
+ seq_printf(m, "%d\n", desc->node);
+ return 0;
+}
+
+static int irq_node_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, irq_node_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations irq_node_proc_fops = {
+ .open = irq_node_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
#endif
static int irq_spurious_proc_show(struct seq_file *m, void *v)
@@ -231,6 +284,13 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
/* create /proc/irq/<irq>/smp_affinity */
proc_create_data("smp_affinity", 0600, desc->dir,
&irq_affinity_proc_fops, (void *)(long)irq);
+
+ /* create /proc/irq/<irq>/affinity_hint */
+ proc_create_data("affinity_hint", 0400, desc->dir,
+ &irq_affinity_hint_proc_fops, (void *)(long)irq);
+
+ proc_create_data("node", 0444, desc->dir,
+ &irq_node_proc_fops, (void *)(long)irq);
#endif
proc_create_data("spurious", 0444, desc->dir,
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index 3db49b9ca374..f42d3f737a33 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -2,7 +2,7 @@
* This module exposes the interface to kernel space for specifying
* QoS dependencies. It provides infrastructure for registration of:
*
- * Dependents on a QoS value : register requirements
+ * Dependents on a QoS value : register requests
* Watchers of QoS value : get notified when target QoS value changes
*
* This QoS design is best effort based. Dependents register their QoS needs.
@@ -14,19 +14,21 @@
* timeout: usec <-- currently not used.
* throughput: kbs (kilo byte / sec)
*
- * There are lists of pm_qos_objects each one wrapping requirements, notifiers
+ * There are lists of pm_qos_objects each one wrapping requests, notifiers
*
- * User mode requirements on a QOS parameter register themselves to the
+ * User mode requests on a QOS parameter register themselves to the
* subsystem by opening the device node /dev/... and writing there request to
* the node. As long as the process holds a file handle open to the node the
* client continues to be accounted for. Upon file release the usermode
- * requirement is removed and a new qos target is computed. This way when the
- * requirement that the application has is cleaned up when closes the file
+ * request is removed and a new qos target is computed. This way when the
+ * request that the application has is cleaned up when closes the file
* pointer or exits the pm_qos_object will get an opportunity to clean up.
*
* Mark Gross <mgross@linux.intel.com>
*/
+/*#define DEBUG*/
+
#include <linux/pm_qos_params.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
@@ -42,25 +44,25 @@
#include <linux/uaccess.h>
/*
- * locking rule: all changes to requirements or notifiers lists
+ * locking rule: all changes to requests or notifiers lists
* or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
* held, taken with _irqsave. One lock to rule them all
*/
-struct requirement_list {
+struct pm_qos_request_list {
struct list_head list;
union {
s32 value;
s32 usec;
s32 kbps;
};
- char *name;
+ int pm_qos_class;
};
static s32 max_compare(s32 v1, s32 v2);
static s32 min_compare(s32 v1, s32 v2);
struct pm_qos_object {
- struct requirement_list requirements;
+ struct pm_qos_request_list requests;
struct blocking_notifier_head *notifiers;
struct miscdevice pm_qos_power_miscdev;
char *name;
@@ -72,7 +74,7 @@ struct pm_qos_object {
static struct pm_qos_object null_pm_qos;
static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier);
static struct pm_qos_object cpu_dma_pm_qos = {
- .requirements = {LIST_HEAD_INIT(cpu_dma_pm_qos.requirements.list)},
+ .requests = {LIST_HEAD_INIT(cpu_dma_pm_qos.requests.list)},
.notifiers = &cpu_dma_lat_notifier,
.name = "cpu_dma_latency",
.default_value = 2000 * USEC_PER_SEC,
@@ -82,7 +84,7 @@ static struct pm_qos_object cpu_dma_pm_qos = {
static BLOCKING_NOTIFIER_HEAD(network_lat_notifier);
static struct pm_qos_object network_lat_pm_qos = {
- .requirements = {LIST_HEAD_INIT(network_lat_pm_qos.requirements.list)},
+ .requests = {LIST_HEAD_INIT(network_lat_pm_qos.requests.list)},
.notifiers = &network_lat_notifier,
.name = "network_latency",
.default_value = 2000 * USEC_PER_SEC,
@@ -93,8 +95,7 @@ static struct pm_qos_object network_lat_pm_qos = {
static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier);
static struct pm_qos_object network_throughput_pm_qos = {
- .requirements =
- {LIST_HEAD_INIT(network_throughput_pm_qos.requirements.list)},
+ .requests = {LIST_HEAD_INIT(network_throughput_pm_qos.requests.list)},
.notifiers = &network_throughput_notifier,
.name = "network_throughput",
.default_value = 0,
@@ -135,31 +136,34 @@ static s32 min_compare(s32 v1, s32 v2)
}
-static void update_target(int target)
+static void update_target(int pm_qos_class)
{
s32 extreme_value;
- struct requirement_list *node;
+ struct pm_qos_request_list *node;
unsigned long flags;
int call_notifier = 0;
spin_lock_irqsave(&pm_qos_lock, flags);
- extreme_value = pm_qos_array[target]->default_value;
+ extreme_value = pm_qos_array[pm_qos_class]->default_value;
list_for_each_entry(node,
- &pm_qos_array[target]->requirements.list, list) {
- extreme_value = pm_qos_array[target]->comparitor(
+ &pm_qos_array[pm_qos_class]->requests.list, list) {
+ extreme_value = pm_qos_array[pm_qos_class]->comparitor(
extreme_value, node->value);
}
- if (atomic_read(&pm_qos_array[target]->target_value) != extreme_value) {
+ if (atomic_read(&pm_qos_array[pm_qos_class]->target_value) !=
+ extreme_value) {
call_notifier = 1;
- atomic_set(&pm_qos_array[target]->target_value, extreme_value);
- pr_debug(KERN_ERR "new target for qos %d is %d\n", target,
- atomic_read(&pm_qos_array[target]->target_value));
+ atomic_set(&pm_qos_array[pm_qos_class]->target_value,
+ extreme_value);
+ pr_debug(KERN_ERR "new target for qos %d is %d\n", pm_qos_class,
+ atomic_read(&pm_qos_array[pm_qos_class]->target_value));
}
spin_unlock_irqrestore(&pm_qos_lock, flags);
if (call_notifier)
- blocking_notifier_call_chain(pm_qos_array[target]->notifiers,
- (unsigned long) extreme_value, NULL);
+ blocking_notifier_call_chain(
+ pm_qos_array[pm_qos_class]->notifiers,
+ (unsigned long) extreme_value, NULL);
}
static int register_pm_qos_misc(struct pm_qos_object *qos)
@@ -185,125 +189,112 @@ static int find_pm_qos_object_by_minor(int minor)
}
/**
- * pm_qos_requirement - returns current system wide qos expectation
+ * pm_qos_request - returns current system wide qos expectation
* @pm_qos_class: identification of which qos value is requested
*
* This function returns the current target value in an atomic manner.
*/
-int pm_qos_requirement(int pm_qos_class)
+int pm_qos_request(int pm_qos_class)
{
return atomic_read(&pm_qos_array[pm_qos_class]->target_value);
}
-EXPORT_SYMBOL_GPL(pm_qos_requirement);
+EXPORT_SYMBOL_GPL(pm_qos_request);
/**
- * pm_qos_add_requirement - inserts new qos request into the list
+ * pm_qos_add_request - inserts new qos request into the list
* @pm_qos_class: identifies which list of qos request to us
- * @name: identifies the request
* @value: defines the qos request
*
* This function inserts a new entry in the pm_qos_class list of requested qos
* performance characteristics. It recomputes the aggregate QoS expectations
- * for the pm_qos_class of parameters.
+ * for the pm_qos_class of parameters, and returns the pm_qos_request list
+ * element as a handle for use in updating and removal. Call needs to save
+ * this handle for later use.
*/
-int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value)
+struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value)
{
- struct requirement_list *dep;
+ struct pm_qos_request_list *dep;
unsigned long flags;
- dep = kzalloc(sizeof(struct requirement_list), GFP_KERNEL);
+ dep = kzalloc(sizeof(struct pm_qos_request_list), GFP_KERNEL);
if (dep) {
if (value == PM_QOS_DEFAULT_VALUE)
dep->value = pm_qos_array[pm_qos_class]->default_value;
else
dep->value = value;
- dep->name = kstrdup(name, GFP_KERNEL);
- if (!dep->name)
- goto cleanup;
+ dep->pm_qos_class = pm_qos_class;
spin_lock_irqsave(&pm_qos_lock, flags);
list_add(&dep->list,
- &pm_qos_array[pm_qos_class]->requirements.list);
+ &pm_qos_array[pm_qos_class]->requests.list);
spin_unlock_irqrestore(&pm_qos_lock, flags);
update_target(pm_qos_class);
-
- return 0;
}
-cleanup:
- kfree(dep);
- return -ENOMEM;
+ return dep;
}
-EXPORT_SYMBOL_GPL(pm_qos_add_requirement);
+EXPORT_SYMBOL_GPL(pm_qos_add_request);
/**
- * pm_qos_update_requirement - modifies an existing qos request
- * @pm_qos_class: identifies which list of qos request to us
- * @name: identifies the request
+ * pm_qos_update_request - modifies an existing qos request
+ * @pm_qos_req : handle to list element holding a pm_qos request to use
* @value: defines the qos request
*
- * Updates an existing qos requirement for the pm_qos_class of parameters along
+ * Updates an existing qos request for the pm_qos_class of parameters along
* with updating the target pm_qos_class value.
*
- * If the named request isn't in the list then no change is made.
+ * Attempts are made to make this code callable on hot code paths.
*/
-int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value)
+void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req,
+ s32 new_value)
{
unsigned long flags;
- struct requirement_list *node;
int pending_update = 0;
+ s32 temp;
- spin_lock_irqsave(&pm_qos_lock, flags);
- list_for_each_entry(node,
- &pm_qos_array[pm_qos_class]->requirements.list, list) {
- if (strcmp(node->name, name) == 0) {
- if (new_value == PM_QOS_DEFAULT_VALUE)
- node->value =
- pm_qos_array[pm_qos_class]->default_value;
- else
- node->value = new_value;
+ if (pm_qos_req) { /*guard against callers passing in null */
+ spin_lock_irqsave(&pm_qos_lock, flags);
+ if (new_value == PM_QOS_DEFAULT_VALUE)
+ temp = pm_qos_array[pm_qos_req->pm_qos_class]->default_value;
+ else
+ temp = new_value;
+
+ if (temp != pm_qos_req->value) {
pending_update = 1;
- break;
+ pm_qos_req->value = temp;
}
+ spin_unlock_irqrestore(&pm_qos_lock, flags);
+ if (pending_update)
+ update_target(pm_qos_req->pm_qos_class);
}
- spin_unlock_irqrestore(&pm_qos_lock, flags);
- if (pending_update)
- update_target(pm_qos_class);
-
- return 0;
}
-EXPORT_SYMBOL_GPL(pm_qos_update_requirement);
+EXPORT_SYMBOL_GPL(pm_qos_update_request);
/**
- * pm_qos_remove_requirement - modifies an existing qos request
- * @pm_qos_class: identifies which list of qos request to us
- * @name: identifies the request
+ * pm_qos_remove_request - modifies an existing qos request
+ * @pm_qos_req: handle to request list element
*
- * Will remove named qos request from pm_qos_class list of parameters and
- * recompute the current target value for the pm_qos_class.
+ * Will remove pm qos request from the list of requests and
+ * recompute the current target value for the pm_qos_class. Call this
+ * on slow code paths.
*/
-void pm_qos_remove_requirement(int pm_qos_class, char *name)
+void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req)
{
unsigned long flags;
- struct requirement_list *node;
- int pending_update = 0;
+ int qos_class;
+ if (pm_qos_req == NULL)
+ return;
+ /* silent return to keep pcm code cleaner */
+
+ qos_class = pm_qos_req->pm_qos_class;
spin_lock_irqsave(&pm_qos_lock, flags);
- list_for_each_entry(node,
- &pm_qos_array[pm_qos_class]->requirements.list, list) {
- if (strcmp(node->name, name) == 0) {
- kfree(node->name);
- list_del(&node->list);
- kfree(node);
- pending_update = 1;
- break;
- }
- }
+ list_del(&pm_qos_req->list);
+ kfree(pm_qos_req);
spin_unlock_irqrestore(&pm_qos_lock, flags);
- if (pending_update)
- update_target(pm_qos_class);
+ update_target(qos_class);
}
-EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
+EXPORT_SYMBOL_GPL(pm_qos_remove_request);
/**
* pm_qos_add_notifier - sets notification entry for changes to target value
@@ -313,7 +304,7 @@ EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
* will register the notifier into a notification chain that gets called
* upon changes to the pm_qos_class target value.
*/
- int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
+int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
{
int retval;
@@ -343,21 +334,16 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
}
EXPORT_SYMBOL_GPL(pm_qos_remove_notifier);
-#define PID_NAME_LEN 32
-
static int pm_qos_power_open(struct inode *inode, struct file *filp)
{
- int ret;
long pm_qos_class;
- char name[PID_NAME_LEN];
pm_qos_class = find_pm_qos_object_by_minor(iminor(inode));
if (pm_qos_class >= 0) {
- filp->private_data = (void *)pm_qos_class;
- snprintf(name, PID_NAME_LEN, "process_%d", current->pid);
- ret = pm_qos_add_requirement(pm_qos_class, name,
- PM_QOS_DEFAULT_VALUE);
- if (ret >= 0)
+ filp->private_data = (void *) pm_qos_add_request(pm_qos_class,
+ PM_QOS_DEFAULT_VALUE);
+
+ if (filp->private_data)
return 0;
}
return -EPERM;
@@ -365,32 +351,40 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp)
static int pm_qos_power_release(struct inode *inode, struct file *filp)
{
- int pm_qos_class;
- char name[PID_NAME_LEN];
+ struct pm_qos_request_list *req;
- pm_qos_class = (long)filp->private_data;
- snprintf(name, PID_NAME_LEN, "process_%d", current->pid);
- pm_qos_remove_requirement(pm_qos_class, name);
+ req = (struct pm_qos_request_list *)filp->private_data;
+ pm_qos_remove_request(req);
return 0;
}
+
static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
size_t count, loff_t *f_pos)
{
s32 value;
- int pm_qos_class;
- char name[PID_NAME_LEN];
-
- pm_qos_class = (long)filp->private_data;
- if (count != sizeof(s32))
+ int x;
+ char ascii_value[11];
+ struct pm_qos_request_list *pm_qos_req;
+
+ if (count == sizeof(s32)) {
+ if (copy_from_user(&value, buf, sizeof(s32)))
+ return -EFAULT;
+ } else if (count == 11) { /* len('0x12345678/0') */
+ if (copy_from_user(ascii_value, buf, 11))
+ return -EFAULT;
+ x = sscanf(ascii_value, "%x", &value);
+ if (x != 1)
+ return -EINVAL;
+ pr_debug(KERN_ERR "%s, %d, 0x%x\n", ascii_value, x, value);
+ } else
return -EINVAL;
- if (copy_from_user(&value, buf, sizeof(s32)))
- return -EFAULT;
- snprintf(name, PID_NAME_LEN, "process_%d", current->pid);
- pm_qos_update_requirement(pm_qos_class, name, value);
- return sizeof(s32);
+ pm_qos_req = (struct pm_qos_request_list *)filp->private_data;
+ pm_qos_update_request(pm_qos_req, value);
+
+ return count;
}
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index bc7704b3a443..00bb252f29a2 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -11,19 +11,18 @@
#include <trace/events/timer.h>
/*
- * Called after updating RLIMIT_CPU to set timer expiration if necessary.
+ * Called after updating RLIMIT_CPU to run cpu timer and update
+ * tsk->signal->cputime_expires expiration cache if necessary. Needs
+ * siglock protection since other code may update expiration cache as
+ * well.
*/
void update_rlimit_cpu(unsigned long rlim_new)
{
cputime_t cputime = secs_to_cputime(rlim_new);
- struct signal_struct *const sig = current->signal;
- if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) ||
- cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) {
- spin_lock_irq(&current->sighand->siglock);
- set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
- spin_unlock_irq(&current->sighand->siglock);
- }
+ spin_lock_irq(&current->sighand->siglock);
+ set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
+ spin_unlock_irq(&current->sighand->siglock);
}
static int check_clock(const clockid_t which_clock)
@@ -548,111 +547,62 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp)
cputime_gt(expires, new_exp);
}
-static inline int expires_le(cputime_t expires, cputime_t new_exp)
-{
- return !cputime_eq(expires, cputime_zero) &&
- cputime_le(expires, new_exp);
-}
/*
* Insert the timer on the appropriate list before any timers that
* expire later. This must be called with the tasklist_lock held
- * for reading, and interrupts disabled.
+ * for reading, interrupts disabled and p->sighand->siglock taken.
*/
-static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
+static void arm_timer(struct k_itimer *timer)
{
struct task_struct *p = timer->it.cpu.task;
struct list_head *head, *listpos;
+ struct task_cputime *cputime_expires;
struct cpu_timer_list *const nt = &timer->it.cpu;
struct cpu_timer_list *next;
- unsigned long i;
- head = (CPUCLOCK_PERTHREAD(timer->it_clock) ?
- p->cpu_timers : p->signal->cpu_timers);
+ if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
+ head = p->cpu_timers;
+ cputime_expires = &p->cputime_expires;
+ } else {
+ head = p->signal->cpu_timers;
+ cputime_expires = &p->signal->cputime_expires;
+ }
head += CPUCLOCK_WHICH(timer->it_clock);
- BUG_ON(!irqs_disabled());
- spin_lock(&p->sighand->siglock);
-
listpos = head;
- if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
- list_for_each_entry(next, head, entry) {
- if (next->expires.sched > nt->expires.sched)
- break;
- listpos = &next->entry;
- }
- } else {
- list_for_each_entry(next, head, entry) {
- if (cputime_gt(next->expires.cpu, nt->expires.cpu))
- break;
- listpos = &next->entry;
- }
+ list_for_each_entry(next, head, entry) {
+ if (cpu_time_before(timer->it_clock, nt->expires, next->expires))
+ break;
+ listpos = &next->entry;
}
list_add(&nt->entry, listpos);
if (listpos == head) {
+ union cpu_time_count *exp = &nt->expires;
+
/*
- * We are the new earliest-expiring timer.
- * If we are a thread timer, there can always
- * be a process timer telling us to stop earlier.
+ * We are the new earliest-expiring POSIX 1.b timer, hence
+ * need to update expiration cache. Take into account that
+ * for process timers we share expiration cache with itimers
+ * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
*/
- if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
- union cpu_time_count *exp = &nt->expires;
-
- switch (CPUCLOCK_WHICH(timer->it_clock)) {
- default:
- BUG();
- case CPUCLOCK_PROF:
- if (expires_gt(p->cputime_expires.prof_exp,
- exp->cpu))
- p->cputime_expires.prof_exp = exp->cpu;
- break;
- case CPUCLOCK_VIRT:
- if (expires_gt(p->cputime_expires.virt_exp,
- exp->cpu))
- p->cputime_expires.virt_exp = exp->cpu;
- break;
- case CPUCLOCK_SCHED:
- if (p->cputime_expires.sched_exp == 0 ||
- p->cputime_expires.sched_exp > exp->sched)
- p->cputime_expires.sched_exp =
- exp->sched;
- break;
- }
- } else {
- struct signal_struct *const sig = p->signal;
- union cpu_time_count *exp = &timer->it.cpu.expires;
-
- /*
- * For a process timer, set the cached expiration time.
- */
- switch (CPUCLOCK_WHICH(timer->it_clock)) {
- default:
- BUG();
- case CPUCLOCK_VIRT:
- if (expires_le(sig->it[CPUCLOCK_VIRT].expires,
- exp->cpu))
- break;
- sig->cputime_expires.virt_exp = exp->cpu;
- break;
- case CPUCLOCK_PROF:
- if (expires_le(sig->it[CPUCLOCK_PROF].expires,
- exp->cpu))
- break;
- i = sig->rlim[RLIMIT_CPU].rlim_cur;
- if (i != RLIM_INFINITY &&
- i <= cputime_to_secs(exp->cpu))
- break;
- sig->cputime_expires.prof_exp = exp->cpu;
- break;
- case CPUCLOCK_SCHED:
- sig->cputime_expires.sched_exp = exp->sched;
- break;
- }
+ switch (CPUCLOCK_WHICH(timer->it_clock)) {
+ case CPUCLOCK_PROF:
+ if (expires_gt(cputime_expires->prof_exp, exp->cpu))
+ cputime_expires->prof_exp = exp->cpu;
+ break;
+ case CPUCLOCK_VIRT:
+ if (expires_gt(cputime_expires->virt_exp, exp->cpu))
+ cputime_expires->virt_exp = exp->cpu;
+ break;
+ case CPUCLOCK_SCHED:
+ if (cputime_expires->sched_exp == 0 ||
+ cputime_expires->sched_exp > exp->sched)
+ cputime_expires->sched_exp = exp->sched;
+ break;
}
}
-
- spin_unlock(&p->sighand->siglock);
}
/*
@@ -660,7 +610,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
*/
static void cpu_timer_fire(struct k_itimer *timer)
{
- if (unlikely(timer->sigq == NULL)) {
+ if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
+ /*
+ * User don't want any signal.
+ */
+ timer->it.cpu.expires.sched = 0;
+ } else if (unlikely(timer->sigq == NULL)) {
/*
* This a special case for clock_nanosleep,
* not a normal timer from sys_timer_create.
@@ -721,7 +676,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
struct itimerspec *new, struct itimerspec *old)
{
struct task_struct *p = timer->it.cpu.task;
- union cpu_time_count old_expires, new_expires, val;
+ union cpu_time_count old_expires, new_expires, old_incr, val;
int ret;
if (unlikely(p == NULL)) {
@@ -752,6 +707,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
BUG_ON(!irqs_disabled());
ret = 0;
+ old_incr = timer->it.cpu.incr;
spin_lock(&p->sighand->siglock);
old_expires = timer->it.cpu.expires;
if (unlikely(timer->it.cpu.firing)) {
@@ -759,7 +715,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
ret = TIMER_RETRY;
} else
list_del_init(&timer->it.cpu.entry);
- spin_unlock(&p->sighand->siglock);
/*
* We need to sample the current value to convert the new
@@ -813,6 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
* disable this firing since we are already reporting
* it as an overrun (thanks to bump_cpu_timer above).
*/
+ spin_unlock(&p->sighand->siglock);
read_unlock(&tasklist_lock);
goto out;
}
@@ -828,11 +784,11 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
*/
timer->it.cpu.expires = new_expires;
if (new_expires.sched != 0 &&
- (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
cpu_time_before(timer->it_clock, val, new_expires)) {
- arm_timer(timer, val);
+ arm_timer(timer);
}
+ spin_unlock(&p->sighand->siglock);
read_unlock(&tasklist_lock);
/*
@@ -853,7 +809,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
timer->it_overrun = -1;
if (new_expires.sched != 0 &&
- (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
!cpu_time_before(timer->it_clock, val, new_expires)) {
/*
* The designated time already passed, so we notify
@@ -867,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
out:
if (old) {
sample_to_timespec(timer->it_clock,
- timer->it.cpu.incr, &old->it_interval);
+ old_incr, &old->it_interval);
}
return ret;
}
@@ -927,25 +882,6 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
read_unlock(&tasklist_lock);
}
- if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
- if (timer->it.cpu.incr.sched == 0 &&
- cpu_time_before(timer->it_clock,
- timer->it.cpu.expires, now)) {
- /*
- * Do-nothing timer expired and has no reload,
- * so it's as if it was never set.
- */
- timer->it.cpu.expires.sched = 0;
- itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
- return;
- }
- /*
- * Account for any expirations and reloads that should
- * have happened.
- */
- bump_cpu_timer(timer, now);
- }
-
if (unlikely(clear_dead)) {
/*
* We've noticed that the thread is dead, but
@@ -1066,16 +1002,9 @@ static void stop_process_timers(struct signal_struct *sig)
struct thread_group_cputimer *cputimer = &sig->cputimer;
unsigned long flags;
- if (!cputimer->running)
- return;
-
spin_lock_irqsave(&cputimer->lock, flags);
cputimer->running = 0;
spin_unlock_irqrestore(&cputimer->lock, flags);
-
- sig->cputime_expires.prof_exp = cputime_zero;
- sig->cputime_expires.virt_exp = cputime_zero;
- sig->cputime_expires.sched_exp = 0;
}
static u32 onecputick;
@@ -1112,6 +1041,23 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
}
}
+/**
+ * task_cputime_zero - Check a task_cputime struct for all zero fields.
+ *
+ * @cputime: The struct to compare.
+ *
+ * Checks @cputime to see if all fields are zero. Returns true if all fields
+ * are zero, false if any field is nonzero.
+ */
+static inline int task_cputime_zero(const struct task_cputime *cputime)
+{
+ if (cputime_eq(cputime->utime, cputime_zero) &&
+ cputime_eq(cputime->stime, cputime_zero) &&
+ cputime->sum_exec_runtime == 0)
+ return 1;
+ return 0;
+}
+
/*
* Check for any per-thread CPU timers that have fired and move them
* off the tsk->*_timers list onto the firing list. Per-thread timers
@@ -1129,19 +1075,6 @@ static void check_process_timers(struct task_struct *tsk,
unsigned long soft;
/*
- * Don't sample the current process CPU clocks if there are no timers.
- */
- if (list_empty(&timers[CPUCLOCK_PROF]) &&
- cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) &&
- sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
- list_empty(&timers[CPUCLOCK_VIRT]) &&
- cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) &&
- list_empty(&timers[CPUCLOCK_SCHED])) {
- stop_process_timers(sig);
- return;
- }
-
- /*
* Collect the current process totals.
*/
thread_group_cputimer(tsk, &cputime);
@@ -1230,18 +1163,11 @@ static void check_process_timers(struct task_struct *tsk,
}
}
- if (!cputime_eq(prof_expires, cputime_zero) &&
- (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) ||
- cputime_gt(sig->cputime_expires.prof_exp, prof_expires)))
- sig->cputime_expires.prof_exp = prof_expires;
- if (!cputime_eq(virt_expires, cputime_zero) &&
- (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) ||
- cputime_gt(sig->cputime_expires.virt_exp, virt_expires)))
- sig->cputime_expires.virt_exp = virt_expires;
- if (sched_expires != 0 &&
- (sig->cputime_expires.sched_exp == 0 ||
- sig->cputime_expires.sched_exp > sched_expires))
- sig->cputime_expires.sched_exp = sched_expires;
+ sig->cputime_expires.prof_exp = prof_expires;
+ sig->cputime_expires.virt_exp = virt_expires;
+ sig->cputime_expires.sched_exp = sched_expires;
+ if (task_cputime_zero(&sig->cputime_expires))
+ stop_process_timers(sig);
}
/*
@@ -1270,6 +1196,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
goto out;
}
read_lock(&tasklist_lock); /* arm_timer needs it. */
+ spin_lock(&p->sighand->siglock);
} else {
read_lock(&tasklist_lock);
if (unlikely(p->signal == NULL)) {
@@ -1290,6 +1217,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
clear_dead_task(timer, now);
goto out_unlock;
}
+ spin_lock(&p->sighand->siglock);
cpu_timer_sample_group(timer->it_clock, p, &now);
bump_cpu_timer(timer, now);
/* Leave the tasklist_lock locked for the call below. */
@@ -1298,7 +1226,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
/*
* Now re-arm for the new expiry time.
*/
- arm_timer(timer, now);
+ BUG_ON(!irqs_disabled());
+ arm_timer(timer);
+ spin_unlock(&p->sighand->siglock);
out_unlock:
read_unlock(&tasklist_lock);
@@ -1310,23 +1240,6 @@ out:
}
/**
- * task_cputime_zero - Check a task_cputime struct for all zero fields.
- *
- * @cputime: The struct to compare.
- *
- * Checks @cputime to see if all fields are zero. Returns true if all fields
- * are zero, false if any field is nonzero.
- */
-static inline int task_cputime_zero(const struct task_cputime *cputime)
-{
- if (cputime_eq(cputime->utime, cputime_zero) &&
- cputime_eq(cputime->stime, cputime_zero) &&
- cputime->sum_exec_runtime == 0)
- return 1;
- return 0;
-}
-
-/**
* task_cputime_expired - Compare two task_cputime entities.
*
* @sample: The task_cputime structure to be checked for expiration.
@@ -1382,7 +1295,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
}
sig = tsk->signal;
- if (!task_cputime_zero(&sig->cputime_expires)) {
+ if (sig->cputimer.running) {
struct task_cputime group_sample;
thread_group_cputimer(tsk, &group_sample);
@@ -1390,7 +1303,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
return 1;
}
- return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY;
+ return 0;
}
/*
@@ -1419,7 +1332,12 @@ void run_posix_cpu_timers(struct task_struct *tsk)
* put them on the firing list.
*/
check_thread_timers(tsk, &firing);
- check_process_timers(tsk, &firing);
+ /*
+ * If there are any active process wide timers (POSIX 1.b, itimers,
+ * RLIMIT_CPU) cputimer must be running.
+ */
+ if (tsk->signal->cputimer.running)
+ check_process_timers(tsk, &firing);
/*
* We must release these locks before taking any timer's lock.
@@ -1456,21 +1374,23 @@ void run_posix_cpu_timers(struct task_struct *tsk)
}
/*
- * Set one of the process-wide special case CPU timers.
+ * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
* The tsk->sighand->siglock must be held by the caller.
- * The *newval argument is relative and we update it to be absolute, *oldval
- * is absolute and we update it to be relative.
*/
void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
cputime_t *newval, cputime_t *oldval)
{
union cpu_time_count now;
- struct list_head *head;
BUG_ON(clock_idx == CPUCLOCK_SCHED);
cpu_timer_sample_group(clock_idx, tsk, &now);
if (oldval) {
+ /*
+ * We are setting itimer. The *oldval is absolute and we update
+ * it to be relative, *newval argument is relative and we update
+ * it to be absolute.
+ */
if (!cputime_eq(*oldval, cputime_zero)) {
if (cputime_le(*oldval, now.cpu)) {
/* Just about to fire. */
@@ -1483,33 +1403,21 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
if (cputime_eq(*newval, cputime_zero))
return;
*newval = cputime_add(*newval, now.cpu);
-
- /*
- * If the RLIMIT_CPU timer will expire before the
- * ITIMER_PROF timer, we have nothing else to do.
- */
- if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur
- < cputime_to_secs(*newval))
- return;
}
/*
- * Check whether there are any process timers already set to fire
- * before this one. If so, we don't have anything more to do.
+ * Update expiration cache if we are the earliest timer, or eventually
+ * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
*/
- head = &tsk->signal->cpu_timers[clock_idx];
- if (list_empty(head) ||
- cputime_ge(list_first_entry(head,
- struct cpu_timer_list, entry)->expires.cpu,
- *newval)) {
- switch (clock_idx) {
- case CPUCLOCK_PROF:
+ switch (clock_idx) {
+ case CPUCLOCK_PROF:
+ if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
tsk->signal->cputime_expires.prof_exp = *newval;
- break;
- case CPUCLOCK_VIRT:
+ break;
+ case CPUCLOCK_VIRT:
+ if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
tsk->signal->cputime_expires.virt_exp = *newval;
- break;
- }
+ break;
}
}
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 43191815f874..524e058dcf06 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -8,7 +8,8 @@ obj-$(CONFIG_PM_SLEEP) += console.o
obj-$(CONFIG_FREEZER) += process.o
obj-$(CONFIG_SUSPEND) += suspend.o
obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
-obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o
+obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \
+ block_io.o
obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
new file mode 100644
index 000000000000..97024fd40cd5
--- /dev/null
+++ b/kernel/power/block_io.c
@@ -0,0 +1,103 @@
+/*
+ * This file provides functions for block I/O operations on swap/file.
+ *
+ * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/bio.h>
+#include <linux/kernel.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+
+#include "power.h"
+
+/**
+ * submit - submit BIO request.
+ * @rw: READ or WRITE.
+ * @off physical offset of page.
+ * @page: page we're reading or writing.
+ * @bio_chain: list of pending biod (for async reading)
+ *
+ * Straight from the textbook - allocate and initialize the bio.
+ * If we're reading, make sure the page is marked as dirty.
+ * Then submit it and, if @bio_chain == NULL, wait.
+ */
+static int submit(int rw, struct block_device *bdev, sector_t sector,
+ struct page *page, struct bio **bio_chain)
+{
+ const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+ struct bio *bio;
+
+ bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
+ bio->bi_sector = sector;
+ bio->bi_bdev = bdev;
+ bio->bi_end_io = end_swap_bio_read;
+
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+ printk(KERN_ERR "PM: Adding page to bio failed at %llu\n",
+ (unsigned long long)sector);
+ bio_put(bio);
+ return -EFAULT;
+ }
+
+ lock_page(page);
+ bio_get(bio);
+
+ if (bio_chain == NULL) {
+ submit_bio(bio_rw, bio);
+ wait_on_page_locked(page);
+ if (rw == READ)
+ bio_set_pages_dirty(bio);
+ bio_put(bio);
+ } else {
+ if (rw == READ)
+ get_page(page); /* These pages are freed later */
+ bio->bi_private = *bio_chain;
+ *bio_chain = bio;
+ submit_bio(bio_rw, bio);
+ }
+ return 0;
+}
+
+int hib_bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
+{
+ return submit(READ, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
+ virt_to_page(addr), bio_chain);
+}
+
+int hib_bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
+{
+ return submit(WRITE, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
+ virt_to_page(addr), bio_chain);
+}
+
+int hib_wait_on_bio_chain(struct bio **bio_chain)
+{
+ struct bio *bio;
+ struct bio *next_bio;
+ int ret = 0;
+
+ if (bio_chain == NULL)
+ return 0;
+
+ bio = *bio_chain;
+ if (bio == NULL)
+ return 0;
+ while (bio) {
+ struct page *page;
+
+ next_bio = bio->bi_private;
+ page = bio->bi_io_vec[0].bv_page;
+ wait_on_page_locked(page);
+ if (!PageUptodate(page) || PageError(page))
+ ret = -EIO;
+ put_page(page);
+ bio_put(bio);
+ bio = next_bio;
+ }
+ *bio_chain = NULL;
+ return ret;
+}
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 46c5a26630a3..006270fe382d 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -97,24 +97,12 @@ extern int hibernate_preallocate_memory(void);
*/
struct snapshot_handle {
- loff_t offset; /* number of the last byte ready for reading
- * or writing in the sequence
- */
unsigned int cur; /* number of the block of PAGE_SIZE bytes the
* next operation will refer to (ie. current)
*/
- unsigned int cur_offset; /* offset with respect to the current
- * block (for the next operation)
- */
- unsigned int prev; /* number of the block of PAGE_SIZE bytes that
- * was the current one previously
- */
void *buffer; /* address of the block to read from
* or write to
*/
- unsigned int buf_offset; /* location to read from or write to,
- * given as a displacement from 'buffer'
- */
int sync_read; /* Set to one to notify the caller of
* snapshot_write_next() that it may
* need to call wait_on_bio_chain()
@@ -125,12 +113,12 @@ struct snapshot_handle {
* snapshot_read_next()/snapshot_write_next() is allowed to
* read/write data after the function returns
*/
-#define data_of(handle) ((handle).buffer + (handle).buf_offset)
+#define data_of(handle) ((handle).buffer)
extern unsigned int snapshot_additional_pages(struct zone *zone);
extern unsigned long snapshot_get_image_size(void);
-extern int snapshot_read_next(struct snapshot_handle *handle, size_t count);
-extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
+extern int snapshot_read_next(struct snapshot_handle *handle);
+extern int snapshot_write_next(struct snapshot_handle *handle);
extern void snapshot_write_finalize(struct snapshot_handle *handle);
extern int snapshot_image_loaded(struct snapshot_handle *handle);
@@ -154,6 +142,15 @@ extern int swsusp_read(unsigned int *flags_p);
extern int swsusp_write(unsigned int flags);
extern void swsusp_close(fmode_t);
+/* kernel/power/block_io.c */
+extern struct block_device *hib_resume_bdev;
+
+extern int hib_bio_read_page(pgoff_t page_off, void *addr,
+ struct bio **bio_chain);
+extern int hib_bio_write_page(pgoff_t page_off, void *addr,
+ struct bio **bio_chain);
+extern int hib_wait_on_bio_chain(struct bio **bio_chain);
+
struct timeval;
/* kernel/power/swsusp.c */
extern void swsusp_show_speed(struct timeval *, struct timeval *,
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index be861c26dda7..25ce010e9f8b 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1604,14 +1604,9 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
* snapshot_handle structure. The structure gets updated and a pointer
* to it should be passed to this function every next time.
*
- * The @count parameter should contain the number of bytes the caller
- * wants to read from the snapshot. It must not be zero.
- *
* On success the function returns a positive number. Then, the caller
* is allowed to read up to the returned number of bytes from the memory
- * location computed by the data_of() macro. The number returned
- * may be smaller than @count, but this only happens if the read would
- * cross a page boundary otherwise.
+ * location computed by the data_of() macro.
*
* The function returns 0 to indicate the end of data stream condition,
* and a negative number is returned on error. In such cases the
@@ -1619,7 +1614,7 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
* any more.
*/
-int snapshot_read_next(struct snapshot_handle *handle, size_t count)
+int snapshot_read_next(struct snapshot_handle *handle)
{
if (handle->cur > nr_meta_pages + nr_copy_pages)
return 0;
@@ -1630,7 +1625,7 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
if (!buffer)
return -ENOMEM;
}
- if (!handle->offset) {
+ if (!handle->cur) {
int error;
error = init_header((struct swsusp_info *)buffer);
@@ -1639,42 +1634,30 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
handle->buffer = buffer;
memory_bm_position_reset(&orig_bm);
memory_bm_position_reset(&copy_bm);
- }
- if (handle->prev < handle->cur) {
- if (handle->cur <= nr_meta_pages) {
- memset(buffer, 0, PAGE_SIZE);
- pack_pfns(buffer, &orig_bm);
- } else {
- struct page *page;
+ } else if (handle->cur <= nr_meta_pages) {
+ memset(buffer, 0, PAGE_SIZE);
+ pack_pfns(buffer, &orig_bm);
+ } else {
+ struct page *page;
- page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
- if (PageHighMem(page)) {
- /* Highmem pages are copied to the buffer,
- * because we can't return with a kmapped
- * highmem page (we may not be called again).
- */
- void *kaddr;
+ page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
+ if (PageHighMem(page)) {
+ /* Highmem pages are copied to the buffer,
+ * because we can't return with a kmapped
+ * highmem page (we may not be called again).
+ */
+ void *kaddr;
- kaddr = kmap_atomic(page, KM_USER0);
- memcpy(buffer, kaddr, PAGE_SIZE);
- kunmap_atomic(kaddr, KM_USER0);
- handle->buffer = buffer;
- } else {
- handle->buffer = page_address(page);
- }
+ kaddr = kmap_atomic(page, KM_USER0);
+ memcpy(buffer, kaddr, PAGE_SIZE);
+ kunmap_atomic(kaddr, KM_USER0);
+ handle->buffer = buffer;
+ } else {
+ handle->buffer = page_address(page);
}
- handle->prev = handle->cur;
- }
- handle->buf_offset = handle->cur_offset;
- if (handle->cur_offset + count >= PAGE_SIZE) {
- count = PAGE_SIZE - handle->cur_offset;
- handle->cur_offset = 0;
- handle->cur++;
- } else {
- handle->cur_offset += count;
}
- handle->offset += count;
- return count;
+ handle->cur++;
+ return PAGE_SIZE;
}
/**
@@ -2133,14 +2116,9 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
* snapshot_handle structure. The structure gets updated and a pointer
* to it should be passed to this function every next time.
*
- * The @count parameter should contain the number of bytes the caller
- * wants to write to the image. It must not be zero.
- *
* On success the function returns a positive number. Then, the caller
* is allowed to write up to the returned number of bytes to the memory
- * location computed by the data_of() macro. The number returned
- * may be smaller than @count, but this only happens if the write would
- * cross a page boundary otherwise.
+ * location computed by the data_of() macro.
*
* The function returns 0 to indicate the "end of file" condition,
* and a negative number is returned on error. In such cases the
@@ -2148,16 +2126,18 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
* any more.
*/
-int snapshot_write_next(struct snapshot_handle *handle, size_t count)
+int snapshot_write_next(struct snapshot_handle *handle)
{
static struct chain_allocator ca;
int error = 0;
/* Check if we have already loaded the entire image */
- if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
+ if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
return 0;
- if (handle->offset == 0) {
+ handle->sync_read = 1;
+
+ if (!handle->cur) {
if (!buffer)
/* This makes the buffer be freed by swsusp_free() */
buffer = get_image_page(GFP_ATOMIC, PG_ANY);
@@ -2166,56 +2146,43 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
return -ENOMEM;
handle->buffer = buffer;
- }
- handle->sync_read = 1;
- if (handle->prev < handle->cur) {
- if (handle->prev == 0) {
- error = load_header(buffer);
- if (error)
- return error;
+ } else if (handle->cur == 1) {
+ error = load_header(buffer);
+ if (error)
+ return error;
- error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
- if (error)
- return error;
+ error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
+ if (error)
+ return error;
+
+ } else if (handle->cur <= nr_meta_pages + 1) {
+ error = unpack_orig_pfns(buffer, &copy_bm);
+ if (error)
+ return error;
- } else if (handle->prev <= nr_meta_pages) {
- error = unpack_orig_pfns(buffer, &copy_bm);
+ if (handle->cur == nr_meta_pages + 1) {
+ error = prepare_image(&orig_bm, &copy_bm);
if (error)
return error;
- if (handle->prev == nr_meta_pages) {
- error = prepare_image(&orig_bm, &copy_bm);
- if (error)
- return error;
-
- chain_init(&ca, GFP_ATOMIC, PG_SAFE);
- memory_bm_position_reset(&orig_bm);
- restore_pblist = NULL;
- handle->buffer = get_buffer(&orig_bm, &ca);
- handle->sync_read = 0;
- if (IS_ERR(handle->buffer))
- return PTR_ERR(handle->buffer);
- }
- } else {
- copy_last_highmem_page();
+ chain_init(&ca, GFP_ATOMIC, PG_SAFE);
+ memory_bm_position_reset(&orig_bm);
+ restore_pblist = NULL;
handle->buffer = get_buffer(&orig_bm, &ca);
+ handle->sync_read = 0;
if (IS_ERR(handle->buffer))
return PTR_ERR(handle->buffer);
- if (handle->buffer != buffer)
- handle->sync_read = 0;
}
- handle->prev = handle->cur;
- }
- handle->buf_offset = handle->cur_offset;
- if (handle->cur_offset + count >= PAGE_SIZE) {
- count = PAGE_SIZE - handle->cur_offset;
- handle->cur_offset = 0;
- handle->cur++;
} else {
- handle->cur_offset += count;
+ copy_last_highmem_page();
+ handle->buffer = get_buffer(&orig_bm, &ca);
+ if (IS_ERR(handle->buffer))
+ return PTR_ERR(handle->buffer);
+ if (handle->buffer != buffer)
+ handle->sync_read = 0;
}
- handle->offset += count;
- return count;
+ handle->cur++;
+ return PAGE_SIZE;
}
/**
@@ -2230,7 +2197,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
{
copy_last_highmem_page();
/* Free only if we have loaded the image entirely */
- if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) {
+ if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
free_highmem_data();
}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 66824d71983a..b0bb21778391 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -29,6 +29,40 @@
#define SWSUSP_SIG "S1SUSPEND"
+/*
+ * The swap map is a data structure used for keeping track of each page
+ * written to a swap partition. It consists of many swap_map_page
+ * structures that contain each an array of MAP_PAGE_SIZE swap entries.
+ * These structures are stored on the swap and linked together with the
+ * help of the .next_swap member.
+ *
+ * The swap map is created during suspend. The swap map pages are
+ * allocated and populated one at a time, so we only need one memory
+ * page to set up the entire structure.
+ *
+ * During resume we also only need to use one swap_map_page structure
+ * at a time.
+ */
+
+#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1)
+
+struct swap_map_page {
+ sector_t entries[MAP_PAGE_ENTRIES];
+ sector_t next_swap;
+};
+
+/**
+ * The swap_map_handle structure is used for handling swap in
+ * a file-alike way
+ */
+
+struct swap_map_handle {
+ struct swap_map_page *cur;
+ sector_t cur_swap;
+ sector_t first_sector;
+ unsigned int k;
+};
+
struct swsusp_header {
char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)];
sector_t image;
@@ -145,110 +179,24 @@ int swsusp_swap_in_use(void)
*/
static unsigned short root_swap = 0xffff;
-static struct block_device *resume_bdev;
-
-/**
- * submit - submit BIO request.
- * @rw: READ or WRITE.
- * @off physical offset of page.
- * @page: page we're reading or writing.
- * @bio_chain: list of pending biod (for async reading)
- *
- * Straight from the textbook - allocate and initialize the bio.
- * If we're reading, make sure the page is marked as dirty.
- * Then submit it and, if @bio_chain == NULL, wait.
- */
-static int submit(int rw, pgoff_t page_off, struct page *page,
- struct bio **bio_chain)
-{
- const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
- struct bio *bio;
-
- bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
- bio->bi_sector = page_off * (PAGE_SIZE >> 9);
- bio->bi_bdev = resume_bdev;
- bio->bi_end_io = end_swap_bio_read;
-
- if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
- printk(KERN_ERR "PM: Adding page to bio failed at %ld\n",
- page_off);
- bio_put(bio);
- return -EFAULT;
- }
-
- lock_page(page);
- bio_get(bio);
-
- if (bio_chain == NULL) {
- submit_bio(bio_rw, bio);
- wait_on_page_locked(page);
- if (rw == READ)
- bio_set_pages_dirty(bio);
- bio_put(bio);
- } else {
- if (rw == READ)
- get_page(page); /* These pages are freed later */
- bio->bi_private = *bio_chain;
- *bio_chain = bio;
- submit_bio(bio_rw, bio);
- }
- return 0;
-}
-
-static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
-{
- return submit(READ, page_off, virt_to_page(addr), bio_chain);
-}
-
-static int bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
-{
- return submit(WRITE, page_off, virt_to_page(addr), bio_chain);
-}
-
-static int wait_on_bio_chain(struct bio **bio_chain)
-{
- struct bio *bio;
- struct bio *next_bio;
- int ret = 0;
-
- if (bio_chain == NULL)
- return 0;
-
- bio = *bio_chain;
- if (bio == NULL)
- return 0;
- while (bio) {
- struct page *page;
-
- next_bio = bio->bi_private;
- page = bio->bi_io_vec[0].bv_page;
- wait_on_page_locked(page);
- if (!PageUptodate(page) || PageError(page))
- ret = -EIO;
- put_page(page);
- bio_put(bio);
- bio = next_bio;
- }
- *bio_chain = NULL;
- return ret;
-}
+struct block_device *hib_resume_bdev;
/*
* Saving part
*/
-static int mark_swapfiles(sector_t start, unsigned int flags)
+static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
{
int error;
- bio_read_page(swsusp_resume_block, swsusp_header, NULL);
+ hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
!memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
memcpy(swsusp_header->sig,SWSUSP_SIG, 10);
- swsusp_header->image = start;
+ swsusp_header->image = handle->first_sector;
swsusp_header->flags = flags;
- error = bio_write_page(swsusp_resume_block,
+ error = hib_bio_write_page(swsusp_resume_block,
swsusp_header, NULL);
} else {
printk(KERN_ERR "PM: Swap header not found!\n");
@@ -260,25 +208,26 @@ static int mark_swapfiles(sector_t start, unsigned int flags)
/**
* swsusp_swap_check - check if the resume device is a swap device
* and get its index (if so)
+ *
+ * This is called before saving image
*/
-
-static int swsusp_swap_check(void) /* This is called before saving image */
+static int swsusp_swap_check(void)
{
int res;
res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
- &resume_bdev);
+ &hib_resume_bdev);
if (res < 0)
return res;
root_swap = res;
- res = blkdev_get(resume_bdev, FMODE_WRITE);
+ res = blkdev_get(hib_resume_bdev, FMODE_WRITE);
if (res)
return res;
- res = set_blocksize(resume_bdev, PAGE_SIZE);
+ res = set_blocksize(hib_resume_bdev, PAGE_SIZE);
if (res < 0)
- blkdev_put(resume_bdev, FMODE_WRITE);
+ blkdev_put(hib_resume_bdev, FMODE_WRITE);
return res;
}
@@ -309,42 +258,9 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
} else {
src = buf;
}
- return bio_write_page(offset, src, bio_chain);
+ return hib_bio_write_page(offset, src, bio_chain);
}
-/*
- * The swap map is a data structure used for keeping track of each page
- * written to a swap partition. It consists of many swap_map_page
- * structures that contain each an array of MAP_PAGE_SIZE swap entries.
- * These structures are stored on the swap and linked together with the
- * help of the .next_swap member.
- *
- * The swap map is created during suspend. The swap map pages are
- * allocated and populated one at a time, so we only need one memory
- * page to set up the entire structure.
- *
- * During resume we also only need to use one swap_map_page structure
- * at a time.
- */
-
-#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1)
-
-struct swap_map_page {
- sector_t entries[MAP_PAGE_ENTRIES];
- sector_t next_swap;
-};
-
-/**
- * The swap_map_handle structure is used for handling swap in
- * a file-alike way
- */
-
-struct swap_map_handle {
- struct swap_map_page *cur;
- sector_t cur_swap;
- unsigned int k;
-};
-
static void release_swap_writer(struct swap_map_handle *handle)
{
if (handle->cur)
@@ -354,16 +270,33 @@ static void release_swap_writer(struct swap_map_handle *handle)
static int get_swap_writer(struct swap_map_handle *handle)
{
+ int ret;
+
+ ret = swsusp_swap_check();
+ if (ret) {
+ if (ret != -ENOSPC)
+ printk(KERN_ERR "PM: Cannot find swap device, try "
+ "swapon -a.\n");
+ return ret;
+ }
handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
- if (!handle->cur)
- return -ENOMEM;
+ if (!handle->cur) {
+ ret = -ENOMEM;
+ goto err_close;
+ }
handle->cur_swap = alloc_swapdev_block(root_swap);
if (!handle->cur_swap) {
- release_swap_writer(handle);
- return -ENOSPC;
+ ret = -ENOSPC;
+ goto err_rel;
}
handle->k = 0;
+ handle->first_sector = handle->cur_swap;
return 0;
+err_rel:
+ release_swap_writer(handle);
+err_close:
+ swsusp_close(FMODE_WRITE);
+ return ret;
}
static int swap_write_page(struct swap_map_handle *handle, void *buf,
@@ -380,7 +313,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
return error;
handle->cur->entries[handle->k++] = offset;
if (handle->k >= MAP_PAGE_ENTRIES) {
- error = wait_on_bio_chain(bio_chain);
+ error = hib_wait_on_bio_chain(bio_chain);
if (error)
goto out;
offset = alloc_swapdev_block(root_swap);
@@ -406,6 +339,24 @@ static int flush_swap_writer(struct swap_map_handle *handle)
return -EINVAL;
}
+static int swap_writer_finish(struct swap_map_handle *handle,
+ unsigned int flags, int error)
+{
+ if (!error) {
+ flush_swap_writer(handle);
+ printk(KERN_INFO "PM: S");
+ error = mark_swapfiles(handle, flags);
+ printk("|\n");
+ }
+
+ if (error)
+ free_all_swap_pages(root_swap);
+ release_swap_writer(handle);
+ swsusp_close(FMODE_WRITE);
+
+ return error;
+}
+
/**
* save_image - save the suspend image data
*/
@@ -431,7 +382,7 @@ static int save_image(struct swap_map_handle *handle,
bio = NULL;
do_gettimeofday(&start);
while (1) {
- ret = snapshot_read_next(snapshot, PAGE_SIZE);
+ ret = snapshot_read_next(snapshot);
if (ret <= 0)
break;
ret = swap_write_page(handle, data_of(*snapshot), &bio);
@@ -441,7 +392,7 @@ static int save_image(struct swap_map_handle *handle,
printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
nr_pages++;
}
- err2 = wait_on_bio_chain(&bio);
+ err2 = hib_wait_on_bio_chain(&bio);
do_gettimeofday(&stop);
if (!ret)
ret = err2;
@@ -483,50 +434,34 @@ int swsusp_write(unsigned int flags)
struct swap_map_handle handle;
struct snapshot_handle snapshot;
struct swsusp_info *header;
+ unsigned long pages;
int error;
- error = swsusp_swap_check();
+ pages = snapshot_get_image_size();
+ error = get_swap_writer(&handle);
if (error) {
- printk(KERN_ERR "PM: Cannot find swap device, try "
- "swapon -a.\n");
+ printk(KERN_ERR "PM: Cannot get swap writer\n");
return error;
}
+ if (!enough_swap(pages)) {
+ printk(KERN_ERR "PM: Not enough free swap\n");
+ error = -ENOSPC;
+ goto out_finish;
+ }
memset(&snapshot, 0, sizeof(struct snapshot_handle));
- error = snapshot_read_next(&snapshot, PAGE_SIZE);
+ error = snapshot_read_next(&snapshot);
if (error < PAGE_SIZE) {
if (error >= 0)
error = -EFAULT;
- goto out;
+ goto out_finish;
}
header = (struct swsusp_info *)data_of(snapshot);
- if (!enough_swap(header->pages)) {
- printk(KERN_ERR "PM: Not enough free swap\n");
- error = -ENOSPC;
- goto out;
- }
- error = get_swap_writer(&handle);
- if (!error) {
- sector_t start = handle.cur_swap;
-
- error = swap_write_page(&handle, header, NULL);
- if (!error)
- error = save_image(&handle, &snapshot,
- header->pages - 1);
-
- if (!error) {
- flush_swap_writer(&handle);
- printk(KERN_INFO "PM: S");
- error = mark_swapfiles(start, flags);
- printk("|\n");
- }
- }
- if (error)
- free_all_swap_pages(root_swap);
-
- release_swap_writer(&handle);
- out:
- swsusp_close(FMODE_WRITE);
+ error = swap_write_page(&handle, header, NULL);
+ if (!error)
+ error = save_image(&handle, &snapshot, pages - 1);
+out_finish:
+ error = swap_writer_finish(&handle, flags, error);
return error;
}
@@ -542,18 +477,21 @@ static void release_swap_reader(struct swap_map_handle *handle)
handle->cur = NULL;
}
-static int get_swap_reader(struct swap_map_handle *handle, sector_t start)
+static int get_swap_reader(struct swap_map_handle *handle,
+ unsigned int *flags_p)
{
int error;
- if (!start)
+ *flags_p = swsusp_header->flags;
+
+ if (!swsusp_header->image) /* how can this happen? */
return -EINVAL;
handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH);
if (!handle->cur)
return -ENOMEM;
- error = bio_read_page(start, handle->cur, NULL);
+ error = hib_bio_read_page(swsusp_header->image, handle->cur, NULL);
if (error) {
release_swap_reader(handle);
return error;
@@ -573,21 +511,28 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
offset = handle->cur->entries[handle->k];
if (!offset)
return -EFAULT;
- error = bio_read_page(offset, buf, bio_chain);
+ error = hib_bio_read_page(offset, buf, bio_chain);
if (error)
return error;
if (++handle->k >= MAP_PAGE_ENTRIES) {
- error = wait_on_bio_chain(bio_chain);
+ error = hib_wait_on_bio_chain(bio_chain);
handle->k = 0;
offset = handle->cur->next_swap;
if (!offset)
release_swap_reader(handle);
else if (!error)
- error = bio_read_page(offset, handle->cur, NULL);
+ error = hib_bio_read_page(offset, handle->cur, NULL);
}
return error;
}
+static int swap_reader_finish(struct swap_map_handle *handle)
+{
+ release_swap_reader(handle);
+
+ return 0;
+}
+
/**
* load_image - load the image using the swap map handle
* @handle and the snapshot handle @snapshot
@@ -615,21 +560,21 @@ static int load_image(struct swap_map_handle *handle,
bio = NULL;
do_gettimeofday(&start);
for ( ; ; ) {
- error = snapshot_write_next(snapshot, PAGE_SIZE);
+ error = snapshot_write_next(snapshot);
if (error <= 0)
break;
error = swap_read_page(handle, data_of(*snapshot), &bio);
if (error)
break;
if (snapshot->sync_read)
- error = wait_on_bio_chain(&bio);
+ error = hib_wait_on_bio_chain(&bio);
if (error)
break;
if (!(nr_pages % m))
printk("\b\b\b\b%3d%%", nr_pages / m);
nr_pages++;
}
- err2 = wait_on_bio_chain(&bio);
+ err2 = hib_wait_on_bio_chain(&bio);
do_gettimeofday(&stop);
if (!error)
error = err2;
@@ -657,20 +602,20 @@ int swsusp_read(unsigned int *flags_p)
struct snapshot_handle snapshot;
struct swsusp_info *header;
- *flags_p = swsusp_header->flags;
-
memset(&snapshot, 0, sizeof(struct snapshot_handle));
- error = snapshot_write_next(&snapshot, PAGE_SIZE);
+ error = snapshot_write_next(&snapshot);
if (error < PAGE_SIZE)
return error < 0 ? error : -EFAULT;
header = (struct swsusp_info *)data_of(snapshot);
- error = get_swap_reader(&handle, swsusp_header->image);
+ error = get_swap_reader(&handle, flags_p);
+ if (error)
+ goto end;
if (!error)
error = swap_read_page(&handle, header, NULL);
if (!error)
error = load_image(&handle, &snapshot, header->pages - 1);
- release_swap_reader(&handle);
-
+ swap_reader_finish(&handle);
+end:
if (!error)
pr_debug("PM: Image successfully loaded\n");
else
@@ -686,11 +631,11 @@ int swsusp_check(void)
{
int error;
- resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
- if (!IS_ERR(resume_bdev)) {
- set_blocksize(resume_bdev, PAGE_SIZE);
+ hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
+ if (!IS_ERR(hib_resume_bdev)) {
+ set_blocksize(hib_resume_bdev, PAGE_SIZE);
memset(swsusp_header, 0, PAGE_SIZE);
- error = bio_read_page(swsusp_resume_block,
+ error = hib_bio_read_page(swsusp_resume_block,
swsusp_header, NULL);
if (error)
goto put;
@@ -698,7 +643,7 @@ int swsusp_check(void)
if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) {
memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
/* Reset swap signature now */
- error = bio_write_page(swsusp_resume_block,
+ error = hib_bio_write_page(swsusp_resume_block,
swsusp_header, NULL);
} else {
error = -EINVAL;
@@ -706,11 +651,11 @@ int swsusp_check(void)
put:
if (error)
- blkdev_put(resume_bdev, FMODE_READ);
+ blkdev_put(hib_resume_bdev, FMODE_READ);
else
pr_debug("PM: Signature found, resuming\n");
} else {
- error = PTR_ERR(resume_bdev);
+ error = PTR_ERR(hib_resume_bdev);
}
if (error)
@@ -725,12 +670,12 @@ put:
void swsusp_close(fmode_t mode)
{
- if (IS_ERR(resume_bdev)) {
+ if (IS_ERR(hib_resume_bdev)) {
pr_debug("PM: Image device not initialised\n");
return;
}
- blkdev_put(resume_bdev, mode);
+ blkdev_put(hib_resume_bdev, mode);
}
static int swsusp_header_init(void)
diff --git a/kernel/power/user.c b/kernel/power/user.c
index a8c96212bc1b..e819e17877ca 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -151,6 +151,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
{
struct snapshot_data *data;
ssize_t res;
+ loff_t pg_offp = *offp & ~PAGE_MASK;
mutex_lock(&pm_mutex);
@@ -159,14 +160,19 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
res = -ENODATA;
goto Unlock;
}
- res = snapshot_read_next(&data->handle, count);
- if (res > 0) {
- if (copy_to_user(buf, data_of(data->handle), res))
- res = -EFAULT;
- else
- *offp = data->handle.offset;
+ if (!pg_offp) { /* on page boundary? */
+ res = snapshot_read_next(&data->handle);
+ if (res <= 0)
+ goto Unlock;
+ } else {
+ res = PAGE_SIZE - pg_offp;
}
+ res = simple_read_from_buffer(buf, count, &pg_offp,
+ data_of(data->handle), res);
+ if (res > 0)
+ *offp += res;
+
Unlock:
mutex_unlock(&pm_mutex);
@@ -178,18 +184,25 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
{
struct snapshot_data *data;
ssize_t res;
+ loff_t pg_offp = *offp & ~PAGE_MASK;
mutex_lock(&pm_mutex);
data = filp->private_data;
- res = snapshot_write_next(&data->handle, count);
- if (res > 0) {
- if (copy_from_user(data_of(data->handle), buf, res))
- res = -EFAULT;
- else
- *offp = data->handle.offset;
+
+ if (!pg_offp) {
+ res = snapshot_write_next(&data->handle);
+ if (res <= 0)
+ goto unlock;
+ } else {
+ res = PAGE_SIZE - pg_offp;
}
+ res = simple_write_to_buffer(data_of(data->handle), res, &pg_offp,
+ buf, count);
+ if (res > 0)
+ *offp += res;
+unlock:
mutex_unlock(&pm_mutex);
return res;
diff --git a/kernel/sys.c b/kernel/sys.c
index 7cb426a58965..0d36d889c74d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -492,10 +492,6 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
return -ENOMEM;
old = current_cred();
- retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE);
- if (retval)
- goto error;
-
retval = -EPERM;
if (rgid != (gid_t) -1) {
if (old->gid == rgid ||
@@ -543,10 +539,6 @@ SYSCALL_DEFINE1(setgid, gid_t, gid)
return -ENOMEM;
old = current_cred();
- retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID);
- if (retval)
- goto error;
-
retval = -EPERM;
if (capable(CAP_SETGID))
new->gid = new->egid = new->sgid = new->fsgid = gid;
@@ -610,10 +602,6 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
return -ENOMEM;
old = current_cred();
- retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE);
- if (retval)
- goto error;
-
retval = -EPERM;
if (ruid != (uid_t) -1) {
new->uid = ruid;
@@ -675,10 +663,6 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
return -ENOMEM;
old = current_cred();
- retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID);
- if (retval)
- goto error;
-
retval = -EPERM;
if (capable(CAP_SETUID)) {
new->suid = new->uid = uid;
@@ -719,9 +703,6 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
if (!new)
return -ENOMEM;
- retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES);
- if (retval)
- goto error;
old = current_cred();
retval = -EPERM;
@@ -788,10 +769,6 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
return -ENOMEM;
old = current_cred();
- retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES);
- if (retval)
- goto error;
-
retval = -EPERM;
if (!capable(CAP_SETGID)) {
if (rgid != (gid_t) -1 && rgid != old->gid &&
@@ -851,9 +828,6 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
old = current_cred();
old_fsuid = old->fsuid;
- if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS) < 0)
- goto error;
-
if (uid == old->uid || uid == old->euid ||
uid == old->suid || uid == old->fsuid ||
capable(CAP_SETUID)) {
@@ -864,7 +838,6 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
}
}
-error:
abort_creds(new);
return old_fsuid;
@@ -888,9 +861,6 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
old = current_cred();
old_fsgid = old->fsgid;
- if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS))
- goto error;
-
if (gid == old->gid || gid == old->egid ||
gid == old->sgid || gid == old->fsgid ||
capable(CAP_SETGID)) {
@@ -900,7 +870,6 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
}
}
-error:
abort_creds(new);
return old_fsgid;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index bcfb79e94ec7..b12583047757 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -163,6 +163,27 @@ static int proc_taint(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
#endif
+#ifdef CONFIG_MAGIC_SYSRQ
+static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */
+
+static int sysrq_sysctl_handler(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int error;
+
+ error = proc_dointvec(table, write, buffer, lenp, ppos);
+ if (error)
+ return error;
+
+ if (write)
+ sysrq_toggle_support(__sysrq_enabled);
+
+ return 0;
+}
+
+#endif
+
static struct ctl_table root_table[];
static struct ctl_table_root sysctl_table_root;
static struct ctl_table_header root_table_header = {
@@ -567,7 +588,7 @@ static struct ctl_table kern_table[] = {
.data = &__sysrq_enabled,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = sysrq_sysctl_handler,
},
#endif
#ifdef CONFIG_PROC_SYSCTL
@@ -621,7 +642,7 @@ static struct ctl_table kern_table[] = {
#endif
{
.procname = "userprocess_debug",
- .data = &sysctl_userprocess_debug,
+ .data = &show_unhandled_signals,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
@@ -1431,7 +1452,8 @@ static struct ctl_table fs_table[] = {
};
static struct ctl_table debug_table[] = {
-#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC)
+#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
+ defined(CONFIG_S390)
{
.procname = "exception-trace",
.data = &show_unhandled_signals,
diff --git a/kernel/time.c b/kernel/time.c
index 656dccfe1cbb..50612faa9baf 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -132,12 +132,11 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
*/
static inline void warp_clock(void)
{
- write_seqlock_irq(&xtime_lock);
- wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60;
- xtime.tv_sec += sys_tz.tz_minuteswest * 60;
- update_xtime_cache(0);
- write_sequnlock_irq(&xtime_lock);
- clock_was_set();
+ struct timespec delta, adjust;
+ delta.tv_sec = sys_tz.tz_minuteswest * 60;
+ delta.tv_nsec = 0;
+ adjust = timespec_add_safe(current_kernel_time(), delta);
+ do_settimeofday(&adjust);
}
/*
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 1f5dde637457..f08e99c1d561 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -625,6 +625,54 @@ static void clocksource_enqueue(struct clocksource *cs)
list_add(&cs->list, entry);
}
+
+/*
+ * Maximum time we expect to go between ticks. This includes idle
+ * tickless time. It provides the trade off between selecting a
+ * mult/shift pair that is very precise but can only handle a short
+ * period of time, vs. a mult/shift pair that can handle long periods
+ * of time but isn't as precise.
+ *
+ * This is a subsystem constant, and actual hardware limitations
+ * may override it (ie: clocksources that wrap every 3 seconds).
+ */
+#define MAX_UPDATE_LENGTH 5 /* Seconds */
+
+/**
+ * __clocksource_register_scale - Used to install new clocksources
+ * @t: clocksource to be registered
+ * @scale: Scale factor multiplied against freq to get clocksource hz
+ * @freq: clocksource frequency (cycles per second) divided by scale
+ *
+ * Returns -EBUSY if registration fails, zero otherwise.
+ *
+ * This *SHOULD NOT* be called directly! Please use the
+ * clocksource_register_hz() or clocksource_register_khz helper functions.
+ */
+int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
+{
+
+ /*
+ * Ideally we want to use some of the limits used in
+ * clocksource_max_deferment, to provide a more informed
+ * MAX_UPDATE_LENGTH. But for now this just gets the
+ * register interface working properly.
+ */
+ clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
+ NSEC_PER_SEC/scale,
+ MAX_UPDATE_LENGTH*scale);
+ cs->max_idle_ns = clocksource_max_deferment(cs);
+
+ mutex_lock(&clocksource_mutex);
+ clocksource_enqueue(cs);
+ clocksource_select();
+ clocksource_enqueue_watchdog(cs);
+ mutex_unlock(&clocksource_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__clocksource_register_scale);
+
+
/**
* clocksource_register - Used to install new clocksources
* @t: clocksource to be registered
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 7c0f180d6e9d..c63116863a80 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -69,7 +69,7 @@ static s64 time_freq;
/* time at last adjustment (secs): */
static long time_reftime;
-long time_adjust;
+static long time_adjust;
/* constant (boot-param configurable) NTP tick adjustment (upscaled) */
static s64 ntp_tick_adj;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 39f6177fafac..caf8d4d4f5c8 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -165,13 +165,6 @@ struct timespec raw_time;
/* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended;
-static struct timespec xtime_cache __attribute__ ((aligned (16)));
-void update_xtime_cache(u64 nsec)
-{
- xtime_cache = xtime;
- timespec_add_ns(&xtime_cache, nsec);
-}
-
/* must hold xtime_lock */
void timekeeping_leap_insert(int leapsecond)
{
@@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv)
xtime = *tv;
- update_xtime_cache(0);
-
timekeeper.ntp_error = 0;
ntp_clear();
@@ -559,7 +550,6 @@ void __init timekeeping_init(void)
}
set_normalized_timespec(&wall_to_monotonic,
-boot.tv_sec, -boot.tv_nsec);
- update_xtime_cache(0);
total_sleep_time.tv_sec = 0;
total_sleep_time.tv_nsec = 0;
write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -593,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev)
wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
total_sleep_time = timespec_add_safe(total_sleep_time, ts);
}
- update_xtime_cache(0);
/* re-base the last cycle value */
timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
timekeeper.ntp_error = 0;
@@ -788,7 +777,6 @@ void update_wall_time(void)
{
struct clocksource *clock;
cycle_t offset;
- u64 nsecs;
int shift = 0, maxshift;
/* Make sure we're fully resumed: */
@@ -847,7 +835,9 @@ void update_wall_time(void)
timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;
}
- /* store full nanoseconds into xtime after rounding it up and
+
+ /*
+ * Store full nanoseconds into xtime after rounding it up and
* add the remainder to the error difference.
*/
xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1;
@@ -855,8 +845,15 @@ void update_wall_time(void)
timekeeper.ntp_error += timekeeper.xtime_nsec <<
timekeeper.ntp_error_shift;
- nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
- update_xtime_cache(nsecs);
+ /*
+ * Finally, make sure that after the rounding
+ * xtime.tv_nsec isn't larger then NSEC_PER_SEC
+ */
+ if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
+ xtime.tv_nsec -= NSEC_PER_SEC;
+ xtime.tv_sec++;
+ second_overflow();
+ }
/* check to see if there is a new clocksource to use */
update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
@@ -896,13 +893,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
unsigned long get_seconds(void)
{
- return xtime_cache.tv_sec;
+ return xtime.tv_sec;
}
EXPORT_SYMBOL(get_seconds);
struct timespec __current_kernel_time(void)
{
- return xtime_cache;
+ return xtime;
}
struct timespec current_kernel_time(void)
@@ -913,7 +910,7 @@ struct timespec current_kernel_time(void)
do {
seq = read_seqbegin(&xtime_lock);
- now = xtime_cache;
+ now = xtime;
} while (read_seqretry(&xtime_lock, seq));
return now;
@@ -928,7 +925,7 @@ struct timespec get_monotonic_coarse(void)
do {
seq = read_seqbegin(&xtime_lock);
- now = xtime_cache;
+ now = xtime;
mono = wall_to_monotonic;
} while (read_seqretry(&xtime_lock, seq));
diff --git a/kernel/timer.c b/kernel/timer.c
index aeb6a54f2771..9199f3c52215 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -319,6 +319,24 @@ unsigned long round_jiffies_up_relative(unsigned long j)
}
EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
+/**
+ * set_timer_slack - set the allowed slack for a timer
+ * @slack_hz: the amount of time (in jiffies) allowed for rounding
+ *
+ * Set the amount of time, in jiffies, that a certain timer has
+ * in terms of slack. By setting this value, the timer subsystem
+ * will schedule the actual timer somewhere between
+ * the time mod_timer() asks for, and that time plus the slack.
+ *
+ * By setting the slack to -1, a percentage of the delay is used
+ * instead.
+ */
+void set_timer_slack(struct timer_list *timer, int slack_hz)
+{
+ timer->slack = slack_hz;
+}
+EXPORT_SYMBOL_GPL(set_timer_slack);
+
static inline void set_running_timer(struct tvec_base *base,
struct timer_list *timer)
@@ -550,6 +568,7 @@ static void __init_timer(struct timer_list *timer,
{
timer->entry.next = NULL;
timer->base = __raw_get_cpu_var(tvec_bases);
+ timer->slack = -1;
#ifdef CONFIG_TIMER_STATS
timer->start_site = NULL;
timer->start_pid = -1;
@@ -715,6 +734,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires)
}
EXPORT_SYMBOL(mod_timer_pending);
+/*
+ * Decide where to put the timer while taking the slack into account
+ *
+ * Algorithm:
+ * 1) calculate the maximum (absolute) time
+ * 2) calculate the highest bit where the expires and new max are different
+ * 3) use this bit to make a mask
+ * 4) use the bitmask to round down the maximum time, so that all last
+ * bits are zeros
+ */
+static inline
+unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
+{
+ unsigned long expires_limit, mask;
+ int bit;
+
+ expires_limit = expires + timer->slack;
+
+ if (timer->slack < 0) /* auto slack: use 0.4% */
+ expires_limit = expires + (expires - jiffies)/256;
+
+ mask = expires ^ expires_limit;
+
+ if (mask == 0)
+ return expires;
+
+ bit = find_last_bit(&mask, BITS_PER_LONG);
+
+ mask = (1 << bit) - 1;
+
+ expires_limit = expires_limit & ~(mask);
+
+ return expires_limit;
+}
+
/**
* mod_timer - modify a timer's timeout
* @timer: the timer to be modified
@@ -745,6 +799,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
if (timer_pending(timer) && timer->expires == expires)
return 1;
+ expires = apply_slack(timer, expires);
+
return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
}
EXPORT_SYMBOL(mod_timer);
@@ -955,6 +1011,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
return index;
}
+static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
+ unsigned long data)
+{
+ int preempt_count = preempt_count();
+
+#ifdef CONFIG_LOCKDEP
+ /*
+ * It is permissible to free the timer from inside the
+ * function that is called from it, this we need to take into
+ * account for lockdep too. To avoid bogus "held lock freed"
+ * warnings as well as problems when looking into
+ * timer->lockdep_map, make a copy and use that here.
+ */
+ struct lockdep_map lockdep_map = timer->lockdep_map;
+#endif
+ /*
+ * Couple the lock chain with the lock chain at
+ * del_timer_sync() by acquiring the lock_map around the fn()
+ * call here and in del_timer_sync().
+ */
+ lock_map_acquire(&lockdep_map);
+
+ trace_timer_expire_entry(timer);
+ fn(data);
+ trace_timer_expire_exit(timer);
+
+ lock_map_release(&lockdep_map);
+
+ if (preempt_count != preempt_count()) {
+ WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
+ fn, preempt_count, preempt_count());
+ /*
+ * Restore the preempt count. That gives us a decent
+ * chance to survive and extract information. If the
+ * callback kept a lock held, bad luck, but not worse
+ * than the BUG() we had.
+ */
+ preempt_count() = preempt_count;
+ }
+}
+
#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
/**
@@ -998,45 +1095,7 @@ static inline void __run_timers(struct tvec_base *base)
detach_timer(timer, 1);
spin_unlock_irq(&base->lock);
- {
- int preempt_count = preempt_count();
-
-#ifdef CONFIG_LOCKDEP
- /*
- * It is permissible to free the timer from
- * inside the function that is called from
- * it, this we need to take into account for
- * lockdep too. To avoid bogus "held lock
- * freed" warnings as well as problems when
- * looking into timer->lockdep_map, make a
- * copy and use that here.
- */
- struct lockdep_map lockdep_map =
- timer->lockdep_map;
-#endif
- /*
- * Couple the lock chain with the lock chain at
- * del_timer_sync() by acquiring the lock_map
- * around the fn() call here and in
- * del_timer_sync().
- */
- lock_map_acquire(&lockdep_map);
-
- trace_timer_expire_entry(timer);
- fn(data);
- trace_timer_expire_exit(timer);
-
- lock_map_release(&lockdep_map);
-
- if (preempt_count != preempt_count()) {
- printk(KERN_ERR "huh, entered %p "
- "with preempt_count %08x, exited"
- " with %08x?\n",
- fn, preempt_count,
- preempt_count());
- BUG();
- }
- }
+ call_timer_fn(timer, fn, data);
spin_lock_irq(&base->lock);
}
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5bfb213984b2..77dabbf64b8f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -229,6 +229,16 @@ static inline void set_wq_data(struct work_struct *work,
atomic_long_set(&work->data, new);
}
+/*
+ * Clear WORK_STRUCT_PENDING and the workqueue on which it was queued.
+ */
+static inline void clear_wq_data(struct work_struct *work)
+{
+ unsigned long flags = *work_data_bits(work) &
+ (1UL << WORK_STRUCT_STATIC);
+ atomic_long_set(&work->data, flags);
+}
+
static inline
struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
{
@@ -671,7 +681,7 @@ static int __cancel_work_timer(struct work_struct *work,
wait_on_work(work);
} while (unlikely(ret < 0));
- work_clear_pending(work);
+ clear_wq_data(work);
return ret;
}
@@ -845,6 +855,30 @@ int schedule_on_each_cpu(work_func_t func)
return 0;
}
+/**
+ * flush_scheduled_work - ensure that any scheduled work has run to completion.
+ *
+ * Forces execution of the kernel-global workqueue and blocks until its
+ * completion.
+ *
+ * Think twice before calling this function! It's very easy to get into
+ * trouble if you don't take great care. Either of the following situations
+ * will lead to deadlock:
+ *
+ * One of the work items currently on the workqueue needs to acquire
+ * a lock held by your code or its caller.
+ *
+ * Your code is running in the context of a work routine.
+ *
+ * They will be detected by lockdep when they occur, but the first might not
+ * occur very often. It depends on what work items are on the workqueue and
+ * what locks they need, which you have no control over.
+ *
+ * In most situations flushing the entire workqueue is overkill; you merely
+ * need to know that a particular work item isn't queued and isn't running.
+ * In such cases you should use cancel_delayed_work_sync() or
+ * cancel_work_sync() instead.
+ */
void flush_scheduled_work(void)
{
flush_workqueue(keventd_wq);