aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/cpu.c10
-rw-r--r--kernel/cpuset.c26
-rw-r--r--kernel/exit.c7
-rw-r--r--kernel/fork.c15
-rw-r--r--kernel/kprobes.c58
-rw-r--r--kernel/module.c16
-rw-r--r--kernel/mutex-debug.c12
-rw-r--r--kernel/mutex-debug.h25
-rw-r--r--kernel/mutex.c21
-rw-r--r--kernel/mutex.h6
-rw-r--r--kernel/ptrace.c23
-rw-r--r--kernel/sched.c11
-rw-r--r--kernel/signal.c35
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/time.c2
-rw-r--r--kernel/time/Makefile1
-rw-r--r--kernel/time/clocksource.c349
-rw-r--r--kernel/time/jiffies.c73
-rw-r--r--kernel/timer.c396
-rw-r--r--kernel/unwind.c918
21 files changed, 1890 insertions, 127 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index f6ef00f4f90f..752bd7d383af 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o
+obj-y += time/
obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
obj-$(CONFIG_FUTEX) += futex.o
ifeq ($(CONFIG_COMPAT),y)
@@ -21,6 +22,7 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
obj-$(CONFIG_UID16) += uid16.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
+obj-$(CONFIG_STACK_UNWIND) += unwind.o
obj-$(CONFIG_PM) += power/
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
obj-$(CONFIG_KEXEC) += kexec.o
diff --git a/kernel/cpu.c b/kernel/cpu.c
index fe2b8d0bfe4c..03dcd981846a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -13,10 +13,10 @@
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/stop_machine.h>
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
/* This protects CPUs going up and down... */
-static DECLARE_MUTEX(cpucontrol);
+static DEFINE_MUTEX(cpucontrol);
static BLOCKING_NOTIFIER_HEAD(cpu_chain);
@@ -30,9 +30,9 @@ static int __lock_cpu_hotplug(int interruptible)
if (lock_cpu_hotplug_owner != current) {
if (interruptible)
- ret = down_interruptible(&cpucontrol);
+ ret = mutex_lock_interruptible(&cpucontrol);
else
- down(&cpucontrol);
+ mutex_lock(&cpucontrol);
}
/*
@@ -56,7 +56,7 @@ void unlock_cpu_hotplug(void)
{
if (--lock_cpu_hotplug_depth == 0) {
lock_cpu_hotplug_owner = NULL;
- up(&cpucontrol);
+ mutex_unlock(&cpucontrol);
}
}
EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index b602f73fb38d..1535af3a912d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2442,31 +2442,43 @@ void __cpuset_memory_pressure_bump(void)
*/
static int proc_cpuset_show(struct seq_file *m, void *v)
{
+ struct pid *pid;
struct task_struct *tsk;
char *buf;
- int retval = 0;
+ int retval;
+ retval = -ENOMEM;
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!buf)
- return -ENOMEM;
+ goto out;
+
+ retval = -ESRCH;
+ pid = m->private;
+ tsk = get_pid_task(pid, PIDTYPE_PID);
+ if (!tsk)
+ goto out_free;
- tsk = m->private;
+ retval = -EINVAL;
mutex_lock(&manage_mutex);
+
retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE);
if (retval < 0)
- goto out;
+ goto out_unlock;
seq_puts(m, buf);
seq_putc(m, '\n');
-out:
+out_unlock:
mutex_unlock(&manage_mutex);
+ put_task_struct(tsk);
+out_free:
kfree(buf);
+out:
return retval;
}
static int cpuset_open(struct inode *inode, struct file *file)
{
- struct task_struct *tsk = PROC_I(inode)->task;
- return single_open(file, proc_cpuset_show, tsk);
+ struct pid *pid = PROC_I(inode)->pid;
+ return single_open(file, proc_cpuset_show, pid);
}
struct file_operations proc_cpuset_operations = {
diff --git a/kernel/exit.c b/kernel/exit.c
index e76bd02e930e..304ef637be6c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -137,12 +137,8 @@ void release_task(struct task_struct * p)
{
int zap_leader;
task_t *leader;
- struct dentry *proc_dentry;
-
repeat:
atomic_dec(&p->user->processes);
- spin_lock(&p->proc_lock);
- proc_dentry = proc_pid_unhash(p);
write_lock_irq(&tasklist_lock);
ptrace_unlink(p);
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
@@ -171,8 +167,7 @@ repeat:
sched_exit(p);
write_unlock_irq(&tasklist_lock);
- spin_unlock(&p->proc_lock);
- proc_pid_flush(proc_dentry);
+ proc_flush_task(p);
release_thread(p);
call_rcu(&p->rcu, delayed_put_task_struct);
diff --git a/kernel/fork.c b/kernel/fork.c
index dfd10cb370c3..9b4e54ef0225 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -993,13 +993,10 @@ static task_t *copy_process(unsigned long clone_flags,
if (put_user(p->pid, parent_tidptr))
goto bad_fork_cleanup;
- p->proc_dentry = NULL;
-
INIT_LIST_HEAD(&p->children);
INIT_LIST_HEAD(&p->sibling);
p->vfork_done = NULL;
spin_lock_init(&p->alloc_lock);
- spin_lock_init(&p->proc_lock);
clear_tsk_thread_flag(p, TIF_SIGPENDING);
init_sigpending(&p->pending);
@@ -1159,18 +1156,6 @@ static task_t *copy_process(unsigned long clone_flags,
}
if (clone_flags & CLONE_THREAD) {
- /*
- * Important: if an exit-all has been started then
- * do not create this new thread - the whole thread
- * group is supposed to exit anyway.
- */
- if (current->signal->flags & SIGNAL_GROUP_EXIT) {
- spin_unlock(&current->sighand->siglock);
- write_unlock_irq(&tasklist_lock);
- retval = -EAGAIN;
- goto bad_fork_cleanup_namespace;
- }
-
p->group_leader = current->group_leader;
list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1fbf466a29aa..64aab081153b 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -47,11 +47,17 @@
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
+static atomic_t kprobe_count;
DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
+static struct notifier_block kprobe_page_fault_nb = {
+ .notifier_call = kprobe_exceptions_notify,
+ .priority = 0x7fffffff /* we need to notified first */
+};
+
#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
/*
* kprobe->ainsn.insn points to the copy of the instruction to be
@@ -368,16 +374,15 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
*/
static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
{
- struct kprobe *kp;
-
if (p->break_handler) {
- list_for_each_entry_rcu(kp, &old_p->list, list) {
- if (kp->break_handler)
- return -EEXIST;
- }
+ if (old_p->break_handler)
+ return -EEXIST;
list_add_tail_rcu(&p->list, &old_p->list);
+ old_p->break_handler = aggr_break_handler;
} else
list_add_rcu(&p->list, &old_p->list);
+ if (p->post_handler && !old_p->post_handler)
+ old_p->post_handler = aggr_post_handler;
return 0;
}
@@ -390,9 +395,11 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
copy_kprobe(p, ap);
ap->addr = p->addr;
ap->pre_handler = aggr_pre_handler;
- ap->post_handler = aggr_post_handler;
ap->fault_handler = aggr_fault_handler;
- ap->break_handler = aggr_break_handler;
+ if (p->post_handler)
+ ap->post_handler = aggr_post_handler;
+ if (p->break_handler)
+ ap->break_handler = aggr_break_handler;
INIT_LIST_HEAD(&ap->list);
list_add_rcu(&p->list, &ap->list);
@@ -464,6 +471,8 @@ static int __kprobes __register_kprobe(struct kprobe *p,
old_p = get_kprobe(p->addr);
if (old_p) {
ret = register_aggr_kprobe(old_p, p);
+ if (!ret)
+ atomic_inc(&kprobe_count);
goto out;
}
@@ -474,6 +483,10 @@ static int __kprobes __register_kprobe(struct kprobe *p,
hlist_add_head_rcu(&p->hlist,
&kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
+ if (atomic_add_return(1, &kprobe_count) == \
+ (ARCH_INACTIVE_KPROBE_COUNT + 1))
+ register_page_fault_notifier(&kprobe_page_fault_nb);
+
arch_arm_kprobe(p);
out:
@@ -536,14 +549,40 @@ valid_p:
kfree(old_p);
}
arch_remove_kprobe(p);
+ } else {
+ mutex_lock(&kprobe_mutex);
+ if (p->break_handler)
+ old_p->break_handler = NULL;
+ if (p->post_handler){
+ list_for_each_entry_rcu(list_p, &old_p->list, list){
+ if (list_p->post_handler){
+ cleanup_p = 2;
+ break;
+ }
+ }
+ if (cleanup_p == 0)
+ old_p->post_handler = NULL;
+ }
+ mutex_unlock(&kprobe_mutex);
}
+
+ /* Call unregister_page_fault_notifier()
+ * if no probes are active
+ */
+ mutex_lock(&kprobe_mutex);
+ if (atomic_add_return(-1, &kprobe_count) == \
+ ARCH_INACTIVE_KPROBE_COUNT)
+ unregister_page_fault_notifier(&kprobe_page_fault_nb);
+ mutex_unlock(&kprobe_mutex);
+ return;
}
static struct notifier_block kprobe_exceptions_nb = {
.notifier_call = kprobe_exceptions_notify,
- .priority = 0x7fffffff /* we need to notified first */
+ .priority = 0x7fffffff /* we need to be notified first */
};
+
int __kprobes register_jprobe(struct jprobe *jp)
{
/* Todo: Verify probepoint is a function entry point */
@@ -652,6 +691,7 @@ static int __init init_kprobes(void)
INIT_HLIST_HEAD(&kprobe_table[i]);
INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
}
+ atomic_set(&kprobe_count, 0);
err = arch_init_kprobes();
if (!err)
diff --git a/kernel/module.c b/kernel/module.c
index d75275de1c28..08811e26ac9d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -40,6 +40,7 @@
#include <linux/string.h>
#include <linux/sched.h>
#include <linux/mutex.h>
+#include <linux/unwind.h>
#include <asm/uaccess.h>
#include <asm/semaphore.h>
#include <asm/cacheflush.h>
@@ -1051,6 +1052,8 @@ static void free_module(struct module *mod)
remove_sect_attrs(mod);
mod_kobject_remove(mod);
+ unwind_remove_table(mod->unwind_info, 0);
+
/* Arch-specific cleanup. */
module_arch_cleanup(mod);
@@ -1412,7 +1415,7 @@ static struct module *load_module(void __user *umod,
unsigned int i, symindex = 0, strindex = 0, setupindex, exindex,
exportindex, modindex, obsparmindex, infoindex, gplindex,
crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex,
- gplfuturecrcindex;
+ gplfuturecrcindex, unwindex = 0;
struct module *mod;
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1502,6 +1505,9 @@ static struct module *load_module(void __user *umod,
versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
+#ifdef ARCH_UNWIND_SECTION_NAME
+ unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME);
+#endif
/* Don't keep modinfo section */
sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -1510,6 +1516,8 @@ static struct module *load_module(void __user *umod,
sechdrs[symindex].sh_flags |= SHF_ALLOC;
sechdrs[strindex].sh_flags |= SHF_ALLOC;
#endif
+ if (unwindex)
+ sechdrs[unwindex].sh_flags |= SHF_ALLOC;
/* Check module struct version now, before we try to use module. */
if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -1738,6 +1746,11 @@ static struct module *load_module(void __user *umod,
goto arch_cleanup;
add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
+ /* Size of section 0 is 0, so this works well if no unwind info. */
+ mod->unwind_info = unwind_add_table(mod,
+ (void *)sechdrs[unwindex].sh_addr,
+ sechdrs[unwindex].sh_size);
+
/* Get rid of temporary copy */
vfree(hdr);
@@ -1836,6 +1849,7 @@ sys_init_module(void __user *umod,
mod->state = MODULE_STATE_LIVE;
/* Drop initial reference. */
module_put(mod);
+ unwind_remove_table(mod->unwind_info, 1);
module_free(mod, mod->module_init);
mod->module_init = NULL;
mod->init_size = 0;
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index f4913c376950..036b6285b15c 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -153,13 +153,13 @@ next:
continue;
count++;
cursor = curr->next;
- debug_spin_lock_restore(&debug_mutex_lock, flags);
+ debug_spin_unlock_restore(&debug_mutex_lock, flags);
printk("\n#%03d: ", count);
printk_lock(lock, filter ? 0 : 1);
goto next;
}
- debug_spin_lock_restore(&debug_mutex_lock, flags);
+ debug_spin_unlock_restore(&debug_mutex_lock, flags);
printk("\n");
}
@@ -316,7 +316,7 @@ void mutex_debug_check_no_locks_held(struct task_struct *task)
continue;
list_del_init(curr);
DEBUG_OFF();
- debug_spin_lock_restore(&debug_mutex_lock, flags);
+ debug_spin_unlock_restore(&debug_mutex_lock, flags);
printk("BUG: %s/%d, lock held at task exit time!\n",
task->comm, task->pid);
@@ -325,7 +325,7 @@ void mutex_debug_check_no_locks_held(struct task_struct *task)
printk("exiting task is not even the owner??\n");
return;
}
- debug_spin_lock_restore(&debug_mutex_lock, flags);
+ debug_spin_unlock_restore(&debug_mutex_lock, flags);
}
/*
@@ -352,7 +352,7 @@ void mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
continue;
list_del_init(curr);
DEBUG_OFF();
- debug_spin_lock_restore(&debug_mutex_lock, flags);
+ debug_spin_unlock_restore(&debug_mutex_lock, flags);
printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
current->comm, current->pid, lock, from, to);
@@ -362,7 +362,7 @@ void mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
printk("freeing task is not even the owner??\n");
return;
}
- debug_spin_lock_restore(&debug_mutex_lock, flags);
+ debug_spin_unlock_restore(&debug_mutex_lock, flags);
}
/*
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index fd384050acb1..a5196c36a5fd 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -46,21 +46,6 @@ extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
extern void debug_mutex_unlock(struct mutex *lock);
extern void debug_mutex_init(struct mutex *lock, const char *name);
-#define debug_spin_lock(lock) \
- do { \
- local_irq_disable(); \
- if (debug_mutex_on) \
- spin_lock(lock); \
- } while (0)
-
-#define debug_spin_unlock(lock) \
- do { \
- if (debug_mutex_on) \
- spin_unlock(lock); \
- local_irq_enable(); \
- preempt_check_resched(); \
- } while (0)
-
#define debug_spin_lock_save(lock, flags) \
do { \
local_irq_save(flags); \
@@ -68,7 +53,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name);
spin_lock(lock); \
} while (0)
-#define debug_spin_lock_restore(lock, flags) \
+#define debug_spin_unlock_restore(lock, flags) \
do { \
if (debug_mutex_on) \
spin_unlock(lock); \
@@ -76,20 +61,20 @@ extern void debug_mutex_init(struct mutex *lock, const char *name);
preempt_check_resched(); \
} while (0)
-#define spin_lock_mutex(lock) \
+#define spin_lock_mutex(lock, flags) \
do { \
struct mutex *l = container_of(lock, struct mutex, wait_lock); \
\
DEBUG_WARN_ON(in_interrupt()); \
- debug_spin_lock(&debug_mutex_lock); \
+ debug_spin_lock_save(&debug_mutex_lock, flags); \
spin_lock(lock); \
DEBUG_WARN_ON(l->magic != l); \
} while (0)
-#define spin_unlock_mutex(lock) \
+#define spin_unlock_mutex(lock, flags) \
do { \
spin_unlock(lock); \
- debug_spin_unlock(&debug_mutex_lock); \
+ debug_spin_unlock_restore(&debug_mutex_lock, flags); \
} while (0)
#define DEBUG_OFF() \
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 5449b210d9ed..7043db21bbce 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -125,10 +125,11 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
struct task_struct *task = current;
struct mutex_waiter waiter;
unsigned int old_val;
+ unsigned long flags;
debug_mutex_init_waiter(&waiter);
- spin_lock_mutex(&lock->wait_lock);
+ spin_lock_mutex(&lock->wait_lock, flags);
debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip);
@@ -157,7 +158,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
if (unlikely(state == TASK_INTERRUPTIBLE &&
signal_pending(task))) {
mutex_remove_waiter(lock, &waiter, task->thread_info);
- spin_unlock_mutex(&lock->wait_lock);
+ spin_unlock_mutex(&lock->wait_lock, flags);
debug_mutex_free_waiter(&waiter);
return -EINTR;
@@ -165,9 +166,9 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
__set_task_state(task, state);
/* didnt get the lock, go to sleep: */
- spin_unlock_mutex(&lock->wait_lock);
+ spin_unlock_mutex(&lock->wait_lock, flags);
schedule();
- spin_lock_mutex(&lock->wait_lock);
+ spin_lock_mutex(&lock->wait_lock, flags);
}
/* got the lock - rejoice! */
@@ -178,7 +179,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
if (likely(list_empty(&lock->wait_list)))
atomic_set(&lock->count, 0);
- spin_unlock_mutex(&lock->wait_lock);
+ spin_unlock_mutex(&lock->wait_lock, flags);
debug_mutex_free_waiter(&waiter);
@@ -203,10 +204,11 @@ static fastcall noinline void
__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
+ unsigned long flags;
DEBUG_WARN_ON(lock->owner != current_thread_info());
- spin_lock_mutex(&lock->wait_lock);
+ spin_lock_mutex(&lock->wait_lock, flags);
/*
* some architectures leave the lock unlocked in the fastpath failure
@@ -231,7 +233,7 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
debug_mutex_clear_owner(lock);
- spin_unlock_mutex(&lock->wait_lock);
+ spin_unlock_mutex(&lock->wait_lock, flags);
}
/*
@@ -276,9 +278,10 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__)
static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
+ unsigned long flags;
int prev;
- spin_lock_mutex(&lock->wait_lock);
+ spin_lock_mutex(&lock->wait_lock, flags);
prev = atomic_xchg(&lock->count, -1);
if (likely(prev == 1))
@@ -287,7 +290,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
if (likely(list_empty(&lock->wait_list)))
atomic_set(&lock->count, 0);
- spin_unlock_mutex(&lock->wait_lock);
+ spin_unlock_mutex(&lock->wait_lock, flags);
return prev == 1;
}
diff --git a/kernel/mutex.h b/kernel/mutex.h
index 00fe84e7b672..069189947257 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -9,8 +9,10 @@
* !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs:
*/
-#define spin_lock_mutex(lock) spin_lock(lock)
-#define spin_unlock_mutex(lock) spin_unlock(lock)
+#define spin_lock_mutex(lock, flags) \
+ do { spin_lock(lock); (void)(flags); } while (0)
+#define spin_unlock_mutex(lock, flags) \
+ do { spin_unlock(lock); (void)(flags); } while (0)
#define mutex_remove_waiter(lock, waiter, ti) \
__list_del((waiter)->list.prev, (waiter)->list.next)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 921c22ad16e4..335c5b932e14 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -120,8 +120,18 @@ int ptrace_check_attach(struct task_struct *child, int kill)
static int may_attach(struct task_struct *task)
{
- if (!task->mm)
- return -EPERM;
+ /* May we inspect the given task?
+ * This check is used both for attaching with ptrace
+ * and for allowing access to sensitive information in /proc.
+ *
+ * ptrace_attach denies several cases that /proc allows
+ * because setting up the necessary parent/child relationship
+ * or halting the specified task is impossible.
+ */
+ int dumpable = 0;
+ /* Don't let security modules deny introspection */
+ if (task == current)
+ return 0;
if (((current->uid != task->euid) ||
(current->uid != task->suid) ||
(current->uid != task->uid) ||
@@ -130,7 +140,9 @@ static int may_attach(struct task_struct *task)
(current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
return -EPERM;
smp_rmb();
- if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
+ if (task->mm)
+ dumpable = task->mm->dumpable;
+ if (!dumpable && !capable(CAP_SYS_PTRACE))
return -EPERM;
return security_ptrace(current, task);
@@ -176,6 +188,8 @@ repeat:
goto repeat;
}
+ if (!task->mm)
+ goto bad;
/* the same process cannot be attached many times */
if (task->ptrace & PT_PTRACED)
goto bad;
@@ -200,7 +214,7 @@ out:
return retval;
}
-void __ptrace_detach(struct task_struct *child, unsigned int data)
+static inline void __ptrace_detach(struct task_struct *child, unsigned int data)
{
child->exit_code = data;
/* .. re-parent .. */
@@ -219,6 +233,7 @@ int ptrace_detach(struct task_struct *child, unsigned int data)
ptrace_disable(child);
write_lock_irq(&tasklist_lock);
+ /* protect against de_thread()->release_task() */
if (child->ptrace)
__ptrace_detach(child, data);
write_unlock_irq(&tasklist_lock);
diff --git a/kernel/sched.c b/kernel/sched.c
index f06d059edef5..a856040c200a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -818,6 +818,11 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
* the target CPU.
*/
#ifdef CONFIG_SMP
+
+#ifndef tsk_is_polling
+#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
+#endif
+
static void resched_task(task_t *p)
{
int cpu;
@@ -833,9 +838,9 @@ static void resched_task(task_t *p)
if (cpu == smp_processor_id())
return;
- /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */
+ /* NEED_RESCHED must be visible before we test polling */
smp_mb();
- if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG))
+ if (!tsk_is_polling(p))
smp_send_reschedule(cpu);
}
#else
@@ -4247,7 +4252,7 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
if (retval)
goto out_unlock;
- jiffies_to_timespec(p->policy & SCHED_FIFO ?
+ jiffies_to_timespec(p->policy == SCHED_FIFO ?
0 : task_timeslice(p), &t);
read_unlock(&tasklist_lock);
retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
diff --git a/kernel/signal.c b/kernel/signal.c
index 1b3c921737e2..52adf53929f6 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1531,6 +1531,35 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
spin_unlock_irqrestore(&sighand->siglock, flags);
}
+static inline int may_ptrace_stop(void)
+{
+ if (!likely(current->ptrace & PT_PTRACED))
+ return 0;
+
+ if (unlikely(current->parent == current->real_parent &&
+ (current->ptrace & PT_ATTACHED)))
+ return 0;
+
+ if (unlikely(current->signal == current->parent->signal) &&
+ unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))
+ return 0;
+
+ /*
+ * Are we in the middle of do_coredump?
+ * If so and our tracer is also part of the coredump stopping
+ * is a deadlock situation, and pointless because our tracer
+ * is dead so don't allow us to stop.
+ * If SIGKILL was already sent before the caller unlocked
+ * ->siglock we must see ->core_waiters != 0. Otherwise it
+ * is safe to enter schedule().
+ */
+ if (unlikely(current->mm->core_waiters) &&
+ unlikely(current->mm == current->parent->mm))
+ return 0;
+
+ return 1;
+}
+
/*
* This must be called with current->sighand->siglock held.
*
@@ -1559,11 +1588,7 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info)
spin_unlock_irq(&current->sighand->siglock);
try_to_freeze();
read_lock(&tasklist_lock);
- if (likely(current->ptrace & PT_PTRACED) &&
- likely(current->parent != current->real_parent ||
- !(current->ptrace & PT_ATTACHED)) &&
- (likely(current->parent->signal != current->signal) ||
- !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) {
+ if (may_ptrace_stop()) {
do_notify_parent_cldstop(current, CLD_TRAPPED);
read_unlock(&tasklist_lock);
schedule();
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 2c0e65819448..f1a4eb1a655e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -73,6 +73,7 @@ extern int printk_ratelimit_burst;
extern int pid_max_min, pid_max_max;
extern int sysctl_drop_caches;
extern int percpu_pagelist_fraction;
+extern int compat_log;
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
int unknown_nmi_panic;
@@ -677,6 +678,16 @@ static ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
+#ifdef CONFIG_COMPAT
+ {
+ .ctl_name = KERN_COMPAT_LOG,
+ .procname = "compat-log",
+ .data = &compat_log,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif
{ .ctl_name = 0 }
};
diff --git a/kernel/time.c b/kernel/time.c
index b00ddc71cedb..5bd489747643 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -523,6 +523,7 @@ EXPORT_SYMBOL(do_gettimeofday);
#else
+#ifndef CONFIG_GENERIC_TIME
/*
* Simulate gettimeofday using do_gettimeofday which only allows a timeval
* and therefore only yields usec accuracy
@@ -537,6 +538,7 @@ void getnstimeofday(struct timespec *tv)
}
EXPORT_SYMBOL_GPL(getnstimeofday);
#endif
+#endif
/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
* Assumes input in normal date format, i.e. 1980-12-31 23:59:59
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
new file mode 100644
index 000000000000..e1dfd8e86cce
--- /dev/null
+++ b/kernel/time/Makefile
@@ -0,0 +1 @@
+obj-y += clocksource.o jiffies.o
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
new file mode 100644
index 000000000000..74eca5939bd9
--- /dev/null
+++ b/kernel/time/clocksource.c
@@ -0,0 +1,349 @@
+/*
+ * linux/kernel/time/clocksource.c
+ *
+ * This file contains the functions which manage clocksource drivers.
+ *
+ * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * TODO WishList:
+ * o Allow clocksource drivers to be unregistered
+ * o get rid of clocksource_jiffies extern
+ */
+
+#include <linux/clocksource.h>
+#include <linux/sysdev.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+/* XXX - Would like a better way for initializing curr_clocksource */
+extern struct clocksource clocksource_jiffies;
+
+/*[Clocksource internal variables]---------
+ * curr_clocksource:
+ * currently selected clocksource. Initialized to clocksource_jiffies.
+ * next_clocksource:
+ * pending next selected clocksource.
+ * clocksource_list:
+ * linked list with the registered clocksources
+ * clocksource_lock:
+ * protects manipulations to curr_clocksource and next_clocksource
+ * and the clocksource_list
+ * override_name:
+ * Name of the user-specified clocksource.
+ */
+static struct clocksource *curr_clocksource = &clocksource_jiffies;
+static struct clocksource *next_clocksource;
+static LIST_HEAD(clocksource_list);
+static DEFINE_SPINLOCK(clocksource_lock);
+static char override_name[32];
+static int finished_booting;
+
+/* clocksource_done_booting - Called near the end of bootup
+ *
+ * Hack to avoid lots of clocksource churn at boot time
+ */
+static int __init clocksource_done_booting(void)
+{
+ finished_booting = 1;
+ return 0;
+}
+
+late_initcall(clocksource_done_booting);
+
+/**
+ * clocksource_get_next - Returns the selected clocksource
+ *
+ */
+struct clocksource *clocksource_get_next(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&clocksource_lock, flags);
+ if (next_clocksource && finished_booting) {
+ curr_clocksource = next_clocksource;
+ next_clocksource = NULL;
+ }
+ spin_unlock_irqrestore(&clocksource_lock, flags);
+
+ return curr_clocksource;
+}
+
+/**
+ * select_clocksource - Finds the best registered clocksource.
+ *
+ * Private function. Must hold clocksource_lock when called.
+ *
+ * Looks through the list of registered clocksources, returning
+ * the one with the highest rating value. If there is a clocksource
+ * name that matches the override string, it returns that clocksource.
+ */
+static struct clocksource *select_clocksource(void)
+{
+ struct clocksource *best = NULL;
+ struct list_head *tmp;
+
+ list_for_each(tmp, &clocksource_list) {
+ struct clocksource *src;
+
+ src = list_entry(tmp, struct clocksource, list);
+ if (!best)
+ best = src;
+
+ /* check for override: */
+ if (strlen(src->name) == strlen(override_name) &&
+ !strcmp(src->name, override_name)) {
+ best = src;
+ break;
+ }
+ /* pick the highest rating: */
+ if (src->rating > best->rating)
+ best = src;
+ }
+
+ return best;
+}
+
+/**
+ * is_registered_source - Checks if clocksource is registered
+ * @c: pointer to a clocksource
+ *
+ * Private helper function. Must hold clocksource_lock when called.
+ *
+ * Returns one if the clocksource is already registered, zero otherwise.
+ */
+static int is_registered_source(struct clocksource *c)
+{
+ int len = strlen(c->name);
+ struct list_head *tmp;
+
+ list_for_each(tmp, &clocksource_list) {
+ struct clocksource *src;
+
+ src = list_entry(tmp, struct clocksource, list);
+ if (strlen(src->name) == len && !strcmp(src->name, c->name))
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * clocksource_register - Used to install new clocksources
+ * @t: clocksource to be registered
+ *
+ * Returns -EBUSY if registration fails, zero otherwise.
+ */
+int clocksource_register(struct clocksource *c)
+{
+ int ret = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&clocksource_lock, flags);
+ /* check if clocksource is already registered */
+ if (is_registered_source(c)) {
+ printk("register_clocksource: Cannot register %s. "
+ "Already registered!", c->name);
+ ret = -EBUSY;
+ } else {
+ /* register it */
+ list_add(&c->list, &clocksource_list);
+ /* scan the registered clocksources, and pick the best one */
+ next_clocksource = select_clocksource();
+ }
+ spin_unlock_irqrestore(&clocksource_lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(clocksource_register);
+
+/**
+ * clocksource_reselect - Rescan list for next clocksource
+ *
+ * A quick helper function to be used if a clocksource changes its
+ * rating. Forces the clocksource list to be re-scanned for the best
+ * clocksource.
+ */
+void clocksource_reselect(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&clocksource_lock, flags);
+ next_clocksource = select_clocksource();
+ spin_unlock_irqrestore(&clocksource_lock, flags);
+}
+EXPORT_SYMBOL(clocksource_reselect);
+
+/**
+ * sysfs_show_current_clocksources - sysfs interface for current clocksource
+ * @dev: unused
+ * @buf: char buffer to be filled with clocksource list
+ *
+ * Provides sysfs interface for listing current clocksource.
+ */
+static ssize_t
+sysfs_show_current_clocksources(struct sys_device *dev, char *buf)
+{
+ char *curr = buf;
+
+ spin_lock_irq(&clocksource_lock);
+ curr += sprintf(curr, "%s ", curr_clocksource->name);
+ spin_unlock_irq(&clocksource_lock);
+
+ curr += sprintf(curr, "\n");
+
+ return curr - buf;
+}
+
+/**
+ * sysfs_override_clocksource - interface for manually overriding clocksource
+ * @dev: unused
+ * @buf: name of override clocksource
+ * @count: length of buffer
+ *
+ * Takes input from sysfs interface for manually overriding the default
+ * clocksource selction.
+ */
+static ssize_t sysfs_override_clocksource(struct sys_device *dev,
+ const char *buf, size_t count)
+{
+ size_t ret = count;
+ /* strings from sysfs write are not 0 terminated! */
+ if (count >= sizeof(override_name))
+ return -EINVAL;
+
+ /* strip of \n: */
+ if (buf[count-1] == '\n')
+ count--;
+ if (count < 1)
+ return -EINVAL;
+
+ spin_lock_irq(&clocksource_lock);
+
+ /* copy the name given: */
+ memcpy(override_name, buf, count);
+ override_name[count] = 0;
+
+ /* try to select it: */
+ next_clocksource = select_clocksource();
+
+ spin_unlock_irq(&clocksource_lock);
+
+ return ret;
+}
+
+/**
+ * sysfs_show_available_clocksources - sysfs interface for listing clocksource
+ * @dev: unused
+ * @buf: char buffer to be filled with clocksource list
+ *
+ * Provides sysfs interface for listing registered clocksources
+ */
+static ssize_t
+sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
+{
+ struct list_head *tmp;
+ char *curr = buf;
+
+ spin_lock_irq(&clocksource_lock);
+ list_for_each(tmp, &clocksource_list) {
+ struct clocksource *src;
+
+ src = list_entry(tmp, struct clocksource, list);
+ curr += sprintf(curr, "%s ", src->name);
+ }
+ spin_unlock_irq(&clocksource_lock);
+
+ curr += sprintf(curr, "\n");
+
+ return curr - buf;
+}
+
+/*
+ * Sysfs setup bits:
+ */
+static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources,
+ sysfs_override_clocksource);
+
+static SYSDEV_ATTR(available_clocksource, 0600,
+ sysfs_show_available_clocksources, NULL);
+
+static struct sysdev_class clocksource_sysclass = {
+ set_kset_name("clocksource"),
+};
+
+static struct sys_device device_clocksource = {
+ .id = 0,
+ .cls = &clocksource_sysclass,
+};
+
+static int __init init_clocksource_sysfs(void)
+{
+ int error = sysdev_class_register(&clocksource_sysclass);
+
+ if (!error)
+ error = sysdev_register(&device_clocksource);
+ if (!error)
+ error = sysdev_create_file(
+ &device_clocksource,
+ &attr_current_clocksource);
+ if (!error)
+ error = sysdev_create_file(
+ &device_clocksource,
+ &attr_available_clocksource);
+ return error;
+}
+
+device_initcall(init_clocksource_sysfs);
+
+/**
+ * boot_override_clocksource - boot clock override
+ * @str: override name
+ *
+ * Takes a clocksource= boot argument and uses it
+ * as the clocksource override name.
+ */
+static int __init boot_override_clocksource(char* str)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&clocksource_lock, flags);
+ if (str)
+ strlcpy(override_name, str, sizeof(override_name));
+ spin_unlock_irqrestore(&clocksource_lock, flags);
+ return 1;
+}
+
+__setup("clocksource=", boot_override_clocksource);
+
+/**
+ * boot_override_clock - Compatibility layer for deprecated boot option
+ * @str: override name
+ *
+ * DEPRECATED! Takes a clock= boot argument and uses it
+ * as the clocksource override name
+ */
+static int __init boot_override_clock(char* str)
+{
+ if (!strcmp(str, "pmtmr")) {
+ printk("Warning: clock=pmtmr is deprecated. "
+ "Use clocksource=acpi_pm.\n");
+ return boot_override_clocksource("acpi_pm");
+ }
+ printk("Warning! clock= boot option is deprecated. "
+ "Use clocksource=xyz\n");
+ return boot_override_clocksource(str);
+}
+
+__setup("clock=", boot_override_clock);
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
new file mode 100644
index 000000000000..126bb30c4afe
--- /dev/null
+++ b/kernel/time/jiffies.c
@@ -0,0 +1,73 @@
+/***********************************************************************
+* linux/kernel/time/jiffies.c
+*
+* This file contains the jiffies based clocksource.
+*
+* Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+************************************************************************/
+#include <linux/clocksource.h>
+#include <linux/jiffies.h>
+#include <linux/init.h>
+
+/* The Jiffies based clocksource is the lowest common
+ * denominator clock source which should function on
+ * all systems. It has the same coarse resolution as
+ * the timer interrupt frequency HZ and it suffers
+ * inaccuracies caused by missed or lost timer
+ * interrupts and the inability for the timer
+ * interrupt hardware to accuratly tick at the
+ * requested HZ value. It is also not reccomended
+ * for "tick-less" systems.
+ */
+#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ))
+
+/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
+ * conversion, the .shift value could be zero. However
+ * this would make NTP adjustments impossible as they are
+ * in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to
+ * shift both the nominator and denominator the same
+ * amount, and give ntp adjustments in units of 1/2^8
+ *
+ * The value 8 is somewhat carefully chosen, as anything
+ * larger can result in overflows. NSEC_PER_JIFFY grows as
+ * HZ shrinks, so values greater then 8 overflow 32bits when
+ * HZ=100.
+ */
+#define JIFFIES_SHIFT 8
+
+static cycle_t jiffies_read(void)
+{
+ return (cycle_t) jiffies;
+}
+
+struct clocksource clocksource_jiffies = {
+ .name = "jiffies",
+ .rating = 0, /* lowest rating*/
+ .read = jiffies_read,
+ .mask = 0xffffffff, /*32bits*/
+ .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
+ .shift = JIFFIES_SHIFT,
+ .is_continuous = 0, /* tick based, not free running */
+};
+
+static int __init init_jiffies_clocksource(void)
+{
+ return clocksource_register(&clocksource_jiffies);
+}
+
+module_init(init_jiffies_clocksource);
diff --git a/kernel/timer.c b/kernel/timer.c
index eb97371b87d8..5bb6b7976eec 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -597,7 +597,6 @@ long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */
long time_precision = 1; /* clock precision (us) */
long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
-static long time_phase; /* phase offset (scaled us) */
long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC;
/* frequency offset (scaled ppm)*/
static long time_adj; /* tick adjust (scaled 1 / HZ) */
@@ -747,27 +746,14 @@ static long adjtime_adjustment(void)
}
/* in the NTP reference this is called "hardclock()" */
-static void update_wall_time_one_tick(void)
+static void update_ntp_one_tick(void)
{
- long time_adjust_step, delta_nsec;
+ long time_adjust_step;
time_adjust_step = adjtime_adjustment();
if (time_adjust_step)
/* Reduce by this step the amount of time left */
time_adjust -= time_adjust_step;
- delta_nsec = tick_nsec + time_adjust_step * 1000;
- /*
- * Advance the phase, once it gets to one microsecond, then
- * advance the tick more.
- */
- time_phase += time_adj;
- if ((time_phase >= FINENSEC) || (time_phase <= -FINENSEC)) {
- long ltemp = shift_right(time_phase, (SHIFT_SCALE - 10));
- time_phase -= ltemp << (SHIFT_SCALE - 10);
- delta_nsec += ltemp;
- }
- xtime.tv_nsec += delta_nsec;
- time_interpolator_update(delta_nsec);
/* Changes by adjtime() do not take effect till next tick. */
if (time_next_adjust != 0) {
@@ -780,36 +766,378 @@ static void update_wall_time_one_tick(void)
* Return how long ticks are at the moment, that is, how much time
* update_wall_time_one_tick will add to xtime next time we call it
* (assuming no calls to do_adjtimex in the meantime).
- * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10
- * bits to the right of the binary point.
+ * The return value is in fixed-point nanoseconds shifted by the
+ * specified number of bits to the right of the binary point.
* This function has no side-effects.
*/
u64 current_tick_length(void)
{
long delta_nsec;
+ u64 ret;
+ /* calculate the finest interval NTP will allow.
+ * ie: nanosecond value shifted by (SHIFT_SCALE - 10)
+ */
delta_nsec = tick_nsec + adjtime_adjustment() * 1000;
- return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj;
+ ret = (u64)delta_nsec << TICK_LENGTH_SHIFT;
+ ret += (s64)time_adj << (TICK_LENGTH_SHIFT - (SHIFT_SCALE - 10));
+
+ return ret;
}
-/*
- * Using a loop looks inefficient, but "ticks" is
- * usually just one (we shouldn't be losing ticks,
- * we're doing this this way mainly for interrupt
- * latency reasons, not because we think we'll
- * have lots of lost timer ticks
+/* XXX - all of this timekeeping code should be later moved to time.c */
+#include <linux/clocksource.h>
+static struct clocksource *clock; /* pointer to current clocksource */
+
+#ifdef CONFIG_GENERIC_TIME
+/**
+ * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook
+ *
+ * private function, must hold xtime_lock lock when being
+ * called. Returns the number of nanoseconds since the
+ * last call to update_wall_time() (adjusted by NTP scaling)
+ */
+static inline s64 __get_nsec_offset(void)
+{
+ cycle_t cycle_now, cycle_delta;
+ s64 ns_offset;
+
+ /* read clocksource: */
+ cycle_now = clocksource_read(clock);
+
+ /* calculate the delta since the last update_wall_time: */
+ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+
+ /* convert to nanoseconds: */
+ ns_offset = cyc2ns(clock, cycle_delta);
+
+ return ns_offset;
+}
+
+/**
+ * __get_realtime_clock_ts - Returns the time of day in a timespec
+ * @ts: pointer to the timespec to be set
+ *
+ * Returns the time of day in a timespec. Used by
+ * do_gettimeofday() and get_realtime_clock_ts().
*/
-static void update_wall_time(unsigned long ticks)
+static inline void __get_realtime_clock_ts(struct timespec *ts)
{
+ unsigned long seq;
+ s64 nsecs;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+
+ *ts = xtime;
+ nsecs = __get_nsec_offset();
+
+ } while (read_seqretry(&xtime_lock, seq));
+
+ timespec_add_ns(ts, nsecs);
+}
+
+/**
+ * getnstimeofday - Returns the time of day in a timespec
+ * @ts: pointer to the timespec to be set
+ *
+ * Returns the time of day in a timespec.
+ */
+void getnstimeofday(struct timespec *ts)
+{
+ __get_realtime_clock_ts(ts);
+}
+
+EXPORT_SYMBOL(getnstimeofday);
+
+/**
+ * do_gettimeofday - Returns the time of day in a timeval
+ * @tv: pointer to the timeval to be set
+ *
+ * NOTE: Users should be converted to using get_realtime_clock_ts()
+ */
+void do_gettimeofday(struct timeval *tv)
+{
+ struct timespec now;
+
+ __get_realtime_clock_ts(&now);
+ tv->tv_sec = now.tv_sec;
+ tv->tv_usec = now.tv_nsec/1000;
+}
+
+EXPORT_SYMBOL(do_gettimeofday);
+/**
+ * do_settimeofday - Sets the time of day
+ * @tv: pointer to the timespec variable containing the new time
+ *
+ * Sets the time of day to the new time and update NTP and notify hrtimers
+ */
+int do_settimeofday(struct timespec *tv)
+{
+ unsigned long flags;
+ time_t wtm_sec, sec = tv->tv_sec;
+ long wtm_nsec, nsec = tv->tv_nsec;
+
+ if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
+ return -EINVAL;
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+
+ nsec -= __get_nsec_offset();
+
+ wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
+ wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
+
+ set_normalized_timespec(&xtime, sec, nsec);
+ set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
+
+ ntp_clear();
+
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ /* signal hrtimers about time change */
+ clock_was_set();
+
+ return 0;
+}
+
+EXPORT_SYMBOL(do_settimeofday);
+
+/**
+ * change_clocksource - Swaps clocksources if a new one is available
+ *
+ * Accumulates current time interval and initializes new clocksource
+ */
+static int change_clocksource(void)
+{
+ struct clocksource *new;
+ cycle_t now;
+ u64 nsec;
+ new = clocksource_get_next();
+ if (clock != new) {
+ now = clocksource_read(new);
+ nsec = __get_nsec_offset();
+ timespec_add_ns(&xtime, nsec);
+
+ clock = new;
+ clock->cycle_last = now;
+ printk(KERN_INFO "Time: %s clocksource has been installed.\n",
+ clock->name);
+ return 1;
+ } else if (clock->update_callback) {
+ return clock->update_callback();
+ }
+ return 0;
+}
+#else
+#define change_clocksource() (0)
+#endif
+
+/**
+ * timeofday_is_continuous - check to see if timekeeping is free running
+ */
+int timekeeping_is_continuous(void)
+{
+ unsigned long seq;
+ int ret;
+
do {
- ticks--;
- update_wall_time_one_tick();
- if (xtime.tv_nsec >= 1000000000) {
- xtime.tv_nsec -= 1000000000;
+ seq = read_seqbegin(&xtime_lock);
+
+ ret = clock->is_continuous;
+
+ } while (read_seqretry(&xtime_lock, seq));
+
+ return ret;
+}
+
+/*
+ * timekeeping_init - Initializes the clocksource and common timekeeping values
+ */
+void __init timekeeping_init(void)
+{
+ unsigned long flags;
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ clock = clocksource_get_next();
+ clocksource_calculate_interval(clock, tick_nsec);
+ clock->cycle_last = clocksource_read(clock);
+ ntp_clear();
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+}
+
+
+/*
+ * timekeeping_resume - Resumes the generic timekeeping subsystem.
+ * @dev: unused
+ *
+ * This is for the generic clocksource timekeeping.
+ * xtime/wall_to_monotonic/jiffies/wall_jiffies/etc are
+ * still managed by arch specific suspend/resume code.
+ */
+static int timekeeping_resume(struct sys_device *dev)
+{
+ unsigned long flags;
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ /* restart the last cycle value */
+ clock->cycle_last = clocksource_read(clock);
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+ return 0;
+}
+
+/* sysfs resume/suspend bits for timekeeping */
+static struct sysdev_class timekeeping_sysclass = {
+ .resume = timekeeping_resume,
+ set_kset_name("timekeeping"),
+};
+
+static struct sys_device device_timer = {
+ .id = 0,
+ .cls = &timekeeping_sysclass,
+};
+
+static int __init timekeeping_init_device(void)
+{
+ int error = sysdev_class_register(&timekeeping_sysclass);
+ if (!error)
+ error = sysdev_register(&device_timer);
+ return error;
+}
+
+device_initcall(timekeeping_init_device);
+
+/*
+ * If the error is already larger, we look ahead another tick,
+ * to compensate for late or lost adjustments.
+ */
+static __always_inline int clocksource_bigadjust(int sign, s64 error, s64 *interval, s64 *offset)
+{
+ int adj;
+
+ /*
+ * As soon as the machine is synchronized to the external time
+ * source this should be the common case.
+ */
+ error >>= 2;
+ if (likely(sign > 0 ? error <= *interval : error >= *interval))
+ return sign;
+
+ /*
+ * An extra look ahead dampens the effect of the current error,
+ * which can grow quite large with continously late updates, as
+ * it would dominate the adjustment value and can lead to
+ * oscillation.
+ */
+ error += current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1);
+ error -= clock->xtime_interval >> 1;
+
+ adj = 0;
+ while (1) {
+ error >>= 1;
+ if (sign > 0 ? error <= *interval : error >= *interval)
+ break;
+ adj++;
+ }
+
+ /*
+ * Add the current adjustments to the error and take the offset
+ * into account, the latter can cause the error to be hardly
+ * reduced at the next tick. Check the error again if there's
+ * room for another adjustment, thus further reducing the error
+ * which otherwise had to be corrected at the next update.
+ */
+ error = (error << 1) - *interval + *offset;
+ if (sign > 0 ? error > *interval : error < *interval)
+ adj++;
+
+ *interval <<= adj;
+ *offset <<= adj;
+ return sign << adj;
+}
+
+/*
+ * Adjust the multiplier to reduce the error value,
+ * this is optimized for the most common adjustments of -1,0,1,
+ * for other values we can do a bit more work.
+ */
+static void clocksource_adjust(struct clocksource *clock, s64 offset)
+{
+ s64 error, interval = clock->cycle_interval;
+ int adj;
+
+ error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1);
+ if (error > interval) {
+ adj = clocksource_bigadjust(1, error, &interval, &offset);
+ } else if (error < -interval) {
+ interval = -interval;
+ offset = -offset;
+ adj = clocksource_bigadjust(-1, error, &interval, &offset);
+ } else
+ return;
+
+ clock->mult += adj;
+ clock->xtime_interval += interval;
+ clock->xtime_nsec -= offset;
+ clock->error -= (interval - offset) << (TICK_LENGTH_SHIFT - clock->shift);
+}
+
+/*
+ * update_wall_time - Uses the current clocksource to increment the wall time
+ *
+ * Called from the timer interrupt, must hold a write on xtime_lock.
+ */
+static void update_wall_time(void)
+{
+ cycle_t offset;
+
+ clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift;
+
+#ifdef CONFIG_GENERIC_TIME
+ offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask;
+#else
+ offset = clock->cycle_interval;
+#endif
+
+ /* normally this loop will run just once, however in the
+ * case of lost or late ticks, it will accumulate correctly.
+ */
+ while (offset >= clock->cycle_interval) {
+ /* accumulate one interval */
+ clock->xtime_nsec += clock->xtime_interval;
+ clock->cycle_last += clock->cycle_interval;
+ offset -= clock->cycle_interval;
+
+ if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
+ clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
xtime.tv_sec++;
second_overflow();
}
- } while (ticks);
+
+ /* interpolator bits */
+ time_interpolator_update(clock->xtime_interval
+ >> clock->shift);
+ /* increment the NTP state machine */
+ update_ntp_one_tick();
+
+ /* accumulate error between NTP and clock interval */
+ clock->error += current_tick_length();
+ clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift);
+ }
+
+ /* correct the clock when NTP error is too big */
+ clocksource_adjust(clock, offset);
+
+ /* store full nanoseconds into xtime */
+ xtime.tv_nsec = clock->xtime_nsec >> clock->shift;
+ clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
+
+ /* check to see if there is a new clocksource to use */
+ if (change_clocksource()) {
+ clock->error = 0;
+ clock->xtime_nsec = 0;
+ clocksource_calculate_interval(clock, tick_nsec);
+ }
}
/*
@@ -915,10 +1243,8 @@ static inline void update_times(void)
unsigned long ticks;
ticks = jiffies - wall_jiffies;
- if (ticks) {
- wall_jiffies += ticks;
- update_wall_time(ticks);
- }
+ wall_jiffies += ticks;
+ update_wall_time();
calc_load(ticks);
}
diff --git a/kernel/unwind.c b/kernel/unwind.c
new file mode 100644
index 000000000000..f69c804c8e62
--- /dev/null
+++ b/kernel/unwind.c
@@ -0,0 +1,918 @@
+/*
+ * Copyright (C) 2002-2006 Novell, Inc.
+ * Jan Beulich <jbeulich@novell.com>
+ * This code is released under version 2 of the GNU GPL.
+ *
+ * A simple API for unwinding kernel stacks. This is used for
+ * debugging and error reporting purposes. The kernel doesn't need
+ * full-blown stack unwinding with all the bells and whistles, so there
+ * is not much point in implementing the full Dwarf2 unwind API.
+ */
+
+#include <linux/unwind.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/stop_machine.h>
+#include <asm/sections.h>
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+
+extern char __start_unwind[], __end_unwind[];
+
+#define MAX_STACK_DEPTH 8
+
+#define EXTRA_INFO(f) { \
+ BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
+ % FIELD_SIZEOF(struct unwind_frame_info, f)) \
+ + offsetof(struct unwind_frame_info, f) \
+ / FIELD_SIZEOF(struct unwind_frame_info, f), \
+ FIELD_SIZEOF(struct unwind_frame_info, f) \
+ }
+#define PTREGS_INFO(f) EXTRA_INFO(regs.f)
+
+static const struct {
+ unsigned offs:BITS_PER_LONG / 2;
+ unsigned width:BITS_PER_LONG / 2;
+} reg_info[] = {
+ UNW_REGISTER_INFO
+};
+
+#undef PTREGS_INFO
+#undef EXTRA_INFO
+
+#ifndef REG_INVALID
+#define REG_INVALID(r) (reg_info[r].width == 0)
+#endif
+
+#define DW_CFA_nop 0x00
+#define DW_CFA_set_loc 0x01
+#define DW_CFA_advance_loc1 0x02
+#define DW_CFA_advance_loc2 0x03
+#define DW_CFA_advance_loc4 0x04
+#define DW_CFA_offset_extended 0x05
+#define DW_CFA_restore_extended 0x06
+#define DW_CFA_undefined 0x07
+#define DW_CFA_same_value 0x08
+#define DW_CFA_register 0x09
+#define DW_CFA_remember_state 0x0a
+#define DW_CFA_restore_state 0x0b
+#define DW_CFA_def_cfa 0x0c
+#define DW_CFA_def_cfa_register 0x0d
+#define DW_CFA_def_cfa_offset 0x0e
+#define DW_CFA_def_cfa_expression 0x0f
+#define DW_CFA_expression 0x10
+#define DW_CFA_offset_extended_sf 0x11
+#define DW_CFA_def_cfa_sf 0x12
+#define DW_CFA_def_cfa_offset_sf 0x13
+#define DW_CFA_val_offset 0x14
+#define DW_CFA_val_offset_sf 0x15
+#define DW_CFA_val_expression 0x16
+#define DW_CFA_lo_user 0x1c
+#define DW_CFA_GNU_window_save 0x2d
+#define DW_CFA_GNU_args_size 0x2e
+#define DW_CFA_GNU_negative_offset_extended 0x2f
+#define DW_CFA_hi_user 0x3f
+
+#define DW_EH_PE_FORM 0x07
+#define DW_EH_PE_native 0x00
+#define DW_EH_PE_leb128 0x01
+#define DW_EH_PE_data2 0x02
+#define DW_EH_PE_data4 0x03
+#define DW_EH_PE_data8 0x04
+#define DW_EH_PE_signed 0x08
+#define DW_EH_PE_ADJUST 0x70
+#define DW_EH_PE_abs 0x00
+#define DW_EH_PE_pcrel 0x10
+#define DW_EH_PE_textrel 0x20
+#define DW_EH_PE_datarel 0x30
+#define DW_EH_PE_funcrel 0x40
+#define DW_EH_PE_aligned 0x50
+#define DW_EH_PE_indirect 0x80
+#define DW_EH_PE_omit 0xff
+
+typedef unsigned long uleb128_t;
+typedef signed long sleb128_t;
+
+static struct unwind_table {
+ struct {
+ unsigned long pc;
+ unsigned long range;
+ } core, init;
+ const void *address;
+ unsigned long size;
+ struct unwind_table *link;
+ const char *name;
+} root_table, *last_table;
+
+struct unwind_item {
+ enum item_location {
+ Nowhere,
+ Memory,
+ Register,
+ Value
+ } where;
+ uleb128_t value;
+};
+
+struct unwind_state {
+ uleb128_t loc, org;
+ const u8 *cieStart, *cieEnd;
+ uleb128_t codeAlign;
+ sleb128_t dataAlign;
+ struct cfa {
+ uleb128_t reg, offs;
+ } cfa;
+ struct unwind_item regs[ARRAY_SIZE(reg_info)];
+ unsigned stackDepth:8;
+ unsigned version:8;
+ const u8 *label;
+ const u8 *stack[MAX_STACK_DEPTH];
+};
+
+static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
+
+static struct unwind_table *find_table(unsigned long pc)
+{
+ struct unwind_table *table;
+
+ for (table = &root_table; table; table = table->link)
+ if ((pc >= table->core.pc
+ && pc < table->core.pc + table->core.range)
+ || (pc >= table->init.pc
+ && pc < table->init.pc + table->init.range))
+ break;
+
+ return table;
+}
+
+static void init_unwind_table(struct unwind_table *table,
+ const char *name,
+ const void *core_start,
+ unsigned long core_size,
+ const void *init_start,
+ unsigned long init_size,
+ const void *table_start,
+ unsigned long table_size)
+{
+ table->core.pc = (unsigned long)core_start;
+ table->core.range = core_size;
+ table->init.pc = (unsigned long)init_start;
+ table->init.range = init_size;
+ table->address = table_start;
+ table->size = table_size;
+ table->link = NULL;
+ table->name = name;
+}
+
+void __init unwind_init(void)
+{
+ init_unwind_table(&root_table, "kernel",
+ _text, _end - _text,
+ NULL, 0,
+ __start_unwind, __end_unwind - __start_unwind);
+}
+
+#ifdef CONFIG_MODULES
+
+/* Must be called with module_mutex held. */
+void *unwind_add_table(struct module *module,
+ const void *table_start,
+ unsigned long table_size)
+{
+ struct unwind_table *table;
+
+ if (table_size <= 0)
+ return NULL;
+
+ table = kmalloc(sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return NULL;
+
+ init_unwind_table(table, module->name,
+ module->module_core, module->core_size,
+ module->module_init, module->init_size,
+ table_start, table_size);
+
+ if (last_table)
+ last_table->link = table;
+ else
+ root_table.link = table;
+ last_table = table;
+
+ return table;
+}
+
+struct unlink_table_info
+{
+ struct unwind_table *table;
+ int init_only;
+};
+
+static int unlink_table(void *arg)
+{
+ struct unlink_table_info *info = arg;
+ struct unwind_table *table = info->table, *prev;
+
+ for (prev = &root_table; prev->link && prev->link != table; prev = prev->link)
+ ;
+
+ if (prev->link) {
+ if (info->init_only) {
+ table->init.pc = 0;
+ table->init.range = 0;
+ info->table = NULL;
+ } else {
+ prev->link = table->link;
+ if (!prev->link)
+ last_table = prev;
+ }
+ } else
+ info->table = NULL;
+
+ return 0;
+}
+
+/* Must be called with module_mutex held. */
+void unwind_remove_table(void *handle, int init_only)
+{
+ struct unwind_table *table = handle;
+ struct unlink_table_info info;
+
+ if (!table || table == &root_table)
+ return;
+
+ if (init_only && table == last_table) {
+ table->init.pc = 0;
+ table->init.range = 0;
+ return;
+ }
+
+ info.table = table;
+ info.init_only = init_only;
+ stop_machine_run(unlink_table, &info, NR_CPUS);
+
+ if (info.table)
+ kfree(table);
+}
+
+#endif /* CONFIG_MODULES */
+
+static uleb128_t get_uleb128(const u8 **pcur, const u8 *end)
+{
+ const u8 *cur = *pcur;
+ uleb128_t value;
+ unsigned shift;
+
+ for (shift = 0, value = 0; cur < end; shift += 7) {
+ if (shift + 7 > 8 * sizeof(value)
+ && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
+ cur = end + 1;
+ break;
+ }
+ value |= (uleb128_t)(*cur & 0x7f) << shift;
+ if (!(*cur++ & 0x80))
+ break;
+ }
+ *pcur = cur;
+
+ return value;
+}
+
+static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
+{
+ const u8 *cur = *pcur;
+ sleb128_t value;
+ unsigned shift;
+
+ for (shift = 0, value = 0; cur < end; shift += 7) {
+ if (shift + 7 > 8 * sizeof(value)
+ && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
+ cur = end + 1;
+ break;
+ }
+ value |= (sleb128_t)(*cur & 0x7f) << shift;
+ if (!(*cur & 0x80)) {
+ value |= -(*cur++ & 0x40) << shift;
+ break;
+ }
+ }
+ *pcur = cur;
+
+ return value;
+}
+
+static unsigned long read_pointer(const u8 **pLoc,
+ const void *end,
+ signed ptrType)
+{
+ unsigned long value = 0;
+ union {
+ const u8 *p8;
+ const u16 *p16u;
+ const s16 *p16s;
+ const u32 *p32u;
+ const s32 *p32s;
+ const unsigned long *pul;
+ } ptr;
+
+ if (ptrType < 0 || ptrType == DW_EH_PE_omit)
+ return 0;
+ ptr.p8 = *pLoc;
+ switch(ptrType & DW_EH_PE_FORM) {
+ case DW_EH_PE_data2:
+ if (end < (const void *)(ptr.p16u + 1))
+ return 0;
+ if(ptrType & DW_EH_PE_signed)
+ value = get_unaligned(ptr.p16s++);
+ else
+ value = get_unaligned(ptr.p16u++);
+ break;
+ case DW_EH_PE_data4:
+#ifdef CONFIG_64BIT
+ if (end < (const void *)(ptr.p32u + 1))
+ return 0;
+ if(ptrType & DW_EH_PE_signed)
+ value = get_unaligned(ptr.p32s++);
+ else
+ value = get_unaligned(ptr.p32u++);
+ break;
+ case DW_EH_PE_data8:
+ BUILD_BUG_ON(sizeof(u64) != sizeof(value));
+#else
+ BUILD_BUG_ON(sizeof(u32) != sizeof(value));
+#endif
+ case DW_EH_PE_native:
+ if (end < (const void *)(ptr.pul + 1))
+ return 0;
+ value = get_unaligned(ptr.pul++);
+ break;
+ case DW_EH_PE_leb128:
+ BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value));
+ value = ptrType & DW_EH_PE_signed
+ ? get_sleb128(&ptr.p8, end)
+ : get_uleb128(&ptr.p8, end);
+ if ((const void *)ptr.p8 > end)
+ return 0;
+ break;
+ default:
+ return 0;
+ }
+ switch(ptrType & DW_EH_PE_ADJUST) {
+ case DW_EH_PE_abs:
+ break;
+ case DW_EH_PE_pcrel:
+ value += (unsigned long)*pLoc;
+ break;
+ default:
+ return 0;
+ }
+ if ((ptrType & DW_EH_PE_indirect)
+ && __get_user(value, (unsigned long *)value))
+ return 0;
+ *pLoc = ptr.p8;
+
+ return value;
+}
+
+static signed fde_pointer_type(const u32 *cie)
+{
+ const u8 *ptr = (const u8 *)(cie + 2);
+ unsigned version = *ptr;
+
+ if (version != 1)
+ return -1; /* unsupported */
+ if (*++ptr) {
+ const char *aug;
+ const u8 *end = (const u8 *)(cie + 1) + *cie;
+ uleb128_t len;
+
+ /* check if augmentation size is first (and thus present) */
+ if (*ptr != 'z')
+ return -1;
+ /* check if augmentation string is nul-terminated */
+ if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL)
+ return -1;
+ ++ptr; /* skip terminator */
+ get_uleb128(&ptr, end); /* skip code alignment */
+ get_sleb128(&ptr, end); /* skip data alignment */
+ /* skip return address column */
+ version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end);
+ len = get_uleb128(&ptr, end); /* augmentation length */
+ if (ptr + len < ptr || ptr + len > end)
+ return -1;
+ end = ptr + len;
+ while (*++aug) {
+ if (ptr >= end)
+ return -1;
+ switch(*aug) {
+ case 'L':
+ ++ptr;
+ break;
+ case 'P': {
+ signed ptrType = *ptr++;
+
+ if (!read_pointer(&ptr, end, ptrType) || ptr > end)
+ return -1;
+ }
+ break;
+ case 'R':
+ return *ptr;
+ default:
+ return -1;
+ }
+ }
+ }
+ return DW_EH_PE_native|DW_EH_PE_abs;
+}
+
+static int advance_loc(unsigned long delta, struct unwind_state *state)
+{
+ state->loc += delta * state->codeAlign;
+
+ return delta > 0;
+}
+
+static void set_rule(uleb128_t reg,
+ enum item_location where,
+ uleb128_t value,
+ struct unwind_state *state)
+{
+ if (reg < ARRAY_SIZE(state->regs)) {
+ state->regs[reg].where = where;
+ state->regs[reg].value = value;
+ }
+}
+
+static int processCFI(const u8 *start,
+ const u8 *end,
+ unsigned long targetLoc,
+ signed ptrType,
+ struct unwind_state *state)
+{
+ union {
+ const u8 *p8;
+ const u16 *p16;
+ const u32 *p32;
+ } ptr;
+ int result = 1;
+
+ if (start != state->cieStart) {
+ state->loc = state->org;
+ result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state);
+ if (targetLoc == 0 && state->label == NULL)
+ return result;
+ }
+ for (ptr.p8 = start; result && ptr.p8 < end; ) {
+ switch(*ptr.p8 >> 6) {
+ uleb128_t value;
+
+ case 0:
+ switch(*ptr.p8++) {
+ case DW_CFA_nop:
+ break;
+ case DW_CFA_set_loc:
+ if ((state->loc = read_pointer(&ptr.p8, end, ptrType)) == 0)
+ result = 0;
+ break;
+ case DW_CFA_advance_loc1:
+ result = ptr.p8 < end && advance_loc(*ptr.p8++, state);
+ break;
+ case DW_CFA_advance_loc2:
+ result = ptr.p8 <= end + 2
+ && advance_loc(*ptr.p16++, state);
+ break;
+ case DW_CFA_advance_loc4:
+ result = ptr.p8 <= end + 4
+ && advance_loc(*ptr.p32++, state);
+ break;
+ case DW_CFA_offset_extended:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
+ break;
+ case DW_CFA_val_offset:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value, Value, get_uleb128(&ptr.p8, end), state);
+ break;
+ case DW_CFA_offset_extended_sf:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value, Memory, get_sleb128(&ptr.p8, end), state);
+ break;
+ case DW_CFA_val_offset_sf:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value, Value, get_sleb128(&ptr.p8, end), state);
+ break;
+ case DW_CFA_restore_extended:
+ case DW_CFA_undefined:
+ case DW_CFA_same_value:
+ set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state);
+ break;
+ case DW_CFA_register:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value,
+ Register,
+ get_uleb128(&ptr.p8, end), state);
+ break;
+ case DW_CFA_remember_state:
+ if (ptr.p8 == state->label) {
+ state->label = NULL;
+ return 1;
+ }
+ if (state->stackDepth >= MAX_STACK_DEPTH)
+ return 0;
+ state->stack[state->stackDepth++] = ptr.p8;
+ break;
+ case DW_CFA_restore_state:
+ if (state->stackDepth) {
+ const uleb128_t loc = state->loc;
+ const u8 *label = state->label;
+
+ state->label = state->stack[state->stackDepth - 1];
+ memcpy(&state->cfa, &badCFA, sizeof(state->cfa));
+ memset(state->regs, 0, sizeof(state->regs));
+ state->stackDepth = 0;
+ result = processCFI(start, end, 0, ptrType, state);
+ state->loc = loc;
+ state->label = label;
+ } else
+ return 0;
+ break;
+ case DW_CFA_def_cfa:
+ state->cfa.reg = get_uleb128(&ptr.p8, end);
+ /*nobreak*/
+ case DW_CFA_def_cfa_offset:
+ state->cfa.offs = get_uleb128(&ptr.p8, end);
+ break;
+ case DW_CFA_def_cfa_sf:
+ state->cfa.reg = get_uleb128(&ptr.p8, end);
+ /*nobreak*/
+ case DW_CFA_def_cfa_offset_sf:
+ state->cfa.offs = get_sleb128(&ptr.p8, end)
+ * state->dataAlign;
+ break;
+ case DW_CFA_def_cfa_register:
+ state->cfa.reg = get_uleb128(&ptr.p8, end);
+ break;
+ /*todo case DW_CFA_def_cfa_expression: */
+ /*todo case DW_CFA_expression: */
+ /*todo case DW_CFA_val_expression: */
+ case DW_CFA_GNU_args_size:
+ get_uleb128(&ptr.p8, end);
+ break;
+ case DW_CFA_GNU_negative_offset_extended:
+ value = get_uleb128(&ptr.p8, end);
+ set_rule(value,
+ Memory,
+ (uleb128_t)0 - get_uleb128(&ptr.p8, end), state);
+ break;
+ case DW_CFA_GNU_window_save:
+ default:
+ result = 0;
+ break;
+ }
+ break;
+ case 1:
+ result = advance_loc(*ptr.p8++ & 0x3f, state);
+ break;
+ case 2:
+ value = *ptr.p8++ & 0x3f;
+ set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
+ break;
+ case 3:
+ set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state);
+ break;
+ }
+ if (ptr.p8 > end)
+ result = 0;
+ if (result && targetLoc != 0 && targetLoc < state->loc)
+ return 1;
+ }
+
+ return result
+ && ptr.p8 == end
+ && (targetLoc == 0
+ || (/*todo While in theory this should apply, gcc in practice omits
+ everything past the function prolog, and hence the location
+ never reaches the end of the function.
+ targetLoc < state->loc &&*/ state->label == NULL));
+}
+
+/* Unwind to previous to frame. Returns 0 if successful, negative
+ * number in case of an error. */
+int unwind(struct unwind_frame_info *frame)
+{
+#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
+ const u32 *fde = NULL, *cie = NULL;
+ const u8 *ptr = NULL, *end = NULL;
+ unsigned long startLoc = 0, endLoc = 0, cfa;
+ unsigned i;
+ signed ptrType = -1;
+ uleb128_t retAddrReg = 0;
+ struct unwind_table *table;
+ struct unwind_state state;
+
+ if (UNW_PC(frame) == 0)
+ return -EINVAL;
+ if ((table = find_table(UNW_PC(frame))) != NULL
+ && !(table->size & (sizeof(*fde) - 1))) {
+ unsigned long tableSize = table->size;
+
+ for (fde = table->address;
+ tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
+ tableSize -= sizeof(*fde) + *fde,
+ fde += 1 + *fde / sizeof(*fde)) {
+ if (!*fde || (*fde & (sizeof(*fde) - 1)))
+ break;
+ if (!fde[1])
+ continue; /* this is a CIE */
+ if ((fde[1] & (sizeof(*fde) - 1))
+ || fde[1] > (unsigned long)(fde + 1)
+ - (unsigned long)table->address)
+ continue; /* this is not a valid FDE */
+ cie = fde + 1 - fde[1] / sizeof(*fde);
+ if (*cie <= sizeof(*cie) + 4
+ || *cie >= fde[1] - sizeof(*fde)
+ || (*cie & (sizeof(*cie) - 1))
+ || cie[1]
+ || (ptrType = fde_pointer_type(cie)) < 0) {
+ cie = NULL; /* this is not a (valid) CIE */
+ continue;
+ }
+ ptr = (const u8 *)(fde + 2);
+ startLoc = read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType);
+ endLoc = startLoc
+ + read_pointer(&ptr,
+ (const u8 *)(fde + 1) + *fde,
+ ptrType & DW_EH_PE_indirect
+ ? ptrType
+ : ptrType & (DW_EH_PE_FORM|DW_EH_PE_signed));
+ if (UNW_PC(frame) >= startLoc && UNW_PC(frame) < endLoc)
+ break;
+ cie = NULL;
+ }
+ }
+ if (cie != NULL) {
+ memset(&state, 0, sizeof(state));
+ state.cieEnd = ptr; /* keep here temporarily */
+ ptr = (const u8 *)(cie + 2);
+ end = (const u8 *)(cie + 1) + *cie;
+ if ((state.version = *ptr) != 1)
+ cie = NULL; /* unsupported version */
+ else if (*++ptr) {
+ /* check if augmentation size is first (and thus present) */
+ if (*ptr == 'z') {
+ /* check for ignorable (or already handled)
+ * nul-terminated augmentation string */
+ while (++ptr < end && *ptr)
+ if (strchr("LPR", *ptr) == NULL)
+ break;
+ }
+ if (ptr >= end || *ptr)
+ cie = NULL;
+ }
+ ++ptr;
+ }
+ if (cie != NULL) {
+ /* get code aligment factor */
+ state.codeAlign = get_uleb128(&ptr, end);
+ /* get data aligment factor */
+ state.dataAlign = get_sleb128(&ptr, end);
+ if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
+ cie = NULL;
+ else {
+ retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end);
+ /* skip augmentation */
+ if (((const char *)(cie + 2))[1] == 'z')
+ ptr += get_uleb128(&ptr, end);
+ if (ptr > end
+ || retAddrReg >= ARRAY_SIZE(reg_info)
+ || REG_INVALID(retAddrReg)
+ || reg_info[retAddrReg].width != sizeof(unsigned long))
+ cie = NULL;
+ }
+ }
+ if (cie != NULL) {
+ state.cieStart = ptr;
+ ptr = state.cieEnd;
+ state.cieEnd = end;
+ end = (const u8 *)(fde + 1) + *fde;
+ /* skip augmentation */
+ if (((const char *)(cie + 2))[1] == 'z') {
+ uleb128_t augSize = get_uleb128(&ptr, end);
+
+ if ((ptr += augSize) > end)
+ fde = NULL;
+ }
+ }
+ if (cie == NULL || fde == NULL) {
+#ifdef CONFIG_FRAME_POINTER
+ unsigned long top, bottom;
+#endif
+
+#ifdef CONFIG_FRAME_POINTER
+ top = STACK_TOP(frame->task);
+ bottom = STACK_BOTTOM(frame->task);
+# if FRAME_RETADDR_OFFSET < 0
+ if (UNW_SP(frame) < top
+ && UNW_FP(frame) <= UNW_SP(frame)
+ && bottom < UNW_FP(frame)
+# else
+ if (UNW_SP(frame) > top
+ && UNW_FP(frame) >= UNW_SP(frame)
+ && bottom > UNW_FP(frame)
+# endif
+ && !((UNW_SP(frame) | UNW_FP(frame))
+ & (sizeof(unsigned long) - 1))) {
+ unsigned long link;
+
+ if (!__get_user(link,
+ (unsigned long *)(UNW_FP(frame)
+ + FRAME_LINK_OFFSET))
+# if FRAME_RETADDR_OFFSET < 0
+ && link > bottom && link < UNW_FP(frame)
+# else
+ && link > UNW_FP(frame) && link < bottom
+# endif
+ && !(link & (sizeof(link) - 1))
+ && !__get_user(UNW_PC(frame),
+ (unsigned long *)(UNW_FP(frame)
+ + FRAME_RETADDR_OFFSET))) {
+ UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET
+# if FRAME_RETADDR_OFFSET < 0
+ -
+# else
+ +
+# endif
+ sizeof(UNW_PC(frame));
+ UNW_FP(frame) = link;
+ return 0;
+ }
+ }
+#endif
+ return -ENXIO;
+ }
+ state.org = startLoc;
+ memcpy(&state.cfa, &badCFA, sizeof(state.cfa));
+ /* process instructions */
+ if (!processCFI(ptr, end, UNW_PC(frame), ptrType, &state)
+ || state.loc > endLoc
+ || state.regs[retAddrReg].where == Nowhere
+ || state.cfa.reg >= ARRAY_SIZE(reg_info)
+ || reg_info[state.cfa.reg].width != sizeof(unsigned long)
+ || state.cfa.offs % sizeof(unsigned long))
+ return -EIO;
+ /* update frame */
+ cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs;
+ startLoc = min((unsigned long)UNW_SP(frame), cfa);
+ endLoc = max((unsigned long)UNW_SP(frame), cfa);
+ if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) {
+ startLoc = min(STACK_LIMIT(cfa), cfa);
+ endLoc = max(STACK_LIMIT(cfa), cfa);
+ }
+#ifndef CONFIG_64BIT
+# define CASES CASE(8); CASE(16); CASE(32)
+#else
+# define CASES CASE(8); CASE(16); CASE(32); CASE(64)
+#endif
+ for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
+ if (REG_INVALID(i)) {
+ if (state.regs[i].where == Nowhere)
+ continue;
+ return -EIO;
+ }
+ switch(state.regs[i].where) {
+ default:
+ break;
+ case Register:
+ if (state.regs[i].value >= ARRAY_SIZE(reg_info)
+ || REG_INVALID(state.regs[i].value)
+ || reg_info[i].width > reg_info[state.regs[i].value].width)
+ return -EIO;
+ switch(reg_info[state.regs[i].value].width) {
+#define CASE(n) \
+ case sizeof(u##n): \
+ state.regs[i].value = FRAME_REG(state.regs[i].value, \
+ const u##n); \
+ break
+ CASES;
+#undef CASE
+ default:
+ return -EIO;
+ }
+ break;
+ }
+ }
+ for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
+ if (REG_INVALID(i))
+ continue;
+ switch(state.regs[i].where) {
+ case Nowhere:
+ if (reg_info[i].width != sizeof(UNW_SP(frame))
+ || &FRAME_REG(i, __typeof__(UNW_SP(frame)))
+ != &UNW_SP(frame))
+ continue;
+ UNW_SP(frame) = cfa;
+ break;
+ case Register:
+ switch(reg_info[i].width) {
+#define CASE(n) case sizeof(u##n): \
+ FRAME_REG(i, u##n) = state.regs[i].value; \
+ break
+ CASES;
+#undef CASE
+ default:
+ return -EIO;
+ }
+ break;
+ case Value:
+ if (reg_info[i].width != sizeof(unsigned long))
+ return -EIO;
+ FRAME_REG(i, unsigned long) = cfa + state.regs[i].value
+ * state.dataAlign;
+ break;
+ case Memory: {
+ unsigned long addr = cfa + state.regs[i].value
+ * state.dataAlign;
+
+ if ((state.regs[i].value * state.dataAlign)
+ % sizeof(unsigned long)
+ || addr < startLoc
+ || addr + sizeof(unsigned long) < addr
+ || addr + sizeof(unsigned long) > endLoc)
+ return -EIO;
+ switch(reg_info[i].width) {
+#define CASE(n) case sizeof(u##n): \
+ __get_user(FRAME_REG(i, u##n), (u##n *)addr); \
+ break
+ CASES;
+#undef CASE
+ default:
+ return -EIO;
+ }
+ }
+ break;
+ }
+ }
+
+ return 0;
+#undef CASES
+#undef FRAME_REG
+}
+EXPORT_SYMBOL(unwind);
+
+int unwind_init_frame_info(struct unwind_frame_info *info,
+ struct task_struct *tsk,
+ /*const*/ struct pt_regs *regs)
+{
+ info->task = tsk;
+ arch_unw_init_frame_info(info, regs);
+
+ return 0;
+}
+EXPORT_SYMBOL(unwind_init_frame_info);
+
+/*
+ * Prepare to unwind a blocked task.
+ */
+int unwind_init_blocked(struct unwind_frame_info *info,
+ struct task_struct *tsk)
+{
+ info->task = tsk;
+ arch_unw_init_blocked(info);
+
+ return 0;
+}
+EXPORT_SYMBOL(unwind_init_blocked);
+
+/*
+ * Prepare to unwind the currently running thread.
+ */
+int unwind_init_running(struct unwind_frame_info *info,
+ asmlinkage int (*callback)(struct unwind_frame_info *,
+ void *arg),
+ void *arg)
+{
+ info->task = current;
+
+ return arch_unwind_init_running(info, callback, arg);
+}
+EXPORT_SYMBOL(unwind_init_running);
+
+/*
+ * Unwind until the return pointer is in user-land (or until an error
+ * occurs). Returns 0 if successful, negative number in case of
+ * error.
+ */
+int unwind_to_user(struct unwind_frame_info *info)
+{
+ while (!arch_unw_user_mode(info)) {
+ int err = unwind(info);
+
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(unwind_to_user);