From b842f8faf6c7dc2005c6a70631c1a91bac02f180 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Fri, 1 Oct 2010 17:23:41 -0400
Subject: jump label: Fix module __init section race

Jump label uses is_module_text_address() to ensure that the module
__init sections are valid before updating them. However, between the
check for a valid module __init section and the subsequent jump
label update, the module's __init section could be freed out from under
us.

We fix this potential race by adding a notifier callback to the
MODULE_STATE_LIVE state. This notifier is called *after* the __init
section has been run but before it is going to be freed. In the
callback, the jump label code zeros the key value for any __init jump
code within the module, and we add a check for a non-zero key value when
we update jump labels. In this way we require no additional data
structures.

Thanks to Mathieu Desnoyers for pointing out this race condition.

Reported-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Jason Baron <jbaron@redhat.com>
LKML-Reference: <c6f037b7598777668025ceedd9294212fd95fa34.1285965957.git.jbaron@redhat.com>

[ Renamed remove_module_init() to remove_jump_label_module_init()
  as suggested by Masami Hiramatsu. ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/jump_label.c | 41 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 7be868bf25c6..be9e105345eb 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -168,7 +168,8 @@ void jump_label_update(unsigned long key, enum jump_label_type type)
 			count = e_module->nr_entries;
 			iter = e_module->table;
 			while (count--) {
-				if (kernel_text_address(iter->code))
+				if (iter->key &&
+						kernel_text_address(iter->code))
 					arch_jump_label_transform(iter, type);
 				iter++;
 			}
@@ -366,6 +367,39 @@ static void remove_jump_label_module(struct module *mod)
 	}
 }
 
+static void remove_jump_label_module_init(struct module *mod)
+{
+	struct hlist_head *head;
+	struct hlist_node *node, *node_next, *module_node, *module_node_next;
+	struct jump_label_entry *e;
+	struct jump_label_module_entry *e_module;
+	struct jump_entry *iter;
+	int i, count;
+
+	/* if the module doesn't have jump label entries, just return */
+	if (!mod->num_jump_entries)
+		return;
+
+	for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
+		head = &jump_label_table[i];
+		hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
+			hlist_for_each_entry_safe(e_module, module_node,
+						  module_node_next,
+						  &(e->modules), hlist) {
+				if (e_module->mod != mod)
+					continue;
+				count = e_module->nr_entries;
+				iter = e_module->table;
+				while (count--) {
+					if (within_module_init(iter->code, mod))
+						iter->key = 0;
+					iter++;
+				}
+			}
+		}
+	}
+}
+
 static int
 jump_label_module_notify(struct notifier_block *self, unsigned long val,
 			 void *data)
@@ -386,6 +420,11 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
 		remove_jump_label_module(mod);
 		mutex_unlock(&jump_label_mutex);
 		break;
+	case MODULE_STATE_LIVE:
+		mutex_lock(&jump_label_mutex);
+		remove_jump_label_module_init(mod);
+		mutex_unlock(&jump_label_mutex);
+		break;
 	}
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 91bad2f8d3057482b9afb599f14421b007136960 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Fri, 1 Oct 2010 17:23:48 -0400
Subject: jump label: Fix deadlock b/w jump_label_mutex vs. text_mutex

register_kprobe() downs the 'text_mutex' and then calls
jump_label_text_reserved(), which downs the 'jump_label_mutex'.
However, the jump label code takes those mutexes in the reverse
order.

Fix by requiring the caller of jump_label_text_reserved() to do
the jump label locking via the newly added: jump_label_lock(),
jump_label_unlock(). Currently, kprobes is the only user
of jump_label_text_reserved().

Reported-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Jason Baron <jbaron@redhat.com>
LKML-Reference: <759032c48d5e30c27f0bba003d09bffa8e9f28bb.1285965957.git.jbaron@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/jump_label.h |  5 +++++
 kernel/jump_label.c        | 33 +++++++++++++++++++++------------
 kernel/kprobes.c           |  6 ++++++
 3 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index b67cb180e6e9..1947a1212678 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -18,6 +18,8 @@ struct module;
 extern struct jump_entry __start___jump_table[];
 extern struct jump_entry __stop___jump_table[];
 
+extern void jump_label_lock(void);
+extern void jump_label_unlock(void);
 extern void arch_jump_label_transform(struct jump_entry *entry,
 				 enum jump_label_type type);
 extern void arch_jump_label_text_poke_early(jump_label_t addr);
@@ -59,6 +61,9 @@ static inline int jump_label_text_reserved(void *start, void *end)
 	return 0;
 }
 
+static inline void jump_label_lock(void) {}
+static inline void jump_label_unlock(void) {}
+
 #endif
 
 #define COND_STMT(key, stmt)					\
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index be9e105345eb..12cce78e9568 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -39,6 +39,16 @@ struct jump_label_module_entry {
 	struct module *mod;
 };
 
+void jump_label_lock(void)
+{
+	mutex_lock(&jump_label_mutex);
+}
+
+void jump_label_unlock(void)
+{
+	mutex_unlock(&jump_label_mutex);
+}
+
 static int jump_label_cmp(const void *a, const void *b)
 {
 	const struct jump_entry *jea = a;
@@ -152,7 +162,7 @@ void jump_label_update(unsigned long key, enum jump_label_type type)
 	struct jump_label_module_entry *e_module;
 	int count;
 
-	mutex_lock(&jump_label_mutex);
+	jump_label_lock();
 	entry = get_jump_label_entry((jump_label_t)key);
 	if (entry) {
 		count = entry->nr_entries;
@@ -175,7 +185,7 @@ void jump_label_update(unsigned long key, enum jump_label_type type)
 			}
 		}
 	}
-	mutex_unlock(&jump_label_mutex);
+	jump_label_unlock();
 }
 
 static int addr_conflict(struct jump_entry *entry, void *start, void *end)
@@ -232,6 +242,7 @@ out:
  * overlaps with any of the jump label patch addresses. Code
  * that wants to modify kernel text should first verify that
  * it does not overlap with any of the jump label addresses.
+ * Caller must hold jump_label_mutex.
  *
  * returns 1 if there is an overlap, 0 otherwise
  */
@@ -242,7 +253,6 @@ int jump_label_text_reserved(void *start, void *end)
 	struct jump_entry *iter_stop = __start___jump_table;
 	int conflict = 0;
 
-	mutex_lock(&jump_label_mutex);
 	iter = iter_start;
 	while (iter < iter_stop) {
 		if (addr_conflict(iter, start, end)) {
@@ -257,7 +267,6 @@ int jump_label_text_reserved(void *start, void *end)
 	conflict = module_conflict(start, end);
 #endif
 out:
-	mutex_unlock(&jump_label_mutex);
 	return conflict;
 }
 
@@ -268,7 +277,7 @@ static __init int init_jump_label(void)
 	struct jump_entry *iter_stop = __stop___jump_table;
 	struct jump_entry *iter;
 
-	mutex_lock(&jump_label_mutex);
+	jump_label_lock();
 	ret = build_jump_label_hashtable(__start___jump_table,
 					 __stop___jump_table);
 	iter = iter_start;
@@ -276,7 +285,7 @@ static __init int init_jump_label(void)
 		arch_jump_label_text_poke_early(iter->code);
 		iter++;
 	}
-	mutex_unlock(&jump_label_mutex);
+	jump_label_unlock();
 	return ret;
 }
 early_initcall(init_jump_label);
@@ -409,21 +418,21 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
 
 	switch (val) {
 	case MODULE_STATE_COMING:
-		mutex_lock(&jump_label_mutex);
+		jump_label_lock();
 		ret = add_jump_label_module(mod);
 		if (ret)
 			remove_jump_label_module(mod);
-		mutex_unlock(&jump_label_mutex);
+		jump_label_unlock();
 		break;
 	case MODULE_STATE_GOING:
-		mutex_lock(&jump_label_mutex);
+		jump_label_lock();
 		remove_jump_label_module(mod);
-		mutex_unlock(&jump_label_mutex);
+		jump_label_unlock();
 		break;
 	case MODULE_STATE_LIVE:
-		mutex_lock(&jump_label_mutex);
+		jump_label_lock();
 		remove_jump_label_module_init(mod);
-		mutex_unlock(&jump_label_mutex);
+		jump_label_unlock();
 		break;
 	}
 	return ret;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 99865c33a60d..9437e14f36bd 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1146,13 +1146,16 @@ int __kprobes register_kprobe(struct kprobe *p)
 		return ret;
 
 	preempt_disable();
+	jump_label_lock();
 	if (!kernel_text_address((unsigned long) p->addr) ||
 	    in_kprobes_functions((unsigned long) p->addr) ||
 	    ftrace_text_reserved(p->addr, p->addr) ||
 	    jump_label_text_reserved(p->addr, p->addr)) {
 		preempt_enable();
+		jump_label_unlock();
 		return -EINVAL;
 	}
+	jump_label_unlock();
 
 	/* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
 	p->flags &= KPROBE_FLAG_DISABLED;
@@ -1187,6 +1190,8 @@ int __kprobes register_kprobe(struct kprobe *p)
 	INIT_LIST_HEAD(&p->list);
 	mutex_lock(&kprobe_mutex);
 
+	jump_label_lock(); /* needed to call jump_label_text_reserved() */
+
 	get_online_cpus();	/* For avoiding text_mutex deadlock. */
 	mutex_lock(&text_mutex);
 
@@ -1214,6 +1219,7 @@ int __kprobes register_kprobe(struct kprobe *p)
 out:
 	mutex_unlock(&text_mutex);
 	put_online_cpus();
+	jump_label_unlock();
 	mutex_unlock(&kprobe_mutex);
 
 	if (probed_mod)
-- 
cgit v1.2.3-59-g8ed1b


From 4ac3dbec800d93485a5c84e37af676278eea657c Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Wed, 27 Oct 2010 11:17:15 -0400
Subject: oprofile: Fix the hang while taking the cpu offline

The kernel build with CONFIG_OPROFILE and CPU_HOTPLUG enabled.
The oprofile is initialised using system timer in absence of hardware
counters supports. Oprofile isn't started from userland.

In this setup while doing a CPU offline the kernel hangs in infinite
for loop inside lock_hrtimer_base() function

This happens because as part of oprofile_cpu_notify(, it tries to
stop an hrtimer which was never started. These per-cpu hrtimers
are started when the oprfile is started.
	echo 1	> /dev/oprofile/enable

This problem also existwhen the cpu is booted with maxcpus parameter
set. When bringing the remaining cpus online the timers are started
even if oprofile is not yet enabled.

This patch fix this issue by adding a state variable so that
these hrtimer start/stop is only attempted when oprofile is
started

For stable kernels v2.6.35.y and v2.6.36.y.

Reported-by: Jan Sebastien <s-jan@ti.com>
Tested-by: sricharan <r.sricharan@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: stable@kernel.org
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 drivers/oprofile/timer_int.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c
index dc0ae4d14dff..010725117dbb 100644
--- a/drivers/oprofile/timer_int.c
+++ b/drivers/oprofile/timer_int.c
@@ -21,6 +21,7 @@
 #include "oprof.h"
 
 static DEFINE_PER_CPU(struct hrtimer, oprofile_hrtimer);
+static int ctr_running;
 
 static enum hrtimer_restart oprofile_hrtimer_notify(struct hrtimer *hrtimer)
 {
@@ -33,6 +34,9 @@ static void __oprofile_hrtimer_start(void *unused)
 {
 	struct hrtimer *hrtimer = &__get_cpu_var(oprofile_hrtimer);
 
+	if (!ctr_running)
+		return;
+
 	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hrtimer->function = oprofile_hrtimer_notify;
 
@@ -42,7 +46,10 @@ static void __oprofile_hrtimer_start(void *unused)
 
 static int oprofile_hrtimer_start(void)
 {
+	get_online_cpus();
+	ctr_running = 1;
 	on_each_cpu(__oprofile_hrtimer_start, NULL, 1);
+	put_online_cpus();
 	return 0;
 }
 
@@ -50,6 +57,9 @@ static void __oprofile_hrtimer_stop(int cpu)
 {
 	struct hrtimer *hrtimer = &per_cpu(oprofile_hrtimer, cpu);
 
+	if (!ctr_running)
+		return;
+
 	hrtimer_cancel(hrtimer);
 }
 
@@ -57,8 +67,11 @@ static void oprofile_hrtimer_stop(void)
 {
 	int cpu;
 
+	get_online_cpus();
 	for_each_online_cpu(cpu)
 		__oprofile_hrtimer_stop(cpu);
+	ctr_running = 0;
+	put_online_cpus();
 }
 
 static int __cpuinit oprofile_cpu_notify(struct notifier_block *self,
-- 
cgit v1.2.3-59-g8ed1b


From 3d7851b3cdd43a734e5cc4c643fd886ab28ad4d5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 15 Oct 2010 09:51:08 -0400
Subject: oprofile: Remove deprecated use of flush_scheduled_work()

flush_scheduled_work() is deprecated and scheduled to be removed.
sync_stop() currently cancels cpu_buffer works inside buffer_mutex and
flushes the system workqueue outside.  Instead, split end_cpu_work()
into two parts - stopping further work enqueues and flushing works -
and do the former inside buffer_mutex and latter outside.

For stable kernels v2.6.35.y and v2.6.36.y.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@kernel.org
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 drivers/oprofile/buffer_sync.c |  2 +-
 drivers/oprofile/cpu_buffer.c  | 10 +++++++---
 drivers/oprofile/cpu_buffer.h  |  1 +
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index b7e755f4178a..a3984f4ef192 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -190,7 +190,7 @@ void sync_stop(void)
 	profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb);
 	task_handoff_unregister(&task_free_nb);
 	mutex_unlock(&buffer_mutex);
-	flush_scheduled_work();
+	flush_cpu_work();
 
 	/* make sure we don't leak task structs */
 	process_task_mortuary();
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index f179ac2ea801..59f55441e075 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -111,14 +111,18 @@ void start_cpu_work(void)
 
 void end_cpu_work(void)
 {
-	int i;
-
 	work_enabled = 0;
+}
+
+void flush_cpu_work(void)
+{
+	int i;
 
 	for_each_online_cpu(i) {
 		struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
 
-		cancel_delayed_work(&b->work);
+		/* these works are per-cpu, no need for flush_sync */
+		flush_delayed_work(&b->work);
 	}
 }
 
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
index 68ea16ab645f..e1d097e250ae 100644
--- a/drivers/oprofile/cpu_buffer.h
+++ b/drivers/oprofile/cpu_buffer.h
@@ -25,6 +25,7 @@ void free_cpu_buffers(void);
 
 void start_cpu_work(void);
 void end_cpu_work(void);
+void flush_cpu_work(void);
 
 /* CPU buffer is composed of such entries (which are
  * also used for context switch notes)
-- 
cgit v1.2.3-59-g8ed1b


From de31c3ca8179d7c21def7ecb56e4fec0c8659d36 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 18 Oct 2010 10:38:58 -0400
Subject: jump label: Fix error with preempt disable holding mutex

Kprobes and jump label were having a race between mutexes that
was fixed by reordering the jump label. But this reordering
moved the jump label mutex into a preempt disable location.

This patch does a little fiddling to move the grabbing of
the jump label mutex from inside the preempt disable section
and still keep the order correct between the mutex and the
kprobes lock.

Reported-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Jason Baron <jbaron@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/kprobes.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9437e14f36bd..9737a76e106f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1145,17 +1145,13 @@ int __kprobes register_kprobe(struct kprobe *p)
 	if (ret)
 		return ret;
 
-	preempt_disable();
 	jump_label_lock();
+	preempt_disable();
 	if (!kernel_text_address((unsigned long) p->addr) ||
 	    in_kprobes_functions((unsigned long) p->addr) ||
 	    ftrace_text_reserved(p->addr, p->addr) ||
-	    jump_label_text_reserved(p->addr, p->addr)) {
-		preempt_enable();
-		jump_label_unlock();
-		return -EINVAL;
-	}
-	jump_label_unlock();
+	    jump_label_text_reserved(p->addr, p->addr))
+		goto fail_with_jump_label;
 
 	/* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
 	p->flags &= KPROBE_FLAG_DISABLED;
@@ -1169,10 +1165,9 @@ int __kprobes register_kprobe(struct kprobe *p)
 		 * We must hold a refcount of the probed module while updating
 		 * its code to prohibit unexpected unloading.
 		 */
-		if (unlikely(!try_module_get(probed_mod))) {
-			preempt_enable();
-			return -EINVAL;
-		}
+		if (unlikely(!try_module_get(probed_mod)))
+			goto fail_with_jump_label;
+
 		/*
 		 * If the module freed .init.text, we couldn't insert
 		 * kprobes in there.
@@ -1180,11 +1175,11 @@ int __kprobes register_kprobe(struct kprobe *p)
 		if (within_module_init((unsigned long)p->addr, probed_mod) &&
 		    probed_mod->state != MODULE_STATE_COMING) {
 			module_put(probed_mod);
-			preempt_enable();
-			return -EINVAL;
+			goto fail_with_jump_label;
 		}
 	}
 	preempt_enable();
+	jump_label_unlock();
 
 	p->nmissed = 0;
 	INIT_LIST_HEAD(&p->list);
@@ -1226,6 +1221,11 @@ out:
 		module_put(probed_mod);
 
 	return ret;
+
+fail_with_jump_label:
+	preempt_enable();
+	jump_label_unlock();
+	return -EINVAL;
 }
 EXPORT_SYMBOL_GPL(register_kprobe);
 
-- 
cgit v1.2.3-59-g8ed1b


From 95bcd683fb694a3e2d0538bf486430a0dfbb4111 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 29 Oct 2010 11:02:43 -0400
Subject: jump label: Make arch_jump_label_text_poke_early() optional

Some archs do not need to do anything special for jump labels on
startup (like MIPS).  This patch adds a weak function stub for
arch_jump_label_text_poke_early();

Cc: Jason Baron <jbaron@redhat.com>
Cc: David Miller <davem@davemloft.net>
Cc: David Daney <ddaney@caviumnetworks.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <1286218615-24011-2-git-send-email-ddaney@caviumnetworks.com>
LKML-Reference: <20101015201037.703989993@goodmis.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/jump_label.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 12cce78e9568..3b79bd938330 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -270,6 +270,13 @@ out:
 	return conflict;
 }
 
+/*
+ * Not all archs need this.
+ */
+void __weak arch_jump_label_text_poke_early(jump_label_t addr)
+{
+}
+
 static __init int init_jump_label(void)
 {
 	int ret;
-- 
cgit v1.2.3-59-g8ed1b


From f0daed0242d514033b9ecca9187108d3c4e281a5 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Sat, 23 Oct 2010 11:06:24 -0700
Subject: jump_label: Fix unaligned traps on sparc.

The vmlinux.lds.h knobs to emit the __jump_table section in the main
kernel image takes care to align the section, but this doesn't help
for the __jump_table section that gets emitted into modules.

Fix the resulting lack of section alignment by explicitly specifying
it in the assembler.

Signed-off-by: David S. Miller <davem@davemloft.net>
LKML-Reference: <20101023.110624.226758370.davem@davemloft.net>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/sparc/include/asm/jump_label.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index 62e66d7b2fb6..d95cf44fb40d 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -14,6 +14,7 @@
 			 "nop\n\t"				\
 			 "nop\n\t"				\
 			 ".pushsection __jump_table,  \"a\"\n\t"\
+			 ".align 4\n\t"				\
 			 ".word 1b, %l[" #label "], %c0\n\t"	\
 			 ".popsection \n\t"			\
 			 : :  "i" (key) :  : label);\
-- 
cgit v1.2.3-59-g8ed1b


From 2d1d7126bbde53989f1d7de174816c123bb7ecb0 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 27 Oct 2010 21:09:15 -0700
Subject: x86, ftrace: Use safe noops, drop trap test

Always use a safe 5-byte noop sequence.  Drop the trap test, since it
is known to return false negatives on some virtualization platforms on
32 bits.  The resulting code is both simpler and safer.

Cc: Daniel Drake <dsd@laptop.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/alternative.c | 69 ++++++++++---------------------------------
 1 file changed, 15 insertions(+), 54 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a36bb90aef53..0b30214282a8 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -644,65 +644,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
 
 #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
 
-unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
+#ifdef CONFIG_X86_64
+unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
+#else
+unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
+#endif
 
 void __init arch_init_ideal_nop5(void)
 {
-	extern const unsigned char ftrace_test_p6nop[];
-	extern const unsigned char ftrace_test_nop5[];
-	extern const unsigned char ftrace_test_jmp[];
-	int faulted = 0;
-
 	/*
-	 * There is no good nop for all x86 archs.
-	 * We will default to using the P6_NOP5, but first we
-	 * will test to make sure that the nop will actually
-	 * work on this CPU. If it faults, we will then
-	 * go to a lesser efficient 5 byte nop. If that fails
-	 * we then just use a jmp as our nop. This isn't the most
-	 * efficient nop, but we can not use a multi part nop
-	 * since we would then risk being preempted in the middle
-	 * of that nop, and if we enabled tracing then, it might
-	 * cause a system crash.
+	 * There is no good nop for all x86 archs.  This selection
+	 * algorithm should be unified with the one in find_nop_table(),
+	 * but this should be good enough for now.
 	 *
-	 * TODO: check the cpuid to determine the best nop.
+	 * For cases other than the ones below, use the safe (as in
+	 * always functional) defaults above.
 	 */
-	asm volatile (
-		"ftrace_test_jmp:"
-		"jmp ftrace_test_p6nop\n"
-		"nop\n"
-		"nop\n"
-		"nop\n"  /* 2 byte jmp + 3 bytes */
-		"ftrace_test_p6nop:"
-		P6_NOP5
-		"jmp 1f\n"
-		"ftrace_test_nop5:"
-		".byte 0x66,0x66,0x66,0x66,0x90\n"
-		"1:"
-		".section .fixup, \"ax\"\n"
-		"2:	movl $1, %0\n"
-		"	jmp ftrace_test_nop5\n"
-		"3:	movl $2, %0\n"
-		"	jmp 1b\n"
-		".previous\n"
-		_ASM_EXTABLE(ftrace_test_p6nop, 2b)
-		_ASM_EXTABLE(ftrace_test_nop5, 3b)
-		: "=r"(faulted) : "0" (faulted));
-
-	switch (faulted) {
-	case 0:
-		pr_info("converting mcount calls to 0f 1f 44 00 00\n");
-		memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
-		break;
-	case 1:
-		pr_info("converting mcount calls to 66 66 66 66 90\n");
-		memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
-		break;
-	case 2:
-		pr_info("converting mcount calls to jmp . + 5\n");
-		memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
-		break;
-	}
-
+#ifdef CONFIG_X86_64
+	/* Don't use these on 32 bits due to broken virtualizers */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		memcpy(ideal_nop5, p6_nops[5], 5);
+#endif
 }
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 45f81b1c96d9793e47ce925d257ea693ce0b193e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 29 Oct 2010 12:33:43 -0400
Subject: jump label: Add work around to i386 gcc asm goto bug

On i386 (not x86_64) early implementations of gcc would have a bug
with asm goto causing it to produce code like the following:

(This was noticed by Peter Zijlstra)

   56 pushl 0
   67 nopl         jmp 0x6f
      popl
      jmp 0x8c

   6f              mov
                   test
                   je 0x8c

   8c mov
      call *(%esp)

The jump added in the asm goto skipped over the popl that matched
the pushl 0, which lead up to a quick crash of the system when
the jump was enabled. The nopl is defined in the asm goto () statement
and when tracepoints are enabled, the nop changes to a jump to the label
that was specified by the asm goto. asm goto is suppose to tell gcc that
the code in the asm might jump to an external label. Here gcc obviously
fails to make that work.

The bug report for gcc is here:

  http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226

The bug only appears on x86 when not compiled with
-maccumulate-outgoing-args. This option is always set on x86_64 and it
is also the work around for a function graph tracer i386 bug.
(See commit: 746357d6a526d6da9d89a2ec645b28406e959c2e)
This explains why the bug only showed up on i386 when function graph
tracer was not enabled.

This patch now adds a CONFIG_JUMP_LABEL option that is default
off instead of using jump labels by default. When jump labels are
enabled, the -maccumulate-outgoing-args will be used (causing a
slightly larger kernel image on i386). This option will exist
until we have a way to detect if the gcc compiler in use is safe
to use on all configurations without the work around.

Note, there exists such a test, but for now we will keep the enabling
of jump label as a manual option.

Archs that know the compiler is safe with asm goto, may choose to
select JUMP_LABEL and enable it by default.

Reported-by: Ingo Molnar <mingo@elte.hu>
Cause-discovered-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Baron <jbaron@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: David Daney <ddaney@caviumnetworks.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: David Miller <davem@davemloft.net>
Cc: Richard Henderson <rth@redhat.com>
LKML-Reference: <1288028746.3673.11.camel@laptop>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/Kconfig               | 14 ++++++++++++++
 arch/x86/Makefile_32.cpu   | 13 ++++++++++++-
 include/linux/jump_label.h |  2 +-
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 53d7f619a1b9..8bf0fa652eb6 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -42,6 +42,20 @@ config KPROBES
 	  for kernel debugging, non-intrusive instrumentation and testing.
 	  If in doubt, say "N".
 
+config JUMP_LABEL
+       bool "Optimize trace point call sites"
+       depends on HAVE_ARCH_JUMP_LABEL
+       help
+         If it is detected that the compiler has support for "asm goto",
+	 the kernel will compile trace point locations with just a
+	 nop instruction. When trace points are enabled, the nop will
+	 be converted to a jump to the trace function. This technique
+	 lowers overhead and stress on the branch prediction of the
+	 processor.
+
+	 On i386, options added to the compiler flags may increase
+	 the size of the kernel slightly.
+
 config OPTPROBES
 	def_bool y
 	depends on KPROBES && HAVE_OPTPROBES
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index 1255d953c65d..f2ee1abb1df9 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -51,7 +51,18 @@ cflags-$(CONFIG_X86_GENERIC) 	+= $(call tune,generic,$(call tune,i686))
 # prologue (push %ebp, mov %esp, %ebp) which breaks the function graph
 # tracer assumptions. For i686, generic, core2 this is set by the
 # compiler anyway
-cflags-$(CONFIG_FUNCTION_GRAPH_TRACER) += $(call cc-option,-maccumulate-outgoing-args)
+ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y)
+ADD_ACCUMULATE_OUTGOING_ARGS := y
+endif
+
+# Work around to a bug with asm goto with first implementations of it
+# in gcc causing gcc to mess up the push and pop of the stack in some
+# uses of asm goto.
+ifeq ($(CONFIG_JUMP_LABEL), y)
+ADD_ACCUMULATE_OUTGOING_ARGS := y
+endif
+
+cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args)
 
 # Bug fix for binutils: this option is required in order to keep
 # binutils from generating NOPL instructions against our will.
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 1947a1212678..7880f18e4b86 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_JUMP_LABEL_H
 #define _LINUX_JUMP_LABEL_H
 
-#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL)
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
 # include <asm/jump_label.h>
 # define HAVE_JUMP_LABEL
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 7b79462a20826a7269322113c68ca78d5f67c0bd Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 30 Oct 2010 01:19:29 -0700
Subject: x86: Check irq_remapped instead of remapping_enabled in destroy_irq()

Russ Anderson reported:
| There is a regression that is causing a NULL pointer dereference
| in free_irte when shutting down xpc. git bisect narrowed it down
| to git commit d585d06(intr_remap: Simplify the code further), which
| changed free_irte(). Reverse applying the patch fixes the problem.

We need to use irq_remapped() for each irq instead of checking only
intr_remapping_enabled as there might be non remapped irqs even when
remapping is enabled.

[ tglx: use cfg instead of retrieving it again. Massaged changelog ]

Reported-bisected-and-tested-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
LKML-Reference: <4CCBD511.40607@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/io_apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 0929191d83cf..7cc0a721f628 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3109,7 +3109,7 @@ void destroy_irq(unsigned int irq)
 
 	irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
 
-	if (intr_remapping_enabled)
+	if (irq_remapped(cfg))
 		free_irte(irq);
 	raw_spin_lock_irqsave(&vector_lock, flags);
 	__clear_irq_vector(irq, cfg);
-- 
cgit v1.2.3-59-g8ed1b