aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt8
-rw-r--r--arch/arm/include/asm/ftrace.h5
-rw-r--r--arch/powerpc/include/asm/ftrace.h5
-rw-r--r--arch/sh/include/asm/ftrace.h5
-rw-r--r--arch/sparc/include/asm/ftrace.h5
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/include/asm/ftrace.h15
-rw-r--r--arch/x86/kernel/ftrace.c129
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--include/linux/ftrace.h5
-rw-r--r--include/linux/hardirq.h15
-rw-r--r--include/linux/marker.h2
-rw-r--r--include/linux/tracepoint.h4
-rw-r--r--kernel/marker.c80
-rw-r--r--kernel/sysctl.c10
-rw-r--r--kernel/trace/trace.c117
-rw-r--r--kernel/tracepoint.c261
-rw-r--r--scripts/Makefile.build12
-rw-r--r--scripts/tracing/draw_functrace.py130
20 files changed, 612 insertions, 205 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1bbcaa8982b6..4862284d3119 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -765,6 +765,14 @@ and is between 256 and 4096 characters. It is defined in the file
parameter will force ia64_sal_cache_flush to call
ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
+ ftrace=[tracer]
+ [ftrace] will set and start the specified tracer
+ as early as possible in order to facilitate early
+ boot debugging.
+
+ ftrace_dump_on_oops
+ [ftrace] will dump the trace buffers on oops.
+
gamecon.map[2|3]=
[HW,JOY] Multisystem joystick and NES/SNES/PSX pad
support via parallel port (up to 5 devices per port)
diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index 39c8bc1a006a..3f3a1d1508ea 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -1,6 +1,11 @@
#ifndef _ASM_ARM_FTRACE
#define _ASM_ARM_FTRACE
+#ifndef __ASSEMBLY__
+static inline void ftrace_nmi_enter(void) { }
+static inline void ftrace_nmi_exit(void) { }
+#endif
+
#ifdef CONFIG_FUNCTION_TRACER
#define MCOUNT_ADDR ((long)(mcount))
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index b298f7a631e6..1cd72700fbc0 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -1,6 +1,11 @@
#ifndef _ASM_POWERPC_FTRACE
#define _ASM_POWERPC_FTRACE
+#ifndef __ASSEMBLY__
+static inline void ftrace_nmi_enter(void) { }
+static inline void ftrace_nmi_exit(void) { }
+#endif
+
#ifdef CONFIG_FUNCTION_TRACER
#define MCOUNT_ADDR ((long)(_mcount))
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h
index 3aed362c9463..31ada0370cb6 100644
--- a/arch/sh/include/asm/ftrace.h
+++ b/arch/sh/include/asm/ftrace.h
@@ -2,6 +2,11 @@
#define __ASM_SH_FTRACE_H
#ifndef __ASSEMBLY__
+static inline void ftrace_nmi_enter(void) { }
+static inline void ftrace_nmi_exit(void) { }
+#endif
+
+#ifndef __ASSEMBLY__
extern void mcount(void);
#endif
diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h
index d27716cd38c1..62055ac0496e 100644
--- a/arch/sparc/include/asm/ftrace.h
+++ b/arch/sparc/include/asm/ftrace.h
@@ -1,6 +1,11 @@
#ifndef _ASM_SPARC64_FTRACE
#define _ASM_SPARC64_FTRACE
+#ifndef __ASSEMBLY__
+static inline void ftrace_nmi_enter(void) { }
+static inline void ftrace_nmi_exit(void) { }
+#endif
+
#ifdef CONFIG_MCOUNT
#define MCOUNT_ADDR ((long)(_mcount))
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2a3dfbd5e677..fa013f529b74 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -186,14 +186,10 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
-config MMIOTRACE_HOOKS
- bool
-
config MMIOTRACE
bool "Memory mapped IO tracing"
depends on DEBUG_KERNEL && PCI
select TRACING
- select MMIOTRACE_HOOKS
help
Mmiotrace traces Memory Mapped I/O access and is meant for
debugging and reverse engineering. It is called from the ioremap
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9e8bc29b8b17..a23468194b8c 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -17,6 +17,21 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
*/
return addr - 1;
}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void ftrace_nmi_enter(void);
+extern void ftrace_nmi_exit(void);
+#else
+static inline void ftrace_nmi_enter(void) { }
+static inline void ftrace_nmi_exit(void) { }
+#endif
+#endif /* __ASSEMBLY__ */
+
+#else /* CONFIG_FUNCTION_TRACER */
+
+#ifndef __ASSEMBLY__
+static inline void ftrace_nmi_enter(void) { }
+static inline void ftrace_nmi_exit(void) { }
#endif
#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 50ea0ac8c9bf..69149337f2fe 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -56,6 +56,133 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
return calc.code;
}
+/*
+ * Modifying code must take extra care. On an SMP machine, if
+ * the code being modified is also being executed on another CPU
+ * that CPU will have undefined results and possibly take a GPF.
+ * We use kstop_machine to stop other CPUS from exectuing code.
+ * But this does not stop NMIs from happening. We still need
+ * to protect against that. We separate out the modification of
+ * the code to take care of this.
+ *
+ * Two buffers are added: An IP buffer and a "code" buffer.
+ *
+ * 1) Put the instruction pointer into the IP buffer
+ * and the new code into the "code" buffer.
+ * 2) Set a flag that says we are modifying code
+ * 3) Wait for any running NMIs to finish.
+ * 4) Write the code
+ * 5) clear the flag.
+ * 6) Wait for any running NMIs to finish.
+ *
+ * If an NMI is executed, the first thing it does is to call
+ * "ftrace_nmi_enter". This will check if the flag is set to write
+ * and if it is, it will write what is in the IP and "code" buffers.
+ *
+ * The trick is, it does not matter if everyone is writing the same
+ * content to the code location. Also, if a CPU is executing code
+ * it is OK to write to that code location if the contents being written
+ * are the same as what exists.
+ */
+
+static atomic_t in_nmi = ATOMIC_INIT(0);
+static int mod_code_status; /* holds return value of text write */
+static int mod_code_write; /* set when NMI should do the write */
+static void *mod_code_ip; /* holds the IP to write to */
+static void *mod_code_newcode; /* holds the text to write to the IP */
+
+static unsigned nmi_wait_count;
+static atomic_t nmi_update_count = ATOMIC_INIT(0);
+
+int ftrace_arch_read_dyn_info(char *buf, int size)
+{
+ int r;
+
+ r = snprintf(buf, size, "%u %u",
+ nmi_wait_count,
+ atomic_read(&nmi_update_count));
+ return r;
+}
+
+static void ftrace_mod_code(void)
+{
+ /*
+ * Yes, more than one CPU process can be writing to mod_code_status.
+ * (and the code itself)
+ * But if one were to fail, then they all should, and if one were
+ * to succeed, then they all should.
+ */
+ mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
+ MCOUNT_INSN_SIZE);
+
+}
+
+void ftrace_nmi_enter(void)
+{
+ atomic_inc(&in_nmi);
+ /* Must have in_nmi seen before reading write flag */
+ smp_mb();
+ if (mod_code_write) {
+ ftrace_mod_code();
+ atomic_inc(&nmi_update_count);
+ }
+}
+
+void ftrace_nmi_exit(void)
+{
+ /* Finish all executions before clearing in_nmi */
+ smp_wmb();
+ atomic_dec(&in_nmi);
+}
+
+static void wait_for_nmi(void)
+{
+ int waited = 0;
+
+ while (atomic_read(&in_nmi)) {
+ waited = 1;
+ cpu_relax();
+ }
+
+ if (waited)
+ nmi_wait_count++;
+}
+
+static int
+do_ftrace_mod_code(unsigned long ip, void *new_code)
+{
+ mod_code_ip = (void *)ip;
+ mod_code_newcode = new_code;
+
+ /* The buffers need to be visible before we let NMIs write them */
+ smp_wmb();
+
+ mod_code_write = 1;
+
+ /* Make sure write bit is visible before we wait on NMIs */
+ smp_mb();
+
+ wait_for_nmi();
+
+ /* Make sure all running NMIs have finished before we write the code */
+ smp_mb();
+
+ ftrace_mod_code();
+
+ /* Make sure the write happens before clearing the bit */
+ smp_wmb();
+
+ mod_code_write = 0;
+
+ /* make sure NMIs see the cleared bit */
+ smp_mb();
+
+ wait_for_nmi();
+
+ return mod_code_status;
+}
+
+
int
ftrace_modify_code(unsigned long ip, unsigned char *old_code,
unsigned char *new_code)
@@ -81,7 +208,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
return -EINVAL;
/* replace the text with the new text */
- if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
+ if (do_ftrace_mod_code(ip, new_code))
return -EPERM;
sync_core();
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 59f89b434b45..0a21b7aab9dc 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
-obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
-mmiotrace-y := pf_in.o mmio-mod.o
+mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
obj-$(CONFIG_NUMA) += numa_$(BITS).o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 31e8730fa246..4152d3c3b138 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -53,7 +53,7 @@
static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
{
-#ifdef CONFIG_MMIOTRACE_HOOKS
+#ifdef CONFIG_MMIOTRACE
if (unlikely(is_kmmio_active()))
if (kmmio_handler(regs, addr) == 1)
return -1;
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 703eb53cfa2b..e46a7b34037c 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -74,6 +74,9 @@ extern void ftrace_caller(void);
extern void ftrace_call(void);
extern void mcount_call(void);
+/* May be defined in arch */
+extern int ftrace_arch_read_dyn_info(char *buf, int size);
+
/**
* ftrace_modify_code - modify code segment
* @ip: the address of the code segment
@@ -181,6 +184,8 @@ static inline void __ftrace_enabled_restore(int enabled)
#endif
#ifdef CONFIG_TRACING
+extern int ftrace_dump_on_oops;
+
extern void
ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 181006cc94a0..0087cb43becf 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -5,6 +5,7 @@
#include <linux/smp_lock.h>
#include <linux/lockdep.h>
#include <asm/hardirq.h>
+#include <asm/ftrace.h>
#include <asm/system.h>
/*
@@ -161,7 +162,17 @@ extern void irq_enter(void);
*/
extern void irq_exit(void);
-#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0)
-#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0)
+#define nmi_enter() \
+ do { \
+ ftrace_nmi_enter(); \
+ lockdep_off(); \
+ __irq_enter(); \
+ } while (0)
+#define nmi_exit() \
+ do { \
+ __irq_exit(); \
+ lockdep_on(); \
+ ftrace_nmi_exit(); \
+ } while (0)
#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 889196c7fbb1..4cf45472d9f5 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -136,8 +136,6 @@ extern marker_probe_func __mark_empty_function;
extern void marker_probe_cb(const struct marker *mdata,
void *call_private, ...);
-extern void marker_probe_cb_noarg(const struct marker *mdata,
- void *call_private, ...);
/*
* Connect a probe to a marker.
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index c5bb39c7a770..63064e9403f2 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -112,6 +112,10 @@ extern int tracepoint_probe_register(const char *name, void *probe);
*/
extern int tracepoint_probe_unregister(const char *name, void *probe);
+extern int tracepoint_probe_register_noupdate(const char *name, void *probe);
+extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe);
+extern void tracepoint_probe_update_all(void);
+
struct tracepoint_iter {
struct module *module;
struct tracepoint *tracepoint;
diff --git a/kernel/marker.c b/kernel/marker.c
index e9c6b2bc9400..2898b647d415 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -43,6 +43,7 @@ static DEFINE_MUTEX(markers_mutex);
*/
#define MARKER_HASH_BITS 6
#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
+static struct hlist_head marker_table[MARKER_TABLE_SIZE];
/*
* Note about RCU :
@@ -64,11 +65,10 @@ struct marker_entry {
void *oldptr;
int rcu_pending;
unsigned char ptype:1;
+ unsigned char format_allocated:1;
char name[0]; /* Contains name'\0'format'\0' */
};
-static struct hlist_head marker_table[MARKER_TABLE_SIZE];
-
/**
* __mark_empty_function - Empty probe callback
* @probe_private: probe private data
@@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
*
* Should be connected to markers "MARK_NOARGS".
*/
-void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
+static void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
{
va_list args; /* not initialized */
char ptype;
@@ -197,7 +197,6 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
}
rcu_read_unlock_sched();
}
-EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
static void free_old_closure(struct rcu_head *head)
{
@@ -416,6 +415,7 @@ static struct marker_entry *add_marker(const char *name, const char *format)
e->single.probe_private = NULL;
e->multi = NULL;
e->ptype = 0;
+ e->format_allocated = 0;
e->refcount = 0;
e->rcu_pending = 0;
hlist_add_head(&e->hlist, head);
@@ -447,6 +447,8 @@ static int remove_marker(const char *name)
if (e->single.func != __mark_empty_function)
return -EBUSY;
hlist_del(&e->hlist);
+ if (e->format_allocated)
+ kfree(e->format);
/* Make sure the call_rcu has been executed */
if (e->rcu_pending)
rcu_barrier_sched();
@@ -457,57 +459,34 @@ static int remove_marker(const char *name)
/*
* Set the mark_entry format to the format found in the element.
*/
-static int marker_set_format(struct marker_entry **entry, const char *format)
+static int marker_set_format(struct marker_entry *entry, const char *format)
{
- struct marker_entry *e;
- size_t name_len = strlen((*entry)->name) + 1;
- size_t format_len = strlen(format) + 1;
-
-
- e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
- GFP_KERNEL);
- if (!e)
+ entry->format = kstrdup(format, GFP_KERNEL);
+ if (!entry->format)
return -ENOMEM;
- memcpy(&e->name[0], (*entry)->name, name_len);
- e->format = &e->name[name_len];
- memcpy(e->format, format, format_len);
- if (strcmp(e->format, MARK_NOARGS) == 0)
- e->call = marker_probe_cb_noarg;
- else
- e->call = marker_probe_cb;
- e->single = (*entry)->single;
- e->multi = (*entry)->multi;
- e->ptype = (*entry)->ptype;
- e->refcount = (*entry)->refcount;
- e->rcu_pending = 0;
- hlist_add_before(&e->hlist, &(*entry)->hlist);
- hlist_del(&(*entry)->hlist);
- /* Make sure the call_rcu has been executed */
- if ((*entry)->rcu_pending)
- rcu_barrier_sched();
- kfree(*entry);
- *entry = e;
+ entry->format_allocated = 1;
+
trace_mark(core_marker_format, "name %s format %s",
- e->name, e->format);
+ entry->name, entry->format);
return 0;
}
/*
* Sets the probe callback corresponding to one marker.
*/
-static int set_marker(struct marker_entry **entry, struct marker *elem,
+static int set_marker(struct marker_entry *entry, struct marker *elem,
int active)
{
int ret;
- WARN_ON(strcmp((*entry)->name, elem->name) != 0);
+ WARN_ON(strcmp(entry->name, elem->name) != 0);
- if ((*entry)->format) {
- if (strcmp((*entry)->format, elem->format) != 0) {
+ if (entry->format) {
+ if (strcmp(entry->format, elem->format) != 0) {
printk(KERN_NOTICE
"Format mismatch for probe %s "
"(%s), marker (%s)\n",
- (*entry)->name,
- (*entry)->format,
+ entry->name,
+ entry->format,
elem->format);
return -EPERM;
}
@@ -523,34 +502,33 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
* pass from a "safe" callback (with argument) to an "unsafe"
* callback (does not set arguments).
*/
- elem->call = (*entry)->call;
+ elem->call = entry->call;
/*
* Sanity check :
* We only update the single probe private data when the ptr is
* set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
*/
WARN_ON(elem->single.func != __mark_empty_function
- && elem->single.probe_private
- != (*entry)->single.probe_private &&
- !elem->ptype);
- elem->single.probe_private = (*entry)->single.probe_private;
+ && elem->single.probe_private != entry->single.probe_private
+ && !elem->ptype);
+ elem->single.probe_private = entry->single.probe_private;
/*
* Make sure the private data is valid when we update the
* single probe ptr.
*/
smp_wmb();
- elem->single.func = (*entry)->single.func;
+ elem->single.func = entry->single.func;
/*
* We also make sure that the new probe callbacks array is consistent
* before setting a pointer to it.
*/
- rcu_assign_pointer(elem->multi, (*entry)->multi);
+ rcu_assign_pointer(elem->multi, entry->multi);
/*
* Update the function or multi probe array pointer before setting the
* ptype.
*/
smp_wmb();
- elem->ptype = (*entry)->ptype;
+ elem->ptype = entry->ptype;
elem->state = active;
return 0;
@@ -594,8 +572,7 @@ void marker_update_probe_range(struct marker *begin,
for (iter = begin; iter < end; iter++) {
mark_entry = get_marker(iter->name);
if (mark_entry) {
- set_marker(&mark_entry, iter,
- !!mark_entry->refcount);
+ set_marker(mark_entry, iter, !!mark_entry->refcount);
/*
* ignore error, continue
*/
@@ -657,7 +634,7 @@ int marker_probe_register(const char *name, const char *format,
ret = PTR_ERR(entry);
} else if (format) {
if (!entry->format)
- ret = marker_set_format(&entry, format);
+ ret = marker_set_format(entry, format);
else if (strcmp(entry->format, format))
ret = -EPERM;
}
@@ -848,8 +825,6 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe,
if (!e->ptype) {
if (num == 0 && e->single.func == probe)
return e->single.probe_private;
- else
- break;
} else {
struct marker_probe_closure *closure;
int match = 0;
@@ -861,6 +836,7 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe,
return closure[i].probe_private;
}
}
+ break;
}
}
return ERR_PTR(-ENOENT);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9d048fa2d902..6b6b727258b5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -484,6 +484,16 @@ static struct ctl_table kern_table[] = {
.proc_handler = &ftrace_enable_sysctl,
},
#endif
+#ifdef CONFIG_TRACING
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "ftrace_dump_on_opps",
+ .data = &ftrace_dump_on_oops,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif
#ifdef CONFIG_MODULES
{
.ctl_name = KERN_MODPROBE,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 85bee775a03e..e4c40c868d67 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -64,6 +64,37 @@ static cpumask_t __read_mostly tracing_buffer_mask;
static int tracing_disabled = 1;
+/*
+ * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
+ *
+ * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
+ * is set, then ftrace_dump is called. This will output the contents
+ * of the ftrace buffers to the console. This is very useful for
+ * capturing traces that lead to crashes and outputing it to a
+ * serial console.
+ *
+ * It is default off, but you can enable it with either specifying
+ * "ftrace_dump_on_oops" in the kernel command line, or setting
+ * /proc/sys/kernel/ftrace_dump_on_oops to true.
+ */
+int ftrace_dump_on_oops;
+
+static int tracing_set_tracer(char *buf);
+
+static int __init set_ftrace(char *str)
+{
+ tracing_set_tracer(str);
+ return 1;
+}
+__setup("ftrace", set_ftrace);
+
+static int __init set_ftrace_dump_on_oops(char *str)
+{
+ ftrace_dump_on_oops = 1;
+ return 1;
+}
+__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
+
long
ns2usecs(cycle_t nsec)
{
@@ -2374,29 +2405,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
-static ssize_t
-tracing_set_trace_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
+static int tracing_set_tracer(char *buf)
{
struct trace_array *tr = &global_trace;
struct tracer *t;
- char buf[max_tracer_type_len+1];
- int i;
- size_t ret;
-
- ret = cnt;
-
- if (cnt > max_tracer_type_len)
- cnt = max_tracer_type_len;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- /* strip ending whitespace. */
- for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
- buf[i] = 0;
+ int ret = 0;
mutex_lock(&trace_types_lock);
for (t = trace_types; t; t = t->next) {
@@ -2420,6 +2433,33 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
out:
mutex_unlock(&trace_types_lock);
+ return ret;
+}
+
+static ssize_t
+tracing_set_trace_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[max_tracer_type_len+1];
+ int i;
+ size_t ret;
+
+ if (cnt > max_tracer_type_len)
+ cnt = max_tracer_type_len;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ /* strip ending whitespace. */
+ for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
+ buf[i] = 0;
+
+ ret = tracing_set_tracer(buf);
+ if (!ret)
+ ret = cnt;
+
if (ret > 0)
filp->f_pos += ret;
@@ -2822,22 +2862,38 @@ static struct file_operations tracing_mark_fops = {
#ifdef CONFIG_DYNAMIC_FTRACE
+int __weak ftrace_arch_read_dyn_info(char *buf, int size)
+{
+ return 0;
+}
+
static ssize_t
-tracing_read_long(struct file *filp, char __user *ubuf,
+tracing_read_dyn_info(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
+ static char ftrace_dyn_info_buffer[1024];
+ static DEFINE_MUTEX(dyn_info_mutex);
unsigned long *p = filp->private_data;
- char buf[64];
+ char *buf = ftrace_dyn_info_buffer;
+ int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
int r;
- r = sprintf(buf, "%ld\n", *p);
+ mutex_lock(&dyn_info_mutex);
+ r = sprintf(buf, "%ld ", *p);
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
+ buf[r++] = '\n';
+
+ r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+
+ mutex_unlock(&dyn_info_mutex);
+
+ return r;
}
-static struct file_operations tracing_read_long_fops = {
+static struct file_operations tracing_dyn_info_fops = {
.open = tracing_open_generic,
- .read = tracing_read_long,
+ .read = tracing_read_dyn_info,
};
#endif
@@ -2946,7 +3002,7 @@ static __init int tracer_init_debugfs(void)
#ifdef CONFIG_DYNAMIC_FTRACE
entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
&ftrace_update_tot_cnt,
- &tracing_read_long_fops);
+ &tracing_dyn_info_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'dyn_ftrace_total_info' entry\n");
@@ -3027,7 +3083,8 @@ EXPORT_SYMBOL_GPL(__ftrace_printk);
static int trace_panic_handler(struct notifier_block *this,
unsigned long event, void *unused)
{
- ftrace_dump();
+ if (ftrace_dump_on_oops)
+ ftrace_dump();
return NOTIFY_OK;
}
@@ -3043,7 +3100,8 @@ static int trace_die_handler(struct notifier_block *self,
{
switch (val) {
case DIE_OOPS:
- ftrace_dump();
+ if (ftrace_dump_on_oops)
+ ftrace_dump();
break;
default:
break;
@@ -3084,7 +3142,6 @@ trace_printk_seq(struct trace_seq *s)
trace_seq_reset(s);
}
-
void ftrace_dump(void)
{
static DEFINE_SPINLOCK(ftrace_dump_lock);
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index af8c85664882..e96590f17de1 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -43,6 +43,7 @@ static DEFINE_MUTEX(tracepoints_mutex);
*/
#define TRACEPOINT_HASH_BITS 6
#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
+static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
/*
* Note about RCU :
@@ -54,40 +55,43 @@ struct tracepoint_entry {
struct hlist_node hlist;
void **funcs;
int refcount; /* Number of times armed. 0 if disarmed. */
- struct rcu_head rcu;
- void *oldptr;
- unsigned char rcu_pending:1;
char name[0];
};
-static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
+struct tp_probes {
+ union {
+ struct rcu_head rcu;
+ struct list_head list;
+ } u;
+ void *probes[0];
+};
-static void free_old_closure(struct rcu_head *head)
+static inline void *allocate_probes(int count)
{
- struct tracepoint_entry *entry = container_of(head,
- struct tracepoint_entry, rcu);
- kfree(entry->oldptr);
- /* Make sure we free the data before setting the pending flag to 0 */
- smp_wmb();
- entry->rcu_pending = 0;
+ struct tp_probes *p = kmalloc(count * sizeof(void *)
+ + sizeof(struct tp_probes), GFP_KERNEL);
+ return p == NULL ? NULL : p->probes;
}
-static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old)
+static void rcu_free_old_probes(struct rcu_head *head)
{
- if (!old)
- return;
- entry->oldptr = old;
- entry->rcu_pending = 1;
- /* write rcu_pending before calling the RCU callback */
- smp_wmb();
- call_rcu_sched(&entry->rcu, free_old_closure);
+ kfree(container_of(head, struct tp_probes, u.rcu));
+}
+
+static inline void release_probes(void *old)
+{
+ if (old) {
+ struct tp_probes *tp_probes = container_of(old,
+ struct tp_probes, probes[0]);
+ call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes);
+ }
}
static void debug_print_probes(struct tracepoint_entry *entry)
{
int i;
- if (!tracepoint_debug)
+ if (!tracepoint_debug || !entry->funcs)
return;
for (i = 0; entry->funcs[i]; i++)
@@ -111,12 +115,13 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
return ERR_PTR(-EEXIST);
}
/* + 2 : one for new probe, one for NULL func */
- new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL);
+ new = allocate_probes(nr_probes + 2);
if (new == NULL)
return ERR_PTR(-ENOMEM);
if (old)
memcpy(new, old, nr_probes * sizeof(void *));
new[nr_probes] = probe;
+ new[nr_probes + 1] = NULL;
entry->refcount = nr_probes + 1;
entry->funcs = new;
debug_print_probes(entry);
@@ -132,7 +137,7 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
old = entry->funcs;
if (!old)
- return NULL;
+ return ERR_PTR(-ENOENT);
debug_print_probes(entry);
/* (N -> M), (N > 1, M >= 0) probes */
@@ -151,13 +156,13 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
int j = 0;
/* N -> M, (N > 1, M > 0) */
/* + 1 for NULL */
- new = kzalloc((nr_probes - nr_del + 1)
- * sizeof(void *), GFP_KERNEL);
+ new = allocate_probes(nr_probes - nr_del + 1);
if (new == NULL)
return ERR_PTR(-ENOMEM);
for (i = 0; old[i]; i++)
if ((probe && old[i] != probe))
new[j++] = old[i];
+ new[nr_probes - nr_del] = NULL;
entry->refcount = nr_probes - nr_del;
entry->funcs = new;
}
@@ -215,7 +220,6 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
memcpy(&e->name[0], name, name_len);
e->funcs = NULL;
e->refcount = 0;
- e->rcu_pending = 0;
hlist_add_head(&e->hlist, head);
return e;
}
@@ -224,32 +228,10 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
* Remove the tracepoint from the tracepoint hash table. Must be called with
* mutex_lock held.
*/
-static int remove_tracepoint(const char *name)
+static inline void remove_tracepoint(struct tracepoint_entry *e)
{
- struct hlist_head *head;
- struct hlist_node *node;
- struct tracepoint_entry *e;
- int found = 0;
- size_t len = strlen(name) + 1;
- u32 hash = jhash(name, len-1, 0);
-
- head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
- hlist_for_each_entry(e, node, head, hlist) {
- if (!strcmp(name, e->name)) {
- found = 1;
- break;
- }
- }
- if (!found)
- return -ENOENT;
- if (e->refcount)
- return -EBUSY;
hlist_del(&e->hlist);
- /* Make sure the call_rcu_sched has been executed */
- if (e->rcu_pending)
- rcu_barrier_sched();
kfree(e);
- return 0;
}
/*
@@ -320,6 +302,23 @@ static void tracepoint_update_probes(void)
module_update_tracepoints();
}
+static void *tracepoint_add_probe(const char *name, void *probe)
+{
+ struct tracepoint_entry *entry;
+ void *old;
+
+ entry = get_tracepoint(name);
+ if (!entry) {
+ entry = add_tracepoint(name);
+ if (IS_ERR(entry))
+ return entry;
+ }
+ old = tracepoint_entry_add_probe(entry, probe);
+ if (IS_ERR(old) && !entry->refcount)
+ remove_tracepoint(entry);
+ return old;
+}
+
/**
* tracepoint_probe_register - Connect a probe to a tracepoint
* @name: tracepoint name
@@ -330,44 +329,36 @@ static void tracepoint_update_probes(void)
*/
int tracepoint_probe_register(const char *name, void *probe)
{
- struct tracepoint_entry *entry;
- int ret = 0;
void *old;
mutex_lock(&tracepoints_mutex);
- entry = get_tracepoint(name);
- if (!entry) {
- entry = add_tracepoint(name);
- if (IS_ERR(entry)) {
- ret = PTR_ERR(entry);
- goto end;
- }
- }
- /*
- * If we detect that a call_rcu_sched is pending for this tracepoint,
- * make sure it's executed now.
- */
- if (entry->rcu_pending)
- rcu_barrier_sched();
- old = tracepoint_entry_add_probe(entry, probe);
- if (IS_ERR(old)) {
- ret = PTR_ERR(old);
- goto end;
- }
+ old = tracepoint_add_probe(name, probe);
mutex_unlock(&tracepoints_mutex);
+ if (IS_ERR(old))
+ return PTR_ERR(old);
+
tracepoint_update_probes(); /* may update entry */
- mutex_lock(&tracepoints_mutex);
- entry = get_tracepoint(name);
- WARN_ON(!entry);
- if (entry->rcu_pending)
- rcu_barrier_sched();
- tracepoint_entry_free_old(entry, old);
-end:
- mutex_unlock(&tracepoints_mutex);
- return ret;
+ release_probes(old);
+ return 0;
}
EXPORT_SYMBOL_GPL(tracepoint_probe_register);
+static void *tracepoint_remove_probe(const char *name, void *probe)
+{
+ struct tracepoint_entry *entry;
+ void *old;
+
+ entry = get_tracepoint(name);
+ if (!entry)
+ return ERR_PTR(-ENOENT);
+ old = tracepoint_entry_remove_probe(entry, probe);
+ if (IS_ERR(old))
+ return old;
+ if (!entry->refcount)
+ remove_tracepoint(entry);
+ return old;
+}
+
/**
* tracepoint_probe_unregister - Disconnect a probe from a tracepoint
* @name: tracepoint name
@@ -380,38 +371,104 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register);
*/
int tracepoint_probe_unregister(const char *name, void *probe)
{
- struct tracepoint_entry *entry;
void *old;
- int ret = -ENOENT;
mutex_lock(&tracepoints_mutex);
- entry = get_tracepoint(name);
- if (!entry)
- goto end;
- if (entry->rcu_pending)
- rcu_barrier_sched();
- old = tracepoint_entry_remove_probe(entry, probe);
- if (!old) {
- printk(KERN_WARNING "Warning: Trying to unregister a probe"
- "that doesn't exist\n");
- goto end;
- }
+ old = tracepoint_remove_probe(name, probe);
mutex_unlock(&tracepoints_mutex);
+ if (IS_ERR(old))
+ return PTR_ERR(old);
+
tracepoint_update_probes(); /* may update entry */
+ release_probes(old);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+
+static LIST_HEAD(old_probes);
+static int need_update;
+
+static void tracepoint_add_old_probes(void *old)
+{
+ need_update = 1;
+ if (old) {
+ struct tp_probes *tp_probes = container_of(old,
+ struct tp_probes, probes[0]);
+ list_add(&tp_probes->u.list, &old_probes);
+ }
+}
+
+/**
+ * tracepoint_probe_register_noupdate - register a probe but not connect
+ * @name: tracepoint name
+ * @probe: probe handler
+ *
+ * caller must call tracepoint_probe_update_all()
+ */
+int tracepoint_probe_register_noupdate(const char *name, void *probe)
+{
+ void *old;
+
mutex_lock(&tracepoints_mutex);
- entry = get_tracepoint(name);
- if (!entry)
- goto end;
- if (entry->rcu_pending)
- rcu_barrier_sched();
- tracepoint_entry_free_old(entry, old);
- remove_tracepoint(name); /* Ignore busy error message */
- ret = 0;
-end:
+ old = tracepoint_add_probe(name, probe);
+ if (IS_ERR(old)) {
+ mutex_unlock(&tracepoints_mutex);
+ return PTR_ERR(old);
+ }
+ tracepoint_add_old_probes(old);
mutex_unlock(&tracepoints_mutex);
- return ret;
+ return 0;
}
-EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
+
+/**
+ * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect
+ * @name: tracepoint name
+ * @probe: probe function pointer
+ *
+ * caller must call tracepoint_probe_update_all()
+ */
+int tracepoint_probe_unregister_noupdate(const char *name, void *probe)
+{
+ void *old;
+
+ mutex_lock(&tracepoints_mutex);
+ old = tracepoint_remove_probe(name, probe);
+ if (IS_ERR(old)) {
+ mutex_unlock(&tracepoints_mutex);
+ return PTR_ERR(old);
+ }
+ tracepoint_add_old_probes(old);
+ mutex_unlock(&tracepoints_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate);
+
+/**
+ * tracepoint_probe_update_all - update tracepoints
+ */
+void tracepoint_probe_update_all(void)
+{
+ LIST_HEAD(release_probes);
+ struct tp_probes *pos, *next;
+
+ mutex_lock(&tracepoints_mutex);
+ if (!need_update) {
+ mutex_unlock(&tracepoints_mutex);
+ return;
+ }
+ if (!list_empty(&old_probes))
+ list_replace_init(&old_probes, &release_probes);
+ need_update = 0;
+ mutex_unlock(&tracepoints_mutex);
+
+ tracepoint_update_probes();
+ list_for_each_entry_safe(pos, next, &release_probes, u.list) {
+ list_del(&pos->u.list);
+ call_rcu_sched(&pos->u.rcu, rcu_free_old_probes);
+ }
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
/**
* tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 468fbc9016c7..7a176773af85 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -198,16 +198,10 @@ cmd_modversions = \
fi;
endif
-ifdef CONFIG_64BIT
-arch_bits = 64
-else
-arch_bits = 32
-endif
-
ifdef CONFIG_FTRACE_MCOUNT_RECORD
-cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl \
- "$(ARCH)" "$(arch_bits)" "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" \
- "$(NM)" "$(RM)" "$(MV)" "$(@)";
+cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
+ "$(if $(CONFIG_64BIT),64,32)" \
+ "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)";
endif
define rule_cc_o_c
diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py
new file mode 100644
index 000000000000..902f9a992620
--- /dev/null
+++ b/scripts/tracing/draw_functrace.py
@@ -0,0 +1,130 @@
+#!/usr/bin/python
+
+"""
+Copyright 2008 (c) Frederic Weisbecker <fweisbec@gmail.com>
+Licensed under the terms of the GNU GPL License version 2
+
+This script parses a trace provided by the function tracer in
+kernel/trace/trace_functions.c
+The resulted trace is processed into a tree to produce a more human
+view of the call stack by drawing textual but hierarchical tree of
+calls. Only the functions's names and the the call time are provided.
+
+Usage:
+ Be sure that you have CONFIG_FUNCTION_TRACER
+ # mkdir /debugfs
+ # mount -t debug debug /debug
+ # echo function > /debug/tracing/current_tracer
+ $ cat /debug/tracing/trace_pipe > ~/raw_trace_func
+ Wait some times but not too much, the script is a bit slow.
+ Break the pipe (Ctrl + Z)
+ $ scripts/draw_functrace.py < raw_trace_func > draw_functrace
+ Then you have your drawn trace in draw_functrace
+"""
+
+
+import sys, re
+
+class CallTree:
+ """ This class provides a tree representation of the functions
+ call stack. If a function has no parent in the kernel (interrupt,
+ syscall, kernel thread...) then it is attached to a virtual parent
+ called ROOT.
+ """
+ ROOT = None
+
+ def __init__(self, func, time = None, parent = None):
+ self._func = func
+ self._time = time
+ if parent is None:
+ self._parent = CallTree.ROOT
+ else:
+ self._parent = parent
+ self._children = []
+
+ def calls(self, func, calltime):
+ """ If a function calls another one, call this method to insert it
+ into the tree at the appropriate place.
+ @return: A reference to the newly created child node.
+ """
+ child = CallTree(func, calltime, self)
+ self._children.append(child)
+ return child
+
+ def getParent(self, func):
+ """ Retrieve the last parent of the current node that
+ has the name given by func. If this function is not
+ on a parent, then create it as new child of root
+ @return: A reference to the parent.
+ """
+ tree = self
+ while tree != CallTree.ROOT and tree._func != func:
+ tree = tree._parent
+ if tree == CallTree.ROOT:
+ child = CallTree.ROOT.calls(func, None)
+ return child
+ return tree
+
+ def __repr__(self):
+ return self.__toString("", True)
+
+ def __toString(self, branch, lastChild):
+ if self._time is not None:
+ s = "%s----%s (%s)\n" % (branch, self._func, self._time)
+ else:
+ s = "%s----%s\n" % (branch, self._func)
+
+ i = 0
+ if lastChild:
+ branch = branch[:-1] + " "
+ while i < len(self._children):
+ if i != len(self._children) - 1:
+ s += "%s" % self._children[i].__toString(branch +\
+ " |", False)
+ else:
+ s += "%s" % self._children[i].__toString(branch +\
+ " |", True)
+ i += 1
+ return s
+
+class BrokenLineException(Exception):
+ """If the last line is not complete because of the pipe breakage,
+ we want to stop the processing and ignore this line.
+ """
+ pass
+
+class CommentLineException(Exception):
+ """ If the line is a comment (as in the beginning of the trace file),
+ just ignore it.
+ """
+ pass
+
+
+def parseLine(line):
+ line = line.strip()
+ if line.startswith("#"):
+ raise CommentLineException
+ m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line)
+ if m is None:
+ raise BrokenLineException
+ return (m.group(1), m.group(2), m.group(3))
+
+
+def main():
+ CallTree.ROOT = CallTree("Root (Nowhere)", None, None)
+ tree = CallTree.ROOT
+
+ for line in sys.stdin:
+ try:
+ calltime, callee, caller = parseLine(line)
+ except BrokenLineException:
+ break
+ except CommentLineException:
+ continue
+ tree = tree.getParent(caller)
+ tree = tree.calls(callee, calltime)
+
+ print CallTree.ROOT
+
+if __name__ == "__main__":
+ main()