From e5e25cf47b0bdd1f7e9b8bb6368ee48e16de0c87 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 18 Sep 2009 00:54:43 +0200 Subject: tracing: Factorize the events profile accounting Factorize the events enabling accounting in a common tracing core helper. This reduces the size of the profile_enable() and profile_disable() callbacks for each trace events. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Acked-by: Li Zefan Cc: Peter Zijlstra Cc: Jason Baron Cc: Masami Hiramatsu Cc: Ingo Molnar --- include/linux/ftrace_event.h | 4 ++-- include/linux/syscalls.h | 24 ++++++++---------------- include/trace/ftrace.h | 28 ++++++++-------------------- 3 files changed, 18 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index bd099ba82ccc..bc103d7b1ca8 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -130,8 +130,8 @@ struct ftrace_event_call { void *data; atomic_t profile_count; - int (*profile_enable)(struct ftrace_event_call *); - void (*profile_disable)(struct ftrace_event_call *); + int (*profile_enable)(void); + void (*profile_disable)(void); }; #define MAX_FILTER_PRED 32 diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a8e37821cc60..7d9803cbb20f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -100,33 +100,25 @@ struct perf_counter_attr; #ifdef CONFIG_EVENT_PROFILE #define TRACE_SYS_ENTER_PROFILE(sname) \ -static int prof_sysenter_enable_##sname(struct ftrace_event_call *event_call) \ +static int prof_sysenter_enable_##sname(void) \ { \ - int ret = 0; \ - if (!atomic_inc_return(&event_enter_##sname.profile_count)) \ - ret = reg_prof_syscall_enter("sys"#sname); \ - return ret; \ + return reg_prof_syscall_enter("sys"#sname); \ } \ \ -static void prof_sysenter_disable_##sname(struct ftrace_event_call *event_call)\ +static void prof_sysenter_disable_##sname(void) \ { \ - if (atomic_add_negative(-1, &event_enter_##sname.profile_count)) \ - unreg_prof_syscall_enter("sys"#sname); \ + unreg_prof_syscall_enter("sys"#sname); \ } #define TRACE_SYS_EXIT_PROFILE(sname) \ -static int prof_sysexit_enable_##sname(struct ftrace_event_call *event_call) \ +static int prof_sysexit_enable_##sname(void) \ { \ - int ret = 0; \ - if (!atomic_inc_return(&event_exit_##sname.profile_count)) \ - ret = reg_prof_syscall_exit("sys"#sname); \ - return ret; \ + return reg_prof_syscall_exit("sys"#sname); \ } \ \ -static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ +static void prof_sysexit_disable_##sname(void) \ { \ - if (atomic_add_negative(-1, &event_exit_##sname.profile_count)) \ - unreg_prof_syscall_exit("sys"#sname); \ + unreg_prof_syscall_exit("sys"#sname); \ } #define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 72a3b437b829..a822087857e9 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -382,20 +382,14 @@ static inline int ftrace_get_offsets_##call( \ * * NOTE: The insertion profile callback (ftrace_profile_) is defined later * - * static int ftrace_profile_enable_(struct ftrace_event_call *event_call) + * static int ftrace_profile_enable_(void) * { - * int ret = 0; - * - * if (!atomic_inc_return(&event_call->profile_count)) - * ret = register_trace_(ftrace_profile_); - * - * return ret; + * return register_trace_(ftrace_profile_); * } * - * static void ftrace_profile_disable_(struct ftrace_event_call *event_call) + * static void ftrace_profile_disable_(void) * { - * if (atomic_add_negative(-1, &event->call->profile_count)) - * unregister_trace_(ftrace_profile_); + * unregister_trace_(ftrace_profile_); * } * */ @@ -405,20 +399,14 @@ static inline int ftrace_get_offsets_##call( \ \ static void ftrace_profile_##call(proto); \ \ -static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ +static int ftrace_profile_enable_##call(void) \ { \ - int ret = 0; \ - \ - if (!atomic_inc_return(&event_call->profile_count)) \ - ret = register_trace_##call(ftrace_profile_##call); \ - \ - return ret; \ + return register_trace_##call(ftrace_profile_##call); \ } \ \ -static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ +static void ftrace_profile_disable_##call(void) \ { \ - if (atomic_add_negative(-1, &event_call->profile_count)) \ - unregister_trace_##call(ftrace_profile_##call); \ + unregister_trace_##call(ftrace_profile_##call); \ } #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -- cgit v1.2.3-59-g8ed1b From 20ab4425a77a1f34028cc6ce57053c22c184ba5f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 18 Sep 2009 06:10:28 +0200 Subject: tracing: Allocate the ftrace event profile buffer dynamically Currently the trace event profile buffer is allocated in the stack. But this may be too much for the stack, as the events can have large statically defined field size and can also grow with dynamic arrays. Allocate two per cpu buffer for all profiled events. The first cpu buffer is used to host every non-nmi context traces. It is protected by disabling the interrupts while writing and committing the trace. The second buffer is reserved for nmi. So that there is no race between them and the first buffer. The whole write/commit section is rcu protected because we release these buffers while deactivating the last profiling trace event. v2: Move the buffers from trace_event to be global, as pointed by Steven Rostedt. v3: Fix the syscall events to handle the profiling buffer races by disabling interrupts, now that the buffers are globals. Suggested-by: Steven Rostedt Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Li Zefan Cc: Jason Baron Cc: Masami Hiramatsu Cc: Ingo Molnar --- include/linux/ftrace_event.h | 6 +++ include/trace/ftrace.h | 83 +++++++++++++++++++++----------- kernel/trace/trace_event_profile.c | 61 +++++++++++++++++++++++- kernel/trace/trace_syscalls.c | 97 ++++++++++++++++++++++++++++++-------- 4 files changed, 199 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index bc103d7b1ca8..4ec5e67e18cf 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -4,6 +4,7 @@ #include #include #include +#include struct trace_array; struct tracer; @@ -134,6 +135,11 @@ struct ftrace_event_call { void (*profile_disable)(void); }; +#define FTRACE_MAX_PROFILE_SIZE 2048 + +extern char *trace_profile_buf; +extern char *trace_profile_buf_nmi; + #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a822087857e9..a0361cb69769 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -648,11 +648,12 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * struct ftrace_raw_##call *entry; * u64 __addr = 0, __count = 1; * unsigned long irq_flags; + * struct trace_entry *ent; * int __entry_size; * int __data_size; + * int __cpu * int pc; * - * local_save_flags(irq_flags); * pc = preempt_count(); * * __data_size = ftrace_get_offsets_(&__data_offsets, args); @@ -663,25 +664,34 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * sizeof(u64)); * __entry_size -= sizeof(u32); * - * do { - * char raw_data[__entry_size]; <- allocate our sample in the stack - * struct trace_entry *ent; + * // Protect the non nmi buffer + * // This also protects the rcu read side + * local_irq_save(irq_flags); + * __cpu = smp_processor_id(); + * + * if (in_nmi()) + * raw_data = rcu_dereference(trace_profile_buf_nmi); + * else + * raw_data = rcu_dereference(trace_profile_buf); + * + * if (!raw_data) + * goto end; * - * zero dead bytes from alignment to avoid stack leak to userspace: + * raw_data = per_cpu_ptr(raw_data, __cpu); * - * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; - * entry = (struct ftrace_raw_ *)raw_data; - * ent = &entry->ent; - * tracing_generic_entry_update(ent, irq_flags, pc); - * ent->type = event_call->id; + * //zero dead bytes from alignment to avoid stack leak to userspace: + * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; + * entry = (struct ftrace_raw_ *)raw_data; + * ent = &entry->ent; + * tracing_generic_entry_update(ent, irq_flags, pc); + * ent->type = event_call->id; * - * <- do some jobs with dynamic arrays + * <- do some jobs with dynamic arrays * - * <- affect our values + * <- affect our values * - * perf_tpcounter_event(event_call->id, __addr, __count, entry, - * __entry_size); <- submit them to perf counter - * } while (0); + * perf_tpcounter_event(event_call->id, __addr, __count, entry, + * __entry_size); <- submit them to perf counter * * } */ @@ -704,11 +714,13 @@ static void ftrace_profile_##call(proto) \ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ + struct trace_entry *ent; \ int __entry_size; \ int __data_size; \ + char *raw_data; \ + int __cpu; \ int pc; \ \ - local_save_flags(irq_flags); \ pc = preempt_count(); \ \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ @@ -716,23 +728,38 @@ static void ftrace_profile_##call(proto) \ sizeof(u64)); \ __entry_size -= sizeof(u32); \ \ - do { \ - char raw_data[__entry_size]; \ - struct trace_entry *ent; \ + if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \ + "profile buffer not large enough")) \ + return; \ + \ + local_irq_save(irq_flags); \ + __cpu = smp_processor_id(); \ \ - *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ - entry = (struct ftrace_raw_##call *)raw_data; \ - ent = &entry->ent; \ - tracing_generic_entry_update(ent, irq_flags, pc); \ - ent->type = event_call->id; \ + if (in_nmi()) \ + raw_data = rcu_dereference(trace_profile_buf_nmi); \ + else \ + raw_data = rcu_dereference(trace_profile_buf); \ \ - tstruct \ + if (!raw_data) \ + goto end; \ \ - { assign; } \ + raw_data = per_cpu_ptr(raw_data, __cpu); \ \ - perf_tpcounter_event(event_call->id, __addr, __count, entry,\ + *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ + entry = (struct ftrace_raw_##call *)raw_data; \ + ent = &entry->ent; \ + tracing_generic_entry_update(ent, irq_flags, pc); \ + ent->type = event_call->id; \ + \ + tstruct \ + \ + { assign; } \ + \ + perf_tpcounter_event(event_call->id, __addr, __count, entry, \ __entry_size); \ - } while (0); \ + \ +end: \ + local_irq_restore(irq_flags); \ \ } diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index df4a74efd50c..3aaa77c3309b 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -8,12 +8,52 @@ #include #include "trace.h" +/* + * We can't use a size but a type in alloc_percpu() + * So let's create a dummy type that matches the desired size + */ +typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t; + +char *trace_profile_buf; +char *trace_profile_buf_nmi; + +/* Count the events in use (per event id, not per instance) */ +static int total_profile_count; + static int ftrace_profile_enable_event(struct ftrace_event_call *event) { + char *buf; + int ret = -ENOMEM; + if (atomic_inc_return(&event->profile_count)) return 0; - return event->profile_enable(); + if (!total_profile_count++) { + buf = (char *)alloc_percpu(profile_buf_t); + if (!buf) + goto fail_buf; + + rcu_assign_pointer(trace_profile_buf, buf); + + buf = (char *)alloc_percpu(profile_buf_t); + if (!buf) + goto fail_buf_nmi; + + rcu_assign_pointer(trace_profile_buf_nmi, buf); + } + + ret = event->profile_enable(); + if (!ret) + return 0; + + kfree(trace_profile_buf_nmi); +fail_buf_nmi: + kfree(trace_profile_buf); +fail_buf: + total_profile_count--; + atomic_dec(&event->profile_count); + + return ret; } int ftrace_profile_enable(int event_id) @@ -36,10 +76,29 @@ int ftrace_profile_enable(int event_id) static void ftrace_profile_disable_event(struct ftrace_event_call *event) { + char *buf, *nmi_buf; + if (!atomic_add_negative(-1, &event->profile_count)) return; event->profile_disable(); + + if (!--total_profile_count) { + buf = trace_profile_buf; + rcu_assign_pointer(trace_profile_buf, NULL); + + nmi_buf = trace_profile_buf_nmi; + rcu_assign_pointer(trace_profile_buf_nmi, NULL); + + /* + * Ensure every events in profiling have finished before + * releasing the buffers + */ + synchronize_sched(); + + free_percpu(buf); + free_percpu(nmi_buf); + } } void ftrace_profile_disable(int event_id) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8712ce3c6a0e..7a3550cf2597 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -384,10 +384,13 @@ static int sys_prof_refcount_exit; static void prof_syscall_enter(struct pt_regs *regs, long id) { - struct syscall_trace_enter *rec; struct syscall_metadata *sys_data; + struct syscall_trace_enter *rec; + unsigned long flags; + char *raw_data; int syscall_nr; int size; + int cpu; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) @@ -402,20 +405,38 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) size = ALIGN(size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - do { - char raw_data[size]; + if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, + "profile buffer not large enough")) + return; + + /* Protect the per cpu buffer, begin the rcu read side */ + local_irq_save(flags); - /* zero the dead bytes from align to not leak stack to user */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + cpu = smp_processor_id(); + + if (in_nmi()) + raw_data = rcu_dereference(trace_profile_buf_nmi); + else + raw_data = rcu_dereference(trace_profile_buf); + + if (!raw_data) + goto end; - rec = (struct syscall_trace_enter *) raw_data; - tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->enter_id; - rec->nr = syscall_nr; - syscall_get_arguments(current, regs, 0, sys_data->nb_args, - (unsigned long *)&rec->args); - perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); - } while(0); + raw_data = per_cpu_ptr(raw_data, cpu); + + /* zero the dead bytes from align to not leak stack to user */ + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + + rec = (struct syscall_trace_enter *) raw_data; + tracing_generic_entry_update(&rec->ent, 0, 0); + rec->ent.type = sys_data->enter_id; + rec->nr = syscall_nr; + syscall_get_arguments(current, regs, 0, sys_data->nb_args, + (unsigned long *)&rec->args); + perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); + +end: + local_irq_restore(flags); } int reg_prof_syscall_enter(char *name) @@ -460,8 +481,12 @@ void unreg_prof_syscall_enter(char *name) static void prof_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; - struct syscall_trace_exit rec; + struct syscall_trace_exit *rec; + unsigned long flags; int syscall_nr; + char *raw_data; + int size; + int cpu; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) @@ -471,12 +496,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) if (!sys_data) return; - tracing_generic_entry_update(&rec.ent, 0, 0); - rec.ent.type = sys_data->exit_id; - rec.nr = syscall_nr; - rec.ret = syscall_get_return_value(current, regs); + /* We can probably do that at build time */ + size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); + size -= sizeof(u32); - perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); + /* + * Impossible, but be paranoid with the future + * How to put this check outside runtime? + */ + if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, + "exit event has grown above profile buffer size")) + return; + + /* Protect the per cpu buffer, begin the rcu read side */ + local_irq_save(flags); + cpu = smp_processor_id(); + + if (in_nmi()) + raw_data = rcu_dereference(trace_profile_buf_nmi); + else + raw_data = rcu_dereference(trace_profile_buf); + + if (!raw_data) + goto end; + + raw_data = per_cpu_ptr(raw_data, cpu); + + /* zero the dead bytes from align to not leak stack to user */ + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + + rec = (struct syscall_trace_exit *)raw_data; + + tracing_generic_entry_update(&rec->ent, 0, 0); + rec->ent.type = sys_data->exit_id; + rec->nr = syscall_nr; + rec->ret = syscall_get_return_value(current, regs); + + perf_tpcounter_event(sys_data->exit_id, 0, 1, rec, size); + +end: + local_irq_restore(flags); } int reg_prof_syscall_exit(char *name) -- cgit v1.2.3-59-g8ed1b From fc5377668c3d808e1d53c4aee152c836f55c3490 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Sep 2009 19:35:28 +0200 Subject: tracing: Remove markers Now that the last users of markers have migrated to the event tracer we can kill off the (now orphan) support code. Signed-off-by: Christoph Hellwig Acked-by: Mathieu Desnoyers Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <20090917173527.GA1699@lst.de> Signed-off-by: Ingo Molnar --- Documentation/markers.txt | 104 ---- arch/powerpc/platforms/cell/spufs/file.c | 1 - arch/powerpc/platforms/cell/spufs/sched.c | 1 - include/linux/kvm_host.h | 1 - include/linux/marker.h | 221 ------- include/linux/module.h | 11 - init/Kconfig | 7 - kernel/Makefile | 1 - kernel/marker.c | 930 ------------------------------ kernel/module.c | 18 - kernel/trace/trace_printk.c | 1 - samples/Kconfig | 6 - samples/Makefile | 2 +- samples/markers/Makefile | 4 - samples/markers/marker-example.c | 53 -- samples/markers/probe-example.c | 92 --- scripts/Makefile.modpost | 12 - 17 files changed, 1 insertion(+), 1464 deletions(-) delete mode 100644 Documentation/markers.txt delete mode 100644 include/linux/marker.h delete mode 100644 kernel/marker.c delete mode 100644 samples/markers/Makefile delete mode 100644 samples/markers/marker-example.c delete mode 100644 samples/markers/probe-example.c (limited to 'include') diff --git a/Documentation/markers.txt b/Documentation/markers.txt deleted file mode 100644 index d2b3d0e91b26..000000000000 --- a/Documentation/markers.txt +++ /dev/null @@ -1,104 +0,0 @@ - Using the Linux Kernel Markers - - Mathieu Desnoyers - - -This document introduces Linux Kernel Markers and their use. It provides -examples of how to insert markers in the kernel and connect probe functions to -them and provides some examples of probe functions. - - -* Purpose of markers - -A marker placed in code provides a hook to call a function (probe) that you can -provide at runtime. A marker can be "on" (a probe is connected to it) or "off" -(no probe is attached). When a marker is "off" it has no effect, except for -adding a tiny time penalty (checking a condition for a branch) and space -penalty (adding a few bytes for the function call at the end of the -instrumented function and adds a data structure in a separate section). When a -marker is "on", the function you provide is called each time the marker is -executed, in the execution context of the caller. When the function provided -ends its execution, it returns to the caller (continuing from the marker site). - -You can put markers at important locations in the code. Markers are -lightweight hooks that can pass an arbitrary number of parameters, -described in a printk-like format string, to the attached probe function. - -They can be used for tracing and performance accounting. - - -* Usage - -In order to use the macro trace_mark, you should include linux/marker.h. - -#include - -And, - -trace_mark(subsystem_event, "myint %d mystring %s", someint, somestring); -Where : -- subsystem_event is an identifier unique to your event - - subsystem is the name of your subsystem. - - event is the name of the event to mark. -- "myint %d mystring %s" is the formatted string for the serializer. "myint" and - "mystring" are repectively the field names associated with the first and - second parameter. -- someint is an integer. -- somestring is a char pointer. - -Connecting a function (probe) to a marker is done by providing a probe (function -to call) for the specific marker through marker_probe_register() and can be -activated by calling marker_arm(). Marker deactivation can be done by calling -marker_disarm() as many times as marker_arm() has been called. Removing a probe -is done through marker_probe_unregister(); it will disarm the probe. - -marker_synchronize_unregister() must be called between probe unregistration and -the first occurrence of -- the end of module exit function, - to make sure there is no caller left using the probe; -- the free of any resource used by the probes, - to make sure the probes wont be accessing invalid data. -This, and the fact that preemption is disabled around the probe call, make sure -that probe removal and module unload are safe. See the "Probe example" section -below for a sample probe module. - -The marker mechanism supports inserting multiple instances of the same marker. -Markers can be put in inline functions, inlined static functions, and -unrolled loops as well as regular functions. - -The naming scheme "subsystem_event" is suggested here as a convention intended -to limit collisions. Marker names are global to the kernel: they are considered -as being the same whether they are in the core kernel image or in modules. -Conflicting format strings for markers with the same name will cause the markers -to be detected to have a different format string not to be armed and will output -a printk warning which identifies the inconsistency: - -"Format mismatch for probe probe_name (format), marker (format)" - -Another way to use markers is to simply define the marker without generating any -function call to actually call into the marker. This is useful in combination -with tracepoint probes in a scheme like this : - -void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk); - -DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name, - "arg1 %u pid %d"); - -notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk) -{ - struct marker *marker = &GET_MARKER(kernel_irq_entry); - /* write data to trace buffers ... */ -} - -* Probe / marker example - -See the example provided in samples/markers/src - -Compile them with your kernel. - -Run, as root : -modprobe marker-example (insmod order is not important) -modprobe probe-example -cat /proc/marker-example (returns an expected error) -rmmod marker-example probe-example -dmesg diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index ab8aef9bb8ea..8f079b865ad0 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index bb5b77c66d05..4678078fede8 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4af56036a6bf..b7bbb5ddd7ae 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include diff --git a/include/linux/marker.h b/include/linux/marker.h deleted file mode 100644 index b85e74ca782f..000000000000 --- a/include/linux/marker.h +++ /dev/null @@ -1,221 +0,0 @@ -#ifndef _LINUX_MARKER_H -#define _LINUX_MARKER_H - -/* - * Code markup for dynamic and static tracing. - * - * See Documentation/marker.txt. - * - * (C) Copyright 2006 Mathieu Desnoyers - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#include -#include - -struct module; -struct marker; - -/** - * marker_probe_func - Type of a marker probe function - * @probe_private: probe private data - * @call_private: call site private data - * @fmt: format string - * @args: variable argument list pointer. Use a pointer to overcome C's - * inability to pass this around as a pointer in a portable manner in - * the callee otherwise. - * - * Type of marker probe functions. They receive the mdata and need to parse the - * format string to recover the variable argument list. - */ -typedef void marker_probe_func(void *probe_private, void *call_private, - const char *fmt, va_list *args); - -struct marker_probe_closure { - marker_probe_func *func; /* Callback */ - void *probe_private; /* Private probe data */ -}; - -struct marker { - const char *name; /* Marker name */ - const char *format; /* Marker format string, describing the - * variable argument list. - */ - char state; /* Marker state. */ - char ptype; /* probe type : 0 : single, 1 : multi */ - /* Probe wrapper */ - void (*call)(const struct marker *mdata, void *call_private, ...); - struct marker_probe_closure single; - struct marker_probe_closure *multi; - const char *tp_name; /* Optional tracepoint name */ - void *tp_cb; /* Optional tracepoint callback */ -} __attribute__((aligned(8))); - -#ifdef CONFIG_MARKERS - -#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format) \ - static const char __mstrtab_##name[] \ - __attribute__((section("__markers_strings"))) \ - = #name "\0" format; \ - static struct marker __mark_##name \ - __attribute__((section("__markers"), aligned(8))) = \ - { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ - 0, 0, marker_probe_cb, { __mark_empty_function, NULL},\ - NULL, tp_name_str, tp_cb } - -#define DEFINE_MARKER(name, format) \ - _DEFINE_MARKER(name, NULL, NULL, format) - -#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format) \ - _DEFINE_MARKER(name, #tp_name, tp_cb, format) - -/* - * Note : the empty asm volatile with read constraint is used here instead of a - * "used" attribute to fix a gcc 4.1.x bug. - * Make sure the alignment of the structure in the __markers section will - * not add unwanted padding between the beginning of the section and the - * structure. Force alignment to the same alignment as the section start. - * - * The "generic" argument controls which marker enabling mechanism must be used. - * If generic is true, a variable read is used. - * If generic is false, immediate values are used. - */ -#define __trace_mark(generic, name, call_private, format, args...) \ - do { \ - DEFINE_MARKER(name, format); \ - __mark_check_format(format, ## args); \ - if (unlikely(__mark_##name.state)) { \ - (*__mark_##name.call) \ - (&__mark_##name, call_private, ## args);\ - } \ - } while (0) - -#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ - do { \ - void __check_tp_type(void) \ - { \ - register_trace_##tp_name(tp_cb); \ - } \ - DEFINE_MARKER_TP(name, tp_name, tp_cb, format); \ - __mark_check_format(format, ## args); \ - (*__mark_##name.call)(&__mark_##name, call_private, \ - ## args); \ - } while (0) - -extern void marker_update_probe_range(struct marker *begin, - struct marker *end); - -#define GET_MARKER(name) (__mark_##name) - -#else /* !CONFIG_MARKERS */ -#define DEFINE_MARKER(name, tp_name, tp_cb, format) -#define __trace_mark(generic, name, call_private, format, args...) \ - __mark_check_format(format, ## args) -#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ - do { \ - void __check_tp_type(void) \ - { \ - register_trace_##tp_name(tp_cb); \ - } \ - __mark_check_format(format, ## args); \ - } while (0) -static inline void marker_update_probe_range(struct marker *begin, - struct marker *end) -{ } -#define GET_MARKER(name) -#endif /* CONFIG_MARKERS */ - -/** - * trace_mark - Marker using code patching - * @name: marker name, not quoted. - * @format: format string - * @args...: variable argument list - * - * Places a marker using optimized code patching technique (imv_read()) - * to be enabled when immediate values are present. - */ -#define trace_mark(name, format, args...) \ - __trace_mark(0, name, NULL, format, ## args) - -/** - * _trace_mark - Marker using variable read - * @name: marker name, not quoted. - * @format: format string - * @args...: variable argument list - * - * Places a marker using a standard memory read (_imv_read()) to be - * enabled. Should be used for markers in code paths where instruction - * modification based enabling is not welcome. (__init and __exit functions, - * lockdep, some traps, printk). - */ -#define _trace_mark(name, format, args...) \ - __trace_mark(1, name, NULL, format, ## args) - -/** - * trace_mark_tp - Marker in a tracepoint callback - * @name: marker name, not quoted. - * @tp_name: tracepoint name, not quoted. - * @tp_cb: tracepoint callback. Should have an associated global symbol so it - * is not optimized away by the compiler (should not be static). - * @format: format string - * @args...: variable argument list - * - * Places a marker in a tracepoint callback. - */ -#define trace_mark_tp(name, tp_name, tp_cb, format, args...) \ - __trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args) - -/** - * MARK_NOARGS - Format string for a marker with no argument. - */ -#define MARK_NOARGS " " - -/* To be used for string format validity checking with gcc */ -static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...) -{ -} - -#define __mark_check_format(format, args...) \ - do { \ - if (0) \ - ___mark_check_format(format, ## args); \ - } while (0) - -extern marker_probe_func __mark_empty_function; - -extern void marker_probe_cb(const struct marker *mdata, - void *call_private, ...); - -/* - * Connect a probe to a marker. - * private data pointer must be a valid allocated memory address, or NULL. - */ -extern int marker_probe_register(const char *name, const char *format, - marker_probe_func *probe, void *probe_private); - -/* - * Returns the private data given to marker_probe_register. - */ -extern int marker_probe_unregister(const char *name, - marker_probe_func *probe, void *probe_private); -/* - * Unregister a marker by providing the registered private data. - */ -extern int marker_probe_unregister_private_data(marker_probe_func *probe, - void *probe_private); - -extern void *marker_get_private_data(const char *name, marker_probe_func *probe, - int num); - -/* - * marker_synchronize_unregister must be called between the last marker probe - * unregistration and the first one of - * - the end of module exit function - * - the free of any resource used by the probes - * to ensure the code and data are valid for any possibly running probes. - */ -#define marker_synchronize_unregister() synchronize_sched() - -#endif diff --git a/include/linux/module.h b/include/linux/module.h index f8f92d015efe..1c755b2f937d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -327,10 +326,6 @@ struct module /* The command line arguments (may be mangled). People like keeping pointers to this stuff */ char *args; -#ifdef CONFIG_MARKERS - struct marker *markers; - unsigned int num_markers; -#endif #ifdef CONFIG_TRACEPOINTS struct tracepoint *tracepoints; unsigned int num_tracepoints; @@ -535,8 +530,6 @@ int unregister_module_notifier(struct notifier_block * nb); extern void print_modules(void); -extern void module_update_markers(void); - extern void module_update_tracepoints(void); extern int module_get_iter_tracepoints(struct tracepoint_iter *iter); @@ -651,10 +644,6 @@ static inline void print_modules(void) { } -static inline void module_update_markers(void) -{ -} - static inline void module_update_tracepoints(void) { } diff --git a/init/Kconfig b/init/Kconfig index 8e8b76d8a272..4cc0fa13d5eb 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1054,13 +1054,6 @@ config PROFILING config TRACEPOINTS bool -config MARKERS - bool "Activate markers" - select TRACEPOINTS - help - Place an empty function call at each marker site. Can be - dynamically changed for a probe function. - source "arch/Kconfig" config SLOW_WORK diff --git a/kernel/Makefile b/kernel/Makefile index 3d9c7e27e3f9..7c9b0a585502 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -87,7 +87,6 @@ obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o -obj-$(CONFIG_MARKERS) += marker.o obj-$(CONFIG_TRACEPOINTS) += tracepoint.o obj-$(CONFIG_LATENCYTOP) += latencytop.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ diff --git a/kernel/marker.c b/kernel/marker.c deleted file mode 100644 index ea54f2647868..000000000000 --- a/kernel/marker.c +++ /dev/null @@ -1,930 +0,0 @@ -/* - * Copyright (C) 2007 Mathieu Desnoyers - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern struct marker __start___markers[]; -extern struct marker __stop___markers[]; - -/* Set to 1 to enable marker debug output */ -static const int marker_debug; - -/* - * markers_mutex nests inside module_mutex. Markers mutex protects the builtin - * and module markers and the hash table. - */ -static DEFINE_MUTEX(markers_mutex); - -/* - * Marker hash table, containing the active markers. - * Protected by module_mutex. - */ -#define MARKER_HASH_BITS 6 -#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) -static struct hlist_head marker_table[MARKER_TABLE_SIZE]; - -/* - * Note about RCU : - * It is used to make sure every handler has finished using its private data - * between two consecutive operation (add or remove) on a given marker. It is - * also used to delay the free of multiple probes array until a quiescent state - * is reached. - * marker entries modifications are protected by the markers_mutex. - */ -struct marker_entry { - struct hlist_node hlist; - char *format; - /* Probe wrapper */ - void (*call)(const struct marker *mdata, void *call_private, ...); - struct marker_probe_closure single; - struct marker_probe_closure *multi; - int refcount; /* Number of times armed. 0 if disarmed. */ - struct rcu_head rcu; - void *oldptr; - int rcu_pending; - unsigned char ptype:1; - unsigned char format_allocated:1; - char name[0]; /* Contains name'\0'format'\0' */ -}; - -/** - * __mark_empty_function - Empty probe callback - * @probe_private: probe private data - * @call_private: call site private data - * @fmt: format string - * @...: variable argument list - * - * Empty callback provided as a probe to the markers. By providing this to a - * disabled marker, we make sure the execution flow is always valid even - * though the function pointer change and the marker enabling are two distinct - * operations that modifies the execution flow of preemptible code. - */ -notrace void __mark_empty_function(void *probe_private, void *call_private, - const char *fmt, va_list *args) -{ -} -EXPORT_SYMBOL_GPL(__mark_empty_function); - -/* - * marker_probe_cb Callback that prepares the variable argument list for probes. - * @mdata: pointer of type struct marker - * @call_private: caller site private data - * @...: Variable argument list. - * - * Since we do not use "typical" pointer based RCU in the 1 argument case, we - * need to put a full smp_rmb() in this branch. This is why we do not use - * rcu_dereference() for the pointer read. - */ -notrace void marker_probe_cb(const struct marker *mdata, - void *call_private, ...) -{ - va_list args; - char ptype; - - /* - * rcu_read_lock_sched does two things : disabling preemption to make - * sure the teardown of the callbacks can be done correctly when they - * are in modules and they insure RCU read coherency. - */ - rcu_read_lock_sched_notrace(); - ptype = mdata->ptype; - if (likely(!ptype)) { - marker_probe_func *func; - /* Must read the ptype before ptr. They are not data dependant, - * so we put an explicit smp_rmb() here. */ - smp_rmb(); - func = mdata->single.func; - /* Must read the ptr before private data. They are not data - * dependant, so we put an explicit smp_rmb() here. */ - smp_rmb(); - va_start(args, call_private); - func(mdata->single.probe_private, call_private, mdata->format, - &args); - va_end(args); - } else { - struct marker_probe_closure *multi; - int i; - /* - * Read mdata->ptype before mdata->multi. - */ - smp_rmb(); - multi = mdata->multi; - /* - * multi points to an array, therefore accessing the array - * depends on reading multi. However, even in this case, - * we must insure that the pointer is read _before_ the array - * data. Same as rcu_dereference, but we need a full smp_rmb() - * in the fast path, so put the explicit barrier here. - */ - smp_read_barrier_depends(); - for (i = 0; multi[i].func; i++) { - va_start(args, call_private); - multi[i].func(multi[i].probe_private, call_private, - mdata->format, &args); - va_end(args); - } - } - rcu_read_unlock_sched_notrace(); -} -EXPORT_SYMBOL_GPL(marker_probe_cb); - -/* - * marker_probe_cb Callback that does not prepare the variable argument list. - * @mdata: pointer of type struct marker - * @call_private: caller site private data - * @...: Variable argument list. - * - * Should be connected to markers "MARK_NOARGS". - */ -static notrace void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, ...) -{ - va_list args; /* not initialized */ - char ptype; - - rcu_read_lock_sched_notrace(); - ptype = mdata->ptype; - if (likely(!ptype)) { - marker_probe_func *func; - /* Must read the ptype before ptr. They are not data dependant, - * so we put an explicit smp_rmb() here. */ - smp_rmb(); - func = mdata->single.func; - /* Must read the ptr before private data. They are not data - * dependant, so we put an explicit smp_rmb() here. */ - smp_rmb(); - func(mdata->single.probe_private, call_private, mdata->format, - &args); - } else { - struct marker_probe_closure *multi; - int i; - /* - * Read mdata->ptype before mdata->multi. - */ - smp_rmb(); - multi = mdata->multi; - /* - * multi points to an array, therefore accessing the array - * depends on reading multi. However, even in this case, - * we must insure that the pointer is read _before_ the array - * data. Same as rcu_dereference, but we need a full smp_rmb() - * in the fast path, so put the explicit barrier here. - */ - smp_read_barrier_depends(); - for (i = 0; multi[i].func; i++) - multi[i].func(multi[i].probe_private, call_private, - mdata->format, &args); - } - rcu_read_unlock_sched_notrace(); -} - -static void free_old_closure(struct rcu_head *head) -{ - struct marker_entry *entry = container_of(head, - struct marker_entry, rcu); - kfree(entry->oldptr); - /* Make sure we free the data before setting the pending flag to 0 */ - smp_wmb(); - entry->rcu_pending = 0; -} - -static void debug_print_probes(struct marker_entry *entry) -{ - int i; - - if (!marker_debug) - return; - - if (!entry->ptype) { - printk(KERN_DEBUG "Single probe : %p %p\n", - entry->single.func, - entry->single.probe_private); - } else { - for (i = 0; entry->multi[i].func; i++) - printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, - entry->multi[i].func, - entry->multi[i].probe_private); - } -} - -static struct marker_probe_closure * -marker_entry_add_probe(struct marker_entry *entry, - marker_probe_func *probe, void *probe_private) -{ - int nr_probes = 0; - struct marker_probe_closure *old, *new; - - WARN_ON(!probe); - - debug_print_probes(entry); - old = entry->multi; - if (!entry->ptype) { - if (entry->single.func == probe && - entry->single.probe_private == probe_private) - return ERR_PTR(-EBUSY); - if (entry->single.func == __mark_empty_function) { - /* 0 -> 1 probes */ - entry->single.func = probe; - entry->single.probe_private = probe_private; - entry->refcount = 1; - entry->ptype = 0; - debug_print_probes(entry); - return NULL; - } else { - /* 1 -> 2 probes */ - nr_probes = 1; - old = NULL; - } - } else { - /* (N -> N+1), (N != 0, 1) probes */ - for (nr_probes = 0; old[nr_probes].func; nr_probes++) - if (old[nr_probes].func == probe - && old[nr_probes].probe_private - == probe_private) - return ERR_PTR(-EBUSY); - } - /* + 2 : one for new probe, one for NULL func */ - new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), - GFP_KERNEL); - if (new == NULL) - return ERR_PTR(-ENOMEM); - if (!old) - new[0] = entry->single; - else - memcpy(new, old, - nr_probes * sizeof(struct marker_probe_closure)); - new[nr_probes].func = probe; - new[nr_probes].probe_private = probe_private; - entry->refcount = nr_probes + 1; - entry->multi = new; - entry->ptype = 1; - debug_print_probes(entry); - return old; -} - -static struct marker_probe_closure * -marker_entry_remove_probe(struct marker_entry *entry, - marker_probe_func *probe, void *probe_private) -{ - int nr_probes = 0, nr_del = 0, i; - struct marker_probe_closure *old, *new; - - old = entry->multi; - - debug_print_probes(entry); - if (!entry->ptype) { - /* 0 -> N is an error */ - WARN_ON(entry->single.func == __mark_empty_function); - /* 1 -> 0 probes */ - WARN_ON(probe && entry->single.func != probe); - WARN_ON(entry->single.probe_private != probe_private); - entry->single.func = __mark_empty_function; - entry->refcount = 0; - entry->ptype = 0; - debug_print_probes(entry); - return NULL; - } else { - /* (N -> M), (N > 1, M >= 0) probes */ - for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - if ((!probe || old[nr_probes].func == probe) - && old[nr_probes].probe_private - == probe_private) - nr_del++; - } - } - - if (nr_probes - nr_del == 0) { - /* N -> 0, (N > 1) */ - entry->single.func = __mark_empty_function; - entry->refcount = 0; - entry->ptype = 0; - } else if (nr_probes - nr_del == 1) { - /* N -> 1, (N > 1) */ - for (i = 0; old[i].func; i++) - if ((probe && old[i].func != probe) || - old[i].probe_private != probe_private) - entry->single = old[i]; - entry->refcount = 1; - entry->ptype = 0; - } else { - int j = 0; - /* N -> M, (N > 1, M > 1) */ - /* + 1 for NULL */ - new = kzalloc((nr_probes - nr_del + 1) - * sizeof(struct marker_probe_closure), GFP_KERNEL); - if (new == NULL) - return ERR_PTR(-ENOMEM); - for (i = 0; old[i].func; i++) - if ((probe && old[i].func != probe) || - old[i].probe_private != probe_private) - new[j++] = old[i]; - entry->refcount = nr_probes - nr_del; - entry->ptype = 1; - entry->multi = new; - } - debug_print_probes(entry); - return old; -} - -/* - * Get marker if the marker is present in the marker hash table. - * Must be called with markers_mutex held. - * Returns NULL if not present. - */ -static struct marker_entry *get_marker(const char *name) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - u32 hash = jhash(name, strlen(name), 0); - - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) - return e; - } - return NULL; -} - -/* - * Add the marker to the marker hash table. Must be called with markers_mutex - * held. - */ -static struct marker_entry *add_marker(const char *name, const char *format) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - size_t name_len = strlen(name) + 1; - size_t format_len = 0; - u32 hash = jhash(name, name_len-1, 0); - - if (format) - format_len = strlen(format) + 1; - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - printk(KERN_NOTICE - "Marker %s busy\n", name); - return ERR_PTR(-EBUSY); /* Already there */ - } - } - /* - * Using kmalloc here to allocate a variable length element. Could - * cause some memory fragmentation if overused. - */ - e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, - GFP_KERNEL); - if (!e) - return ERR_PTR(-ENOMEM); - memcpy(&e->name[0], name, name_len); - if (format) { - e->format = &e->name[name_len]; - memcpy(e->format, format, format_len); - if (strcmp(e->format, MARK_NOARGS) == 0) - e->call = marker_probe_cb_noarg; - else - e->call = marker_probe_cb; - trace_mark(core_marker_format, "name %s format %s", - e->name, e->format); - } else { - e->format = NULL; - e->call = marker_probe_cb; - } - e->single.func = __mark_empty_function; - e->single.probe_private = NULL; - e->multi = NULL; - e->ptype = 0; - e->format_allocated = 0; - e->refcount = 0; - e->rcu_pending = 0; - hlist_add_head(&e->hlist, head); - return e; -} - -/* - * Remove the marker from the marker hash table. Must be called with mutex_lock - * held. - */ -static int remove_marker(const char *name) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - int found = 0; - size_t len = strlen(name) + 1; - u32 hash = jhash(name, len-1, 0); - - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - found = 1; - break; - } - } - if (!found) - return -ENOENT; - if (e->single.func != __mark_empty_function) - return -EBUSY; - hlist_del(&e->hlist); - if (e->format_allocated) - kfree(e->format); - /* Make sure the call_rcu has been executed */ - if (e->rcu_pending) - rcu_barrier_sched(); - kfree(e); - return 0; -} - -/* - * Set the mark_entry format to the format found in the element. - */ -static int marker_set_format(struct marker_entry *entry, const char *format) -{ - entry->format = kstrdup(format, GFP_KERNEL); - if (!entry->format) - return -ENOMEM; - entry->format_allocated = 1; - - trace_mark(core_marker_format, "name %s format %s", - entry->name, entry->format); - return 0; -} - -/* - * Sets the probe callback corresponding to one marker. - */ -static int set_marker(struct marker_entry *entry, struct marker *elem, - int active) -{ - int ret = 0; - WARN_ON(strcmp(entry->name, elem->name) != 0); - - if (entry->format) { - if (strcmp(entry->format, elem->format) != 0) { - printk(KERN_NOTICE - "Format mismatch for probe %s " - "(%s), marker (%s)\n", - entry->name, - entry->format, - elem->format); - return -EPERM; - } - } else { - ret = marker_set_format(entry, elem->format); - if (ret) - return ret; - } - - /* - * probe_cb setup (statically known) is done here. It is - * asynchronous with the rest of execution, therefore we only - * pass from a "safe" callback (with argument) to an "unsafe" - * callback (does not set arguments). - */ - elem->call = entry->call; - /* - * Sanity check : - * We only update the single probe private data when the ptr is - * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) - */ - WARN_ON(elem->single.func != __mark_empty_function - && elem->single.probe_private != entry->single.probe_private - && !elem->ptype); - elem->single.probe_private = entry->single.probe_private; - /* - * Make sure the private data is valid when we update the - * single probe ptr. - */ - smp_wmb(); - elem->single.func = entry->single.func; - /* - * We also make sure that the new probe callbacks array is consistent - * before setting a pointer to it. - */ - rcu_assign_pointer(elem->multi, entry->multi); - /* - * Update the function or multi probe array pointer before setting the - * ptype. - */ - smp_wmb(); - elem->ptype = entry->ptype; - - if (elem->tp_name && (active ^ elem->state)) { - WARN_ON(!elem->tp_cb); - /* - * It is ok to directly call the probe registration because type - * checking has been done in the __trace_mark_tp() macro. - */ - - if (active) { - /* - * try_module_get should always succeed because we hold - * lock_module() to get the tp_cb address. - */ - ret = try_module_get(__module_text_address( - (unsigned long)elem->tp_cb)); - BUG_ON(!ret); - ret = tracepoint_probe_register_noupdate( - elem->tp_name, - elem->tp_cb); - } else { - ret = tracepoint_probe_unregister_noupdate( - elem->tp_name, - elem->tp_cb); - /* - * tracepoint_probe_update_all() must be called - * before the module containing tp_cb is unloaded. - */ - module_put(__module_text_address( - (unsigned long)elem->tp_cb)); - } - } - elem->state = active; - - return ret; -} - -/* - * Disable a marker and its probe callback. - * Note: only waiting an RCU period after setting elem->call to the empty - * function insures that the original callback is not used anymore. This insured - * by rcu_read_lock_sched around the call site. - */ -static void disable_marker(struct marker *elem) -{ - int ret; - - /* leave "call" as is. It is known statically. */ - if (elem->tp_name && elem->state) { - WARN_ON(!elem->tp_cb); - /* - * It is ok to directly call the probe registration because type - * checking has been done in the __trace_mark_tp() macro. - */ - ret = tracepoint_probe_unregister_noupdate(elem->tp_name, - elem->tp_cb); - WARN_ON(ret); - /* - * tracepoint_probe_update_all() must be called - * before the module containing tp_cb is unloaded. - */ - module_put(__module_text_address((unsigned long)elem->tp_cb)); - } - elem->state = 0; - elem->single.func = __mark_empty_function; - /* Update the function before setting the ptype */ - smp_wmb(); - elem->ptype = 0; /* single probe */ - /* - * Leave the private data and id there, because removal is racy and - * should be done only after an RCU period. These are never used until - * the next initialization anyway. - */ -} - -/** - * marker_update_probe_range - Update a probe range - * @begin: beginning of the range - * @end: end of the range - * - * Updates the probe callback corresponding to a range of markers. - */ -void marker_update_probe_range(struct marker *begin, - struct marker *end) -{ - struct marker *iter; - struct marker_entry *mark_entry; - - mutex_lock(&markers_mutex); - for (iter = begin; iter < end; iter++) { - mark_entry = get_marker(iter->name); - if (mark_entry) { - set_marker(mark_entry, iter, !!mark_entry->refcount); - /* - * ignore error, continue - */ - } else { - disable_marker(iter); - } - } - mutex_unlock(&markers_mutex); -} - -/* - * Update probes, removing the faulty probes. - * - * Internal callback only changed before the first probe is connected to it. - * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 - * transitions. All other transitions will leave the old private data valid. - * This makes the non-atomicity of the callback/private data updates valid. - * - * "special case" updates : - * 0 -> 1 callback - * 1 -> 0 callback - * 1 -> 2 callbacks - * 2 -> 1 callbacks - * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. - * Site effect : marker_set_format may delete the marker entry (creating a - * replacement). - */ -static void marker_update_probes(void) -{ - /* Core kernel markers */ - marker_update_probe_range(__start___markers, __stop___markers); - /* Markers in modules. */ - module_update_markers(); - tracepoint_probe_update_all(); -} - -/** - * marker_probe_register - Connect a probe to a marker - * @name: marker name - * @format: format string - * @probe: probe handler - * @probe_private: probe private data - * - * private data must be a valid allocated memory address, or NULL. - * Returns 0 if ok, error value on error. - * The probe address must at least be aligned on the architecture pointer size. - */ -int marker_probe_register(const char *name, const char *format, - marker_probe_func *probe, void *probe_private) -{ - struct marker_entry *entry; - int ret = 0; - struct marker_probe_closure *old; - - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) { - entry = add_marker(name, format); - if (IS_ERR(entry)) - ret = PTR_ERR(entry); - } else if (format) { - if (!entry->format) - ret = marker_set_format(entry, format); - else if (strcmp(entry->format, format)) - ret = -EPERM; - } - if (ret) - goto end; - - /* - * If we detect that a call_rcu is pending for this marker, - * make sure it's executed now. - */ - if (entry->rcu_pending) - rcu_barrier_sched(); - old = marker_entry_add_probe(entry, probe, probe_private); - if (IS_ERR(old)) { - ret = PTR_ERR(old); - goto end; - } - mutex_unlock(&markers_mutex); - marker_update_probes(); - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); -end: - mutex_unlock(&markers_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(marker_probe_register); - -/** - * marker_probe_unregister - Disconnect a probe from a marker - * @name: marker name - * @probe: probe function pointer - * @probe_private: probe private data - * - * Returns the private data given to marker_probe_register, or an ERR_PTR(). - * We do not need to call a synchronize_sched to make sure the probes have - * finished running before doing a module unload, because the module unload - * itself uses stop_machine(), which insures that every preempt disabled section - * have finished. - */ -int marker_probe_unregister(const char *name, - marker_probe_func *probe, void *probe_private) -{ - struct marker_entry *entry; - struct marker_probe_closure *old; - int ret = -ENOENT; - - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - old = marker_entry_remove_probe(entry, probe, probe_private); - mutex_unlock(&markers_mutex); - marker_update_probes(); - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); - remove_marker(name); /* Ignore busy error message */ - ret = 0; -end: - mutex_unlock(&markers_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(marker_probe_unregister); - -static struct marker_entry * -get_marker_from_private_data(marker_probe_func *probe, void *probe_private) -{ - struct marker_entry *entry; - unsigned int i; - struct hlist_head *head; - struct hlist_node *node; - - for (i = 0; i < MARKER_TABLE_SIZE; i++) { - head = &marker_table[i]; - hlist_for_each_entry(entry, node, head, hlist) { - if (!entry->ptype) { - if (entry->single.func == probe - && entry->single.probe_private - == probe_private) - return entry; - } else { - struct marker_probe_closure *closure; - closure = entry->multi; - for (i = 0; closure[i].func; i++) { - if (closure[i].func == probe && - closure[i].probe_private - == probe_private) - return entry; - } - } - } - } - return NULL; -} - -/** - * marker_probe_unregister_private_data - Disconnect a probe from a marker - * @probe: probe function - * @probe_private: probe private data - * - * Unregister a probe by providing the registered private data. - * Only removes the first marker found in hash table. - * Return 0 on success or error value. - * We do not need to call a synchronize_sched to make sure the probes have - * finished running before doing a module unload, because the module unload - * itself uses stop_machine(), which insures that every preempt disabled section - * have finished. - */ -int marker_probe_unregister_private_data(marker_probe_func *probe, - void *probe_private) -{ - struct marker_entry *entry; - int ret = 0; - struct marker_probe_closure *old; - - mutex_lock(&markers_mutex); - entry = get_marker_from_private_data(probe, probe_private); - if (!entry) { - ret = -ENOENT; - goto end; - } - if (entry->rcu_pending) - rcu_barrier_sched(); - old = marker_entry_remove_probe(entry, NULL, probe_private); - mutex_unlock(&markers_mutex); - marker_update_probes(); - mutex_lock(&markers_mutex); - entry = get_marker_from_private_data(probe, probe_private); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); - remove_marker(entry->name); /* Ignore busy error message */ -end: - mutex_unlock(&markers_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); - -/** - * marker_get_private_data - Get a marker's probe private data - * @name: marker name - * @probe: probe to match - * @num: get the nth matching probe's private data - * - * Returns the nth private data pointer (starting from 0) matching, or an - * ERR_PTR. - * Returns the private data pointer, or an ERR_PTR. - * The private data pointer should _only_ be dereferenced if the caller is the - * owner of the data, or its content could vanish. This is mostly used to - * confirm that a caller is the owner of a registered probe. - */ -void *marker_get_private_data(const char *name, marker_probe_func *probe, - int num) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - size_t name_len = strlen(name) + 1; - u32 hash = jhash(name, name_len-1, 0); - int i; - - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - if (!e->ptype) { - if (num == 0 && e->single.func == probe) - return e->single.probe_private; - } else { - struct marker_probe_closure *closure; - int match = 0; - closure = e->multi; - for (i = 0; closure[i].func; i++) { - if (closure[i].func != probe) - continue; - if (match++ == num) - return closure[i].probe_private; - } - } - break; - } - } - return ERR_PTR(-ENOENT); -} -EXPORT_SYMBOL_GPL(marker_get_private_data); - -#ifdef CONFIG_MODULES - -int marker_module_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct module *mod = data; - - switch (val) { - case MODULE_STATE_COMING: - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - break; - case MODULE_STATE_GOING: - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - break; - } - return 0; -} - -struct notifier_block marker_module_nb = { - .notifier_call = marker_module_notify, - .priority = 0, -}; - -static int init_markers(void) -{ - return register_module_notifier(&marker_module_nb); -} -__initcall(init_markers); - -#endif /* CONFIG_MODULES */ diff --git a/kernel/module.c b/kernel/module.c index 05ce49ced8f6..b6ee424245dd 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2237,10 +2237,6 @@ static noinline struct module *load_module(void __user *umod, sizeof(*mod->ctors), &mod->num_ctors); #endif -#ifdef CONFIG_MARKERS - mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", - sizeof(*mod->markers), &mod->num_markers); -#endif #ifdef CONFIG_TRACEPOINTS mod->tracepoints = section_objs(hdr, sechdrs, secstrings, "__tracepoints", @@ -2958,20 +2954,6 @@ void module_layout(struct module *mod, EXPORT_SYMBOL(module_layout); #endif -#ifdef CONFIG_MARKERS -void module_update_markers(void) -{ - struct module *mod; - - mutex_lock(&module_mutex); - list_for_each_entry(mod, &modules, list) - if (!mod->taints) - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - mutex_unlock(&module_mutex); -} -#endif - #ifdef CONFIG_TRACEPOINTS void module_update_tracepoints(void) { diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 687699d365ae..2547d8813cf0 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/samples/Kconfig b/samples/Kconfig index 428b065ba695..b92bde3c6a89 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -7,12 +7,6 @@ menuconfig SAMPLES if SAMPLES -config SAMPLE_MARKERS - tristate "Build markers examples -- loadable modules only" - depends on MARKERS && m - help - This build markers example modules. - config SAMPLE_TRACEPOINTS tristate "Build tracepoints examples -- loadable modules only" depends on TRACEPOINTS && m diff --git a/samples/Makefile b/samples/Makefile index 13e4b470b539..43343a03b1f4 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -1,3 +1,3 @@ # Makefile for Linux samples code -obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ trace_events/ +obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ diff --git a/samples/markers/Makefile b/samples/markers/Makefile deleted file mode 100644 index 6d7231265f0f..000000000000 --- a/samples/markers/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# builds the kprobes example kernel modules; -# then to use one (as root): insmod - -obj-$(CONFIG_SAMPLE_MARKERS) += probe-example.o marker-example.o diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c deleted file mode 100644 index e9cd9c0bc84f..000000000000 --- a/samples/markers/marker-example.c +++ /dev/null @@ -1,53 +0,0 @@ -/* marker-example.c - * - * Executes a marker when /proc/marker-example is opened. - * - * (C) Copyright 2007 Mathieu Desnoyers - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#include -#include -#include -#include - -struct proc_dir_entry *pentry_example; - -static int my_open(struct inode *inode, struct file *file) -{ - int i; - - trace_mark(subsystem_event, "integer %d string %s", 123, - "example string"); - for (i = 0; i < 10; i++) - trace_mark(subsystem_eventb, MARK_NOARGS); - return -EPERM; -} - -static struct file_operations mark_ops = { - .open = my_open, -}; - -static int __init example_init(void) -{ - printk(KERN_ALERT "example init\n"); - pentry_example = proc_create("marker-example", 0444, NULL, &mark_ops); - if (!pentry_example) - return -EPERM; - return 0; -} - -static void __exit example_exit(void) -{ - printk(KERN_ALERT "example exit\n"); - remove_proc_entry("marker-example", NULL); -} - -module_init(example_init) -module_exit(example_exit) - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mathieu Desnoyers"); -MODULE_DESCRIPTION("Marker example"); diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c deleted file mode 100644 index 2dfb3b32937e..000000000000 --- a/samples/markers/probe-example.c +++ /dev/null @@ -1,92 +0,0 @@ -/* probe-example.c - * - * Connects two functions to marker call sites. - * - * (C) Copyright 2007 Mathieu Desnoyers - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#include -#include -#include -#include -#include - -struct probe_data { - const char *name; - const char *format; - marker_probe_func *probe_func; -}; - -void probe_subsystem_event(void *probe_data, void *call_data, - const char *format, va_list *args) -{ - /* Declare args */ - unsigned int value; - const char *mystr; - - /* Assign args */ - value = va_arg(*args, typeof(value)); - mystr = va_arg(*args, typeof(mystr)); - - /* Call printk */ - printk(KERN_INFO "Value %u, string %s\n", value, mystr); - - /* or count, check rights, serialize data in a buffer */ -} - -atomic_t eventb_count = ATOMIC_INIT(0); - -void probe_subsystem_eventb(void *probe_data, void *call_data, - const char *format, va_list *args) -{ - /* Increment counter */ - atomic_inc(&eventb_count); -} - -static struct probe_data probe_array[] = -{ - { .name = "subsystem_event", - .format = "integer %d string %s", - .probe_func = probe_subsystem_event }, - { .name = "subsystem_eventb", - .format = MARK_NOARGS, - .probe_func = probe_subsystem_eventb }, -}; - -static int __init probe_init(void) -{ - int result; - int i; - - for (i = 0; i < ARRAY_SIZE(probe_array); i++) { - result = marker_probe_register(probe_array[i].name, - probe_array[i].format, - probe_array[i].probe_func, &probe_array[i]); - if (result) - printk(KERN_INFO "Unable to register probe %s\n", - probe_array[i].name); - } - return 0; -} - -static void __exit probe_fini(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(probe_array); i++) - marker_probe_unregister(probe_array[i].name, - probe_array[i].probe_func, &probe_array[i]); - printk(KERN_INFO "Number of event b : %u\n", - atomic_read(&eventb_count)); - marker_synchronize_unregister(); -} - -module_init(probe_init); -module_exit(probe_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mathieu Desnoyers"); -MODULE_DESCRIPTION("SUBSYSTEM Probe"); diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index f4053dc7b5d6..8f14c81abbc7 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -13,7 +13,6 @@ # 2) modpost is then used to # 3) create one .mod.c file pr. module # 4) create one Module.symvers file with CRC for all exported symbols -# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers # 5) compile all .mod.c files # 6) final link of the module to a file @@ -59,10 +58,6 @@ include scripts/Makefile.lib kernelsymfile := $(objtree)/Module.symvers modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers -kernelmarkersfile := $(objtree)/Module.markers -modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers - -markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) # Step 1), find all modules listed in $(MODVERDIR)/ __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) @@ -85,8 +80,6 @@ modpost = scripts/mod/modpost \ $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \ $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S) \ - $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ - $(if $(CONFIG_MARKERS),-M $(markersfile)) \ $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) \ $(if $(cross_build),-c) @@ -101,17 +94,12 @@ quiet_cmd_kernel-mod = MODPOST $@ cmd_kernel-mod = $(modpost) $@ vmlinux.o: FORCE - @rm -fr $(kernelmarkersfile) $(call cmd,kernel-mod) # Declare generated files as targets for modpost $(symverfile): __modpost ; $(modules:.ko=.mod.c): __modpost ; -ifdef CONFIG_MARKERS -$(markersfile): __modpost ; -endif - # Step 5), compile all *.mod.c files -- cgit v1.2.3-59-g8ed1b