aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/trace_events.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/trace_events.c')
-rw-r--r--kernel/trace/trace_events.c925
1 files changed, 710 insertions, 215 deletions
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f38234ecea18..0356cae0cf74 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -38,6 +38,15 @@ DEFINE_MUTEX(event_mutex);
LIST_HEAD(ftrace_events);
static LIST_HEAD(ftrace_generic_fields);
static LIST_HEAD(ftrace_common_fields);
+static bool eventdir_initialized;
+
+static LIST_HEAD(module_strings);
+
+struct module_string {
+ struct list_head next;
+ struct module *module;
+ char *str;
+};
#define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
@@ -167,6 +176,7 @@ static int trace_define_generic_fields(void)
__generic_field(int, CPU, FILTER_CPU);
__generic_field(int, cpu, FILTER_CPU);
+ __generic_field(int, common_cpu, FILTER_CPU);
__generic_field(char *, COMM, FILTER_COMM);
__generic_field(char *, comm, FILTER_COMM);
@@ -180,6 +190,7 @@ static int trace_define_common_fields(void)
__common_field(unsigned short, type);
__common_field(unsigned char, flags);
+ /* Holds both preempt_count and migrate_disable */
__common_field(unsigned char, preempt_count);
__common_field(int, pid);
@@ -216,6 +227,221 @@ int trace_event_get_offsets(struct trace_event_call *call)
return tail->offset + tail->size;
}
+/*
+ * Check if the referenced field is an array and return true,
+ * as arrays are OK to dereference.
+ */
+static bool test_field(const char *fmt, struct trace_event_call *call)
+{
+ struct trace_event_fields *field = call->class->fields_array;
+ const char *array_descriptor;
+ const char *p = fmt;
+ int len;
+
+ if (!(len = str_has_prefix(fmt, "REC->")))
+ return false;
+ fmt += len;
+ for (p = fmt; *p; p++) {
+ if (!isalnum(*p) && *p != '_')
+ break;
+ }
+ len = p - fmt;
+
+ for (; field->type; field++) {
+ if (strncmp(field->name, fmt, len) ||
+ field->name[len])
+ continue;
+ array_descriptor = strchr(field->type, '[');
+ /* This is an array and is OK to dereference. */
+ return array_descriptor != NULL;
+ }
+ return false;
+}
+
+/*
+ * Examine the print fmt of the event looking for unsafe dereference
+ * pointers using %p* that could be recorded in the trace event and
+ * much later referenced after the pointer was freed. Dereferencing
+ * pointers are OK, if it is dereferenced into the event itself.
+ */
+static void test_event_printk(struct trace_event_call *call)
+{
+ u64 dereference_flags = 0;
+ bool first = true;
+ const char *fmt, *c, *r, *a;
+ int parens = 0;
+ char in_quote = 0;
+ int start_arg = 0;
+ int arg = 0;
+ int i;
+
+ fmt = call->print_fmt;
+
+ if (!fmt)
+ return;
+
+ for (i = 0; fmt[i]; i++) {
+ switch (fmt[i]) {
+ case '\\':
+ i++;
+ if (!fmt[i])
+ return;
+ continue;
+ case '"':
+ case '\'':
+ /*
+ * The print fmt starts with a string that
+ * is processed first to find %p* usage,
+ * then after the first string, the print fmt
+ * contains arguments that are used to check
+ * if the dereferenced %p* usage is safe.
+ */
+ if (first) {
+ if (fmt[i] == '\'')
+ continue;
+ if (in_quote) {
+ arg = 0;
+ first = false;
+ /*
+ * If there was no %p* uses
+ * the fmt is OK.
+ */
+ if (!dereference_flags)
+ return;
+ }
+ }
+ if (in_quote) {
+ if (in_quote == fmt[i])
+ in_quote = 0;
+ } else {
+ in_quote = fmt[i];
+ }
+ continue;
+ case '%':
+ if (!first || !in_quote)
+ continue;
+ i++;
+ if (!fmt[i])
+ return;
+ switch (fmt[i]) {
+ case '%':
+ continue;
+ case 'p':
+ /* Find dereferencing fields */
+ switch (fmt[i + 1]) {
+ case 'B': case 'R': case 'r':
+ case 'b': case 'M': case 'm':
+ case 'I': case 'i': case 'E':
+ case 'U': case 'V': case 'N':
+ case 'a': case 'd': case 'D':
+ case 'g': case 't': case 'C':
+ case 'O': case 'f':
+ if (WARN_ONCE(arg == 63,
+ "Too many args for event: %s",
+ trace_event_name(call)))
+ return;
+ dereference_flags |= 1ULL << arg;
+ }
+ break;
+ default:
+ {
+ bool star = false;
+ int j;
+
+ /* Increment arg if %*s exists. */
+ for (j = 0; fmt[i + j]; j++) {
+ if (isdigit(fmt[i + j]) ||
+ fmt[i + j] == '.')
+ continue;
+ if (fmt[i + j] == '*') {
+ star = true;
+ continue;
+ }
+ if ((fmt[i + j] == 's') && star)
+ arg++;
+ break;
+ }
+ break;
+ } /* default */
+
+ } /* switch */
+ arg++;
+ continue;
+ case '(':
+ if (in_quote)
+ continue;
+ parens++;
+ continue;
+ case ')':
+ if (in_quote)
+ continue;
+ parens--;
+ if (WARN_ONCE(parens < 0,
+ "Paren mismatch for event: %s\narg='%s'\n%*s",
+ trace_event_name(call),
+ fmt + start_arg,
+ (i - start_arg) + 5, "^"))
+ return;
+ continue;
+ case ',':
+ if (in_quote || parens)
+ continue;
+ i++;
+ while (isspace(fmt[i]))
+ i++;
+ start_arg = i;
+ if (!(dereference_flags & (1ULL << arg)))
+ goto next_arg;
+
+ /* Find the REC-> in the argument */
+ c = strchr(fmt + i, ',');
+ r = strstr(fmt + i, "REC->");
+ if (r && (!c || r < c)) {
+ /*
+ * Addresses of events on the buffer,
+ * or an array on the buffer is
+ * OK to dereference.
+ * There's ways to fool this, but
+ * this is to catch common mistakes,
+ * not malicious code.
+ */
+ a = strchr(fmt + i, '&');
+ if ((a && (a < r)) || test_field(r, call))
+ dereference_flags &= ~(1ULL << arg);
+ } else if ((r = strstr(fmt + i, "__get_dynamic_array(")) &&
+ (!c || r < c)) {
+ dereference_flags &= ~(1ULL << arg);
+ } else if ((r = strstr(fmt + i, "__get_sockaddr(")) &&
+ (!c || r < c)) {
+ dereference_flags &= ~(1ULL << arg);
+ }
+
+ next_arg:
+ i--;
+ arg++;
+ }
+ }
+
+ /*
+ * If you triggered the below warning, the trace event reported
+ * uses an unsafe dereference pointer %p*. As the data stored
+ * at the trace event time may no longer exist when the trace
+ * event is printed, dereferencing to the original source is
+ * unsafe. The source of the dereference must be copied into the
+ * event itself, and the dereference must access the copy instead.
+ */
+ if (WARN_ON_ONCE(dereference_flags)) {
+ arg = 1;
+ while (!(dereference_flags & 1)) {
+ dereference_flags >>= 1;
+ arg++;
+ }
+ pr_warn("event %s has unsafe dereference of argument %d\n",
+ trace_event_name(call), arg);
+ pr_warn("print_fmt: %s\n", fmt);
+ }
+}
+
int trace_event_raw_init(struct trace_event_call *call)
{
int id;
@@ -224,6 +450,8 @@ int trace_event_raw_init(struct trace_event_call *call)
if (!id)
return -ENODEV;
+ test_event_printk(call);
+
return 0;
}
EXPORT_SYMBOL_GPL(trace_event_raw_init);
@@ -232,10 +460,13 @@ bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
{
struct trace_array *tr = trace_file->tr;
struct trace_array_cpu *data;
+ struct trace_pid_list *no_pid_list;
struct trace_pid_list *pid_list;
pid_list = rcu_dereference_raw(tr->filtered_pids);
- if (!pid_list)
+ no_pid_list = rcu_dereference_raw(tr->filtered_no_pids);
+
+ if (!pid_list && !no_pid_list)
return false;
data = this_cpu_ptr(tr->array_buffer.data);
@@ -254,22 +485,19 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
trace_event_ignore_this_pid(trace_file))
return NULL;
- local_save_flags(fbuffer->flags);
- fbuffer->pc = preempt_count();
/*
* If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables
* preemption (adding one to the preempt_count). Since we are
* interested in the preempt_count at the time the tracepoint was
* hit, we need to subtract one to offset the increment.
*/
- if (IS_ENABLED(CONFIG_PREEMPTION))
- fbuffer->pc--;
+ fbuffer->trace_ctx = tracing_gen_ctx_dec();
fbuffer->trace_file = trace_file;
fbuffer->event =
trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file,
event_call->event.type, len,
- fbuffer->flags, fbuffer->pc);
+ fbuffer->trace_ctx);
if (!fbuffer->event)
return NULL;
@@ -510,6 +738,9 @@ event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
pid_list = rcu_dereference_raw(tr->filtered_pids);
trace_filter_add_remove_task(pid_list, NULL, task);
+
+ pid_list = rcu_dereference_raw(tr->filtered_no_pids);
+ trace_filter_add_remove_task(pid_list, NULL, task);
}
static void
@@ -522,6 +753,9 @@ event_filter_pid_sched_process_fork(void *data,
pid_list = rcu_dereference_sched(tr->filtered_pids);
trace_filter_add_remove_task(pid_list, self, task);
+
+ pid_list = rcu_dereference_sched(tr->filtered_no_pids);
+ trace_filter_add_remove_task(pid_list, self, task);
}
void trace_event_follow_fork(struct trace_array *tr, bool enable)
@@ -529,47 +763,64 @@ void trace_event_follow_fork(struct trace_array *tr, bool enable)
if (enable) {
register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork,
tr, INT_MIN);
- register_trace_prio_sched_process_exit(event_filter_pid_sched_process_exit,
+ register_trace_prio_sched_process_free(event_filter_pid_sched_process_exit,
tr, INT_MAX);
} else {
unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork,
tr);
- unregister_trace_sched_process_exit(event_filter_pid_sched_process_exit,
+ unregister_trace_sched_process_free(event_filter_pid_sched_process_exit,
tr);
}
}
static void
event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
- struct task_struct *prev, struct task_struct *next)
+ struct task_struct *prev,
+ struct task_struct *next,
+ unsigned int prev_state)
{
struct trace_array *tr = data;
+ struct trace_pid_list *no_pid_list;
struct trace_pid_list *pid_list;
+ bool ret;
pid_list = rcu_dereference_sched(tr->filtered_pids);
+ no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
- this_cpu_write(tr->array_buffer.data->ignore_pid,
- trace_ignore_this_task(pid_list, prev) &&
- trace_ignore_this_task(pid_list, next));
+ /*
+ * Sched switch is funny, as we only want to ignore it
+ * in the notrace case if both prev and next should be ignored.
+ */
+ ret = trace_ignore_this_task(NULL, no_pid_list, prev) &&
+ trace_ignore_this_task(NULL, no_pid_list, next);
+
+ this_cpu_write(tr->array_buffer.data->ignore_pid, ret ||
+ (trace_ignore_this_task(pid_list, NULL, prev) &&
+ trace_ignore_this_task(pid_list, NULL, next)));
}
static void
event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
- struct task_struct *prev, struct task_struct *next)
+ struct task_struct *prev,
+ struct task_struct *next,
+ unsigned int prev_state)
{
struct trace_array *tr = data;
+ struct trace_pid_list *no_pid_list;
struct trace_pid_list *pid_list;
pid_list = rcu_dereference_sched(tr->filtered_pids);
+ no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
this_cpu_write(tr->array_buffer.data->ignore_pid,
- trace_ignore_this_task(pid_list, next));
+ trace_ignore_this_task(pid_list, no_pid_list, next));
}
static void
event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
{
struct trace_array *tr = data;
+ struct trace_pid_list *no_pid_list;
struct trace_pid_list *pid_list;
/* Nothing to do if we are already tracing */
@@ -577,15 +828,17 @@ event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
return;
pid_list = rcu_dereference_sched(tr->filtered_pids);
+ no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
this_cpu_write(tr->array_buffer.data->ignore_pid,
- trace_ignore_this_task(pid_list, task));
+ trace_ignore_this_task(pid_list, no_pid_list, task));
}
static void
event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
{
struct trace_array *tr = data;
+ struct trace_pid_list *no_pid_list;
struct trace_pid_list *pid_list;
/* Nothing to do if we are not tracing */
@@ -593,23 +846,15 @@ event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
return;
pid_list = rcu_dereference_sched(tr->filtered_pids);
+ no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
/* Set tracing if current is enabled */
this_cpu_write(tr->array_buffer.data->ignore_pid,
- trace_ignore_this_task(pid_list, current));
+ trace_ignore_this_task(pid_list, no_pid_list, current));
}
-static void __ftrace_clear_event_pids(struct trace_array *tr)
+static void unregister_pid_events(struct trace_array *tr)
{
- struct trace_pid_list *pid_list;
- struct trace_event_file *file;
- int cpu;
-
- pid_list = rcu_dereference_protected(tr->filtered_pids,
- lockdep_is_held(&event_mutex));
- if (!pid_list)
- return;
-
unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
@@ -621,26 +866,55 @@ static void __ftrace_clear_event_pids(struct trace_array *tr)
unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
+}
- list_for_each_entry(file, &tr->events, list) {
- clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
+static void __ftrace_clear_event_pids(struct trace_array *tr, int type)
+{
+ struct trace_pid_list *pid_list;
+ struct trace_pid_list *no_pid_list;
+ struct trace_event_file *file;
+ int cpu;
+
+ pid_list = rcu_dereference_protected(tr->filtered_pids,
+ lockdep_is_held(&event_mutex));
+ no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
+ lockdep_is_held(&event_mutex));
+
+ /* Make sure there's something to do */
+ if (!pid_type_enabled(type, pid_list, no_pid_list))
+ return;
+
+ if (!still_need_pid_events(type, pid_list, no_pid_list)) {
+ unregister_pid_events(tr);
+
+ list_for_each_entry(file, &tr->events, list) {
+ clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
+ }
+
+ for_each_possible_cpu(cpu)
+ per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false;
}
- for_each_possible_cpu(cpu)
- per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false;
+ if (type & TRACE_PIDS)
+ rcu_assign_pointer(tr->filtered_pids, NULL);
- rcu_assign_pointer(tr->filtered_pids, NULL);
+ if (type & TRACE_NO_PIDS)
+ rcu_assign_pointer(tr->filtered_no_pids, NULL);
/* Wait till all users are no longer using pid filtering */
tracepoint_synchronize_unregister();
- trace_free_pid_list(pid_list);
+ if ((type & TRACE_PIDS) && pid_list)
+ trace_pid_list_free(pid_list);
+
+ if ((type & TRACE_NO_PIDS) && no_pid_list)
+ trace_pid_list_free(no_pid_list);
}
-static void ftrace_clear_event_pids(struct trace_array *tr)
+static void ftrace_clear_event_pids(struct trace_array *tr, int type)
{
mutex_lock(&event_mutex);
- __ftrace_clear_event_pids(tr);
+ __ftrace_clear_event_pids(tr, type);
mutex_unlock(&event_mutex);
}
@@ -1013,15 +1287,32 @@ static void t_stop(struct seq_file *m, void *p)
}
static void *
-p_next(struct seq_file *m, void *v, loff_t *pos)
+__next(struct seq_file *m, void *v, loff_t *pos, int type)
{
struct trace_array *tr = m->private;
- struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
+ struct trace_pid_list *pid_list;
+
+ if (type == TRACE_PIDS)
+ pid_list = rcu_dereference_sched(tr->filtered_pids);
+ else
+ pid_list = rcu_dereference_sched(tr->filtered_no_pids);
return trace_pid_next(pid_list, v, pos);
}
-static void *p_start(struct seq_file *m, loff_t *pos)
+static void *
+p_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ return __next(m, v, pos, TRACE_PIDS);
+}
+
+static void *
+np_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ return __next(m, v, pos, TRACE_NO_PIDS);
+}
+
+static void *__start(struct seq_file *m, loff_t *pos, int type)
__acquires(RCU)
{
struct trace_pid_list *pid_list;
@@ -1036,7 +1327,10 @@ static void *p_start(struct seq_file *m, loff_t *pos)
mutex_lock(&event_mutex);
rcu_read_lock_sched();
- pid_list = rcu_dereference_sched(tr->filtered_pids);
+ if (type == TRACE_PIDS)
+ pid_list = rcu_dereference_sched(tr->filtered_pids);
+ else
+ pid_list = rcu_dereference_sched(tr->filtered_no_pids);
if (!pid_list)
return NULL;
@@ -1044,6 +1338,18 @@ static void *p_start(struct seq_file *m, loff_t *pos)
return trace_pid_start(pid_list, pos);
}
+static void *p_start(struct seq_file *m, loff_t *pos)
+ __acquires(RCU)
+{
+ return __start(m, pos, TRACE_PIDS);
+}
+
+static void *np_start(struct seq_file *m, loff_t *pos)
+ __acquires(RCU)
+{
+ return __start(m, pos, TRACE_NO_PIDS);
+}
+
static void p_stop(struct seq_file *m, void *p)
__releases(RCU)
{
@@ -1134,7 +1440,8 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
mutex_lock(&event_mutex);
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
- if (!trace_event_name(call) || !call->class || !call->class->reg)
+ if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
+ !trace_event_name(call) || !call->class || !call->class->reg)
continue;
if (system && strcmp(call->class->system, system->name) != 0)
@@ -1418,9 +1725,9 @@ static LIST_HEAD(event_subsystems);
static int subsystem_open(struct inode *inode, struct file *filp)
{
+ struct trace_subsystem_dir *dir = NULL, *iter_dir;
+ struct trace_array *tr = NULL, *iter_tr;
struct event_subsystem *system = NULL;
- struct trace_subsystem_dir *dir = NULL; /* Initialize for gcc */
- struct trace_array *tr;
int ret;
if (tracing_is_disabled())
@@ -1429,10 +1736,12 @@ static int subsystem_open(struct inode *inode, struct file *filp)
/* Make sure the system still exists */
mutex_lock(&event_mutex);
mutex_lock(&trace_types_lock);
- list_for_each_entry(tr, &ftrace_trace_arrays, list) {
- list_for_each_entry(dir, &tr->systems, list) {
- if (dir == inode->i_private) {
+ list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) {
+ list_for_each_entry(iter_dir, &iter_tr->systems, list) {
+ if (iter_dir == inode->i_private) {
/* Don't open systems with no events */
+ tr = iter_tr;
+ dir = iter_dir;
if (dir->nr_events) {
__get_system_dir(dir);
system = dir->subsystem;
@@ -1448,9 +1757,6 @@ static int subsystem_open(struct inode *inode, struct file *filp)
if (!system)
return -ENODEV;
- /* Some versions of gcc think dir can be uninitialized here */
- WARN_ON(!dir);
-
/* Still need to increment the ref count of the system */
if (trace_array_get(tr) < 0) {
put_system(dir);
@@ -1588,6 +1894,7 @@ static void ignore_task_cpu(void *data)
{
struct trace_array *tr = data;
struct trace_pid_list *pid_list;
+ struct trace_pid_list *no_pid_list;
/*
* This function is called by on_each_cpu() while the
@@ -1595,18 +1902,50 @@ static void ignore_task_cpu(void *data)
*/
pid_list = rcu_dereference_protected(tr->filtered_pids,
mutex_is_locked(&event_mutex));
+ no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
+ mutex_is_locked(&event_mutex));
this_cpu_write(tr->array_buffer.data->ignore_pid,
- trace_ignore_this_task(pid_list, current));
+ trace_ignore_this_task(pid_list, no_pid_list, current));
+}
+
+static void register_pid_events(struct trace_array *tr)
+{
+ /*
+ * Register a probe that is called before all other probes
+ * to set ignore_pid if next or prev do not match.
+ * Register a probe this is called after all other probes
+ * to only keep ignore_pid set if next pid matches.
+ */
+ register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
+ tr, INT_MAX);
+ register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
+ tr, 0);
+
+ register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
+ tr, INT_MAX);
+ register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
+ tr, 0);
+
+ register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
+ tr, INT_MAX);
+ register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
+ tr, 0);
+
+ register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
+ tr, INT_MAX);
+ register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
+ tr, 0);
}
static ssize_t
-ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
+event_pid_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos, int type)
{
struct seq_file *m = filp->private_data;
struct trace_array *tr = m->private;
struct trace_pid_list *filtered_pids = NULL;
+ struct trace_pid_list *other_pids = NULL;
struct trace_pid_list *pid_list;
struct trace_event_file *file;
ssize_t ret;
@@ -1620,14 +1959,26 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
mutex_lock(&event_mutex);
- filtered_pids = rcu_dereference_protected(tr->filtered_pids,
- lockdep_is_held(&event_mutex));
+ if (type == TRACE_PIDS) {
+ filtered_pids = rcu_dereference_protected(tr->filtered_pids,
+ lockdep_is_held(&event_mutex));
+ other_pids = rcu_dereference_protected(tr->filtered_no_pids,
+ lockdep_is_held(&event_mutex));
+ } else {
+ filtered_pids = rcu_dereference_protected(tr->filtered_no_pids,
+ lockdep_is_held(&event_mutex));
+ other_pids = rcu_dereference_protected(tr->filtered_pids,
+ lockdep_is_held(&event_mutex));
+ }
ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
if (ret < 0)
goto out;
- rcu_assign_pointer(tr->filtered_pids, pid_list);
+ if (type == TRACE_PIDS)
+ rcu_assign_pointer(tr->filtered_pids, pid_list);
+ else
+ rcu_assign_pointer(tr->filtered_no_pids, pid_list);
list_for_each_entry(file, &tr->events, list) {
set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
@@ -1635,33 +1986,9 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
if (filtered_pids) {
tracepoint_synchronize_unregister();
- trace_free_pid_list(filtered_pids);
- } else if (pid_list) {
- /*
- * Register a probe that is called before all other probes
- * to set ignore_pid if next or prev do not match.
- * Register a probe this is called after all other probes
- * to only keep ignore_pid set if next pid matches.
- */
- register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
- tr, INT_MAX);
- register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
- tr, 0);
-
- register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
- tr, INT_MAX);
- register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
- tr, 0);
-
- register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
- tr, INT_MAX);
- register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
- tr, 0);
-
- register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
- tr, INT_MAX);
- register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
- tr, 0);
+ trace_pid_list_free(filtered_pids);
+ } else if (pid_list && !other_pids) {
+ register_pid_events(tr);
}
/*
@@ -1680,9 +2007,24 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
return ret;
}
+static ssize_t
+ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ return event_pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS);
+}
+
+static ssize_t
+ftrace_event_npid_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ return event_pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS);
+}
+
static int ftrace_event_avail_open(struct inode *inode, struct file *file);
static int ftrace_event_set_open(struct inode *inode, struct file *file);
static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
+static int ftrace_event_set_npid_open(struct inode *inode, struct file *file);
static int ftrace_event_release(struct inode *inode, struct file *file);
static const struct seq_operations show_event_seq_ops = {
@@ -1706,6 +2048,13 @@ static const struct seq_operations show_set_pid_seq_ops = {
.stop = p_stop,
};
+static const struct seq_operations show_set_no_pid_seq_ops = {
+ .start = np_start,
+ .next = np_next,
+ .show = trace_pid_show,
+ .stop = p_stop,
+};
+
static const struct file_operations ftrace_avail_fops = {
.open = ftrace_event_avail_open,
.read = seq_read,
@@ -1729,6 +2078,14 @@ static const struct file_operations ftrace_set_event_pid_fops = {
.release = ftrace_event_release,
};
+static const struct file_operations ftrace_set_event_notrace_pid_fops = {
+ .open = ftrace_event_set_npid_open,
+ .read = seq_read,
+ .write = ftrace_event_npid_write,
+ .llseek = seq_lseek,
+ .release = ftrace_event_release,
+};
+
static const struct file_operations ftrace_enable_fops = {
.open = tracing_open_generic,
.read = event_enable_read,
@@ -1858,7 +2215,28 @@ ftrace_event_set_pid_open(struct inode *inode, struct file *file)
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC))
- ftrace_clear_event_pids(tr);
+ ftrace_clear_event_pids(tr, TRACE_PIDS);
+
+ ret = ftrace_event_open(inode, file, seq_ops);
+ if (ret < 0)
+ trace_array_put(tr);
+ return ret;
+}
+
+static int
+ftrace_event_set_npid_open(struct inode *inode, struct file *file)
+{
+ const struct seq_operations *seq_ops = &show_set_no_pid_seq_ops;
+ struct trace_array *tr = inode->i_private;
+ int ret;
+
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
+
+ if ((file->f_mode & FMODE_WRITE) &&
+ (file->f_flags & O_TRUNC))
+ ftrace_clear_event_pids(tr, TRACE_NO_PIDS);
ret = ftrace_event_open(inode, file, seq_ops);
if (ret < 0)
@@ -1903,8 +2281,8 @@ static struct dentry *
event_subsystem_dir(struct trace_array *tr, const char *name,
struct trace_event_file *file, struct dentry *parent)
{
+ struct event_subsystem *system, *iter;
struct trace_subsystem_dir *dir;
- struct event_subsystem *system;
struct dentry *entry;
/* First see if we did not already create this dir */
@@ -1918,13 +2296,13 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
}
/* Now see if the system itself exists. */
- list_for_each_entry(system, &event_subsystems, list) {
- if (strcmp(system->name, name) == 0)
+ system = NULL;
+ list_for_each_entry(iter, &event_subsystems, list) {
+ if (strcmp(iter->name, name) == 0) {
+ system = iter;
break;
+ }
}
- /* Reset system variable when not found */
- if (&system->list == &event_subsystems)
- system = NULL;
dir = kmalloc(sizeof(*dir), GFP_KERNEL);
if (!dir)
@@ -1950,16 +2328,21 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
dir->subsystem = system;
file->system = dir;
- entry = tracefs_create_file("filter", 0644, dir->entry, dir,
- &ftrace_subsystem_filter_fops);
- if (!entry) {
- kfree(system->filter);
- system->filter = NULL;
- pr_warn("Could not create tracefs '%s/filter' entry\n", name);
- }
+ /* the ftrace system is special, do not create enable or filter files */
+ if (strcmp(name, "ftrace") != 0) {
- trace_create_file("enable", 0644, dir->entry, dir,
- &ftrace_system_enable_fops);
+ entry = tracefs_create_file("filter", TRACE_MODE_WRITE,
+ dir->entry, dir,
+ &ftrace_subsystem_filter_fops);
+ if (!entry) {
+ kfree(system->filter);
+ system->filter = NULL;
+ pr_warn("Could not create tracefs '%s/filter' entry\n", name);
+ }
+
+ trace_create_file("enable", TRACE_MODE_WRITE, dir->entry, dir,
+ &ftrace_system_enable_fops);
+ }
list_add(&dir->list, &tr->systems);
@@ -1975,11 +2358,47 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
}
static int
+event_define_fields(struct trace_event_call *call)
+{
+ struct list_head *head;
+ int ret = 0;
+
+ /*
+ * Other events may have the same class. Only update
+ * the fields if they are not already defined.
+ */
+ head = trace_get_fields(call);
+ if (list_empty(head)) {
+ struct trace_event_fields *field = call->class->fields_array;
+ unsigned int offset = sizeof(struct trace_entry);
+
+ for (; field->type; field++) {
+ if (field->type == TRACE_FUNCTION_TYPE) {
+ field->define_fields(call);
+ break;
+ }
+
+ offset = ALIGN(offset, field->align);
+ ret = trace_define_field(call, field->type, field->name,
+ offset, field->size,
+ field->is_signed, field->filter_type);
+ if (WARN_ON_ONCE(ret)) {
+ pr_err("error code is %d\n", ret);
+ break;
+ }
+
+ offset += field->size;
+ }
+ }
+
+ return ret;
+}
+
+static int
event_create_dir(struct dentry *parent, struct trace_event_file *file)
{
struct trace_event_call *call = file->event_call;
struct trace_array *tr = file->tr;
- struct list_head *head;
struct dentry *d_events;
const char *name;
int ret;
@@ -2003,45 +2422,20 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
}
if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
- trace_create_file("enable", 0644, file->dir, file,
+ trace_create_file("enable", TRACE_MODE_WRITE, file->dir, file,
&ftrace_enable_fops);
#ifdef CONFIG_PERF_EVENTS
if (call->event.type && call->class->reg)
- trace_create_file("id", 0444, file->dir,
+ trace_create_file("id", TRACE_MODE_READ, file->dir,
(void *)(long)call->event.type,
&ftrace_event_id_fops);
#endif
- /*
- * Other events may have the same class. Only update
- * the fields if they are not already defined.
- */
- head = trace_get_fields(call);
- if (list_empty(head)) {
- struct trace_event_fields *field = call->class->fields_array;
- unsigned int offset = sizeof(struct trace_entry);
-
- for (; field->type; field++) {
- if (field->type == TRACE_FUNCTION_TYPE) {
- ret = field->define_fields(call);
- break;
- }
-
- offset = ALIGN(offset, field->align);
- ret = trace_define_field(call, field->type, field->name,
- offset, field->size,
- field->is_signed, field->filter_type);
- if (ret)
- break;
-
- offset += field->size;
- }
- if (ret < 0) {
- pr_warn("Could not initialize trace point events/%s\n",
- name);
- return -1;
- }
+ ret = event_define_fields(call);
+ if (ret < 0) {
+ pr_warn("Could not initialize trace point events/%s\n", name);
+ return ret;
}
/*
@@ -2049,18 +2443,22 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
* triggers or filters.
*/
if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
- trace_create_file("filter", 0644, file->dir, file,
- &ftrace_event_filter_fops);
+ trace_create_file("filter", TRACE_MODE_WRITE, file->dir,
+ file, &ftrace_event_filter_fops);
- trace_create_file("trigger", 0644, file->dir, file,
- &event_trigger_fops);
+ trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
+ file, &event_trigger_fops);
}
#ifdef CONFIG_HIST_TRIGGERS
- trace_create_file("hist", 0444, file->dir, file,
+ trace_create_file("hist", TRACE_MODE_READ, file->dir, file,
&event_hist_fops);
#endif
- trace_create_file("format", 0444, file->dir, call,
+#ifdef CONFIG_HIST_TRIGGERS_DEBUG
+ trace_create_file("hist_debug", TRACE_MODE_READ, file->dir, file,
+ &event_hist_debug_fops);
+#endif
+ trace_create_file("format", TRACE_MODE_READ, file->dir, call,
&ftrace_event_format_fops);
#ifdef CONFIG_TRACE_EVENT_INJECT
@@ -2148,7 +2546,10 @@ __register_event(struct trace_event_call *call, struct module *mod)
return ret;
list_add(&call->list, &ftrace_events);
- call->mod = mod;
+ if (call->flags & TRACE_EVENT_FL_DYNAMIC)
+ atomic_set(&call->refcnt, 0);
+ else
+ call->module = mod;
return 0;
}
@@ -2251,6 +2652,76 @@ static void update_event_printk(struct trace_event_call *call,
}
}
+static void add_str_to_module(struct module *module, char *str)
+{
+ struct module_string *modstr;
+
+ modstr = kmalloc(sizeof(*modstr), GFP_KERNEL);
+
+ /*
+ * If we failed to allocate memory here, then we'll just
+ * let the str memory leak when the module is removed.
+ * If this fails to allocate, there's worse problems than
+ * a leaked string on module removal.
+ */
+ if (WARN_ON_ONCE(!modstr))
+ return;
+
+ modstr->module = module;
+ modstr->str = str;
+
+ list_add(&modstr->next, &module_strings);
+}
+
+static void update_event_fields(struct trace_event_call *call,
+ struct trace_eval_map *map)
+{
+ struct ftrace_event_field *field;
+ struct list_head *head;
+ char *ptr;
+ char *str;
+ int len = strlen(map->eval_string);
+
+ /* Dynamic events should never have field maps */
+ if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC))
+ return;
+
+ head = trace_get_fields(call);
+ list_for_each_entry(field, head, link) {
+ ptr = strchr(field->type, '[');
+ if (!ptr)
+ continue;
+ ptr++;
+
+ if (!isalpha(*ptr) && *ptr != '_')
+ continue;
+
+ if (strncmp(map->eval_string, ptr, len) != 0)
+ continue;
+
+ str = kstrdup(field->type, GFP_KERNEL);
+ if (WARN_ON_ONCE(!str))
+ return;
+ ptr = str + (ptr - field->type);
+ ptr = eval_replace(ptr, map, len);
+ /* enum/sizeof string smaller than value */
+ if (WARN_ON_ONCE(!ptr)) {
+ kfree(str);
+ continue;
+ }
+
+ /*
+ * If the event is part of a module, then we need to free the string
+ * when the module is removed. Otherwise, it will stay allocated
+ * until a reboot.
+ */
+ if (call->module)
+ add_str_to_module(call->module, str);
+
+ field->type = str;
+ }
+}
+
void trace_event_eval_update(struct trace_eval_map **map, int len)
{
struct trace_event_call *call, *p;
@@ -2269,9 +2740,9 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
}
/*
- * Since calls are grouped by systems, the likelyhood that the
+ * Since calls are grouped by systems, the likelihood that the
* next call in the iteration belongs to the same system as the
- * previous call is high. As an optimization, we skip seaching
+ * previous call is high. As an optimization, we skip searching
* for a map[] that matches the call's system if the last call
* was from the same system. That's what last_i is for. If the
* call has the same system as the previous call, then last_i
@@ -2286,6 +2757,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
first = false;
}
update_event_printk(call, map[i]);
+ update_event_fields(call, map[i]);
}
}
}
@@ -2296,12 +2768,24 @@ static struct trace_event_file *
trace_create_new_event(struct trace_event_call *call,
struct trace_array *tr)
{
+ struct trace_pid_list *no_pid_list;
+ struct trace_pid_list *pid_list;
struct trace_event_file *file;
+ unsigned int first;
file = kmem_cache_alloc(file_cachep, GFP_TRACE);
if (!file)
return NULL;
+ pid_list = rcu_dereference_protected(tr->filtered_pids,
+ lockdep_is_held(&event_mutex));
+ no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
+ lockdep_is_held(&event_mutex));
+
+ if (!trace_pid_list_first(pid_list, &first) ||
+ !trace_pid_list_first(no_pid_list, &first))
+ file->flags |= EVENT_FILE_FL_PID_FILTER;
+
file->event_call = call;
file->tr = tr;
atomic_set(&file->sm_ref, 0);
@@ -2322,15 +2806,18 @@ __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
if (!file)
return -ENOMEM;
- return event_create_dir(tr->event_dir, file);
+ if (eventdir_initialized)
+ return event_create_dir(tr->event_dir, file);
+ else
+ return event_define_fields(call);
}
/*
- * Just create a decriptor for early init. A descriptor is required
+ * Just create a descriptor for early init. A descriptor is required
* for enabling events at boot. We want to enable events before
* the filesystem is initialized.
*/
-static __init int
+static int
__trace_early_add_new_event(struct trace_event_call *call,
struct trace_array *tr)
{
@@ -2340,7 +2827,7 @@ __trace_early_add_new_event(struct trace_event_call *call,
if (!file)
return -ENOMEM;
- return 0;
+ return event_define_fields(call);
}
struct ftrace_module_file_ops;
@@ -2361,6 +2848,7 @@ int trace_add_event_call(struct trace_event_call *call)
mutex_unlock(&trace_types_lock);
return ret;
}
+EXPORT_SYMBOL_GPL(trace_add_event_call);
/*
* Must be called under locking of trace_types_lock, event_mutex and
@@ -2422,6 +2910,7 @@ int trace_remove_event_call(struct trace_event_call *call)
return ret;
}
+EXPORT_SYMBOL_GPL(trace_remove_event_call);
#define for_each_event(event, start, end) \
for (event = start; \
@@ -2456,12 +2945,23 @@ static void trace_module_add_events(struct module *mod)
static void trace_module_remove_events(struct module *mod)
{
struct trace_event_call *call, *p;
+ struct module_string *modstr, *m;
down_write(&trace_event_sem);
list_for_each_entry_safe(call, p, &ftrace_events, list) {
- if (call->mod == mod)
+ if ((call->flags & TRACE_EVENT_FL_DYNAMIC) || !call->module)
+ continue;
+ if (call->module == mod)
__trace_remove_event_call(call);
}
+ /* Check for any strings allocade for this module */
+ list_for_each_entry_safe(modstr, m, &module_strings, next) {
+ if (modstr->module != mod)
+ continue;
+ list_del(&modstr->next);
+ kfree(modstr->str);
+ kfree(modstr);
+ }
up_write(&trace_event_sem);
/*
@@ -2493,7 +2993,7 @@ static int trace_module_notify(struct notifier_block *self,
mutex_unlock(&trace_types_lock);
mutex_unlock(&event_mutex);
- return 0;
+ return NOTIFY_OK;
}
static struct notifier_block trace_module_nb = {
@@ -2602,7 +3102,7 @@ struct trace_event_file *trace_get_event_file(const char *instance,
}
/* Don't let event modules unload while in use */
- ret = try_module_get(file->event_call->mod);
+ ret = trace_event_try_get_ref(file->event_call);
if (!ret) {
trace_array_put(tr);
ret = -EBUSY;
@@ -2632,7 +3132,7 @@ EXPORT_SYMBOL_GPL(trace_get_event_file);
void trace_put_event_file(struct trace_event_file *file)
{
mutex_lock(&event_mutex);
- module_put(file->event_call->mod);
+ trace_event_put_ref(file->event_call);
mutex_unlock(&event_mutex);
trace_array_put(file->tr);
@@ -2767,7 +3267,7 @@ static int free_probe_data(void *data)
if (!edata->ref) {
/* Remove the SOFT_MODE flag */
__ftrace_event_enable_disable(edata->file, 0, 1);
- module_put(edata->file->event_call->mod);
+ trace_event_put_ref(edata->file->event_call);
kfree(edata);
}
return 0;
@@ -2900,7 +3400,7 @@ event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
out_reg:
/* Don't let event modules unload while probe registered */
- ret = try_module_get(file->event_call->mod);
+ ret = trace_event_try_get_ref(file->event_call);
if (!ret) {
ret = -EBUSY;
goto out_free;
@@ -2930,7 +3430,7 @@ event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
out_disable:
__ftrace_event_enable_disable(file, 0, 1);
out_put:
- module_put(file->event_call->mod);
+ trace_event_put_ref(file->event_call);
out_free:
kfree(data);
goto out;
@@ -2963,14 +3463,13 @@ static inline int register_event_cmds(void) { return 0; }
#endif /* CONFIG_DYNAMIC_FTRACE */
/*
- * The top level array has already had its trace_event_file
- * descriptors created in order to allow for early events to
- * be recorded. This function is called after the tracefs has been
- * initialized, and we now have to create the files associated
- * to the events.
+ * The top level array and trace arrays created by boot-time tracing
+ * have already had its trace_event_file descriptors created in order
+ * to allow for early events to be recorded.
+ * This function is called after the tracefs has been initialized,
+ * and we now have to create the files associated to the events.
*/
-static __init void
-__trace_early_add_event_dirs(struct trace_array *tr)
+static void __trace_early_add_event_dirs(struct trace_array *tr)
{
struct trace_event_file *file;
int ret;
@@ -2985,20 +3484,20 @@ __trace_early_add_event_dirs(struct trace_array *tr)
}
/*
- * For early boot up, the top trace array requires to have
- * a list of events that can be enabled. This must be done before
- * the filesystem is set up in order to allow events to be traced
- * early.
+ * For early boot up, the top trace array and the trace arrays created
+ * by boot-time tracing require to have a list of events that can be
+ * enabled. This must be done before the filesystem is set up in order
+ * to allow events to be traced early.
*/
-static __init void
-__trace_early_add_events(struct trace_array *tr)
+void __trace_early_add_events(struct trace_array *tr)
{
struct trace_event_call *call;
int ret;
list_for_each_entry(call, &ftrace_events, list) {
/* Early boot up should not have any modules loaded */
- if (WARN_ON_ONCE(call->mod))
+ if (!(call->flags & TRACE_EVENT_FL_DYNAMIC) &&
+ WARN_ON_ONCE(call->module))
continue;
ret = __trace_early_add_new_event(call, tr);
@@ -3035,7 +3534,7 @@ static __init int setup_trace_event(char *str)
{
strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
ring_buffer_expanded = true;
- tracing_selftest_disabled = true;
+ disable_tracing_selftest("running event tracing");
return 1;
}
@@ -3048,12 +3547,10 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
struct dentry *d_events;
struct dentry *entry;
- entry = tracefs_create_file("set_event", 0644, parent,
- tr, &ftrace_set_event_fops);
- if (!entry) {
- pr_warn("Could not create tracefs 'set_event' entry\n");
+ entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent,
+ tr, &ftrace_set_event_fops);
+ if (!entry)
return -ENOMEM;
- }
d_events = tracefs_create_dir("events", parent);
if (!d_events) {
@@ -3061,32 +3558,28 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
return -ENOMEM;
}
- entry = trace_create_file("enable", 0644, d_events,
+ entry = trace_create_file("enable", TRACE_MODE_WRITE, d_events,
tr, &ftrace_tr_enable_fops);
- if (!entry) {
- pr_warn("Could not create tracefs 'enable' entry\n");
+ if (!entry)
return -ENOMEM;
- }
/* There are not as crucial, just warn if they are not created */
- entry = tracefs_create_file("set_event_pid", 0644, parent,
- tr, &ftrace_set_event_pid_fops);
- if (!entry)
- pr_warn("Could not create tracefs 'set_event_pid' entry\n");
+ trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent,
+ tr, &ftrace_set_event_pid_fops);
+
+ trace_create_file("set_event_notrace_pid",
+ TRACE_MODE_WRITE, parent, tr,
+ &ftrace_set_event_notrace_pid_fops);
/* ring buffer internal formats */
- entry = trace_create_file("header_page", 0444, d_events,
+ trace_create_file("header_page", TRACE_MODE_READ, d_events,
ring_buffer_print_page_header,
&ftrace_show_header_fops);
- if (!entry)
- pr_warn("Could not create tracefs 'header_page' entry\n");
- entry = trace_create_file("header_event", 0444, d_events,
+ trace_create_file("header_event", TRACE_MODE_READ, d_events,
ring_buffer_print_entry_header,
&ftrace_show_header_fops);
- if (!entry)
- pr_warn("Could not create tracefs 'header_event' entry\n");
tr->event_dir = d_events;
@@ -3100,7 +3593,7 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
*
* When a new instance is created, it needs to set up its events
* directory, as well as other files associated with events. It also
- * creates the event hierachry in the @parent/events directory.
+ * creates the event hierarchy in the @parent/events directory.
*
* Returns 0 on success.
*
@@ -3117,7 +3610,11 @@ int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
goto out;
down_write(&trace_event_sem);
- __trace_add_event_dirs(tr);
+ /* If tr already has the event list, it is initialized in early boot. */
+ if (unlikely(!list_empty(&tr->events)))
+ __trace_early_add_event_dirs(tr);
+ else
+ __trace_add_event_dirs(tr);
up_write(&trace_event_sem);
out:
@@ -3158,7 +3655,7 @@ int event_trace_del_tracer(struct trace_array *tr)
clear_event_triggers(tr);
/* Clear the pid list */
- __ftrace_clear_event_pids(tr);
+ __ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS);
/* Disable any running events */
__ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
@@ -3253,10 +3750,10 @@ static __init int event_trace_enable(void)
* initialize events and perhaps start any events that are on the
* command line. Unfortunately, there are some events that will not
* start this early, like the system call tracepoints that need
- * to set the TIF_SYSCALL_TRACEPOINT flag of pid 1. But event_trace_enable()
- * is called before pid 1 starts, and this flag is never set, making
- * the syscall tracepoint never get reached, but the event is enabled
- * regardless (and not doing anything).
+ * to set the %SYSCALL_WORK_SYSCALL_TRACEPOINT flag of pid 1. But
+ * event_trace_enable() is called before pid 1 starts, and this flag
+ * is never set, making the syscall tracepoint never get reached, but
+ * the event is enabled regardless (and not doing anything).
*/
static __init int event_trace_enable_again(void)
{
@@ -3273,33 +3770,31 @@ static __init int event_trace_enable_again(void)
early_initcall(event_trace_enable_again);
+/* Init fields which doesn't related to the tracefs */
+static __init int event_trace_init_fields(void)
+{
+ if (trace_define_generic_fields())
+ pr_warn("tracing: Failed to allocated generic fields");
+
+ if (trace_define_common_fields())
+ pr_warn("tracing: Failed to allocate common fields");
+
+ return 0;
+}
+
__init int event_trace_init(void)
{
struct trace_array *tr;
- struct dentry *d_tracer;
- struct dentry *entry;
int ret;
tr = top_trace_array();
if (!tr)
return -ENODEV;
- d_tracer = tracing_init_dentry();
- if (IS_ERR(d_tracer))
- return 0;
-
- entry = tracefs_create_file("available_events", 0444, d_tracer,
- tr, &ftrace_avail_fops);
- if (!entry)
- pr_warn("Could not create tracefs 'available_events' entry\n");
-
- if (trace_define_generic_fields())
- pr_warn("tracing: Failed to allocated generic fields");
-
- if (trace_define_common_fields())
- pr_warn("tracing: Failed to allocate common fields");
+ trace_create_file("available_events", TRACE_MODE_READ,
+ NULL, tr, &ftrace_avail_fops);
- ret = early_event_add_tracer(d_tracer, tr);
+ ret = early_event_add_tracer(NULL, tr);
if (ret)
return ret;
@@ -3308,6 +3803,9 @@ __init int event_trace_init(void)
if (ret)
pr_warn("Failed to register trace events module notifier\n");
#endif
+
+ eventdir_initialized = true;
+
return 0;
}
@@ -3316,6 +3814,7 @@ void __init trace_event_init(void)
event_trace_memsetup();
init_ftrace_syscalls();
event_trace_enable();
+ event_trace_init_fields();
}
#ifdef CONFIG_EVENT_TRACE_STARTUP_TEST
@@ -3493,17 +3992,16 @@ static struct trace_event_file event_trace_file __initdata;
static void __init
function_test_events_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *pt_regs)
+ struct ftrace_ops *op, struct ftrace_regs *regs)
{
struct trace_buffer *buffer;
struct ring_buffer_event *event;
struct ftrace_entry *entry;
- unsigned long flags;
+ unsigned int trace_ctx;
long disabled;
int cpu;
- int pc;
- pc = preempt_count();
+ trace_ctx = tracing_gen_ctx();
preempt_disable_notrace();
cpu = raw_smp_processor_id();
disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
@@ -3511,11 +4009,9 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip,
if (disabled != 1)
goto out;
- local_save_flags(flags);
-
event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file,
TRACE_FN, sizeof(*entry),
- flags, pc);
+ trace_ctx);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
@@ -3523,7 +4019,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip,
entry->parent_ip = parent_ip;
event_trigger_unlock_commit(&event_trace_file, buffer, event,
- entry, flags, pc);
+ entry, trace_ctx);
out:
atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
preempt_enable_notrace();
@@ -3532,7 +4028,6 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip,
static struct ftrace_ops trace_ops __initdata =
{
.func = function_test_events_call,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE,
};
static __init void event_trace_self_test_with_function(void)