diff options
| author | 2019-11-25 13:26:56 -0800 | |
|---|---|---|
| committer | 2019-11-25 13:26:56 -0800 | |
| commit | 976e3645923bdd2fe7893aae33fd7a21098bfb28 (patch) | |
| tree | d1cb24e4c9743beef15a4796070aca7e2c08228a /kernel/trace | |
| parent | Revert "Input: synaptics - enable RMI mode for X1 Extreme 2nd Generation" (diff) | |
| parent | Input: synaptics-rmi4 - fix various V4L2 compliance problems in F54 (diff) | |
Merge branch 'next' into for-linus
Prepare input updates for 5.5 merge window.
Diffstat (limited to 'kernel/trace')
| -rw-r--r-- | kernel/trace/Kconfig | 9 | ||||
| -rw-r--r-- | kernel/trace/bpf_trace.c | 36 | ||||
| -rw-r--r-- | kernel/trace/fgraph.c | 4 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 80 | ||||
| -rw-r--r-- | kernel/trace/ftrace_internal.h | 8 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer_benchmark.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 183 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 13 | ||||
| -rw-r--r-- | kernel/trace/trace_dynevent.c | 14 | ||||
| -rw-r--r-- | kernel/trace/trace_dynevent.h | 7 | ||||
| -rw-r--r-- | kernel/trace/trace_event_perf.c | 4 | ||||
| -rw-r--r-- | kernel/trace/trace_events.c | 41 | ||||
| -rw-r--r-- | kernel/trace/trace_events_filter.c | 6 | ||||
| -rw-r--r-- | kernel/trace/trace_events_hist.c | 40 | ||||
| -rw-r--r-- | kernel/trace/trace_events_trigger.c | 8 | ||||
| -rw-r--r-- | kernel/trace/trace_hwlat.c | 4 | ||||
| -rw-r--r-- | kernel/trace/trace_kprobe.c | 284 | ||||
| -rw-r--r-- | kernel/trace/trace_output.c | 6 | ||||
| -rw-r--r-- | kernel/trace/trace_printk.c | 7 | ||||
| -rw-r--r-- | kernel/trace/trace_probe.c | 193 | ||||
| -rw-r--r-- | kernel/trace/trace_probe.h | 68 | ||||
| -rw-r--r-- | kernel/trace/trace_sched_wakeup.c | 3 | ||||
| -rw-r--r-- | kernel/trace/trace_stack.c | 120 | ||||
| -rw-r--r-- | kernel/trace/trace_stat.c | 6 | ||||
| -rw-r--r-- | kernel/trace/trace_uprobe.c | 311 |
25 files changed, 1153 insertions, 304 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 98da8998c25c..e08527f50d2a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -146,7 +146,7 @@ config FUNCTION_TRACER select GENERIC_TRACER select CONTEXT_SWITCH_TRACER select GLOB - select TASKS_RCU if PREEMPT + select TASKS_RCU if PREEMPTION help Enable the kernel to trace every kernel function. This is done by using a compiler feature to insert a small, 5-byte No-Operation @@ -179,7 +179,7 @@ config TRACE_PREEMPT_TOGGLE config PREEMPTIRQ_EVENTS bool "Enable trace events for preempt and irq disable/enable" select TRACE_IRQFLAGS - select TRACE_PREEMPT_TOGGLE if PREEMPT + select TRACE_PREEMPT_TOGGLE if PREEMPTION select GENERIC_TRACER default n help @@ -214,7 +214,7 @@ config PREEMPT_TRACER bool "Preemption-off Latency Tracer" default n depends on !ARCH_USES_GETTIMEOFFSET - depends on PREEMPT + depends on PREEMPTION select GENERIC_TRACER select TRACER_MAX_TRACE select RING_BUFFER_ALLOW_SWAP @@ -520,7 +520,8 @@ config BPF_EVENTS bool default y help - This allows the user to attach BPF programs to kprobe events. + This allows the user to attach BPF programs to kprobe, uprobe, and + tracepoint events. config DYNAMIC_EVENTS def_bool n diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ca1255d14576..44bd08f2443b 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -142,8 +142,13 @@ BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { int ret; + ret = security_locked_down(LOCKDOWN_BPF_READ); + if (ret < 0) + goto out; + ret = probe_kernel_read(dst, unsafe_ptr, size); if (unlikely(ret < 0)) +out: memset(dst, 0, size); return ret; @@ -500,14 +505,17 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; -static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); -static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd); +static DEFINE_PER_CPU(int, bpf_event_output_nest_level); +struct bpf_nested_pt_regs { + struct pt_regs regs[3]; +}; +static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs); +static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds); u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) { - struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd); - struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); + int nest_level = this_cpu_inc_return(bpf_event_output_nest_level); struct perf_raw_frag frag = { .copy = ctx_copy, .size = ctx_size, @@ -522,12 +530,25 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, .data = meta, }, }; + struct perf_sample_data *sd; + struct pt_regs *regs; + u64 ret; + + if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) { + ret = -EBUSY; + goto out; + } + sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]); + regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]); perf_fetch_caller_regs(regs); perf_sample_data_init(sd, 0, 0); sd->raw = &raw; - return __bpf_perf_event_output(regs, map, flags, sd); + ret = __bpf_perf_event_output(regs, map, flags, sd); +out: + this_cpu_dec(bpf_event_output_nest_level); + return ret; } BPF_CALL_0(bpf_get_current_task) @@ -569,6 +590,10 @@ BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size, { int ret; + ret = security_locked_down(LOCKDOWN_BPF_READ); + if (ret < 0) + goto out; + /* * The strncpy_from_unsafe() call will likely not fill the entire * buffer, but that's okay in this circumstance as we're probing @@ -580,6 +605,7 @@ BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size, */ ret = strncpy_from_unsafe(dst, unsafe_ptr, size); if (unlikely(ret < 0)) +out: memset(dst, 0, size); return ret; diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 8dfd5021b933..7950a0356042 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -276,7 +276,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, int index = task->curr_ret_stack; int i; - if (ret != (unsigned long)return_to_handler) + if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler)) return ret; if (index < 0) @@ -294,7 +294,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, { int task_idx; - if (ret != (unsigned long)return_to_handler) + if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler)) return ret; task_idx = task->curr_ret_stack; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index eca34503f178..f296d89be757 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -18,6 +18,7 @@ #include <linux/clocksource.h> #include <linux/sched/task.h> #include <linux/kallsyms.h> +#include <linux/security.h> #include <linux/seq_file.h> #include <linux/tracefs.h> #include <linux/hardirq.h> @@ -2814,7 +2815,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) * synchornize_rcu_tasks() will wait for those tasks to * execute and either schedule voluntarily or enter user space. */ - if (IS_ENABLED(CONFIG_PREEMPT)) + if (IS_ENABLED(CONFIG_PREEMPTION)) synchronize_rcu_tasks(); free_ops: @@ -3095,6 +3096,14 @@ t_probe_next(struct seq_file *m, loff_t *pos) hnd = &iter->probe_entry->hlist; hash = iter->probe->ops.func_hash->filter_hash; + + /* + * A probe being registered may temporarily have an empty hash + * and it's at the end of the func_probes list. + */ + if (!hash || hash == EMPTY_HASH) + return NULL; + size = 1 << hash->size_bits; retry: @@ -3478,6 +3487,11 @@ static int ftrace_avail_open(struct inode *inode, struct file *file) { struct ftrace_iterator *iter; + int ret; + + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; if (unlikely(ftrace_disabled)) return -ENODEV; @@ -3497,6 +3511,15 @@ ftrace_enabled_open(struct inode *inode, struct file *file) { struct ftrace_iterator *iter; + /* + * This shows us what functions are currently being + * traced and by what. Not sure if we want lockdown + * to hide such critical information for an admin. + * Although, perhaps it can show information we don't + * want people to see, but if something is tracing + * something, we probably want to know about it. + */ + iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); if (!iter) return -ENOMEM; @@ -3532,21 +3555,22 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, struct ftrace_hash *hash; struct list_head *mod_head; struct trace_array *tr = ops->private; - int ret = 0; + int ret = -ENOMEM; ftrace_ops_init(ops); if (unlikely(ftrace_disabled)) return -ENODEV; + if (tracing_check_open_get_tr(tr)) + return -ENODEV; + iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) - return -ENOMEM; + goto out; - if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX)) { - kfree(iter); - return -ENOMEM; - } + if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX)) + goto out; iter->ops = ops; iter->flags = flag; @@ -3576,13 +3600,13 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, if (!iter->hash) { trace_parser_put(&iter->parser); - kfree(iter); - ret = -ENOMEM; goto out_unlock; } } else iter->hash = hash; + ret = 0; + if (file->f_mode & FMODE_READ) { iter->pg = ftrace_pages_start; @@ -3594,7 +3618,6 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, /* Failed */ free_ftrace_hash(iter->hash); trace_parser_put(&iter->parser); - kfree(iter); } } else file->private_data = iter; @@ -3602,6 +3625,13 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, out_unlock: mutex_unlock(&ops->func_hash->regex_lock); + out: + if (ret) { + kfree(iter); + if (tr) + trace_array_put(tr); + } + return ret; } @@ -3610,6 +3640,7 @@ ftrace_filter_open(struct inode *inode, struct file *file) { struct ftrace_ops *ops = inode->i_private; + /* Checks for tracefs lockdown */ return ftrace_regex_open(ops, FTRACE_ITER_FILTER | FTRACE_ITER_DO_PROBES, inode, file); @@ -3620,6 +3651,7 @@ ftrace_notrace_open(struct inode *inode, struct file *file) { struct ftrace_ops *ops = inode->i_private; + /* Checks for tracefs lockdown */ return ftrace_regex_open(ops, FTRACE_ITER_NOTRACE, inode, file); } @@ -4320,12 +4352,21 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, mutex_unlock(&ftrace_lock); + /* + * Note, there's a small window here that the func_hash->filter_hash + * may be NULL or empty. Need to be carefule when reading the loop. + */ mutex_lock(&probe->ops.func_hash->regex_lock); orig_hash = &probe->ops.func_hash->filter_hash; old_hash = *orig_hash; hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash); + if (!hash) { + ret = -ENOMEM; + goto out; + } + ret = ftrace_match_records(hash, glob, strlen(glob)); /* Nothing found? */ @@ -5020,6 +5061,8 @@ int ftrace_regex_release(struct inode *inode, struct file *file) mutex_unlock(&iter->ops->func_hash->regex_lock); free_ftrace_hash(iter->hash); + if (iter->tr) + trace_array_put(iter->tr); kfree(iter); return 0; @@ -5177,9 +5220,13 @@ static int __ftrace_graph_open(struct inode *inode, struct file *file, struct ftrace_graph_data *fgd) { - int ret = 0; + int ret; struct ftrace_hash *new_hash = NULL; + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + if (file->f_mode & FMODE_WRITE) { const int size_bits = FTRACE_HASH_DEFAULT_BITS; @@ -6019,11 +6066,7 @@ clear_func_from_hash(struct ftrace_init_func *func, struct ftrace_hash *hash) { struct ftrace_func_entry *entry; - if (ftrace_hash_empty(hash)) - return; - - entry = __ftrace_lookup_ip(hash, func->ip); - + entry = ftrace_lookup_ip(hash, func->ip); /* * Do not allow this rec to match again. * Yeah, it may waste some memory, but will be removed @@ -6524,8 +6567,9 @@ ftrace_pid_open(struct inode *inode, struct file *file) struct seq_file *m; int ret = 0; - if (trace_array_get(tr) < 0) - return -ENODEV; + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) diff --git a/kernel/trace/ftrace_internal.h b/kernel/trace/ftrace_internal.h index 0515a2096f90..0456e0a3dab1 100644 --- a/kernel/trace/ftrace_internal.h +++ b/kernel/trace/ftrace_internal.h @@ -6,22 +6,22 @@ /* * Traverse the ftrace_global_list, invoking all entries. The reason that we - * can use rcu_dereference_raw_notrace() is that elements removed from this list + * can use rcu_dereference_raw_check() is that elements removed from this list * are simply leaked, so there is no need to interact with a grace-period - * mechanism. The rcu_dereference_raw_notrace() calls are needed to handle + * mechanism. The rcu_dereference_raw_check() calls are needed to handle * concurrent insertions into the ftrace_global_list. * * Silly Alpha and silly pointer-speculation compiler optimizations! */ #define do_for_each_ftrace_op(op, list) \ - op = rcu_dereference_raw_notrace(list); \ + op = rcu_dereference_raw_check(list); \ do /* * Optimized for just a single item in the list (as that is the normal case). */ #define while_for_each_ftrace_op(op) \ - while (likely(op = rcu_dereference_raw_notrace((op)->next)) && \ + while (likely(op = rcu_dereference_raw_check((op)->next)) && \ unlikely((op) != &ftrace_list_end)) extern struct ftrace_ops __rcu *ftrace_ops_list; diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 0564f6db0561..09b0b49f346e 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -267,7 +267,7 @@ static void ring_buffer_producer(void) if (consumer && !(cnt % wakeup_interval)) wake_up_process(consumer); -#ifndef CONFIG_PREEMPT +#ifndef CONFIG_PREEMPTION /* * If we are a non preempt kernel, the 10 second run will * stop everything while it runs. Instead, we will call diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 525a97fbbc60..6a0ee9178365 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -17,6 +17,7 @@ #include <linux/stacktrace.h> #include <linux/writeback.h> #include <linux/kallsyms.h> +#include <linux/security.h> #include <linux/seq_file.h> #include <linux/notifier.h> #include <linux/irqflags.h> @@ -304,6 +305,23 @@ void trace_array_put(struct trace_array *this_tr) mutex_unlock(&trace_types_lock); } +int tracing_check_open_get_tr(struct trace_array *tr) +{ + int ret; + + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + + if (tracing_disabled) + return -ENODEV; + + if (tr && trace_array_get(tr) < 0) + return -ENODEV; + + return 0; +} + int call_filter_check_discard(struct trace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event) @@ -1567,9 +1585,9 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, /** * update_max_tr_single - only copy one trace over, and reset the rest - * @tr - tracer - * @tsk - task with the latency - * @cpu - the cpu of the buffer to copy. + * @tr: tracer + * @tsk: task with the latency + * @cpu: the cpu of the buffer to copy. * * Flip the trace of a single CPU buffer between the @tr and the max_tr. */ @@ -1767,7 +1785,7 @@ static void __init apply_trace_boot_options(void); /** * register_tracer - register a tracer with the ftrace system. - * @type - the plugin for the tracer + * @type: the plugin for the tracer * * Register a new plugin tracer. */ @@ -1854,7 +1872,7 @@ int __init register_tracer(struct tracer *type) return ret; } -void tracing_reset(struct trace_buffer *buf, int cpu) +static void tracing_reset_cpu(struct trace_buffer *buf, int cpu) { struct ring_buffer *buffer = buf->buffer; @@ -2230,9 +2248,9 @@ static bool tracing_record_taskinfo_skip(int flags) /** * tracing_record_taskinfo - record the task info of a task * - * @task - task to record - * @flags - TRACE_RECORD_CMDLINE for recording comm - * - TRACE_RECORD_TGID for recording tgid + * @task: task to record + * @flags: TRACE_RECORD_CMDLINE for recording comm + * TRACE_RECORD_TGID for recording tgid */ void tracing_record_taskinfo(struct task_struct *task, int flags) { @@ -2258,10 +2276,10 @@ void tracing_record_taskinfo(struct task_struct *task, int flags) /** * tracing_record_taskinfo_sched_switch - record task info for sched_switch * - * @prev - previous task during sched_switch - * @next - next task during sched_switch - * @flags - TRACE_RECORD_CMDLINE for recording comm - * TRACE_RECORD_TGID for recording tgid + * @prev: previous task during sched_switch + * @next: next task during sched_switch + * @flags: TRACE_RECORD_CMDLINE for recording comm + * TRACE_RECORD_TGID for recording tgid */ void tracing_record_taskinfo_sched_switch(struct task_struct *prev, struct task_struct *next, int flags) @@ -2642,10 +2660,10 @@ static void ftrace_exports(struct ring_buffer_event *event) preempt_disable_notrace(); - export = rcu_dereference_raw_notrace(ftrace_exports_list); + export = rcu_dereference_raw_check(ftrace_exports_list); while (export) { trace_process_export(export, event); - export = rcu_dereference_raw_notrace(export->next); + export = rcu_dereference_raw_check(export->next); } preempt_enable_notrace(); @@ -3072,7 +3090,9 @@ static void trace_printk_start_stop_comm(int enabled) /** * trace_vbprintk - write binary msg to tracing buffer - * + * @ip: The address of the caller + * @fmt: The string format to write to the buffer + * @args: Arguments for @fmt */ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) { @@ -4138,8 +4158,11 @@ release: int tracing_open_generic(struct inode *inode, struct file *filp) { - if (tracing_disabled) - return -ENODEV; + int ret; + + ret = tracing_check_open_get_tr(NULL); + if (ret) + return ret; filp->private_data = inode->i_private; return 0; @@ -4154,15 +4177,14 @@ bool tracing_is_disabled(void) * Open and update trace_array ref count. * Must have the current trace_array passed to it. */ -static int tracing_open_generic_tr(struct inode *inode, struct file *filp) +int tracing_open_generic_tr(struct inode *inode, struct file *filp) { struct trace_array *tr = inode->i_private; + int ret; - if (tracing_disabled) - return -ENODEV; - - if (trace_array_get(tr) < 0) - return -ENODEV; + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; filp->private_data = inode->i_private; @@ -4231,10 +4253,11 @@ static int tracing_open(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; struct trace_iterator *iter; - int ret = 0; + int ret; - if (trace_array_get(tr) < 0) - return -ENODEV; + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; /* If this file was open for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { @@ -4249,7 +4272,7 @@ static int tracing_open(struct inode *inode, struct file *file) if (cpu == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(trace_buf); else - tracing_reset(trace_buf, cpu); + tracing_reset_cpu(trace_buf, cpu); } if (file->f_mode & FMODE_READ) { @@ -4350,12 +4373,15 @@ static int show_traces_open(struct inode *inode, struct file *file) struct seq_file *m; int ret; - if (tracing_disabled) - return -ENODEV; + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; ret = seq_open(file, &show_traces_seq_ops); - if (ret) + if (ret) { + trace_array_put(tr); return ret; + } m = file->private_data; m->private = tr; @@ -4363,6 +4389,14 @@ static int show_traces_open(struct inode *inode, struct file *file) return 0; } +static int show_traces_release(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + return seq_release(inode, file); +} + static ssize_t tracing_write_stub(struct file *filp, const char __user *ubuf, size_t count, loff_t *ppos) @@ -4393,8 +4427,8 @@ static const struct file_operations tracing_fops = { static const struct file_operations show_traces_fops = { .open = show_traces_open, .read = seq_read, - .release = seq_release, .llseek = seq_lseek, + .release = show_traces_release, }; static ssize_t @@ -4695,11 +4729,9 @@ static int tracing_trace_options_open(struct inode *inode, struct file *file) struct trace_array *tr = inode->i_private; int ret; - if (tracing_disabled) - return -ENODEV; - - if (trace_array_get(tr) < 0) - return -ENODEV; + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; ret = single_open(file, tracing_trace_options_show, inode->i_private); if (ret < 0) @@ -4813,15 +4845,15 @@ static const char readme_msg[] = #endif #endif /* CONFIG_STACK_TRACER */ #ifdef CONFIG_DYNAMIC_EVENTS - " dynamic_events\t\t- Add/remove/show the generic dynamic events\n" + " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif #ifdef CONFIG_KPROBE_EVENTS - " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n" + " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif #ifdef CONFIG_UPROBE_EVENTS - " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n" + " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) @@ -4846,7 +4878,7 @@ static const char readme_msg[] = #else "\t $stack<index>, $stack, $retval, $comm,\n" #endif - "\t +|-[u]<offset>(<fetcharg>)\n" + "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n" "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n" "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n" "\t <type>\\[<array-size>\\]\n" @@ -5036,8 +5068,11 @@ static const struct seq_operations tracing_saved_tgids_seq_ops = { static int tracing_saved_tgids_open(struct inode *inode, struct file *filp) { - if (tracing_disabled) - return -ENODEV; + int ret; + + ret = tracing_check_open_get_tr(NULL); + if (ret) + return ret; return seq_open(filp, &tracing_saved_tgids_seq_ops); } @@ -5113,8 +5148,11 @@ static const struct seq_operations tracing_saved_cmdlines_seq_ops = { static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp) { - if (tracing_disabled) - return -ENODEV; + int ret; + + ret = tracing_check_open_get_tr(NULL); + if (ret) + return ret; return seq_open(filp, &tracing_saved_cmdlines_seq_ops); } @@ -5278,8 +5316,11 @@ static const struct seq_operations tracing_eval_map_seq_ops = { static int tracing_eval_map_open(struct inode *inode, struct file *filp) { - if (tracing_disabled) - return -ENODEV; + int ret; + + ret = tracing_check_open_get_tr(NULL); + if (ret) + return ret; return seq_open(filp, &tracing_eval_map_seq_ops); } @@ -5802,13 +5843,11 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) { struct trace_array *tr = inode->i_private; struct trace_iterator *iter; - int ret = 0; - - if (tracing_disabled) - return -ENODEV; + int ret; - if (trace_array_get(tr) < 0) - return -ENODEV; + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; mutex_lock(&trace_types_lock); @@ -5997,6 +6036,7 @@ waitagain: sizeof(struct trace_iterator) - offsetof(struct trace_iterator, seq)); cpumask_clear(iter->started); + trace_seq_init(&iter->seq); iter->pos = -1; trace_event_read_lock(); @@ -6545,11 +6585,9 @@ static int tracing_clock_open(struct inode *inode, struct file *file) struct trace_array *tr = inode->i_private; int ret; - if (tracing_disabled) - return -ENODEV; - - if (trace_array_get(tr)) - return -ENODEV; + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; ret = single_open(file, tracing_clock_show, inode->i_private); if (ret < 0) @@ -6579,11 +6617,9 @@ static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file) struct trace_array *tr = inode->i_private; int ret; - if (tracing_disabled) - return -ENODEV; - - if (trace_array_get(tr)) - return -ENODEV; + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private); if (ret < 0) @@ -6636,10 +6672,11 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file) struct trace_array *tr = inode->i_private; struct trace_iterator *iter; struct seq_file *m; - int ret = 0; + int ret; - if (trace_array_get(tr) < 0) - return -ENODEV; + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; if (file->f_mode & FMODE_READ) { iter = __tracing_open(inode, file, true); @@ -6740,7 +6777,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, if (iter->cpu_file == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(&tr->max_buffer); else - tracing_reset(&tr->max_buffer, iter->cpu_file); + tracing_reset_cpu(&tr->max_buffer, iter->cpu_file); } break; } @@ -6784,6 +6821,7 @@ static int snapshot_raw_open(struct inode *inode, struct file *filp) struct ftrace_buffer_info *info; int ret; + /* The following checks for tracefs lockdown */ ret = tracing_buffers_open(inode, filp); if (ret < 0) return ret; @@ -7103,8 +7141,9 @@ static int tracing_err_log_open(struct inode *inode, struct file *file) struct trace_array *tr = inode->i_private; int ret = 0; - if (trace_array_get(tr) < 0) - return -ENODEV; + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; /* If this file was opened for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) @@ -7155,11 +7194,9 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) struct ftrace_buffer_info *info; int ret; - if (tracing_disabled) - return -ENODEV; - - if (trace_array_get(tr) < 0) - return -ENODEV; + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 005f08629b8b..d685c61085c0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -338,6 +338,7 @@ extern struct mutex trace_types_lock; extern int trace_array_get(struct trace_array *tr); extern void trace_array_put(struct trace_array *tr); +extern int tracing_check_open_get_tr(struct trace_array *tr); extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); @@ -365,11 +366,11 @@ static inline struct trace_array *top_trace_array(void) __builtin_types_compatible_p(typeof(var), type *) #undef IF_ASSIGN -#define IF_ASSIGN(var, entry, etype, id) \ - if (FTRACE_CMP_TYPE(var, etype)) { \ - var = (typeof(var))(entry); \ - WARN_ON(id && (entry)->type != id); \ - break; \ +#define IF_ASSIGN(var, entry, etype, id) \ + if (FTRACE_CMP_TYPE(var, etype)) { \ + var = (typeof(var))(entry); \ + WARN_ON(id != 0 && (entry)->type != id); \ + break; \ } /* Will cause compile errors if type is not found. */ @@ -677,11 +678,11 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu) int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); -void tracing_reset(struct trace_buffer *buf, int cpu); void tracing_reset_online_cpus(struct trace_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); int tracing_open_generic(struct inode *inode, struct file *filp); +int tracing_open_generic_tr(struct inode *inode, struct file *filp); bool tracing_is_disabled(void); bool tracer_tracing_is_on(struct trace_array *tr); void tracer_tracing_on(struct trace_array *tr); diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index fa100ed3b4de..89779eb84a07 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -47,6 +47,7 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) return -EINVAL; event++; } + argc--; argv++; p = strchr(event, '/'); if (p) { @@ -61,10 +62,13 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) for_each_dyn_event_safe(pos, n) { if (type && type != pos->ops) continue; - if (pos->ops->match(system, event, pos)) { - ret = pos->ops->free(pos); + if (!pos->ops->match(system, event, + argc, (const char **)argv, pos)) + continue; + + ret = pos->ops->free(pos); + if (ret) break; - } } mutex_unlock(&event_mutex); @@ -170,6 +174,10 @@ static int dyn_event_open(struct inode *inode, struct file *file) { int ret; + ret = tracing_check_open_get_tr(NULL); + if (ret) + return ret; + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { ret = dyn_events_release_all(NULL); if (ret < 0) diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h index 8c334064e4d6..46898138d2df 100644 --- a/kernel/trace/trace_dynevent.h +++ b/kernel/trace/trace_dynevent.h @@ -31,8 +31,9 @@ struct dyn_event; * @is_busy: Check whether given event is busy so that it can not be deleted. * Return true if it is busy, otherwides false. * @free: Delete the given event. Return 0 if success, otherwides error. - * @match: Check whether given event and system name match this event. - * Return true if it matches, otherwides false. + * @match: Check whether given event and system name match this event. The argc + * and argv is used for exact match. Return true if it matches, otherwides + * false. * * Except for @create, these methods are called under holding event_mutex. */ @@ -43,7 +44,7 @@ struct dyn_event_operations { bool (*is_busy)(struct dyn_event *ev); int (*free)(struct dyn_event *ev); bool (*match)(const char *system, const char *event, - struct dyn_event *ev); + int argc, const char **argv, struct dyn_event *ev); }; /* Register new dyn_event type -- must be called at first */ diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 0892e38ed6fb..a9dfa04ffa44 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -272,9 +272,11 @@ int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe) goto out; } + mutex_lock(&event_mutex); ret = perf_trace_event_init(tp_event, p_event); if (ret) destroy_local_trace_kprobe(tp_event); + mutex_unlock(&event_mutex); out: kfree(func); return ret; @@ -282,8 +284,10 @@ out: void perf_kprobe_destroy(struct perf_event *p_event) { + mutex_lock(&event_mutex); perf_trace_event_close(p_event); perf_trace_event_unreg(p_event); + mutex_unlock(&event_mutex); destroy_local_trace_kprobe(p_event->tp_event); } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index c7506bc81b75..fba87d10f0c1 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -12,6 +12,7 @@ #define pr_fmt(fmt) fmt #include <linux/workqueue.h> +#include <linux/security.h> #include <linux/spinlock.h> #include <linux/kthread.h> #include <linux/tracefs.h> @@ -255,12 +256,12 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, local_save_flags(fbuffer->flags); fbuffer->pc = preempt_count(); /* - * If CONFIG_PREEMPT is enabled, then the tracepoint itself disables + * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables * preemption (adding one to the preempt_count). Since we are * interested in the preempt_count at the time the tracepoint was * hit, we need to subtract one to offset the increment. */ - if (IS_ENABLED(CONFIG_PREEMPT)) + if (IS_ENABLED(CONFIG_PREEMPTION)) fbuffer->pc--; fbuffer->trace_file = trace_file; @@ -787,7 +788,7 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, return ret; } -static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) +int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) { char *event = NULL, *sub = NULL, *match; int ret; @@ -1294,6 +1295,8 @@ static int trace_format_open(struct inode *inode, struct file *file) struct seq_file *m; int ret; + /* Do we want to hide event format files on tracefs lockdown? */ + ret = seq_open(file, &trace_format_seq_ops); if (ret < 0) return ret; @@ -1440,28 +1443,17 @@ static int system_tr_open(struct inode *inode, struct file *filp) struct trace_array *tr = inode->i_private; int ret; - if (tracing_is_disabled()) - return -ENODEV; - - if (trace_array_get(tr) < 0) - return -ENODEV; - /* Make a temporary dir that has no system but points to tr */ dir = kzalloc(sizeof(*dir), GFP_KERNEL); - if (!dir) { - trace_array_put(tr); + if (!dir) return -ENOMEM; - } - dir->tr = tr; - - ret = tracing_open_generic(inode, filp); + ret = tracing_open_generic_tr(inode, filp); if (ret < 0) { - trace_array_put(tr); kfree(dir); return ret; } - + dir->tr = tr; filp->private_data = dir; return 0; @@ -1771,6 +1763,10 @@ ftrace_event_open(struct inode *inode, struct file *file, struct seq_file *m; int ret; + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + ret = seq_open(file, seq_ops); if (ret < 0) return ret; @@ -1795,6 +1791,7 @@ ftrace_event_avail_open(struct inode *inode, struct file *file) { const struct seq_operations *seq_ops = &show_event_seq_ops; + /* Checks for tracefs lockdown */ return ftrace_event_open(inode, file, seq_ops); } @@ -1805,8 +1802,9 @@ ftrace_event_set_open(struct inode *inode, struct file *file) struct trace_array *tr = inode->i_private; int ret; - if (trace_array_get(tr) < 0) - return -ENODEV; + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) @@ -1825,8 +1823,9 @@ ftrace_event_set_pid_open(struct inode *inode, struct file *file) struct trace_array *tr = inode->i_private; int ret; - if (trace_array_get(tr) < 0) - return -ENODEV; + ret = tracing_check_open_get_tr(tr); + if (ret) + return ret; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index c773b8fb270c..c9a74f82b14a 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -452,8 +452,10 @@ predicate_parse(const char *str, int nr_parens, int nr_preds, switch (*next) { case '(': /* #2 */ - if (top - op_stack > nr_parens) - return ERR_PTR(-EINVAL); + if (top - op_stack > nr_parens) { + ret = -EINVAL; + goto out_free; + } *(++top) = invert; continue; case '!': /* #3 */ diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index ca6b0dff60c5..7482a1466ebf 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -7,12 +7,17 @@ #include <linux/module.h> #include <linux/kallsyms.h> +#include <linux/security.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/stacktrace.h> #include <linux/rculist.h> #include <linux/tracefs.h> +/* for gfp flag names */ +#include <linux/trace_events.h> +#include <trace/events/mmflags.h> + #include "tracing_map.h" #include "trace.h" #include "trace_dynevent.h" @@ -374,7 +379,7 @@ static int synth_event_show(struct seq_file *m, struct dyn_event *ev); static int synth_event_release(struct dyn_event *ev); static bool synth_event_is_busy(struct dyn_event *ev); static bool synth_event_match(const char *system, const char *event, - struct dyn_event *ev); + int argc, const char **argv, struct dyn_event *ev); static struct dyn_event_operations synth_event_ops = { .create = synth_event_create, @@ -422,7 +427,7 @@ static bool synth_event_is_busy(struct dyn_event *ev) } static bool synth_event_match(const char *system, const char *event, - struct dyn_event *ev) + int argc, const char **argv, struct dyn_event *ev) { struct synth_event *sev = to_synth_event(ev); @@ -674,6 +679,8 @@ static bool synth_field_signed(char *type) { if (str_has_prefix(type, "u")) return false; + if (strcmp(type, "gfp_t") == 0) + return false; return true; } @@ -752,6 +759,8 @@ static int synth_field_size(char *type) size = sizeof(unsigned long); else if (strcmp(type, "pid_t") == 0) size = sizeof(pid_t); + else if (strcmp(type, "gfp_t") == 0) + size = sizeof(gfp_t); else if (synth_field_is_string(type)) size = synth_field_string_size(type); @@ -792,6 +801,8 @@ static const char *synth_field_fmt(char *type) fmt = "%lu"; else if (strcmp(type, "pid_t") == 0) fmt = "%d"; + else if (strcmp(type, "gfp_t") == 0) + fmt = "%x"; else if (synth_field_is_string(type)) fmt = "%s"; @@ -834,9 +845,20 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, i == se->n_fields - 1 ? "" : " "); n_u64 += STR_VAR_LEN_MAX / sizeof(u64); } else { + struct trace_print_flags __flags[] = { + __def_gfpflag_names, {-1, NULL} }; + trace_seq_printf(s, print_fmt, se->fields[i]->name, entry->fields[n_u64], i == se->n_fields - 1 ? "" : " "); + + if (strcmp(se->fields[i]->type, "gfp_t") == 0) { + trace_seq_puts(s, " ("); + trace_print_flags_seq(s, "|", + entry->fields[n_u64], + __flags); + trace_seq_putc(s, ')'); + } n_u64++; } } @@ -1429,6 +1451,10 @@ static int synth_events_open(struct inode *inode, struct file *file) { int ret; + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { ret = dyn_events_release_all(&synth_event_ops); if (ret < 0) @@ -1661,7 +1687,7 @@ static int save_hist_vars(struct hist_trigger_data *hist_data) if (var_data) return 0; - if (trace_array_get(tr) < 0) + if (tracing_check_open_get_tr(tr)) return -ENODEV; var_data = kzalloc(sizeof(*var_data), GFP_KERNEL); @@ -2785,6 +2811,8 @@ static struct hist_field *create_alias(struct hist_trigger_data *hist_data, return NULL; } + alias->var_ref_idx = var_ref->var_ref_idx; + return alias; } @@ -5494,6 +5522,12 @@ static int hist_show(struct seq_file *m, void *v) static int event_hist_open(struct inode *inode, struct file *file) { + int ret; + + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + return single_open(file, hist_show, file); } diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 2a2912cb4533..2cd53ca21b51 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -5,6 +5,7 @@ * Copyright (C) 2013 Tom Zanussi <tom.zanussi@linux.intel.com> */ +#include <linux/security.h> #include <linux/module.h> #include <linux/ctype.h> #include <linux/mutex.h> @@ -173,7 +174,11 @@ static const struct seq_operations event_triggers_seq_ops = { static int event_trigger_regex_open(struct inode *inode, struct file *file) { - int ret = 0; + int ret; + + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; mutex_lock(&event_mutex); @@ -292,6 +297,7 @@ event_trigger_write(struct file *filp, const char __user *ubuf, static int event_trigger_open(struct inode *inode, struct file *filp) { + /* Checks for tracefs lockdown */ return event_trigger_regex_open(inode, filp); } diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index fa95139445b2..862f4b0139fc 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -150,7 +150,7 @@ void trace_hwlat_callback(bool enter) if (enter) nmi_ts_start = time_get(); else - nmi_total_ts = time_get() - nmi_ts_start; + nmi_total_ts += time_get() - nmi_ts_start; } if (enter) @@ -256,6 +256,8 @@ static int get_sample(void) /* Keep a running maximum ever recorded hardware latency */ if (sample > tr->max_latency) tr->max_latency = sample; + if (outer_sample > tr->max_latency) + tr->max_latency = outer_sample; } out: diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9d483ad9bb6c..1552a95c743b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -7,6 +7,7 @@ */ #define pr_fmt(fmt) "trace_kprobe: " fmt +#include <linux/security.h> #include <linux/module.h> #include <linux/uaccess.h> #include <linux/rculist.h> @@ -39,7 +40,7 @@ static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev); static int trace_kprobe_release(struct dyn_event *ev); static bool trace_kprobe_is_busy(struct dyn_event *ev); static bool trace_kprobe_match(const char *system, const char *event, - struct dyn_event *ev); + int argc, const char **argv, struct dyn_event *ev); static struct dyn_event_operations trace_kprobe_ops = { .create = trace_kprobe_create, @@ -137,13 +138,36 @@ static bool trace_kprobe_is_busy(struct dyn_event *ev) return trace_probe_is_enabled(&tk->tp); } +static bool trace_kprobe_match_command_head(struct trace_kprobe *tk, + int argc, const char **argv) +{ + char buf[MAX_ARGSTR_LEN + 1]; + + if (!argc) + return true; + + if (!tk->symbol) + snprintf(buf, sizeof(buf), "0x%p", tk->rp.kp.addr); + else if (tk->rp.kp.offset) + snprintf(buf, sizeof(buf), "%s+%u", + trace_kprobe_symbol(tk), tk->rp.kp.offset); + else + snprintf(buf, sizeof(buf), "%s", trace_kprobe_symbol(tk)); + if (strcmp(buf, argv[0])) + return false; + argc--; argv++; + + return trace_probe_match_command_args(&tk->tp, argc, argv); +} + static bool trace_kprobe_match(const char *system, const char *event, - struct dyn_event *ev) + int argc, const char **argv, struct dyn_event *ev) { struct trace_kprobe *tk = to_trace_kprobe(ev); return strcmp(trace_probe_name(&tk->tp), event) == 0 && - (!system || strcmp(trace_probe_group_name(&tk->tp), system) == 0); + (!system || strcmp(trace_probe_group_name(&tk->tp), system) == 0) && + trace_kprobe_match_command_head(tk, argc, argv); } static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) @@ -180,20 +204,33 @@ unsigned long trace_kprobe_address(struct trace_kprobe *tk) return addr; } +static nokprobe_inline struct trace_kprobe * +trace_kprobe_primary_from_call(struct trace_event_call *call) +{ + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return NULL; + + return container_of(tp, struct trace_kprobe, tp); +} + bool trace_kprobe_on_func_entry(struct trace_event_call *call) { - struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + struct trace_kprobe *tk = trace_kprobe_primary_from_call(call); - return kprobe_on_func_entry(tk->rp.kp.addr, + return tk ? kprobe_on_func_entry(tk->rp.kp.addr, tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name, - tk->rp.kp.addr ? 0 : tk->rp.kp.offset); + tk->rp.kp.addr ? 0 : tk->rp.kp.offset) : false; } bool trace_kprobe_error_injectable(struct trace_event_call *call) { - struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + struct trace_kprobe *tk = trace_kprobe_primary_from_call(call); - return within_error_injection_list(trace_kprobe_address(tk)); + return tk ? within_error_injection_list(trace_kprobe_address(tk)) : + false; } static int register_kprobe_event(struct trace_kprobe *tk); @@ -291,32 +328,68 @@ static inline int __enable_trace_kprobe(struct trace_kprobe *tk) return ret; } +static void __disable_trace_kprobe(struct trace_probe *tp) +{ + struct trace_probe *pos; + struct trace_kprobe *tk; + + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tk = container_of(pos, struct trace_kprobe, tp); + if (!trace_kprobe_is_registered(tk)) + continue; + if (trace_kprobe_is_return(tk)) + disable_kretprobe(&tk->rp); + else + disable_kprobe(&tk->rp.kp); + } +} + /* * Enable trace_probe * if the file is NULL, enable "perf" handler, or enable "trace" handler. */ -static int -enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) +static int enable_trace_kprobe(struct trace_event_call *call, + struct trace_event_file *file) { - bool enabled = trace_probe_is_enabled(&tk->tp); + struct trace_probe *pos, *tp; + struct trace_kprobe *tk; + bool enabled; int ret = 0; + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return -ENODEV; + enabled = trace_probe_is_enabled(tp); + + /* This also changes "enabled" state */ if (file) { - ret = trace_probe_add_file(&tk->tp, file); + ret = trace_probe_add_file(tp, file); if (ret) return ret; } else - trace_probe_set_flag(&tk->tp, TP_FLAG_PROFILE); + trace_probe_set_flag(tp, TP_FLAG_PROFILE); if (enabled) return 0; - ret = __enable_trace_kprobe(tk); + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tk = container_of(pos, struct trace_kprobe, tp); + if (trace_kprobe_has_gone(tk)) + continue; + ret = __enable_trace_kprobe(tk); + if (ret) + break; + enabled = true; + } + if (ret) { + /* Failed to enable one of them. Roll back all */ + if (enabled) + __disable_trace_kprobe(tp); if (file) - trace_probe_remove_file(&tk->tp, file); + trace_probe_remove_file(tp, file); else - trace_probe_clear_flag(&tk->tp, TP_FLAG_PROFILE); + trace_probe_clear_flag(tp, TP_FLAG_PROFILE); } return ret; @@ -326,11 +399,14 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) * Disable trace_probe * if the file is NULL, disable "perf" handler, or disable "trace" handler. */ -static int -disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) +static int disable_trace_kprobe(struct trace_event_call *call, + struct trace_event_file *file) { - struct trace_probe *tp = &tk->tp; - int ret = 0; + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return -ENODEV; if (file) { if (!trace_probe_get_file_link(tp, file)) @@ -341,12 +417,8 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) } else trace_probe_clear_flag(tp, TP_FLAG_PROFILE); - if (!trace_probe_is_enabled(tp) && trace_kprobe_is_registered(tk)) { - if (trace_kprobe_is_return(tk)) - disable_kretprobe(&tk->rp); - else - disable_kprobe(&tk->rp.kp); - } + if (!trace_probe_is_enabled(tp)) + __disable_trace_kprobe(tp); out: if (file) @@ -358,7 +430,7 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) */ trace_probe_remove_file(tp, file); - return ret; + return 0; } #if defined(CONFIG_KPROBES_ON_FTRACE) && \ @@ -389,6 +461,10 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) { int i, ret; + ret = security_locked_down(LOCKDOWN_KPROBES); + if (ret) + return ret; + if (trace_kprobe_is_registered(tk)) return -EINVAL; @@ -437,6 +513,10 @@ static void __unregister_trace_kprobe(struct trace_kprobe *tk) /* Unregister a trace_probe and probe_event */ static int unregister_trace_kprobe(struct trace_kprobe *tk) { + /* If other probes are on the event, just unregister kprobe */ + if (trace_probe_has_sibling(&tk->tp)) + goto unreg; + /* Enabled event can not be unregistered */ if (trace_probe_is_enabled(&tk->tp)) return -EBUSY; @@ -445,12 +525,82 @@ static int unregister_trace_kprobe(struct trace_kprobe *tk) if (unregister_kprobe_event(tk)) return -EBUSY; +unreg: __unregister_trace_kprobe(tk); dyn_event_remove(&tk->devent); + trace_probe_unlink(&tk->tp); return 0; } +static bool trace_kprobe_has_same_kprobe(struct trace_kprobe *orig, + struct trace_kprobe *comp) +{ + struct trace_probe_event *tpe = orig->tp.event; + struct trace_probe *pos; + int i; + + list_for_each_entry(pos, &tpe->probes, list) { + orig = container_of(pos, struct trace_kprobe, tp); + if (strcmp(trace_kprobe_symbol(orig), + trace_kprobe_symbol(comp)) || + trace_kprobe_offset(orig) != trace_kprobe_offset(comp)) + continue; + + /* + * trace_probe_compare_arg_type() ensured that nr_args and + * each argument name and type are same. Let's compare comm. + */ + for (i = 0; i < orig->tp.nr_args; i++) { + if (strcmp(orig->tp.args[i].comm, + comp->tp.args[i].comm)) + break; + } + + if (i == orig->tp.nr_args) + return true; + } + + return false; +} + +static int append_trace_kprobe(struct trace_kprobe *tk, struct trace_kprobe *to) +{ + int ret; + + ret = trace_probe_compare_arg_type(&tk->tp, &to->tp); + if (ret) { + /* Note that argument starts index = 2 */ + trace_probe_log_set_index(ret + 1); + trace_probe_log_err(0, DIFF_ARG_TYPE); + return -EEXIST; + } + if (trace_kprobe_has_same_kprobe(to, tk)) { + trace_probe_log_set_index(0); + trace_probe_log_err(0, SAME_PROBE); + return -EEXIST; + } + + /* Append to existing event */ + ret = trace_probe_append(&tk->tp, &to->tp); + if (ret) + return ret; + + /* Register k*probe */ + ret = __register_trace_kprobe(tk); + if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) { + pr_warn("This probe might be able to register after target module is loaded. Continue.\n"); + ret = 0; + } + + if (ret) + trace_probe_unlink(&tk->tp); + else + dyn_event_add(&tk->devent); + + return ret; +} + /* Register a trace_probe and probe_event */ static int register_trace_kprobe(struct trace_kprobe *tk) { @@ -459,14 +609,17 @@ static int register_trace_kprobe(struct trace_kprobe *tk) mutex_lock(&event_mutex); - /* Delete old (same name) event if exist */ old_tk = find_trace_kprobe(trace_probe_name(&tk->tp), trace_probe_group_name(&tk->tp)); if (old_tk) { - ret = unregister_trace_kprobe(old_tk); - if (ret < 0) - goto end; - free_trace_kprobe(old_tk); + if (trace_kprobe_is_return(tk) != trace_kprobe_is_return(old_tk)) { + trace_probe_log_set_index(0); + trace_probe_log_err(0, DIFF_PROBE_TYPE); + ret = -EEXIST; + } else { + ret = append_trace_kprobe(tk, old_tk); + } + goto end; } /* Register new event */ @@ -700,7 +853,7 @@ static int trace_kprobe_create(int argc, const char *argv[]) trace_probe_log_err(0, BAD_INSN_BNDRY); else if (ret == -ENOENT) trace_probe_log_err(0, BAD_PROBE_ADDR); - else if (ret != -ENOMEM) + else if (ret != -ENOMEM && ret != -EEXIST) trace_probe_log_err(0, FAIL_REG_PROBE); goto error; } @@ -783,6 +936,10 @@ static int probes_open(struct inode *inode, struct file *file) { int ret; + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { ret = dyn_events_release_all(&trace_kprobe_ops); if (ret < 0) @@ -835,6 +992,12 @@ static const struct seq_operations profile_seq_op = { static int profile_open(struct inode *inode, struct file *file) { + int ret; + + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + return seq_open(file, &profile_seq_op); } @@ -965,6 +1128,9 @@ retry: case FETCH_OP_COMM: val = (unsigned long)current->comm; break; + case FETCH_OP_DATA: + val = (unsigned long)code->data; + break; #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API case FETCH_OP_ARG: val = regs_get_kernel_argument(regs, code->param); @@ -1089,7 +1255,10 @@ print_kprobe_event(struct trace_iterator *iter, int flags, struct trace_probe *tp; field = (struct kprobe_trace_entry_head *)iter->ent; - tp = container_of(event, struct trace_probe, call.event); + tp = trace_probe_primary_from_call( + container_of(event, struct trace_event_call, event)); + if (WARN_ON_ONCE(!tp)) + goto out; trace_seq_printf(s, "%s: (", trace_probe_name(tp)); @@ -1116,7 +1285,10 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, struct trace_probe *tp; field = (struct kretprobe_trace_entry_head *)iter->ent; - tp = container_of(event, struct trace_probe, call.event); + tp = trace_probe_primary_from_call( + container_of(event, struct trace_event_call, event)); + if (WARN_ON_ONCE(!tp)) + goto out; trace_seq_printf(s, "%s: (", trace_probe_name(tp)); @@ -1145,23 +1317,31 @@ static int kprobe_event_define_fields(struct trace_event_call *event_call) { int ret; struct kprobe_trace_entry_head field; - struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data; + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(event_call); + if (WARN_ON_ONCE(!tp)) + return -ENOENT; DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); - return traceprobe_define_arg_fields(event_call, sizeof(field), &tk->tp); + return traceprobe_define_arg_fields(event_call, sizeof(field), tp); } static int kretprobe_event_define_fields(struct trace_event_call *event_call) { int ret; struct kretprobe_trace_entry_head field; - struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data; + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(event_call); + if (WARN_ON_ONCE(!tp)) + return -ENOENT; DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); - return traceprobe_define_arg_fields(event_call, sizeof(field), &tk->tp); + return traceprobe_define_arg_fields(event_call, sizeof(field), tp); } #ifdef CONFIG_PERF_EVENTS @@ -1289,20 +1469,19 @@ int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, static int kprobe_register(struct trace_event_call *event, enum trace_reg type, void *data) { - struct trace_kprobe *tk = (struct trace_kprobe *)event->data; struct trace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: - return enable_trace_kprobe(tk, file); + return enable_trace_kprobe(event, file); case TRACE_REG_UNREGISTER: - return disable_trace_kprobe(tk, file); + return disable_trace_kprobe(event, file); #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: - return enable_trace_kprobe(tk, NULL); + return enable_trace_kprobe(event, NULL); case TRACE_REG_PERF_UNREGISTER: - return disable_trace_kprobe(tk, NULL); + return disable_trace_kprobe(event, NULL); case TRACE_REG_PERF_OPEN: case TRACE_REG_PERF_CLOSE: case TRACE_REG_PERF_ADD: @@ -1369,7 +1548,6 @@ static inline void init_trace_event_call(struct trace_kprobe *tk) call->flags = TRACE_EVENT_FL_KPROBE; call->class->reg = kprobe_register; - call->data = tk; } static int register_kprobe_event(struct trace_kprobe *tk) @@ -1432,7 +1610,9 @@ void destroy_local_trace_kprobe(struct trace_event_call *event_call) { struct trace_kprobe *tk; - tk = container_of(event_call, struct trace_kprobe, tp.call); + tk = trace_kprobe_primary_from_call(event_call); + if (unlikely(!tk)) + return; if (trace_probe_is_enabled(&tk->tp)) { WARN_ON(1); @@ -1577,7 +1757,8 @@ static __init int kprobe_trace_self_tests_init(void) pr_warn("error on getting probe file.\n"); warn++; } else - enable_trace_kprobe(tk, file); + enable_trace_kprobe( + trace_probe_event_call(&tk->tp), file); } } @@ -1598,7 +1779,8 @@ static __init int kprobe_trace_self_tests_init(void) pr_warn("error on getting probe file.\n"); warn++; } else - enable_trace_kprobe(tk, file); + enable_trace_kprobe( + trace_probe_event_call(&tk->tp), file); } } @@ -1631,7 +1813,8 @@ static __init int kprobe_trace_self_tests_init(void) pr_warn("error on getting probe file.\n"); warn++; } else - disable_trace_kprobe(tk, file); + disable_trace_kprobe( + trace_probe_event_call(&tk->tp), file); } tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM); @@ -1649,7 +1832,8 @@ static __init int kprobe_trace_self_tests_init(void) pr_warn("error on getting probe file.\n"); warn++; } else - disable_trace_kprobe(tk, file); + disable_trace_kprobe( + trace_probe_event_call(&tk->tp), file); } ret = trace_run_command("-:testprobe", create_or_delete_trace_kprobe); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index cab4a5398f1d..d54ce252b05a 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -219,10 +219,10 @@ trace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len, { int i; const char *ret = trace_seq_buffer_ptr(p); + const char *fmt = concatenate ? "%*phN" : "%*ph"; - for (i = 0; i < buf_len; i++) - trace_seq_printf(p, "%s%2.2x", concatenate || i == 0 ? "" : " ", - buf[i]); + for (i = 0; i < buf_len; i += 16) + trace_seq_printf(p, fmt, min(buf_len - i, 16), &buf[i]); trace_seq_putc(p, 0); return ret; diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index c3fd849d4a8f..d4e31e969206 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -6,6 +6,7 @@ * */ #include <linux/seq_file.h> +#include <linux/security.h> #include <linux/uaccess.h> #include <linux/kernel.h> #include <linux/ftrace.h> @@ -348,6 +349,12 @@ static const struct seq_operations show_format_seq_ops = { static int ftrace_formats_open(struct inode *inode, struct file *file) { + int ret; + + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + return seq_open(file, &show_format_seq_ops); } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index dbef0d135075..905b10af5d5c 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -178,6 +178,16 @@ void __trace_probe_log_err(int offset, int err_type) if (!command) return; + if (trace_probe_log.index >= trace_probe_log.argc) { + /** + * Set the error position is next to the last arg + space. + * Note that len includes the terminal null and the cursor + * appaers at pos + 1. + */ + pos = len; + offset = 0; + } + /* And make a command string from argv array */ p = command; for (i = 0; i < trace_probe_log.argc; i++) { @@ -316,6 +326,29 @@ inval_var: return -EINVAL; } +static int str_to_immediate(char *str, unsigned long *imm) +{ + if (isdigit(str[0])) + return kstrtoul(str, 0, imm); + else if (str[0] == '-') + return kstrtol(str, 0, (long *)imm); + else if (str[0] == '+') + return kstrtol(str + 1, 0, (long *)imm); + return -EINVAL; +} + +static int __parse_imm_string(char *str, char **pbuf, int offs) +{ + size_t len = strlen(str); + + if (str[len - 1] != '"') { + trace_probe_log_err(offs + len, IMMSTR_NO_CLOSE); + return -EINVAL; + } + *pbuf = kstrndup(str, len - 1, GFP_KERNEL); + return 0; +} + /* Recursive argument parser */ static int parse_probe_arg(char *arg, const struct fetch_type *type, @@ -430,7 +463,8 @@ parse_probe_arg(char *arg, const struct fetch_type *type, ret = parse_probe_arg(arg, t2, &code, end, flags, offs); if (ret) break; - if (code->op == FETCH_OP_COMM) { + if (code->op == FETCH_OP_COMM || + code->op == FETCH_OP_DATA) { trace_probe_log_err(offs, COMM_CANT_DEREF); return -EINVAL; } @@ -444,6 +478,21 @@ parse_probe_arg(char *arg, const struct fetch_type *type, code->offset = offset; } break; + case '\\': /* Immediate value */ + if (arg[1] == '"') { /* Immediate string */ + ret = __parse_imm_string(arg + 2, &tmp, offs + 2); + if (ret) + break; + code->op = FETCH_OP_DATA; + code->data = tmp; + } else { + ret = str_to_immediate(arg + 1, &code->immediate); + if (ret) + trace_probe_log_err(offs + 1, BAD_IMM); + else + code->op = FETCH_OP_IMM; + } + break; } if (!ret && code->op == FETCH_OP_NOP) { /* Parsed, but do not find fetch method */ @@ -542,8 +591,11 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, } } - /* Since $comm can not be dereferred, we can find $comm by strcmp */ - if (strcmp(arg, "$comm") == 0) { + /* + * Since $comm and immediate string can not be dereferred, + * we can find those by strcmp. + */ + if (strcmp(arg, "$comm") == 0 || strncmp(arg, "\\\"", 2) == 0) { /* The type of $comm must be "string", and not an array. */ if (parg->count || (t && strcmp(t, "string"))) return -EINVAL; @@ -580,7 +632,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, if (!strcmp(parg->type->name, "string") || !strcmp(parg->type->name, "ustring")) { if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_UDEREF && - code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM) { + code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM && + code->op != FETCH_OP_DATA) { trace_probe_log_err(offset + (t ? (t - arg) : 0), BAD_STRING); ret = -EINVAL; @@ -589,9 +642,10 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) || parg->count) { /* - * IMM and COMM is pointing actual address, those must - * be kept, and if parg->count != 0, this is an array - * of string pointers instead of string address itself. + * IMM, DATA and COMM is pointing actual address, those + * must be kept, and if parg->count != 0, this is an + * array of string pointers instead of string address + * itself. */ code++; if (code->op != FETCH_OP_NOP) { @@ -665,7 +719,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, fail: if (ret) { for (code = tmp; code < tmp + FETCH_INSN_MAX; code++) - if (code->op == FETCH_NOP_SYMBOL) + if (code->op == FETCH_NOP_SYMBOL || + code->op == FETCH_OP_DATA) kfree(code->data); } kfree(tmp); @@ -736,7 +791,8 @@ void traceprobe_free_probe_arg(struct probe_arg *arg) struct fetch_insn *code = arg->code; while (code && code->op != FETCH_OP_END) { - if (code->op == FETCH_NOP_SYMBOL) + if (code->op == FETCH_NOP_SYMBOL || + code->op == FETCH_OP_DATA) kfree(code->data); code++; } @@ -886,43 +942,85 @@ int traceprobe_define_arg_fields(struct trace_event_call *event_call, return 0; } +static void trace_probe_event_free(struct trace_probe_event *tpe) +{ + kfree(tpe->class.system); + kfree(tpe->call.name); + kfree(tpe->call.print_fmt); + kfree(tpe); +} + +int trace_probe_append(struct trace_probe *tp, struct trace_probe *to) +{ + if (trace_probe_has_sibling(tp)) + return -EBUSY; + + list_del_init(&tp->list); + trace_probe_event_free(tp->event); + + tp->event = to->event; + list_add_tail(&tp->list, trace_probe_probe_list(to)); + + return 0; +} + +void trace_probe_unlink(struct trace_probe *tp) +{ + list_del_init(&tp->list); + if (list_empty(trace_probe_probe_list(tp))) + trace_probe_event_free(tp->event); + tp->event = NULL; +} void trace_probe_cleanup(struct trace_probe *tp) { - struct trace_event_call *call = trace_probe_event_call(tp); int i; for (i = 0; i < tp->nr_args; i++) traceprobe_free_probe_arg(&tp->args[i]); - kfree(call->class->system); - kfree(call->name); - kfree(call->print_fmt); + if (tp->event) + trace_probe_unlink(tp); } int trace_probe_init(struct trace_probe *tp, const char *event, const char *group) { - struct trace_event_call *call = trace_probe_event_call(tp); + struct trace_event_call *call; + int ret = 0; if (!event || !group) return -EINVAL; - call->class = &tp->class; - call->name = kstrdup(event, GFP_KERNEL); - if (!call->name) + tp->event = kzalloc(sizeof(struct trace_probe_event), GFP_KERNEL); + if (!tp->event) return -ENOMEM; - tp->class.system = kstrdup(group, GFP_KERNEL); - if (!tp->class.system) { - kfree(call->name); - call->name = NULL; - return -ENOMEM; + INIT_LIST_HEAD(&tp->event->files); + INIT_LIST_HEAD(&tp->event->class.fields); + INIT_LIST_HEAD(&tp->event->probes); + INIT_LIST_HEAD(&tp->list); + list_add(&tp->event->probes, &tp->list); + + call = trace_probe_event_call(tp); + call->class = &tp->event->class; + call->name = kstrdup(event, GFP_KERNEL); + if (!call->name) { + ret = -ENOMEM; + goto error; + } + + tp->event->class.system = kstrdup(group, GFP_KERNEL); + if (!tp->event->class.system) { + ret = -ENOMEM; + goto error; } - INIT_LIST_HEAD(&tp->files); - INIT_LIST_HEAD(&tp->class.fields); return 0; + +error: + trace_probe_cleanup(tp); + return ret; } int trace_probe_register_event_call(struct trace_probe *tp) @@ -951,7 +1049,7 @@ int trace_probe_add_file(struct trace_probe *tp, struct trace_event_file *file) link->file = file; INIT_LIST_HEAD(&link->list); - list_add_tail_rcu(&link->list, &tp->files); + list_add_tail_rcu(&link->list, &tp->event->files); trace_probe_set_flag(tp, TP_FLAG_TRACE); return 0; } @@ -982,8 +1080,51 @@ int trace_probe_remove_file(struct trace_probe *tp, synchronize_rcu(); kfree(link); - if (list_empty(&tp->files)) + if (list_empty(&tp->event->files)) trace_probe_clear_flag(tp, TP_FLAG_TRACE); return 0; } + +/* + * Return the smallest index of different type argument (start from 1). + * If all argument types and name are same, return 0. + */ +int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b) +{ + int i; + + /* In case of more arguments */ + if (a->nr_args < b->nr_args) + return a->nr_args + 1; + if (a->nr_args > b->nr_args) + return b->nr_args + 1; + + for (i = 0; i < a->nr_args; i++) { + if ((b->nr_args <= i) || + ((a->args[i].type != b->args[i].type) || + (a->args[i].count != b->args[i].count) || + strcmp(a->args[i].name, b->args[i].name))) + return i + 1; + } + + return 0; +} + +bool trace_probe_match_command_args(struct trace_probe *tp, + int argc, const char **argv) +{ + char buf[MAX_ARGSTR_LEN + 1]; + int i; + + if (tp->nr_args < argc) + return false; + + for (i = 0; i < argc; i++) { + snprintf(buf, sizeof(buf), "%s=%s", + tp->args[i].name, tp->args[i].comm); + if (strcmp(buf, argv[i])) + return false; + } + return true; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index d1714820efe1..4ee703728aec 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -89,6 +89,7 @@ enum fetch_op { FETCH_OP_COMM, /* Current comm */ FETCH_OP_ARG, /* Function argument : .param */ FETCH_OP_FOFFS, /* File offset: .immediate */ + FETCH_OP_DATA, /* Allocated data: .data */ // Stage 2 (dereference) op FETCH_OP_DEREF, /* Dereference: .offset */ FETCH_OP_UDEREF, /* User-space Dereference: .offset */ @@ -222,11 +223,18 @@ struct probe_arg { const struct fetch_type *type; /* Type of this argument */ }; -struct trace_probe { +/* Event call and class holder */ +struct trace_probe_event { unsigned int flags; /* For TP_FLAG_* */ struct trace_event_class class; struct trace_event_call call; struct list_head files; + struct list_head probes; +}; + +struct trace_probe { + struct list_head list; + struct trace_probe_event *event; ssize_t size; /* trace entry size */ unsigned int nr_args; struct probe_arg args[]; @@ -240,19 +248,19 @@ struct event_file_link { static inline bool trace_probe_test_flag(struct trace_probe *tp, unsigned int flag) { - return !!(tp->flags & flag); + return !!(tp->event->flags & flag); } static inline void trace_probe_set_flag(struct trace_probe *tp, unsigned int flag) { - tp->flags |= flag; + tp->event->flags |= flag; } static inline void trace_probe_clear_flag(struct trace_probe *tp, unsigned int flag) { - tp->flags &= ~flag; + tp->event->flags &= ~flag; } static inline bool trace_probe_is_enabled(struct trace_probe *tp) @@ -262,45 +270,76 @@ static inline bool trace_probe_is_enabled(struct trace_probe *tp) static inline const char *trace_probe_name(struct trace_probe *tp) { - return trace_event_name(&tp->call); + return trace_event_name(&tp->event->call); } static inline const char *trace_probe_group_name(struct trace_probe *tp) { - return tp->call.class->system; + return tp->event->call.class->system; } static inline struct trace_event_call * trace_probe_event_call(struct trace_probe *tp) { - return &tp->call; + return &tp->event->call; +} + +static inline struct trace_probe_event * +trace_probe_event_from_call(struct trace_event_call *event_call) +{ + return container_of(event_call, struct trace_probe_event, call); +} + +static inline struct trace_probe * +trace_probe_primary_from_call(struct trace_event_call *call) +{ + struct trace_probe_event *tpe = trace_probe_event_from_call(call); + + return list_first_entry(&tpe->probes, struct trace_probe, list); +} + +static inline struct list_head *trace_probe_probe_list(struct trace_probe *tp) +{ + return &tp->event->probes; +} + +static inline bool trace_probe_has_sibling(struct trace_probe *tp) +{ + struct list_head *list = trace_probe_probe_list(tp); + + return !list_empty(list) && !list_is_singular(list); } static inline int trace_probe_unregister_event_call(struct trace_probe *tp) { /* tp->event is unregistered in trace_remove_event_call() */ - return trace_remove_event_call(&tp->call); + return trace_remove_event_call(&tp->event->call); } static inline bool trace_probe_has_single_file(struct trace_probe *tp) { - return !!list_is_singular(&tp->files); + return !!list_is_singular(&tp->event->files); } int trace_probe_init(struct trace_probe *tp, const char *event, const char *group); void trace_probe_cleanup(struct trace_probe *tp); +int trace_probe_append(struct trace_probe *tp, struct trace_probe *to); +void trace_probe_unlink(struct trace_probe *tp); int trace_probe_register_event_call(struct trace_probe *tp); int trace_probe_add_file(struct trace_probe *tp, struct trace_event_file *file); int trace_probe_remove_file(struct trace_probe *tp, struct trace_event_file *file); struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp, struct trace_event_file *file); +int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b); +bool trace_probe_match_command_args(struct trace_probe *tp, + int argc, const char **argv); #define trace_probe_for_each_link(pos, tp) \ - list_for_each_entry(pos, &(tp)->files, list) + list_for_each_entry(pos, &(tp)->event->files, list) #define trace_probe_for_each_link_rcu(pos, tp) \ - list_for_each_entry_rcu(pos, &(tp)->files, list) + list_for_each_entry_rcu(pos, &(tp)->event->files, list) /* Check the name is good for event/group/fields */ static inline bool is_good_name(const char *name) @@ -370,6 +409,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(BAD_VAR, "Invalid $-valiable specified"), \ C(BAD_REG_NAME, "Invalid register name"), \ C(BAD_MEM_ADDR, "Invalid memory address"), \ + C(BAD_IMM, "Invalid immediate value"), \ + C(IMMSTR_NO_CLOSE, "String is not closed with '\"'"), \ C(FILE_ON_KPROBE, "File offset is not available with kprobe"), \ C(BAD_FILE_OFFS, "Invalid file offset value"), \ C(SYM_ON_UPROBE, "Symbol is not available with uprobe"), \ @@ -393,7 +434,10 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(ARG_TOO_LONG, "Argument expression is too long"), \ C(NO_ARG_BODY, "No argument expression"), \ C(BAD_INSN_BNDRY, "Probe point is not an instruction boundary"),\ - C(FAIL_REG_PROBE, "Failed to register probe event"), + C(FAIL_REG_PROBE, "Failed to register probe event"),\ + C(DIFF_PROBE_TYPE, "Probe type is different from existing probe"),\ + C(DIFF_ARG_TYPE, "Argument type or name is different from existing probe"),\ + C(SAME_PROBE, "There is already the exact same probe event"), #undef C #define C(a, b) TP_ERR_##a diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 743b2b520d34..5e43b9664eca 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -579,8 +579,7 @@ probe_wakeup(void *ignore, struct task_struct *p) else tracing_dl = 0; - wakeup_task = p; - get_task_struct(wakeup_task); + wakeup_task = get_task_struct(p); local_save_flags(flags); diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 5d16f73898db..4df9a209f7ca 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -5,6 +5,7 @@ */ #include <linux/sched/task_stack.h> #include <linux/stacktrace.h> +#include <linux/security.h> #include <linux/kallsyms.h> #include <linux/seq_file.h> #include <linux/spinlock.h> @@ -53,6 +54,104 @@ static void print_max_stack(void) } } +/* + * The stack tracer looks for a maximum stack at each call from a function. It + * registers a callback from ftrace, and in that callback it examines the stack + * size. It determines the stack size from the variable passed in, which is the + * address of a local variable in the stack_trace_call() callback function. + * The stack size is calculated by the address of the local variable to the top + * of the current stack. If that size is smaller than the currently saved max + * stack size, nothing more is done. + * + * If the size of the stack is greater than the maximum recorded size, then the + * following algorithm takes place. + * + * For architectures (like x86) that store the function's return address before + * saving the function's local variables, the stack will look something like + * this: + * + * [ top of stack ] + * 0: sys call entry frame + * 10: return addr to entry code + * 11: start of sys_foo frame + * 20: return addr to sys_foo + * 21: start of kernel_func_bar frame + * 30: return addr to kernel_func_bar + * 31: [ do trace stack here ] + * + * The save_stack_trace() is called returning all the functions it finds in the + * current stack. Which would be (from the bottom of the stack to the top): + * + * return addr to kernel_func_bar + * return addr to sys_foo + * return addr to entry code + * + * Now to figure out how much each of these functions' local variable size is, + * a search of the stack is made to find these values. When a match is made, it + * is added to the stack_dump_trace[] array. The offset into the stack is saved + * in the stack_trace_index[] array. The above example would show: + * + * stack_dump_trace[] | stack_trace_index[] + * ------------------ + ------------------- + * return addr to kernel_func_bar | 30 + * return addr to sys_foo | 20 + * return addr to entry | 10 + * + * The print_max_stack() function above, uses these values to print the size of + * each function's portion of the stack. + * + * for (i = 0; i < nr_entries; i++) { + * size = i == nr_entries - 1 ? stack_trace_index[i] : + * stack_trace_index[i] - stack_trace_index[i+1] + * print "%d %d %d %s\n", i, stack_trace_index[i], size, stack_dump_trace[i]); + * } + * + * The above shows + * + * depth size location + * ----- ---- -------- + * 0 30 10 kernel_func_bar + * 1 20 10 sys_foo + * 2 10 10 entry code + * + * Now for architectures that might save the return address after the functions + * local variables (saving the link register before calling nested functions), + * this will cause the stack to look a little different: + * + * [ top of stack ] + * 0: sys call entry frame + * 10: start of sys_foo_frame + * 19: return addr to entry code << lr saved before calling kernel_func_bar + * 20: start of kernel_func_bar frame + * 29: return addr to sys_foo_frame << lr saved before calling next function + * 30: [ do trace stack here ] + * + * Although the functions returned by save_stack_trace() may be the same, the + * placement in the stack will be different. Using the same algorithm as above + * would yield: + * + * stack_dump_trace[] | stack_trace_index[] + * ------------------ + ------------------- + * return addr to kernel_func_bar | 30 + * return addr to sys_foo | 29 + * return addr to entry | 19 + * + * Where the mapping is off by one: + * + * kernel_func_bar stack frame size is 29 - 19 not 30 - 29! + * + * To fix this, if the architecture sets ARCH_RET_ADDR_AFTER_LOCAL_VARS the + * values in stack_trace_index[] are shifted by one to and the number of + * stack trace entries is decremented by one. + * + * stack_dump_trace[] | stack_trace_index[] + * ------------------ + ------------------- + * return addr to kernel_func_bar | 29 + * return addr to sys_foo | 19 + * + * Although the entry function is not displayed, the first function (sys_foo) + * will still include the stack size of it. + */ static void check_stack(unsigned long ip, unsigned long *stack) { unsigned long this_size, flags; unsigned long *p, *top, *start; @@ -158,6 +257,20 @@ static void check_stack(unsigned long ip, unsigned long *stack) i++; } +#ifdef ARCH_FTRACE_SHIFT_STACK_TRACER + /* + * Some archs will store the link register before calling + * nested functions. This means the saved return address + * comes after the local storage, and we need to shift + * for that. + */ + if (x > 1) { + memmove(&stack_trace_index[0], &stack_trace_index[1], + sizeof(stack_trace_index[0]) * (x - 1)); + x--; + } +#endif + stack_trace_nr_entries = x; if (task_stack_end_corrupted(current)) { @@ -358,6 +471,12 @@ static const struct seq_operations stack_trace_seq_ops = { static int stack_trace_open(struct inode *inode, struct file *file) { + int ret; + + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + return seq_open(file, &stack_trace_seq_ops); } @@ -375,6 +494,7 @@ stack_trace_filter_open(struct inode *inode, struct file *file) { struct ftrace_ops *ops = inode->i_private; + /* Checks for tracefs lockdown */ return ftrace_regex_open(ops, FTRACE_ITER_FILTER, inode, file); } diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 75bf1bcb4a8a..9ab0a1a7ad5e 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -9,7 +9,7 @@ * */ - +#include <linux/security.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/rbtree.h> @@ -238,6 +238,10 @@ static int tracing_stat_open(struct inode *inode, struct file *file) struct seq_file *m; struct stat_session *session = inode->i_private; + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + ret = stat_seq_init(session); if (ret) return ret; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 1ceedb9146b1..352073d36585 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -7,6 +7,7 @@ */ #define pr_fmt(fmt) "trace_uprobe: " fmt +#include <linux/security.h> #include <linux/ctype.h> #include <linux/module.h> #include <linux/uaccess.h> @@ -44,7 +45,7 @@ static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev); static int trace_uprobe_release(struct dyn_event *ev); static bool trace_uprobe_is_busy(struct dyn_event *ev); static bool trace_uprobe_match(const char *system, const char *event, - struct dyn_event *ev); + int argc, const char **argv, struct dyn_event *ev); static struct dyn_event_operations trace_uprobe_ops = { .create = trace_uprobe_create, @@ -248,6 +249,9 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, case FETCH_OP_COMM: val = FETCH_TOKEN_COMM; break; + case FETCH_OP_DATA: + val = (unsigned long)code->data; + break; case FETCH_OP_FOFFS: val = translate_user_vaddr(code->immediate); break; @@ -284,13 +288,54 @@ static bool trace_uprobe_is_busy(struct dyn_event *ev) return trace_probe_is_enabled(&tu->tp); } +static bool trace_uprobe_match_command_head(struct trace_uprobe *tu, + int argc, const char **argv) +{ + char buf[MAX_ARGSTR_LEN + 1]; + int len; + + if (!argc) + return true; + + len = strlen(tu->filename); + if (strncmp(tu->filename, argv[0], len) || argv[0][len] != ':') + return false; + + if (tu->ref_ctr_offset == 0) + snprintf(buf, sizeof(buf), "0x%0*lx", + (int)(sizeof(void *) * 2), tu->offset); + else + snprintf(buf, sizeof(buf), "0x%0*lx(0x%lx)", + (int)(sizeof(void *) * 2), tu->offset, + tu->ref_ctr_offset); + if (strcmp(buf, &argv[0][len + 1])) + return false; + + argc--; argv++; + + return trace_probe_match_command_args(&tu->tp, argc, argv); +} + static bool trace_uprobe_match(const char *system, const char *event, - struct dyn_event *ev) + int argc, const char **argv, struct dyn_event *ev) { struct trace_uprobe *tu = to_trace_uprobe(ev); return strcmp(trace_probe_name(&tu->tp), event) == 0 && - (!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0); + (!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0) && + trace_uprobe_match_command_head(tu, argc, argv); +} + +static nokprobe_inline struct trace_uprobe * +trace_uprobe_primary_from_call(struct trace_event_call *call) +{ + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return NULL; + + return container_of(tp, struct trace_uprobe, tp); } /* @@ -352,15 +397,76 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu) { int ret; + if (trace_probe_has_sibling(&tu->tp)) + goto unreg; + ret = unregister_uprobe_event(tu); if (ret) return ret; +unreg: dyn_event_remove(&tu->devent); + trace_probe_unlink(&tu->tp); free_trace_uprobe(tu); return 0; } +static bool trace_uprobe_has_same_uprobe(struct trace_uprobe *orig, + struct trace_uprobe *comp) +{ + struct trace_probe_event *tpe = orig->tp.event; + struct trace_probe *pos; + struct inode *comp_inode = d_real_inode(comp->path.dentry); + int i; + + list_for_each_entry(pos, &tpe->probes, list) { + orig = container_of(pos, struct trace_uprobe, tp); + if (comp_inode != d_real_inode(orig->path.dentry) || + comp->offset != orig->offset) + continue; + + /* + * trace_probe_compare_arg_type() ensured that nr_args and + * each argument name and type are same. Let's compare comm. + */ + for (i = 0; i < orig->tp.nr_args; i++) { + if (strcmp(orig->tp.args[i].comm, + comp->tp.args[i].comm)) + break; + } + + if (i == orig->tp.nr_args) + return true; + } + + return false; +} + +static int append_trace_uprobe(struct trace_uprobe *tu, struct trace_uprobe *to) +{ + int ret; + + ret = trace_probe_compare_arg_type(&tu->tp, &to->tp); + if (ret) { + /* Note that argument starts index = 2 */ + trace_probe_log_set_index(ret + 1); + trace_probe_log_err(0, DIFF_ARG_TYPE); + return -EEXIST; + } + if (trace_uprobe_has_same_uprobe(to, tu)) { + trace_probe_log_set_index(0); + trace_probe_log_err(0, SAME_PROBE); + return -EEXIST; + } + + /* Append to existing event */ + ret = trace_probe_append(&tu->tp, &to->tp); + if (!ret) + dyn_event_add(&tu->devent); + + return ret; +} + /* * Uprobe with multiple reference counter is not allowed. i.e. * If inode and offset matches, reference counter offset *must* @@ -370,25 +476,21 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu) * as the new one does not conflict with any other existing * ones. */ -static struct trace_uprobe *find_old_trace_uprobe(struct trace_uprobe *new) +static int validate_ref_ctr_offset(struct trace_uprobe *new) { struct dyn_event *pos; - struct trace_uprobe *tmp, *old = NULL; + struct trace_uprobe *tmp; struct inode *new_inode = d_real_inode(new->path.dentry); - old = find_probe_event(trace_probe_name(&new->tp), - trace_probe_group_name(&new->tp)); - for_each_trace_uprobe(tmp, pos) { - if ((old ? old != tmp : true) && - new_inode == d_real_inode(tmp->path.dentry) && + if (new_inode == d_real_inode(tmp->path.dentry) && new->offset == tmp->offset && new->ref_ctr_offset != tmp->ref_ctr_offset) { pr_warn("Reference counter offset mismatch."); - return ERR_PTR(-EINVAL); + return -EINVAL; } } - return old; + return 0; } /* Register a trace_uprobe and probe_event */ @@ -399,18 +501,22 @@ static int register_trace_uprobe(struct trace_uprobe *tu) mutex_lock(&event_mutex); - /* register as an event */ - old_tu = find_old_trace_uprobe(tu); - if (IS_ERR(old_tu)) { - ret = PTR_ERR(old_tu); + ret = validate_ref_ctr_offset(tu); + if (ret) goto end; - } + /* register as an event */ + old_tu = find_probe_event(trace_probe_name(&tu->tp), + trace_probe_group_name(&tu->tp)); if (old_tu) { - /* delete old event */ - ret = unregister_trace_uprobe(old_tu); - if (ret) - goto end; + if (is_ret_probe(tu) != is_ret_probe(old_tu)) { + trace_probe_log_set_index(0); + trace_probe_log_err(0, DIFF_PROBE_TYPE); + ret = -EEXIST; + } else { + ret = append_trace_uprobe(tu, old_tu); + } + goto end; } ret = register_uprobe_event(tu); @@ -664,6 +770,10 @@ static int probes_open(struct inode *inode, struct file *file) { int ret; + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { ret = dyn_events_release_all(&trace_uprobe_ops); if (ret) @@ -713,6 +823,12 @@ static const struct seq_operations profile_seq_op = { static int profile_open(struct inode *inode, struct file *file) { + int ret; + + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + return seq_open(file, &profile_seq_op); } @@ -897,7 +1013,10 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e u8 *data; entry = (struct uprobe_trace_entry_head *)iter->ent; - tu = container_of(event, struct trace_uprobe, tp.call.event); + tu = trace_uprobe_primary_from_call( + container_of(event, struct trace_event_call, event)); + if (unlikely(!tu)) + goto out; if (is_ret_probe(tu)) { trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", @@ -924,27 +1043,71 @@ typedef bool (*filter_func_t)(struct uprobe_consumer *self, enum uprobe_filter_ctx ctx, struct mm_struct *mm); -static int -probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, - filter_func_t filter) +static int trace_uprobe_enable(struct trace_uprobe *tu, filter_func_t filter) +{ + int ret; + + tu->consumer.filter = filter; + tu->inode = d_real_inode(tu->path.dentry); + + if (tu->ref_ctr_offset) + ret = uprobe_register_refctr(tu->inode, tu->offset, + tu->ref_ctr_offset, &tu->consumer); + else + ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); + + if (ret) + tu->inode = NULL; + + return ret; +} + +static void __probe_event_disable(struct trace_probe *tp) { - bool enabled = trace_probe_is_enabled(&tu->tp); + struct trace_probe *pos; + struct trace_uprobe *tu; + + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tu = container_of(pos, struct trace_uprobe, tp); + if (!tu->inode) + continue; + + WARN_ON(!uprobe_filter_is_empty(&tu->filter)); + + uprobe_unregister(tu->inode, tu->offset, &tu->consumer); + tu->inode = NULL; + } +} + +static int probe_event_enable(struct trace_event_call *call, + struct trace_event_file *file, filter_func_t filter) +{ + struct trace_probe *pos, *tp; + struct trace_uprobe *tu; + bool enabled; int ret; + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return -ENODEV; + enabled = trace_probe_is_enabled(tp); + + /* This may also change "enabled" state */ if (file) { - if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) + if (trace_probe_test_flag(tp, TP_FLAG_PROFILE)) return -EINTR; - ret = trace_probe_add_file(&tu->tp, file); + ret = trace_probe_add_file(tp, file); if (ret < 0) return ret; } else { - if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) + if (trace_probe_test_flag(tp, TP_FLAG_TRACE)) return -EINTR; - trace_probe_set_flag(&tu->tp, TP_FLAG_PROFILE); + trace_probe_set_flag(tp, TP_FLAG_PROFILE); } + tu = container_of(tp, struct trace_uprobe, tp); WARN_ON(!uprobe_filter_is_empty(&tu->filter)); if (enabled) @@ -954,18 +1117,15 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, if (ret) goto err_flags; - tu->consumer.filter = filter; - tu->inode = d_real_inode(tu->path.dentry); - if (tu->ref_ctr_offset) { - ret = uprobe_register_refctr(tu->inode, tu->offset, - tu->ref_ctr_offset, &tu->consumer); - } else { - ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tu = container_of(pos, struct trace_uprobe, tp); + ret = trace_uprobe_enable(tu, filter); + if (ret) { + __probe_event_disable(tp); + goto err_buffer; + } } - if (ret) - goto err_buffer; - return 0; err_buffer: @@ -973,33 +1133,35 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, err_flags: if (file) - trace_probe_remove_file(&tu->tp, file); + trace_probe_remove_file(tp, file); else - trace_probe_clear_flag(&tu->tp, TP_FLAG_PROFILE); + trace_probe_clear_flag(tp, TP_FLAG_PROFILE); return ret; } -static void -probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file) +static void probe_event_disable(struct trace_event_call *call, + struct trace_event_file *file) { - if (!trace_probe_is_enabled(&tu->tp)) + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return; + + if (!trace_probe_is_enabled(tp)) return; if (file) { - if (trace_probe_remove_file(&tu->tp, file) < 0) + if (trace_probe_remove_file(tp, file) < 0) return; - if (trace_probe_is_enabled(&tu->tp)) + if (trace_probe_is_enabled(tp)) return; } else - trace_probe_clear_flag(&tu->tp, TP_FLAG_PROFILE); - - WARN_ON(!uprobe_filter_is_empty(&tu->filter)); - - uprobe_unregister(tu->inode, tu->offset, &tu->consumer); - tu->inode = NULL; + trace_probe_clear_flag(tp, TP_FLAG_PROFILE); + __probe_event_disable(tp); uprobe_buffer_disable(); } @@ -1007,7 +1169,11 @@ static int uprobe_event_define_fields(struct trace_event_call *event_call) { int ret, size; struct uprobe_trace_entry_head field; - struct trace_uprobe *tu = event_call->data; + struct trace_uprobe *tu; + + tu = trace_uprobe_primary_from_call(event_call); + if (unlikely(!tu)) + return -ENODEV; if (is_ret_probe(tu)) { DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0); @@ -1100,6 +1266,27 @@ static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) return err; } +static int uprobe_perf_multi_call(struct trace_event_call *call, + struct perf_event *event, + int (*op)(struct trace_uprobe *tu, struct perf_event *event)) +{ + struct trace_probe *pos, *tp; + struct trace_uprobe *tu; + int ret = 0; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return -ENODEV; + + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tu = container_of(pos, struct trace_uprobe, tp); + ret = op(tu, event); + if (ret) + break; + } + + return ret; +} static bool uprobe_perf_filter(struct uprobe_consumer *uc, enum uprobe_filter_ctx ctx, struct mm_struct *mm) { @@ -1213,30 +1400,29 @@ static int trace_uprobe_register(struct trace_event_call *event, enum trace_reg type, void *data) { - struct trace_uprobe *tu = event->data; struct trace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: - return probe_event_enable(tu, file, NULL); + return probe_event_enable(event, file, NULL); case TRACE_REG_UNREGISTER: - probe_event_disable(tu, file); + probe_event_disable(event, file); return 0; #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: - return probe_event_enable(tu, NULL, uprobe_perf_filter); + return probe_event_enable(event, NULL, uprobe_perf_filter); case TRACE_REG_PERF_UNREGISTER: - probe_event_disable(tu, NULL); + probe_event_disable(event, NULL); return 0; case TRACE_REG_PERF_OPEN: - return uprobe_perf_open(tu, data); + return uprobe_perf_multi_call(event, data, uprobe_perf_open); case TRACE_REG_PERF_CLOSE: - return uprobe_perf_close(tu, data); + return uprobe_perf_multi_call(event, data, uprobe_perf_close); #endif default: @@ -1330,7 +1516,6 @@ static inline void init_trace_event_call(struct trace_uprobe *tu) call->flags = TRACE_EVENT_FL_UPROBE | TRACE_EVENT_FL_CAP_ANY; call->class->reg = trace_uprobe_register; - call->data = tu; } static int register_uprobe_event(struct trace_uprobe *tu) @@ -1399,7 +1584,7 @@ void destroy_local_trace_uprobe(struct trace_event_call *event_call) { struct trace_uprobe *tu; - tu = container_of(event_call, struct trace_uprobe, tp.call); + tu = trace_uprobe_primary_from_call(event_call); free_trace_uprobe(tu); } |
