From d8c4deee6dc6876ae3b7d09a5b58138a57ee45f6 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 8 Sep 2017 23:55:17 -0700 Subject: tracing: Remove obsolete sched_switch tracer selftest Since commit 87d80de2800d087ea833cb79bc13f85ff34ed49f ("tracing: Remove obsolete sched_switch tracer"), the sched_switch tracer selftest is no longer used. This patch removes the same. Link: http://lkml.kernel.org/r/20170909065517.22262-1-joelaf@google.com Cc: Ingo Molnar Cc: kernel-team@android.com Signed-off-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.h | 2 -- kernel/trace/trace_selftest.c | 32 -------------------------------- 2 files changed, 34 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 652c682707cd..3c078e2ad777 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -738,8 +738,6 @@ extern int trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr); -extern int trace_selftest_startup_sched_switch(struct tracer *trace, - struct trace_array *tr); extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); /* diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index b17ec642793b..364f78abdf47 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -1150,38 +1150,6 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) } #endif /* CONFIG_SCHED_TRACER */ -#ifdef CONFIG_CONTEXT_SWITCH_TRACER -int -trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr) -{ - unsigned long count; - int ret; - - /* start the tracing */ - ret = tracer_init(trace, tr); - if (ret) { - warn_failed_init_tracer(trace, ret); - return ret; - } - - /* Sleep for a 1/10 of a second */ - msleep(100); - /* stop the tracing. */ - tracing_stop(); - /* check the trace buffer */ - ret = trace_test_buffer(&tr->trace_buffer, &count); - trace->reset(tr); - tracing_start(); - - if (!ret && !count) { - printk(KERN_CONT ".. no entries found .."); - ret = -1; - } - - return ret; -} -#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ - #ifdef CONFIG_BRANCH_TRACER int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) -- cgit v1.3-14-g43fede From 6e7a2398114a2597a0995f96f44d908741ae5035 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 23 Aug 2017 12:23:09 +0100 Subject: tracing: Remove redundant unread variable ret Integer ret is being assigned but never used and hence it is redundant. Remove it, fixes clang warning: trace_events_hist.c:1077:3: warning: Value stored to 'ret' is never read Link: http://lkml.kernel.org/r/20170823112309.19383-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 1c21d0e2a145..f123b5d0c226 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1062,7 +1062,7 @@ static void hist_trigger_show(struct seq_file *m, struct event_trigger_data *data, int n) { struct hist_trigger_data *hist_data; - int n_entries, ret = 0; + int n_entries; if (n > 0) seq_puts(m, "\n\n"); @@ -1073,10 +1073,8 @@ static void hist_trigger_show(struct seq_file *m, hist_data = data->private_data; n_entries = print_entries(m, hist_data); - if (n_entries < 0) { - ret = n_entries; + if (n_entries < 0) n_entries = 0; - } seq_printf(m, "\nTotals:\n Hits: %llu\n Entries: %u\n Dropped: %llu\n", (u64)atomic64_read(&hist_data->map->hits), -- cgit v1.3-14-g43fede From 12ecef0cb12102d8c034770173d2d1363cb97d52 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 21 Sep 2017 16:22:49 -0400 Subject: tracing: Reverse the order of trace_types_lock and event_mutex In order to make future changes where we need to call tracing_set_clock() from within an event command, the order of trace_types_lock and event_mutex must be reversed, as the event command will hold event_mutex and the trace_types_lock is taken from within tracing_set_clock(). Link: http://lkml.kernel.org/r/20170921162249.0dde3dca@gandalf.local.home Requested-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 5 +++++ kernel/trace/trace_events.c | 31 +++++++++++++++---------------- 2 files changed, 20 insertions(+), 16 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 752e5daf0896..5f1ac7d3402c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7687,6 +7687,7 @@ static int instance_mkdir(const char *name) struct trace_array *tr; int ret; + mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); ret = -EEXIST; @@ -7742,6 +7743,7 @@ static int instance_mkdir(const char *name) list_add(&tr->list, &ftrace_trace_arrays); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return 0; @@ -7753,6 +7755,7 @@ static int instance_mkdir(const char *name) out_unlock: mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return ret; @@ -7765,6 +7768,7 @@ static int instance_rmdir(const char *name) int ret; int i; + mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); ret = -ENODEV; @@ -7810,6 +7814,7 @@ static int instance_rmdir(const char *name) out_unlock: mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return ret; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 87468398b9ed..ec0f9aa4e151 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1406,8 +1406,8 @@ static int subsystem_open(struct inode *inode, struct file *filp) return -ENODEV; /* Make sure the system still exists */ - mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); list_for_each_entry(tr, &ftrace_trace_arrays, list) { list_for_each_entry(dir, &tr->systems, list) { if (dir == inode->i_private) { @@ -1421,8 +1421,8 @@ static int subsystem_open(struct inode *inode, struct file *filp) } } exit_loop: - mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); if (!system) return -ENODEV; @@ -2294,15 +2294,15 @@ static void __add_event_to_tracers(struct trace_event_call *call); int trace_add_event_call(struct trace_event_call *call) { int ret; - mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); ret = __register_event(call, NULL); if (ret >= 0) __add_event_to_tracers(call); - mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return ret; } @@ -2356,13 +2356,13 @@ int trace_remove_event_call(struct trace_event_call *call) { int ret; - mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); down_write(&trace_event_sem); ret = probe_remove_event_call(call); up_write(&trace_event_sem); - mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return ret; } @@ -2424,8 +2424,8 @@ static int trace_module_notify(struct notifier_block *self, { struct module *mod = data; - mutex_lock(&trace_types_lock); mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); switch (val) { case MODULE_STATE_COMING: trace_module_add_events(mod); @@ -2434,8 +2434,8 @@ static int trace_module_notify(struct notifier_block *self, trace_module_remove_events(mod); break; } - mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return 0; } @@ -2950,24 +2950,24 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) * creates the event hierachry in the @parent/events directory. * * Returns 0 on success. + * + * Must be called with event_mutex held. */ int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr) { int ret; - mutex_lock(&event_mutex); + lockdep_assert_held(&event_mutex); ret = create_event_toplevel_files(parent, tr); if (ret) - goto out_unlock; + goto out; down_write(&trace_event_sem); __trace_add_event_dirs(tr); up_write(&trace_event_sem); - out_unlock: - mutex_unlock(&event_mutex); - + out: return ret; } @@ -2996,9 +2996,10 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr) return ret; } +/* Must be called with event_mutex held */ int event_trace_del_tracer(struct trace_array *tr) { - mutex_lock(&event_mutex); + lockdep_assert_held(&event_mutex); /* Disable any event triggers and associated soft-disabled events */ clear_event_triggers(tr); @@ -3019,8 +3020,6 @@ int event_trace_del_tracer(struct trace_array *tr) tr->event_dir = NULL; - mutex_unlock(&event_mutex); - return 0; } -- cgit v1.3-14-g43fede From 1a149d7d3f45d311da1f63473736c05f30ae8a75 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 22 Sep 2017 16:59:02 -0400 Subject: ring-buffer: Rewrite trace_recursive_(un)lock() to be simpler The current method to prevent the ring buffer from entering into a recursize loop is to use a bitmask and set the bit that maps to the current context (normal, softirq, irq or NMI), and if that bit was already set, it is considered a recursive loop. New code is being added that may require the ring buffer to be entered a second time in the current context. The recursive locking prevents that from happening. Instead of mapping a bitmask to the current context, just allow 4 levels of nesting in the ring buffer. This matches the 4 context levels that it can already nest. It is highly unlikely to have more than two levels, thus it should be fine when we add the second entry into the ring buffer. If that proves to be a problem, we can always up the number to 8. An added benefit is that reading preempt_count() to get the current level adds a very slight but noticeable overhead. This removes that need. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 64 ++++++++++++---------------------------------- 1 file changed, 17 insertions(+), 47 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 81279c6602ff..f6ee9b1ef62a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2538,61 +2538,29 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * The lock and unlock are done within a preempt disable section. * The current_context per_cpu variable can only be modified * by the current task between lock and unlock. But it can - * be modified more than once via an interrupt. To pass this - * information from the lock to the unlock without having to - * access the 'in_interrupt()' functions again (which do show - * a bit of overhead in something as critical as function tracing, - * we use a bitmask trick. + * be modified more than once via an interrupt. There are four + * different contexts that we need to consider. * - * bit 0 = NMI context - * bit 1 = IRQ context - * bit 2 = SoftIRQ context - * bit 3 = normal context. + * Normal context. + * SoftIRQ context + * IRQ context + * NMI context * - * This works because this is the order of contexts that can - * preempt other contexts. A SoftIRQ never preempts an IRQ - * context. - * - * When the context is determined, the corresponding bit is - * checked and set (if it was set, then a recursion of that context - * happened). - * - * On unlock, we need to clear this bit. To do so, just subtract - * 1 from the current_context and AND it to itself. - * - * (binary) - * 101 - 1 = 100 - * 101 & 100 = 100 (clearing bit zero) - * - * 1010 - 1 = 1001 - * 1010 & 1001 = 1000 (clearing bit 1) - * - * The least significant bit can be cleared this way, and it - * just so happens that it is the same bit corresponding to - * the current context. + * If for some reason the ring buffer starts to recurse, we + * only allow that to happen at most 4 times (one for each + * context). If it happens 5 times, then we consider this a + * recusive loop and do not let it go further. */ static __always_inline int trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) { - unsigned int val = cpu_buffer->current_context; - int bit; - - if (in_interrupt()) { - if (in_nmi()) - bit = RB_CTX_NMI; - else if (in_irq()) - bit = RB_CTX_IRQ; - else - bit = RB_CTX_SOFTIRQ; - } else - bit = RB_CTX_NORMAL; - - if (unlikely(val & (1 << bit))) + if (cpu_buffer->current_context >= 4) return 1; - val |= (1 << bit); - cpu_buffer->current_context = val; + cpu_buffer->current_context++; + /* Interrupts must see this update */ + barrier(); return 0; } @@ -2600,7 +2568,9 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) static __always_inline void trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) { - cpu_buffer->current_context &= cpu_buffer->current_context - 1; + /* Don't let the dec leak out */ + barrier(); + cpu_buffer->current_context--; } /** -- cgit v1.3-14-g43fede From a15f7fc20389a8827d5859907568b201234d4b79 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 22 Sep 2017 14:58:17 -0500 Subject: tracing: Exclude 'generic fields' from histograms There are a small number of 'generic fields' (comm/COMM/cpu/CPU) that are found by trace_find_event_field() but are only meant for filtering. Specifically, they unlike normal fields, they have a size of 0 and thus wreak havoc when used as a histogram key. Exclude these (return -EINVAL) when used as histogram keys. Link: http://lkml.kernel.org/r/956154cbc3e8a4f0633d619b886c97f0f0edf7b4.1506105045.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index f123b5d0c226..121d56850f24 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -450,7 +450,7 @@ static int create_val_field(struct hist_trigger_data *hist_data, } field = trace_find_event_field(file->event_call, field_name); - if (!field) { + if (!field || !field->size) { ret = -EINVAL; goto out; } @@ -548,7 +548,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, } field = trace_find_event_field(file->event_call, field_name); - if (!field) { + if (!field || !field->size) { ret = -EINVAL; goto out; } -- cgit v1.3-14-g43fede From 83c07ecc4203728e85fc4a2ce6fdf25d16ea118e Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 22 Sep 2017 14:58:18 -0500 Subject: tracing: Remove lookups from tracing_map hitcount Lookups inflate the hitcount, making it essentially useless. Only inserts and updates should really affect the hitcount anyway, so explicitly filter lookups out. Link: http://lkml.kernel.org/r/c8d9dc39d269a8abf88bf4102d0dfc65deb0fc7f.1506105045.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/tracing_map.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 305039b122fa..07e75344725b 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -428,7 +428,8 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) if (test_key && test_key == key_hash && entry->val && keys_match(key, entry->val->key, map->key_size)) { - atomic64_inc(&map->hits); + if (!lookup_only) + atomic64_inc(&map->hits); return entry->val; } -- cgit v1.3-14-g43fede From 4f36c2d85cedea60ad424d44534121ab0458069e Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 22 Sep 2017 14:58:19 -0500 Subject: tracing: Increase tracing map KEYS_MAX size The current default for the number of subkeys in a compound key is 2, which is too restrictive. Increase it to a more realistic value of 3. Link: http://lkml.kernel.org/r/b6952cca06d1f912eba33804a6fd6069b3847d44.1506105045.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/tracing_map.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/tracing_map.h b/kernel/trace/tracing_map.h index 618838f5f30a..f0975110b967 100644 --- a/kernel/trace/tracing_map.h +++ b/kernel/trace/tracing_map.h @@ -5,7 +5,7 @@ #define TRACING_MAP_BITS_MAX 17 #define TRACING_MAP_BITS_MIN 7 -#define TRACING_MAP_KEYS_MAX 2 +#define TRACING_MAP_KEYS_MAX 3 #define TRACING_MAP_VALS_MAX 3 #define TRACING_MAP_FIELDS_MAX (TRACING_MAP_KEYS_MAX + \ TRACING_MAP_VALS_MAX) -- cgit v1.3-14-g43fede From 7e465baa80293ed5f87fdf6405391d6f02110d4e Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 22 Sep 2017 14:58:20 -0500 Subject: tracing: Make traceprobe parsing code reusable traceprobe_probes_write() and traceprobe_command() actually contain nothing that ties them to kprobes - the code is generically useful for similar types of parsing elsewhere, so separate it out and move it to trace.c/trace.h. Other than moving it, the only change is in naming: traceprobe_probes_write() becomes trace_parse_run_command() and traceprobe_command() becomes trace_run_command(). Link: http://lkml.kernel.org/r/ae5c26ea40c196a8986854d921eb6e713ede7e3f.1506105045.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 86 +++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 7 ++++ kernel/trace/trace_kprobe.c | 18 +++++----- kernel/trace/trace_probe.c | 86 --------------------------------------------- kernel/trace/trace_probe.h | 7 ---- kernel/trace/trace_uprobe.c | 2 +- 6 files changed, 103 insertions(+), 103 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5f1ac7d3402c..73e67b68c53b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8281,6 +8281,92 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) } EXPORT_SYMBOL_GPL(ftrace_dump); +int trace_run_command(const char *buf, int (*createfn)(int, char **)) +{ + char **argv; + int argc, ret; + + argc = 0; + ret = 0; + argv = argv_split(GFP_KERNEL, buf, &argc); + if (!argv) + return -ENOMEM; + + if (argc) + ret = createfn(argc, argv); + + argv_free(argv); + + return ret; +} + +#define WRITE_BUFSIZE 4096 + +ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos, + int (*createfn)(int, char **)) +{ + char *kbuf, *buf, *tmp; + int ret = 0; + size_t done = 0; + size_t size; + + kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + while (done < count) { + size = count - done; + + if (size >= WRITE_BUFSIZE) + size = WRITE_BUFSIZE - 1; + + if (copy_from_user(kbuf, buffer + done, size)) { + ret = -EFAULT; + goto out; + } + kbuf[size] = '\0'; + buf = kbuf; + do { + tmp = strchr(buf, '\n'); + if (tmp) { + *tmp = '\0'; + size = tmp - buf + 1; + } else { + size = strlen(buf); + if (done + size < count) { + if (buf != kbuf) + break; + /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ + pr_warn("Line length is too long: Should be less than %d\n", + WRITE_BUFSIZE - 2); + ret = -EINVAL; + goto out; + } + } + done += size; + + /* Remove comments */ + tmp = strchr(buf, '#'); + + if (tmp) + *tmp = '\0'; + + ret = trace_run_command(buf, createfn); + if (ret) + goto out; + buf += size; + + } while (done < count); + } + ret = done; + +out: + kfree(kbuf); + + return ret; +} + __init static int tracer_alloc_buffers(void) { int ring_buf_size; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3c078e2ad777..f8343eb3c1f9 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1752,6 +1752,13 @@ void trace_printk_start_comm(void); int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); +#define MAX_EVENT_NAME_LEN 64 + +extern int trace_run_command(const char *buf, int (*createfn)(int, char**)); +extern ssize_t trace_parse_run_command(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos, + int (*createfn)(int, char**)); + /* * Normal trace_printk() and friends allocates special buffers * to do the manipulation, as well as saves the print formats diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 8a907e12b6b9..af6134f2e597 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -907,8 +907,8 @@ static int probes_open(struct inode *inode, struct file *file) static ssize_t probes_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - return traceprobe_probes_write(file, buffer, count, ppos, - create_trace_kprobe); + return trace_parse_run_command(file, buffer, count, ppos, + create_trace_kprobe); } static const struct file_operations kprobe_events_ops = { @@ -1433,9 +1433,9 @@ static __init int kprobe_trace_self_tests_init(void) pr_info("Testing kprobe tracing: "); - ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target " - "$stack $stack0 +0($stack)", - create_trace_kprobe); + ret = trace_run_command("p:testprobe kprobe_trace_selftest_target " + "$stack $stack0 +0($stack)", + create_trace_kprobe); if (WARN_ON_ONCE(ret)) { pr_warn("error on probing function entry.\n"); warn++; @@ -1455,8 +1455,8 @@ static __init int kprobe_trace_self_tests_init(void) } } - ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target " - "$retval", create_trace_kprobe); + ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target " + "$retval", create_trace_kprobe); if (WARN_ON_ONCE(ret)) { pr_warn("error on probing function return.\n"); warn++; @@ -1526,13 +1526,13 @@ static __init int kprobe_trace_self_tests_init(void) disable_trace_kprobe(tk, file); } - ret = traceprobe_command("-:testprobe", create_trace_kprobe); + ret = trace_run_command("-:testprobe", create_trace_kprobe); if (WARN_ON_ONCE(ret)) { pr_warn("error on deleting a probe.\n"); warn++; } - ret = traceprobe_command("-:testprobe2", create_trace_kprobe); + ret = trace_run_command("-:testprobe2", create_trace_kprobe); if (WARN_ON_ONCE(ret)) { pr_warn("error on deleting a probe.\n"); warn++; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 52478f033f88..d59357308677 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -623,92 +623,6 @@ void traceprobe_free_probe_arg(struct probe_arg *arg) kfree(arg->comm); } -int traceprobe_command(const char *buf, int (*createfn)(int, char **)) -{ - char **argv; - int argc, ret; - - argc = 0; - ret = 0; - argv = argv_split(GFP_KERNEL, buf, &argc); - if (!argv) - return -ENOMEM; - - if (argc) - ret = createfn(argc, argv); - - argv_free(argv); - - return ret; -} - -#define WRITE_BUFSIZE 4096 - -ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos, - int (*createfn)(int, char **)) -{ - char *kbuf, *buf, *tmp; - int ret = 0; - size_t done = 0; - size_t size; - - kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); - if (!kbuf) - return -ENOMEM; - - while (done < count) { - size = count - done; - - if (size >= WRITE_BUFSIZE) - size = WRITE_BUFSIZE - 1; - - if (copy_from_user(kbuf, buffer + done, size)) { - ret = -EFAULT; - goto out; - } - kbuf[size] = '\0'; - buf = kbuf; - do { - tmp = strchr(buf, '\n'); - if (tmp) { - *tmp = '\0'; - size = tmp - buf + 1; - } else { - size = strlen(buf); - if (done + size < count) { - if (buf != kbuf) - break; - /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ - pr_warn("Line length is too long: Should be less than %d\n", - WRITE_BUFSIZE - 2); - ret = -EINVAL; - goto out; - } - } - done += size; - - /* Remove comments */ - tmp = strchr(buf, '#'); - - if (tmp) - *tmp = '\0'; - - ret = traceprobe_command(buf, createfn); - if (ret) - goto out; - buf += size; - - } while (done < count); - } - ret = done; - -out: - kfree(kbuf); - - return ret; -} - static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, bool is_return) { diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 903273c93e61..fb66e3eaa192 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -42,7 +42,6 @@ #define MAX_TRACE_ARGS 128 #define MAX_ARGSTR_LEN 63 -#define MAX_EVENT_NAME_LEN 64 #define MAX_STRING_SIZE PATH_MAX /* Reserved field names */ @@ -356,12 +355,6 @@ extern void traceprobe_free_probe_arg(struct probe_arg *arg); extern int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset); -extern ssize_t traceprobe_probes_write(struct file *file, - const char __user *buffer, size_t count, loff_t *ppos, - int (*createfn)(int, char**)); - -extern int traceprobe_command(const char *buf, int (*createfn)(int, char**)); - /* Sum up total data length for dynamic arraies (strings) */ static nokprobe_inline int __get_data_size(struct trace_probe *tp, struct pt_regs *regs) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 4525e0271a53..b34965e62fb9 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -651,7 +651,7 @@ static int probes_open(struct inode *inode, struct file *file) static ssize_t probes_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - return traceprobe_probes_write(file, buffer, count, ppos, create_trace_uprobe); + return trace_parse_run_command(file, buffer, count, ppos, create_trace_uprobe); } static const struct file_operations uprobe_events_ops = { -- cgit v1.3-14-g43fede From 0d7a8325bf3326c92da2d21b4496a9ddde896d4f Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 22 Sep 2017 14:58:21 -0500 Subject: tracing: Clean up hist_field_flags enum As we add more flags, specifying explicit integers for the flag values becomes more unwieldy and error-prone - switch them over to left-shift values. Link: http://lkml.kernel.org/r/e644e4fb7665aec015f4a2d84a2f990d3dd5b8a1.1506105045.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 121d56850f24..0c7ec3048624 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -110,16 +110,16 @@ DEFINE_HIST_FIELD_FN(u8); #define HIST_KEY_SIZE_MAX (MAX_FILTER_STR_VAL + HIST_STACKTRACE_SIZE) enum hist_field_flags { - HIST_FIELD_FL_HITCOUNT = 1, - HIST_FIELD_FL_KEY = 2, - HIST_FIELD_FL_STRING = 4, - HIST_FIELD_FL_HEX = 8, - HIST_FIELD_FL_SYM = 16, - HIST_FIELD_FL_SYM_OFFSET = 32, - HIST_FIELD_FL_EXECNAME = 64, - HIST_FIELD_FL_SYSCALL = 128, - HIST_FIELD_FL_STACKTRACE = 256, - HIST_FIELD_FL_LOG2 = 512, + HIST_FIELD_FL_HITCOUNT = 1 << 0, + HIST_FIELD_FL_KEY = 1 << 1, + HIST_FIELD_FL_STRING = 1 << 2, + HIST_FIELD_FL_HEX = 1 << 3, + HIST_FIELD_FL_SYM = 1 << 4, + HIST_FIELD_FL_SYM_OFFSET = 1 << 5, + HIST_FIELD_FL_EXECNAME = 1 << 6, + HIST_FIELD_FL_SYSCALL = 1 << 7, + HIST_FIELD_FL_STACKTRACE = 1 << 8, + HIST_FIELD_FL_LOG2 = 1 << 9, }; struct hist_trigger_attrs { -- cgit v1.3-14-g43fede From 85013256cf01629f72a327674c5d007b4a4b40da Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 22 Sep 2017 14:58:22 -0500 Subject: tracing: Add hist_field_name() accessor In preparation for hist_fields that won't be strictly based on trace_event_fields, add a new hist_field_name() accessor to allow that flexibility and update associated users. Link: http://lkml.kernel.org/r/5b5a2d36dde067cbbe2434b10f06daac27b7dbd5.1506105045.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 67 +++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 22 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 0c7ec3048624..34edf5fd85fd 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -146,6 +146,23 @@ struct hist_trigger_data { struct tracing_map *map; }; +static const char *hist_field_name(struct hist_field *field, + unsigned int level) +{ + const char *field_name = ""; + + if (level > 1) + return field_name; + + if (field->field) + field_name = field->field->name; + + if (field_name == NULL) + field_name = ""; + + return field_name; +} + static hist_field_fn_t select_value_fn(int field_size, int field_is_signed) { hist_field_fn_t fn = NULL; @@ -653,7 +670,6 @@ static int is_descending(const char *str) static int create_sort_keys(struct hist_trigger_data *hist_data) { char *fields_str = hist_data->attrs->sort_key_str; - struct ftrace_event_field *field = NULL; struct tracing_map_sort_key *sort_key; int descending, ret = 0; unsigned int i, j; @@ -670,7 +686,9 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) } for (i = 0; i < TRACING_MAP_SORT_KEYS_MAX; i++) { + struct hist_field *hist_field; char *field_str, *field_name; + const char *test_name; sort_key = &hist_data->sort_keys[i]; @@ -703,8 +721,10 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) } for (j = 1; j < hist_data->n_fields; j++) { - field = hist_data->fields[j]->field; - if (field && (strcmp(field_name, field->name) == 0)) { + hist_field = hist_data->fields[j]; + test_name = hist_field_name(hist_field, 0); + + if (strcmp(field_name, test_name) == 0) { sort_key->field_idx = j; descending = is_descending(field_str); if (descending < 0) { @@ -952,6 +972,7 @@ hist_trigger_entry_print(struct seq_file *m, struct hist_field *key_field; char str[KSYM_SYMBOL_LEN]; bool multiline = false; + const char *field_name; unsigned int i; u64 uval; @@ -963,26 +984,27 @@ hist_trigger_entry_print(struct seq_file *m, if (i > hist_data->n_vals) seq_puts(m, ", "); + field_name = hist_field_name(key_field, 0); + if (key_field->flags & HIST_FIELD_FL_HEX) { uval = *(u64 *)(key + key_field->offset); - seq_printf(m, "%s: %llx", - key_field->field->name, uval); + seq_printf(m, "%s: %llx", field_name, uval); } else if (key_field->flags & HIST_FIELD_FL_SYM) { uval = *(u64 *)(key + key_field->offset); sprint_symbol_no_offset(str, uval); - seq_printf(m, "%s: [%llx] %-45s", - key_field->field->name, uval, str); + seq_printf(m, "%s: [%llx] %-45s", field_name, + uval, str); } else if (key_field->flags & HIST_FIELD_FL_SYM_OFFSET) { uval = *(u64 *)(key + key_field->offset); sprint_symbol(str, uval); - seq_printf(m, "%s: [%llx] %-55s", - key_field->field->name, uval, str); + seq_printf(m, "%s: [%llx] %-55s", field_name, + uval, str); } else if (key_field->flags & HIST_FIELD_FL_EXECNAME) { char *comm = elt->private_data; uval = *(u64 *)(key + key_field->offset); - seq_printf(m, "%s: %-16s[%10llu]", - key_field->field->name, comm, uval); + seq_printf(m, "%s: %-16s[%10llu]", field_name, + comm, uval); } else if (key_field->flags & HIST_FIELD_FL_SYSCALL) { const char *syscall_name; @@ -991,8 +1013,8 @@ hist_trigger_entry_print(struct seq_file *m, if (!syscall_name) syscall_name = "unknown_syscall"; - seq_printf(m, "%s: %-30s[%3llu]", - key_field->field->name, syscall_name, uval); + seq_printf(m, "%s: %-30s[%3llu]", field_name, + syscall_name, uval); } else if (key_field->flags & HIST_FIELD_FL_STACKTRACE) { seq_puts(m, "stacktrace:\n"); hist_trigger_stacktrace_print(m, @@ -1000,15 +1022,14 @@ hist_trigger_entry_print(struct seq_file *m, HIST_STACKTRACE_DEPTH); multiline = true; } else if (key_field->flags & HIST_FIELD_FL_LOG2) { - seq_printf(m, "%s: ~ 2^%-2llu", key_field->field->name, + seq_printf(m, "%s: ~ 2^%-2llu", field_name, *(u64 *)(key + key_field->offset)); } else if (key_field->flags & HIST_FIELD_FL_STRING) { - seq_printf(m, "%s: %-50s", key_field->field->name, + seq_printf(m, "%s: %-50s", field_name, (char *)(key + key_field->offset)); } else { uval = *(u64 *)(key + key_field->offset); - seq_printf(m, "%s: %10llu", key_field->field->name, - uval); + seq_printf(m, "%s: %10llu", field_name, uval); } } @@ -1021,13 +1042,13 @@ hist_trigger_entry_print(struct seq_file *m, tracing_map_read_sum(elt, HITCOUNT_IDX)); for (i = 1; i < hist_data->n_vals; i++) { + field_name = hist_field_name(hist_data->fields[i], 0); + if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) { - seq_printf(m, " %s: %10llx", - hist_data->fields[i]->field->name, + seq_printf(m, " %s: %10llx", field_name, tracing_map_read_sum(elt, i)); } else { - seq_printf(m, " %s: %10llu", - hist_data->fields[i]->field->name, + seq_printf(m, " %s: %10llu", field_name, tracing_map_read_sum(elt, i)); } } @@ -1140,7 +1161,9 @@ static const char *get_hist_field_flags(struct hist_field *hist_field) static void hist_field_print(struct seq_file *m, struct hist_field *hist_field) { - seq_printf(m, "%s", hist_field->field->name); + const char *field_name = hist_field_name(hist_field, 0); + + seq_printf(m, "%s", field_name); if (hist_field->flags) { const char *flags_str = get_hist_field_flags(hist_field); -- cgit v1.3-14-g43fede From 5819eaddf35b24d628ddfa4fbb5f8d4026e44b96 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 22 Sep 2017 14:58:23 -0500 Subject: tracing: Reimplement log2 log2 as currently implemented applies only to u64 trace_event_field derived fields, and assumes that anything it's applied to is a u64 field. To prepare for synthetic fields like latencies, log2 should be applicable to those as well, so take the opportunity now to fix the current problems as well as expand to more general uses. log2 should be thought of as a chaining function rather than a field type. To enable this as well as possible future function implementations, add a hist_field operand array into the hist_field definition for this purpose, and make use of it to implement the log2 'function'. Link: http://lkml.kernel.org/r/b47f93fc0b87b36eccf716b0c018f3a71e1f1111.1506105045.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 34edf5fd85fd..1e1558c99d56 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -28,12 +28,16 @@ struct hist_field; typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event); +#define HIST_FIELD_OPERANDS_MAX 2 + struct hist_field { struct ftrace_event_field *field; unsigned long flags; hist_field_fn_t fn; unsigned int size; unsigned int offset; + unsigned int is_signed; + struct hist_field *operands[HIST_FIELD_OPERANDS_MAX]; }; static u64 hist_field_none(struct hist_field *field, void *event) @@ -71,7 +75,9 @@ static u64 hist_field_pstring(struct hist_field *hist_field, void *event) static u64 hist_field_log2(struct hist_field *hist_field, void *event) { - u64 val = *(u64 *)(event + hist_field->field->offset); + struct hist_field *operand = hist_field->operands[0]; + + u64 val = operand->fn(operand, event); return (u64) ilog2(roundup_pow_of_two(val)); } @@ -156,6 +162,8 @@ static const char *hist_field_name(struct hist_field *field, if (field->field) field_name = field->field->name; + else if (field->flags & HIST_FIELD_FL_LOG2) + field_name = hist_field_name(field->operands[0], ++level); if (field_name == NULL) field_name = ""; @@ -357,8 +365,20 @@ static const struct tracing_map_ops hist_trigger_elt_comm_ops = { .elt_init = hist_trigger_elt_comm_init, }; -static void destroy_hist_field(struct hist_field *hist_field) +static void destroy_hist_field(struct hist_field *hist_field, + unsigned int level) { + unsigned int i; + + if (level > 2) + return; + + if (!hist_field) + return; + + for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) + destroy_hist_field(hist_field->operands[i], level + 1); + kfree(hist_field); } @@ -385,7 +405,10 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field, } if (flags & HIST_FIELD_FL_LOG2) { + unsigned long fl = flags & ~HIST_FIELD_FL_LOG2; hist_field->fn = hist_field_log2; + hist_field->operands[0] = create_hist_field(field, fl); + hist_field->size = hist_field->operands[0]->size; goto out; } @@ -405,7 +428,7 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field, hist_field->fn = select_value_fn(field->size, field->is_signed); if (!hist_field->fn) { - destroy_hist_field(hist_field); + destroy_hist_field(hist_field, 0); return NULL; } } @@ -422,7 +445,7 @@ static void destroy_hist_fields(struct hist_trigger_data *hist_data) for (i = 0; i < TRACING_MAP_FIELDS_MAX; i++) { if (hist_data->fields[i]) { - destroy_hist_field(hist_data->fields[i]); + destroy_hist_field(hist_data->fields[i], 0); hist_data->fields[i] = NULL; } } -- cgit v1.3-14-g43fede From 6cafbe159416822f6d3dfd711bf4c39050c650ba Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 20 Jun 2017 10:44:58 -0400 Subject: ftrace: Add a ftrace_free_mem() function for modules to use In order to be able to trace module init functions, the module code needs to tell ftrace what is being freed when the init sections are freed. Use the code that the main init calls to tell ftrace to free the main init sections. This requires passing in a start and end address to free. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 2 ++ kernel/trace/ftrace.c | 14 +++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2e028854bac7..47fc404ad233 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -151,8 +151,10 @@ struct ftrace_ops_hash { }; void ftrace_free_init_mem(void); +void ftrace_free_mem(void *start, void *end); #else static inline void ftrace_free_init_mem(void) { } +static inline void ftrace_free_mem(void *start, void *end) { } #endif /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6abfafd7f173..84cb5928665a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5868,10 +5868,10 @@ void ftrace_module_init(struct module *mod) } #endif /* CONFIG_MODULES */ -void __init ftrace_free_init_mem(void) +void ftrace_free_mem(void *start_ptr, void *end_ptr) { - unsigned long start = (unsigned long)(&__init_begin); - unsigned long end = (unsigned long)(&__init_end); + unsigned long start = (unsigned long)(start_ptr); + unsigned long end = (unsigned long)(end_ptr); struct ftrace_page **last_pg = &ftrace_pages_start; struct ftrace_page *pg; struct dyn_ftrace *rec; @@ -5913,6 +5913,14 @@ void __init ftrace_free_init_mem(void) mutex_unlock(&ftrace_lock); } +void __init ftrace_free_init_mem(void) +{ + void *start = (void *)(&__init_begin); + void *end = (void *)(&__init_end); + + ftrace_free_mem(start, end); +} + void __init ftrace_init(void) { extern unsigned long __start_mcount_loc[]; -- cgit v1.3-14-g43fede From 3e234289f86b12985ef8909cd34525fcb66c4efb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 3 Mar 2017 18:00:22 -0500 Subject: ftrace: Allow module init functions to be traced Allow for module init sections to be traced as well as core kernel init sections. Now that filtering modules functions can be stored, for when they are loaded, it makes sense to be able to trace them. Cc: Jessica Yu Cc: Rusty Russell Signed-off-by: Steven Rostedt (VMware) --- include/linux/init.h | 4 +--- kernel/module.c | 2 ++ kernel/trace/ftrace.c | 6 ++++-- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/init.h b/include/linux/init.h index 94769d687cf0..a779c1816437 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -39,7 +39,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold __inittrace __latent_entropy +#define __init __section(.init.text) __cold __latent_entropy #define __initdata __section(.init.data) #define __initconst __section(.init.rodata) #define __exitdata __section(.exit.data) @@ -68,10 +68,8 @@ #ifdef MODULE #define __exitused -#define __inittrace notrace #else #define __exitused __used -#define __inittrace #endif #define __exit __section(.exit.text) __exitused __cold notrace diff --git a/kernel/module.c b/kernel/module.c index de66ec825992..58bca427ac3f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3473,6 +3473,8 @@ static noinline int do_init_module(struct module *mod) if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC)) async_synchronize_full(); + ftrace_free_mem(mod->init_layout.base, mod->init_layout.base + + mod->init_layout.size); mutex_lock(&module_mutex); /* Drop initial reference. */ module_put(mod); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 84cb5928665a..d7297e866e4a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5752,7 +5752,8 @@ void ftrace_release_mod(struct module *mod) last_pg = &ftrace_pages_start; for (pg = ftrace_pages_start; pg; pg = *last_pg) { rec = &pg->records[0]; - if (within_module_core(rec->ip, mod)) { + if (within_module_core(rec->ip, mod) || + within_module_init(rec->ip, mod)) { /* * As core pages are first, the first * page should never be a module page. @@ -5821,7 +5822,8 @@ void ftrace_module_enable(struct module *mod) * not part of this module, then skip this pg, * which the "break" will do. */ - if (!within_module_core(rec->ip, mod)) + if (!within_module_core(rec->ip, mod) && + !within_module_init(rec->ip, mod)) break; cnt = 0; -- cgit v1.3-14-g43fede From aba4b5c22cbac296f4081a0476d0c55828f135b4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 1 Sep 2017 08:35:38 -0400 Subject: ftrace: Save module init functions kallsyms symbols for tracing If function tracing is active when the module init functions are freed, then store them to be referenced by kallsyms. As module init functions can now be traced on module load, they were useless: ># echo ':mod:snd_seq' > set_ftrace_filter ># echo function > current_tracer ># modprobe snd_seq ># cat trace # tracer: function # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | modprobe-2786 [000] .... 3189.037874: 0xffffffffa0860000 <-do_one_initcall modprobe-2786 [000] .... 3189.037876: 0xffffffffa086004d <-0xffffffffa086000f modprobe-2786 [000] .... 3189.037876: 0xffffffffa086010d <-0xffffffffa0860018 modprobe-2786 [000] .... 3189.037877: 0xffffffffa086011a <-0xffffffffa0860021 modprobe-2786 [000] .... 3189.037877: 0xffffffffa0860080 <-0xffffffffa086002a modprobe-2786 [000] .... 3189.039523: 0xffffffffa0860400 <-0xffffffffa0860033 modprobe-2786 [000] .... 3189.039523: 0xffffffffa086038a <-0xffffffffa086041c modprobe-2786 [000] .... 3189.039591: 0xffffffffa086038a <-0xffffffffa0860436 modprobe-2786 [000] .... 3189.039657: 0xffffffffa086038a <-0xffffffffa0860450 modprobe-2786 [000] .... 3189.039719: 0xffffffffa0860127 <-0xffffffffa086003c modprobe-2786 [000] .... 3189.039742: snd_seq_create_kernel_client <-0xffffffffa08601f6 When the output is shown, the kallsyms for the module init functions have already been freed, and the output of the trace can not convert them to their function names. Now this looks like this: # tracer: function # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | modprobe-2463 [002] .... 174.243237: alsa_seq_init <-do_one_initcall modprobe-2463 [002] .... 174.243239: client_init_data <-alsa_seq_init modprobe-2463 [002] .... 174.243240: snd_sequencer_memory_init <-alsa_seq_init modprobe-2463 [002] .... 174.243240: snd_seq_queues_init <-alsa_seq_init modprobe-2463 [002] .... 174.243240: snd_sequencer_device_init <-alsa_seq_init modprobe-2463 [002] .... 174.244860: snd_seq_info_init <-alsa_seq_init modprobe-2463 [002] .... 174.244861: create_info_entry <-snd_seq_info_init modprobe-2463 [002] .... 174.244936: create_info_entry <-snd_seq_info_init modprobe-2463 [002] .... 174.245003: create_info_entry <-snd_seq_info_init modprobe-2463 [002] .... 174.245072: snd_seq_system_client_init <-alsa_seq_init modprobe-2463 [002] .... 174.245094: snd_seq_create_kernel_client <-snd_seq_system_client_init Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 20 ++++++- kernel/kallsyms.c | 5 ++ kernel/module.c | 2 +- kernel/trace/ftrace.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 168 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 47fc404ad233..202b40784c4e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -51,6 +51,21 @@ static inline void early_trace_init(void) { } struct module; struct ftrace_hash; +#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_MODULES) && \ + defined(CONFIG_DYNAMIC_FTRACE) +const char * +ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym); +#else +static inline const char * +ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym) +{ + return NULL; +} +#endif + + #ifdef CONFIG_FUNCTION_TRACER extern int ftrace_enabled; @@ -151,10 +166,10 @@ struct ftrace_ops_hash { }; void ftrace_free_init_mem(void); -void ftrace_free_mem(void *start, void *end); +void ftrace_free_mem(struct module *mod, void *start, void *end); #else static inline void ftrace_free_init_mem(void) { } -static inline void ftrace_free_mem(void *start, void *end) { } +static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { } #endif /* @@ -272,6 +287,7 @@ static inline int ftrace_nr_registered_ops(void) static inline void clear_ftrace_function(void) { } static inline void ftrace_kill(void) { } static inline void ftrace_free_init_mem(void) { } +static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_STACK_TRACER diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 127e7cfafa55..976ecb9275d9 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -337,6 +338,10 @@ const char *kallsyms_lookup(unsigned long addr, if (!ret) ret = bpf_address_lookup(addr, symbolsize, offset, modname, namebuf); + + if (!ret) + ret = ftrace_mod_address_lookup(addr, symbolsize, + offset, modname, namebuf); return ret; } diff --git a/kernel/module.c b/kernel/module.c index 58bca427ac3f..279a469dc375 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3473,7 +3473,7 @@ static noinline int do_init_module(struct module *mod) if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC)) async_synchronize_full(); - ftrace_free_mem(mod->init_layout.base, mod->init_layout.base + + ftrace_free_mem(mod, mod->init_layout.base, mod->init_layout.base + mod->init_layout.size); mutex_lock(&module_mutex); /* Drop initial reference. */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d7297e866e4a..86dbbfb353db 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5675,6 +5675,21 @@ static int ftrace_process_locs(struct module *mod, return ret; } +struct ftrace_mod_func { + struct list_head list; + char *name; + unsigned long ip; + unsigned int size; +}; + +struct ftrace_mod_map { + struct list_head list; + struct module *mod; + unsigned long start_addr; + unsigned long end_addr; + struct list_head funcs; +}; + #ifdef CONFIG_MODULES #define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) @@ -5868,9 +5883,123 @@ void ftrace_module_init(struct module *mod) ftrace_process_locs(mod, mod->ftrace_callsites, mod->ftrace_callsites + mod->num_ftrace_callsites); } + +static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map, + struct dyn_ftrace *rec) +{ + struct ftrace_mod_func *mod_func; + unsigned long symsize; + unsigned long offset; + char str[KSYM_SYMBOL_LEN]; + char *modname; + const char *ret; + + ret = kallsyms_lookup(rec->ip, &symsize, &offset, &modname, str); + if (!ret) + return; + + mod_func = kmalloc(sizeof(*mod_func), GFP_KERNEL); + if (!mod_func) + return; + + mod_func->name = kstrdup(str, GFP_KERNEL); + if (!mod_func->name) { + kfree(mod_func); + return; + } + + mod_func->ip = rec->ip - offset; + mod_func->size = symsize; + + list_add_rcu(&mod_func->list, &mod_map->funcs); +} + +static LIST_HEAD(ftrace_mod_maps); + +static struct ftrace_mod_map * +allocate_ftrace_mod_map(struct module *mod, + unsigned long start, unsigned long end) +{ + struct ftrace_mod_map *mod_map; + + mod_map = kmalloc(sizeof(*mod_map), GFP_KERNEL); + if (!mod_map) + return NULL; + + mod_map->mod = mod; + mod_map->start_addr = start; + mod_map->end_addr = end; + + INIT_LIST_HEAD_RCU(&mod_map->funcs); + + list_add_rcu(&mod_map->list, &ftrace_mod_maps); + + return mod_map; +} + +static const char * +ftrace_func_address_lookup(struct ftrace_mod_map *mod_map, + unsigned long addr, unsigned long *size, + unsigned long *off, char *sym) +{ + struct ftrace_mod_func *found_func = NULL; + struct ftrace_mod_func *mod_func; + + list_for_each_entry_rcu(mod_func, &mod_map->funcs, list) { + if (addr >= mod_func->ip && + addr < mod_func->ip + mod_func->size) { + found_func = mod_func; + break; + } + } + + if (found_func) { + if (size) + *size = found_func->size; + if (off) + *off = addr - found_func->ip; + if (sym) + strlcpy(sym, found_func->name, KSYM_NAME_LEN); + + return found_func->name; + } + + return NULL; +} + +const char * +ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym) +{ + struct ftrace_mod_map *mod_map; + const char *ret = NULL; + + preempt_disable(); + list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) { + ret = ftrace_func_address_lookup(mod_map, addr, size, off, sym); + if (ret) { + if (modname) + *modname = mod_map->mod->name; + break; + } + } + preempt_enable(); + + return ret; +} + +#else +static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map, + struct dyn_ftrace *rec) { } +static inline struct ftrace_mod_map * +allocate_ftrace_mod_map(struct module *mod, + unsigned long start, unsigned long end) +{ + return NULL; +} #endif /* CONFIG_MODULES */ -void ftrace_free_mem(void *start_ptr, void *end_ptr) +void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) { unsigned long start = (unsigned long)(start_ptr); unsigned long end = (unsigned long)(end_ptr); @@ -5878,6 +6007,7 @@ void ftrace_free_mem(void *start_ptr, void *end_ptr) struct ftrace_page *pg; struct dyn_ftrace *rec; struct dyn_ftrace key; + struct ftrace_mod_map *mod_map = NULL; int order; key.ip = start; @@ -5885,6 +6015,14 @@ void ftrace_free_mem(void *start_ptr, void *end_ptr) mutex_lock(&ftrace_lock); + /* + * If we are freeing module init memory, then check if + * any tracer is active. If so, we need to save a mapping of + * the module functions being freed with the address. + */ + if (mod && ftrace_ops_list != &ftrace_list_end) + mod_map = allocate_ftrace_mod_map(mod, start, end); + for (pg = ftrace_pages_start; pg; last_pg = &pg->next, pg = *last_pg) { if (end < pg->records[0].ip || start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE)) @@ -5895,6 +6033,10 @@ void ftrace_free_mem(void *start_ptr, void *end_ptr) ftrace_cmp_recs); if (!rec) continue; + + if (mod_map) + save_ftrace_mod_rec(mod_map, rec); + pg->index--; ftrace_update_tot_cnt--; if (!pg->index) { @@ -5920,7 +6062,7 @@ void __init ftrace_free_init_mem(void) void *start = (void *)(&__init_begin); void *end = (void *)(&__init_end); - ftrace_free_mem(start, end); + ftrace_free_mem(NULL, start, end); } void __init ftrace_init(void) -- cgit v1.3-14-g43fede From 6aa69784b43eb5f69120339938c50a97a433049f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 5 Sep 2017 19:20:16 -0400 Subject: ftrace: Add freeing algorithm to free ftrace_mod_maps The ftrace_mod_map is a descriptor to save module init function names in case they were traced, and the trace output needs to reference the function name from the function address. But after the function is unloaded, it the maps should be freed, as the rest of the function names are as well. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 86dbbfb353db..a5824408bed9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5683,6 +5683,7 @@ struct ftrace_mod_func { }; struct ftrace_mod_map { + struct rcu_head rcu; struct list_head list; struct module *mod; unsigned long start_addr; @@ -5694,6 +5695,8 @@ struct ftrace_mod_map { #define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) +static LIST_HEAD(ftrace_mod_maps); + static int referenced_filters(struct dyn_ftrace *rec) { struct ftrace_ops *ops; @@ -5747,8 +5750,26 @@ static void clear_mod_from_hashes(struct ftrace_page *pg) mutex_unlock(&trace_types_lock); } +static void ftrace_free_mod_map(struct rcu_head *rcu) +{ + struct ftrace_mod_map *mod_map = container_of(rcu, struct ftrace_mod_map, rcu); + struct ftrace_mod_func *mod_func; + struct ftrace_mod_func *n; + + /* All the contents of mod_map are now not visible to readers */ + list_for_each_entry_safe(mod_func, n, &mod_map->funcs, list) { + kfree(mod_func->name); + list_del(&mod_func->list); + kfree(mod_func); + } + + kfree(mod_map); +} + void ftrace_release_mod(struct module *mod) { + struct ftrace_mod_map *mod_map; + struct ftrace_mod_map *n; struct dyn_ftrace *rec; struct ftrace_page **last_pg; struct ftrace_page *tmp_page = NULL; @@ -5760,6 +5781,14 @@ void ftrace_release_mod(struct module *mod) if (ftrace_disabled) goto out_unlock; + list_for_each_entry_safe(mod_map, n, &ftrace_mod_maps, list) { + if (mod_map->mod == mod) { + list_del_rcu(&mod_map->list); + call_rcu_sched(&mod_map->rcu, ftrace_free_mod_map); + break; + } + } + /* * Each module has its own ftrace_pages, remove * them from the list. @@ -5914,8 +5943,6 @@ static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map, list_add_rcu(&mod_func->list, &mod_map->funcs); } -static LIST_HEAD(ftrace_mod_maps); - static struct ftrace_mod_map * allocate_ftrace_mod_map(struct module *mod, unsigned long start, unsigned long end) @@ -5974,6 +6001,7 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, struct ftrace_mod_map *mod_map; const char *ret = NULL; + /* mod_map is freed via call_rcu_sched() */ preempt_disable(); list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) { ret = ftrace_func_address_lookup(mod_map, addr, size, off, sym); -- cgit v1.3-14-g43fede From 6171a0310a06a7a0cb83713fa7068bdd4192de19 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 6 Sep 2017 08:40:41 -0400 Subject: ftrace/kallsyms: Have /proc/kallsyms show saved mod init functions If a module is loaded while tracing is enabled, then there's a possibility that the module init functions were traced. These functions have their name and address stored by ftrace such that it can translate the function address that is written into the buffer into a human readable function name. As userspace tools may be doing the same, they need a way to map function names to their address as well. This is done through reading /proc/kallsyms. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 9 +++++++++ kernel/kallsyms.c | 38 ++++++++++++++++++++++++++++++++------ kernel/trace/ftrace.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 202b40784c4e..346f8294e40a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -56,6 +56,9 @@ struct ftrace_hash; const char * ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym); +int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *name, + char *module_name, int *exported); #else static inline const char * ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, @@ -63,6 +66,12 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, { return NULL; } +static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *name, + char *module_name, int *exported) +{ + return -1; +} #endif diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 976ecb9275d9..1966fe1c2b57 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -479,6 +479,7 @@ EXPORT_SYMBOL(__print_symbol); struct kallsym_iter { loff_t pos; loff_t pos_mod_end; + loff_t pos_ftrace_mod_end; unsigned long value; unsigned int nameoff; /* If iterating in core kernel symbols. */ char type; @@ -501,11 +502,25 @@ static int get_ksymbol_mod(struct kallsym_iter *iter) return 1; } +static int get_ksymbol_ftrace_mod(struct kallsym_iter *iter) +{ + int ret = ftrace_mod_get_kallsym(iter->pos - iter->pos_mod_end, + &iter->value, &iter->type, + iter->name, iter->module_name, + &iter->exported); + if (ret < 0) { + iter->pos_ftrace_mod_end = iter->pos; + return 0; + } + + return 1; +} + static int get_ksymbol_bpf(struct kallsym_iter *iter) { iter->module_name[0] = '\0'; iter->exported = 0; - return bpf_get_kallsym(iter->pos - iter->pos_mod_end, + return bpf_get_kallsym(iter->pos - iter->pos_ftrace_mod_end, &iter->value, &iter->type, iter->name) < 0 ? 0 : 1; } @@ -530,20 +545,31 @@ static void reset_iter(struct kallsym_iter *iter, loff_t new_pos) iter->name[0] = '\0'; iter->nameoff = get_symbol_offset(new_pos); iter->pos = new_pos; - if (new_pos == 0) + if (new_pos == 0) { iter->pos_mod_end = 0; + iter->pos_ftrace_mod_end = 0; + } } static int update_iter_mod(struct kallsym_iter *iter, loff_t pos) { iter->pos = pos; - if (iter->pos_mod_end > 0 && - iter->pos_mod_end < iter->pos) + if (iter->pos_ftrace_mod_end > 0 && + iter->pos_ftrace_mod_end < iter->pos) return get_ksymbol_bpf(iter); - if (!get_ksymbol_mod(iter)) - return get_ksymbol_bpf(iter); + if (iter->pos_mod_end > 0 && + iter->pos_mod_end < iter->pos) { + if (!get_ksymbol_ftrace_mod(iter)) + return get_ksymbol_bpf(iter); + return 1; + } + + if (!get_ksymbol_mod(iter)) { + if (!get_ksymbol_ftrace_mod(iter)) + return get_ksymbol_bpf(iter); + } return 1; } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a5824408bed9..9e99bd55732e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5689,6 +5689,7 @@ struct ftrace_mod_map { unsigned long start_addr; unsigned long end_addr; struct list_head funcs; + unsigned int num_funcs; }; #ifdef CONFIG_MODULES @@ -5940,6 +5941,8 @@ static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map, mod_func->ip = rec->ip - offset; mod_func->size = symsize; + mod_map->num_funcs++; + list_add_rcu(&mod_func->list, &mod_map->funcs); } @@ -5956,6 +5959,7 @@ allocate_ftrace_mod_map(struct module *mod, mod_map->mod = mod; mod_map->start_addr = start; mod_map->end_addr = end; + mod_map->num_funcs = 0; INIT_LIST_HEAD_RCU(&mod_map->funcs); @@ -6016,6 +6020,42 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, return ret; } +int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *name, + char *module_name, int *exported) +{ + struct ftrace_mod_map *mod_map; + struct ftrace_mod_func *mod_func; + + preempt_disable(); + list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) { + + if (symnum >= mod_map->num_funcs) { + symnum -= mod_map->num_funcs; + continue; + } + + list_for_each_entry_rcu(mod_func, &mod_map->funcs, list) { + if (symnum > 1) { + symnum--; + continue; + } + + *value = mod_func->ip; + *type = 'T'; + strlcpy(name, mod_func->name, KSYM_NAME_LEN); + strlcpy(module_name, mod_map->mod->name, MODULE_NAME_LEN); + *exported = 1; + preempt_enable(); + return 0; + } + WARN_ON(1); + break; + } + preempt_enable(); + return -ERANGE; +} + #else static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map, struct dyn_ftrace *rec) { } -- cgit v1.3-14-g43fede From aaecaa0b5f31794f1711247da4b5883a6ff98163 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 5 Oct 2017 17:54:31 -0700 Subject: tracing: Prepare to add preempt and irq trace events In preparation of adding irqsoff and preemptsoff enable and disable trace events, move required functions and code to make it easier to add these events in a later patch. This patch is just code movement and no functional change. Link: http://lkml.kernel.org/r/20171006005432.14244-2-joelaf@google.com Cc: Peter Zijlstra Cc: kernel-team@android.com Signed-off-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_irqsoff.c | 100 ++++++++++++++++++++++++++++++++----------- 1 file changed, 74 insertions(+), 26 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 7758bc0617cb..0e3033c00474 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -16,6 +16,7 @@ #include "trace.h" +#if defined(CONFIG_IRQSOFF_TRACER) || defined(CONFIG_PREEMPT_TRACER) static struct trace_array *irqsoff_trace __read_mostly; static int tracer_enabled __read_mostly; @@ -462,64 +463,44 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1) #else /* !CONFIG_PROVE_LOCKING */ -/* - * Stubs: - */ - -void trace_softirqs_on(unsigned long ip) -{ -} - -void trace_softirqs_off(unsigned long ip) -{ -} - -inline void print_irqtrace_events(struct task_struct *curr) -{ -} - /* * We are only interested in hardirq on/off events: */ -void trace_hardirqs_on(void) +static inline void tracer_hardirqs_on(void) { if (!preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } -EXPORT_SYMBOL(trace_hardirqs_on); -void trace_hardirqs_off(void) +static inline void tracer_hardirqs_off(void) { if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } -EXPORT_SYMBOL(trace_hardirqs_off); -__visible void trace_hardirqs_on_caller(unsigned long caller_addr) +static inline void tracer_hardirqs_on_caller(unsigned long caller_addr) { if (!preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, caller_addr); } -EXPORT_SYMBOL(trace_hardirqs_on_caller); -__visible void trace_hardirqs_off_caller(unsigned long caller_addr) +static inline void tracer_hardirqs_off_caller(unsigned long caller_addr) { if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, caller_addr); } -EXPORT_SYMBOL(trace_hardirqs_off_caller); #endif /* CONFIG_PROVE_LOCKING */ #endif /* CONFIG_IRQSOFF_TRACER */ #ifdef CONFIG_PREEMPT_TRACER -void trace_preempt_on(unsigned long a0, unsigned long a1) +static inline void tracer_preempt_on(unsigned long a0, unsigned long a1) { if (preempt_trace() && !irq_trace()) stop_critical_timing(a0, a1); } -void trace_preempt_off(unsigned long a0, unsigned long a1) +static inline void tracer_preempt_off(unsigned long a0, unsigned long a1) { if (preempt_trace() && !irq_trace()) start_critical_timing(a0, a1); @@ -781,3 +762,70 @@ __init static int init_irqsoff_tracer(void) return 0; } core_initcall(init_irqsoff_tracer); +#endif /* IRQSOFF_TRACER || PREEMPTOFF_TRACER */ + +#ifndef CONFIG_IRQSOFF_TRACER +static inline void tracer_hardirqs_on(void) { } +static inline void tracer_hardirqs_off(void) { } +static inline void tracer_hardirqs_on_caller(unsigned long caller_addr) { } +static inline void tracer_hardirqs_off_caller(unsigned long caller_addr) { } +#endif + +#ifndef CONFIG_PREEMPT_TRACER +static inline void tracer_preempt_on(unsigned long a0, unsigned long a1) { } +static inline void tracer_preempt_off(unsigned long a0, unsigned long a1) { } +#endif + +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PROVE_LOCKING) +void trace_hardirqs_on(void) +{ + tracer_hardirqs_on(); +} +EXPORT_SYMBOL(trace_hardirqs_on); + +void trace_hardirqs_off(void) +{ + tracer_hardirqs_off(); +} +EXPORT_SYMBOL(trace_hardirqs_off); + +__visible void trace_hardirqs_on_caller(unsigned long caller_addr) +{ + tracer_hardirqs_on_caller(caller_addr); +} +EXPORT_SYMBOL(trace_hardirqs_on_caller); + +__visible void trace_hardirqs_off_caller(unsigned long caller_addr) +{ + tracer_hardirqs_off_caller(caller_addr); +} +EXPORT_SYMBOL(trace_hardirqs_off_caller); + +/* + * Stubs: + */ + +void trace_softirqs_on(unsigned long ip) +{ +} + +void trace_softirqs_off(unsigned long ip) +{ +} + +inline void print_irqtrace_events(struct task_struct *curr) +{ +} +#endif + +#ifdef CONFIG_PREEMPT_TRACER +void trace_preempt_on(unsigned long a0, unsigned long a1) +{ + tracer_preempt_on(a0, a1); +} + +void trace_preempt_off(unsigned long a0, unsigned long a1) +{ + tracer_preempt_off(a0, a1); +} +#endif -- cgit v1.3-14-g43fede From 97562633bcbac4a07d605ae628d7655fa71caaf5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 5 Oct 2017 09:19:19 -0700 Subject: bpf: perf event change needed for subsequent bpf helpers This patch does not impact existing functionalities. It contains the changes in perf event area needed for subsequent bpf_perf_event_read_value and bpf_perf_prog_read_value helpers. Signed-off-by: Yonghong Song Acked-by: Peter Zijlstra (Intel) Signed-off-by: David S. Miller --- include/linux/perf_event.h | 7 +++++-- kernel/bpf/arraymap.c | 2 +- kernel/events/core.c | 15 +++++++++++++-- kernel/trace/bpf_trace.c | 2 +- 4 files changed, 20 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8e22f24ded6a..79b18a20cf5d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -806,6 +806,7 @@ struct perf_output_handle { struct bpf_perf_event_data_kern { struct pt_regs *regs; struct perf_sample_data *data; + struct perf_event *event; }; #ifdef CONFIG_CGROUP_PERF @@ -884,7 +885,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, void *context); extern void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu); -int perf_event_read_local(struct perf_event *event, u64 *value); +int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); @@ -1286,7 +1288,8 @@ static inline const struct perf_event_attr *perf_event_attrs(struct perf_event * { return ERR_PTR(-EINVAL); } -static inline int perf_event_read_local(struct perf_event *event, u64 *value) +static inline int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running) { return -EINVAL; } diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 98c0f00c3f5e..68d866628be0 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -492,7 +492,7 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, ee = ERR_PTR(-EOPNOTSUPP); event = perf_file->private_data; - if (perf_event_read_local(event, &value) == -EOPNOTSUPP) + if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP) goto err_out; ee = bpf_event_entry_gen(perf_file, map_file); diff --git a/kernel/events/core.c b/kernel/events/core.c index 6bc21e202ae4..902149f05381 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3684,10 +3684,12 @@ static inline u64 perf_event_count(struct perf_event *event) * will not be local and we cannot read them atomically * - must not have a pmu::count method */ -int perf_event_read_local(struct perf_event *event, u64 *value) +int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running) { unsigned long flags; int ret = 0; + u64 now; /* * Disabling interrupts avoids all counter scheduling (context @@ -3718,13 +3720,21 @@ int perf_event_read_local(struct perf_event *event, u64 *value) goto out; } + now = event->shadow_ctx_time + perf_clock(); + if (enabled) + *enabled = now - event->tstamp_enabled; /* * If the event is currently on this CPU, its either a per-task event, * or local to this CPU. Furthermore it means its ACTIVE (otherwise * oncpu == -1). */ - if (event->oncpu == smp_processor_id()) + if (event->oncpu == smp_processor_id()) { event->pmu->read(event); + if (running) + *running = now - event->tstamp_running; + } else if (running) { + *running = event->total_time_running; + } *value = local64_read(&event->count); out: @@ -8072,6 +8082,7 @@ static void bpf_overflow_handler(struct perf_event *event, struct bpf_perf_event_data_kern ctx = { .data = data, .regs = regs, + .event = event, }; int ret = 0; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index dc498b605d5d..95888ae6c263 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -275,7 +275,7 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) if (!ee) return -ENOENT; - err = perf_event_read_local(ee->event, &value); + err = perf_event_read_local(ee->event, &value, NULL, NULL); /* * this api is ugly since we miss [-22..-2] range of valid * counter values, but that's uapi -- cgit v1.3-14-g43fede From 908432ca84fc229e906ba164219e9ad0fe56f755 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 5 Oct 2017 09:19:20 -0700 Subject: bpf: add helper bpf_perf_event_read_value for perf event array map Hardware pmu counters are limited resources. When there are more pmu based perf events opened than available counters, kernel will multiplex these events so each event gets certain percentage (but not 100%) of the pmu time. In case that multiplexing happens, the number of samples or counter value will not reflect the case compared to no multiplexing. This makes comparison between different runs difficult. Typically, the number of samples or counter value should be normalized before comparing to other experiments. The typical normalization is done like: normalized_num_samples = num_samples * time_enabled / time_running normalized_counter_value = counter_value * time_enabled / time_running where time_enabled is the time enabled for event and time_running is the time running for event since last normalization. This patch adds helper bpf_perf_event_read_value for kprobed based perf event array map, to read perf counter and enabled/running time. The enabled/running time is accumulated since the perf event open. To achieve scaling factor between two bpf invocations, users can can use cpu_id as the key (which is typical for perf array usage model) to remember the previous value and do the calculation inside the bpf program. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 21 +++++++++++++++++++-- kernel/bpf/verifier.c | 4 +++- kernel/trace/bpf_trace.c | 45 +++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 63 insertions(+), 7 deletions(-) (limited to 'kernel/trace') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6082faf5fd2a..7b57a212c7d7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -641,6 +641,14 @@ union bpf_attr { * @xdp_md: pointer to xdp_md * @delta: An positive/negative integer to be added to xdp_md.data_meta * Return: 0 on success or negative on error + * + * int bpf_perf_event_read_value(map, flags, buf, buf_size) + * read perf event counter value and perf event enabled/running time + * @map: pointer to perf_event_array map + * @flags: index of event in the map or bitmask flags + * @buf: buf to fill + * @buf_size: size of the buf + * Return: 0 on success or negative error code */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -697,7 +705,8 @@ union bpf_attr { FN(redirect_map), \ FN(sk_redirect_map), \ FN(sock_map_update), \ - FN(xdp_adjust_meta), + FN(xdp_adjust_meta), \ + FN(perf_event_read_value), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -741,7 +750,9 @@ enum bpf_func_id { #define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_DONT_FRAGMENT (1ULL << 2) -/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */ +/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and + * BPF_FUNC_perf_event_read_value flags. + */ #define BPF_F_INDEX_MASK 0xffffffffULL #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK /* BPF_FUNC_perf_event_output for sk_buff input context. */ @@ -934,4 +945,10 @@ enum { #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ #define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ +struct bpf_perf_event_value { + __u64 counter; + __u64 enabled; + __u64 running; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 52b022310f6a..590125e29161 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1552,7 +1552,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) break; case BPF_MAP_TYPE_PERF_EVENT_ARRAY: if (func_id != BPF_FUNC_perf_event_read && - func_id != BPF_FUNC_perf_event_output) + func_id != BPF_FUNC_perf_event_output && + func_id != BPF_FUNC_perf_event_read_value) goto error; break; case BPF_MAP_TYPE_STACK_TRACE: @@ -1595,6 +1596,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) break; case BPF_FUNC_perf_event_read: case BPF_FUNC_perf_event_output: + case BPF_FUNC_perf_event_read_value: if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) goto error; break; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 95888ae6c263..0be86cc0130e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -255,14 +255,14 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } -BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) +static __always_inline int +get_map_perf_counter(struct bpf_map *map, u64 flags, + u64 *value, u64 *enabled, u64 *running) { struct bpf_array *array = container_of(map, struct bpf_array, map); unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; struct bpf_event_entry *ee; - u64 value = 0; - int err; if (unlikely(flags & ~(BPF_F_INDEX_MASK))) return -EINVAL; @@ -275,7 +275,15 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) if (!ee) return -ENOENT; - err = perf_event_read_local(ee->event, &value, NULL, NULL); + return perf_event_read_local(ee->event, value, enabled, running); +} + +BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) +{ + u64 value = 0; + int err; + + err = get_map_perf_counter(map, flags, &value, NULL, NULL); /* * this api is ugly since we miss [-22..-2] range of valid * counter values, but that's uapi @@ -293,6 +301,33 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags, + struct bpf_perf_event_value *, buf, u32, size) +{ + int err = -EINVAL; + + if (unlikely(size != sizeof(struct bpf_perf_event_value))) + goto clear; + err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled, + &buf->running); + if (unlikely(err)) + goto clear; + return 0; +clear: + memset(buf, 0, size); + return err; +} + +static const struct bpf_func_proto bpf_perf_event_read_value_proto = { + .func = bpf_perf_event_read_value, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_UNINIT_MEM, + .arg4_type = ARG_CONST_SIZE, +}; + static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd); static __always_inline u64 @@ -499,6 +534,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_perf_event_output_proto; case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto; + case BPF_FUNC_perf_event_read_value: + return &bpf_perf_event_read_value_proto; default: return tracing_func_proto(func_id); } -- cgit v1.3-14-g43fede From 4bebdc7a85aa400c0222b5329861e4ad9252f1e5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 5 Oct 2017 09:19:22 -0700 Subject: bpf: add helper bpf_perf_prog_read_value This patch adds helper bpf_perf_prog_read_cvalue for perf event based bpf programs, to read event counter and enabled/running time. The enabled/running time is accumulated since the perf event open. The typical use case for perf event based bpf program is to attach itself to a single event. In such cases, if it is desirable to get scaling factor between two bpf invocations, users can can save the time values in a map, and use the value from the map and the current value to calculate the scaling factor. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 10 +++++++++- kernel/trace/bpf_trace.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7b57a212c7d7..5bbbec17aa5a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -649,6 +649,13 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return: 0 on success or negative error code + * + * int bpf_perf_prog_read_value(ctx, buf, buf_size) + * read perf prog attached perf event counter and enabled/running time + * @ctx: pointer to ctx + * @buf: buf to fill + * @buf_size: size of the buf + * Return : 0 on success or negative error code */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -706,7 +713,8 @@ union bpf_attr { FN(sk_redirect_map), \ FN(sock_map_update), \ FN(xdp_adjust_meta), \ - FN(perf_event_read_value), + FN(perf_event_read_value), \ + FN(perf_prog_read_value), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0be86cc0130e..04ea5314f2bc 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -613,6 +613,32 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = { .arg3_type = ARG_ANYTHING, }; +BPF_CALL_3(bpf_perf_prog_read_value_tp, struct bpf_perf_event_data_kern *, ctx, + struct bpf_perf_event_value *, buf, u32, size) +{ + int err = -EINVAL; + + if (unlikely(size != sizeof(struct bpf_perf_event_value))) + goto clear; + err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled, + &buf->running); + if (unlikely(err)) + goto clear; + return 0; +clear: + memset(buf, 0, size); + return err; +} + +static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = { + .func = bpf_perf_prog_read_value_tp, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -620,6 +646,8 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) return &bpf_perf_event_output_proto_tp; case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto_tp; + case BPF_FUNC_perf_prog_read_value: + return &bpf_perf_prog_read_value_proto_tp; default: return tracing_func_proto(func_id); } -- cgit v1.3-14-g43fede From 1d48b080bcce0a5e7d7aa2dbcdb35deefc188c3f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 Sep 2017 13:50:16 +0200 Subject: sched/debug: Rename task-state printing helpers Steve requested better names for the new task-state helper functions. So introduce the concept of task-state index for the printing and rename __get_task_state() to task_state_index() and __task_state_to_char() to task_index_to_char(). Requested-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Acked-by: Steven Rostedt Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170929115016.pzlqc7ss3ccystyg@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- fs/proc/array.c | 2 +- include/linux/sched.h | 6 +++--- include/trace/events/sched.h | 2 +- kernel/trace/trace_output.c | 12 ++++++------ kernel/trace/trace_sched_wakeup.c | 8 ++++---- 5 files changed, 15 insertions(+), 15 deletions(-) (limited to 'kernel/trace') diff --git a/fs/proc/array.c b/fs/proc/array.c index 77a8eacbe032..1b5406ca8e4c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -137,7 +137,7 @@ static const char * const task_state_array[] = { static inline const char *get_task_state(struct task_struct *tsk) { BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != ARRAY_SIZE(task_state_array)); - return task_state_array[__get_task_state(tsk)]; + return task_state_array[task_state_index(tsk)]; } static inline int get_task_umask(struct task_struct *tsk) diff --git a/include/linux/sched.h b/include/linux/sched.h index bdd6ad6fcce1..33a01f4deb00 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1248,7 +1248,7 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) -static inline unsigned int __get_task_state(struct task_struct *tsk) +static inline unsigned int task_state_index(struct task_struct *tsk) { unsigned int tsk_state = READ_ONCE(tsk->state); unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; @@ -1261,7 +1261,7 @@ static inline unsigned int __get_task_state(struct task_struct *tsk) return fls(state); } -static inline char __task_state_to_char(unsigned int state) +static inline char task_index_to_char(unsigned int state) { static const char state_char[] = "RSDTtXZPI"; @@ -1272,7 +1272,7 @@ static inline char __task_state_to_char(unsigned int state) static inline char task_state_to_char(struct task_struct *tsk) { - return __task_state_to_char(__get_task_state(tsk)); + return task_index_to_char(task_state_index(tsk)); } /** diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 3c8b7f625670..fab74a12ca0f 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -117,7 +117,7 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct * if (preempt) return TASK_STATE_MAX; - return __get_task_state(p); + return task_state_index(p); } #endif /* CREATE_TRACE_POINTS */ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index c738e764e2a5..90db994ac900 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -921,8 +921,8 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, trace_assign_type(field, iter->ent); - T = __task_state_to_char(field->next_state); - S = __task_state_to_char(field->prev_state); + T = task_index_to_char(field->next_state); + S = task_index_to_char(field->prev_state); trace_find_cmdline(field->next_pid, comm); trace_seq_printf(&iter->seq, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", @@ -957,8 +957,8 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S) trace_assign_type(field, iter->ent); if (!S) - S = __task_state_to_char(field->prev_state); - T = __task_state_to_char(field->next_state); + S = task_index_to_char(field->prev_state); + T = task_index_to_char(field->next_state); trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", field->prev_pid, field->prev_prio, @@ -993,8 +993,8 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S) trace_assign_type(field, iter->ent); if (!S) - S = __task_state_to_char(field->prev_state); - T = __task_state_to_char(field->next_state); + S = task_index_to_char(field->prev_state); + T = task_index_to_char(field->next_state); SEQ_PUT_HEX_FIELD(s, field->prev_pid); SEQ_PUT_HEX_FIELD(s, field->prev_prio); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 0c331978b1a6..500f370d3bb1 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -397,10 +397,10 @@ tracing_sched_switch_trace(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->prev_pid = prev->pid; entry->prev_prio = prev->prio; - entry->prev_state = __get_task_state(prev); + entry->prev_state = task_state_index(prev); entry->next_pid = next->pid; entry->next_prio = next->prio; - entry->next_state = __get_task_state(next); + entry->next_state = task_state_index(next); entry->next_cpu = task_cpu(next); if (!call_filter_check_discard(call, entry, buffer, event)) @@ -425,10 +425,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->prev_pid = curr->pid; entry->prev_prio = curr->prio; - entry->prev_state = __get_task_state(curr); + entry->prev_state = task_state_index(curr); entry->next_pid = wakee->pid; entry->next_prio = wakee->prio; - entry->next_state = __get_task_state(wakee); + entry->next_state = task_state_index(wakee); entry->next_cpu = task_cpu(wakee); if (!call_filter_check_discard(call, entry, buffer, event)) -- cgit v1.3-14-g43fede From d59158162e032917a428704160a2063a02405ec6 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Tue, 10 Oct 2017 15:51:37 -0700 Subject: tracing: Add support for preempt and irq enable/disable events Preempt and irq trace events can be used for tracing the start and end of an atomic section which can be used by a trace viewer like systrace to graphically view the start and end of an atomic section and correlate them with latencies and scheduling issues. This also serves as a prelude to using synthetic events or probes to rewrite the preempt and irqsoff tracers, along with numerous benefits of using trace events features for these events. Link: http://lkml.kernel.org/r/20171006005432.14244-3-joelaf@google.com Link: http://lkml.kernel.org/r/20171010225137.17370-1-joelaf@google.com Cc: Peter Zilstra Cc: kernel-team@android.com Signed-off-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 3 +- include/trace/events/preemptirq.h | 70 +++++++++++++++++++++++++++++++++++++++ kernel/trace/Kconfig | 11 ++++++ kernel/trace/Makefile | 1 + kernel/trace/trace_irqsoff.c | 35 +++++++++++++++++++- 5 files changed, 118 insertions(+), 2 deletions(-) create mode 100644 include/trace/events/preemptirq.h (limited to 'kernel/trace') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 346f8294e40a..1f8545caa691 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -769,7 +769,8 @@ static inline unsigned long get_lock_parent_ip(void) static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { } #endif -#ifdef CONFIG_PREEMPT_TRACER +#if defined(CONFIG_PREEMPT_TRACER) || \ + (defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS)) extern void trace_preempt_on(unsigned long a0, unsigned long a1); extern void trace_preempt_off(unsigned long a0, unsigned long a1); #else diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h new file mode 100644 index 000000000000..f5024c560d8f --- /dev/null +++ b/include/trace/events/preemptirq.h @@ -0,0 +1,70 @@ +#ifdef CONFIG_PREEMPTIRQ_EVENTS + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM preemptirq + +#if !defined(_TRACE_PREEMPTIRQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PREEMPTIRQ_H + +#include +#include +#include +#include + +DECLARE_EVENT_CLASS(preemptirq_template, + + TP_PROTO(unsigned long ip, unsigned long parent_ip), + + TP_ARGS(ip, parent_ip), + + TP_STRUCT__entry( + __field(u32, caller_offs) + __field(u32, parent_offs) + ), + + TP_fast_assign( + __entry->caller_offs = (u32)(ip - (unsigned long)_stext); + __entry->parent_offs = (u32)(parent_ip - (unsigned long)_stext); + ), + + TP_printk("caller=%pF parent=%pF", + (void *)((unsigned long)(_stext) + __entry->caller_offs), + (void *)((unsigned long)(_stext) + __entry->parent_offs)) +); + +#ifndef CONFIG_PROVE_LOCKING +DEFINE_EVENT(preemptirq_template, irq_disable, + TP_PROTO(unsigned long ip, unsigned long parent_ip), + TP_ARGS(ip, parent_ip)); + +DEFINE_EVENT(preemptirq_template, irq_enable, + TP_PROTO(unsigned long ip, unsigned long parent_ip), + TP_ARGS(ip, parent_ip)); +#endif + +#ifdef CONFIG_DEBUG_PREEMPT +DEFINE_EVENT(preemptirq_template, preempt_disable, + TP_PROTO(unsigned long ip, unsigned long parent_ip), + TP_ARGS(ip, parent_ip)); + +DEFINE_EVENT(preemptirq_template, preempt_enable, + TP_PROTO(unsigned long ip, unsigned long parent_ip), + TP_ARGS(ip, parent_ip)); +#endif + +#endif /* _TRACE_PREEMPTIRQ_H */ + +#include + +#else /* !CONFIG_PREEMPTIRQ_EVENTS */ + +#define trace_irq_enable(...) +#define trace_irq_disable(...) +#define trace_preempt_enable(...) +#define trace_preempt_disable(...) +#define trace_irq_enable_rcuidle(...) +#define trace_irq_disable_rcuidle(...) +#define trace_preempt_enable_rcuidle(...) +#define trace_preempt_disable_rcuidle(...) + +#endif diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 434c840e2d82..b8395a020821 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -160,6 +160,17 @@ config FUNCTION_GRAPH_TRACER address on the current task structure into a stack of calls. +config PREEMPTIRQ_EVENTS + bool "Enable trace events for preempt and irq disable/enable" + select TRACE_IRQFLAGS + depends on DEBUG_PREEMPT || !PROVE_LOCKING + default n + help + Enable tracing of disable and enable events for preemption and irqs. + For tracing preempt disable/enable events, DEBUG_PREEMPT must be + enabled. For tracing irq disable/enable events, PROVE_LOCKING must + be disabled. + config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" default n diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 90f2701d92a7..9f62eee61f14 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_TRACING) += trace_printk.o obj-$(CONFIG_TRACING_MAP) += tracing_map.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o +obj-$(CONFIG_PREEMPTIRQ_EVENTS) += trace_irqsoff.o obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 0e3033c00474..03ecb4465ee4 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -16,6 +16,9 @@ #include "trace.h" +#define CREATE_TRACE_POINTS +#include + #if defined(CONFIG_IRQSOFF_TRACER) || defined(CONFIG_PREEMPT_TRACER) static struct trace_array *irqsoff_trace __read_mostly; static int tracer_enabled __read_mostly; @@ -777,26 +780,53 @@ static inline void tracer_preempt_off(unsigned long a0, unsigned long a1) { } #endif #if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PROVE_LOCKING) +/* Per-cpu variable to prevent redundant calls when IRQs already off */ +static DEFINE_PER_CPU(int, tracing_irq_cpu); + void trace_hardirqs_on(void) { + if (!this_cpu_read(tracing_irq_cpu)) + return; + + trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); tracer_hardirqs_on(); + + this_cpu_write(tracing_irq_cpu, 0); } EXPORT_SYMBOL(trace_hardirqs_on); void trace_hardirqs_off(void) { + if (this_cpu_read(tracing_irq_cpu)) + return; + + this_cpu_write(tracing_irq_cpu, 1); + + trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); tracer_hardirqs_off(); } EXPORT_SYMBOL(trace_hardirqs_off); __visible void trace_hardirqs_on_caller(unsigned long caller_addr) { + if (!this_cpu_read(tracing_irq_cpu)) + return; + + trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr); tracer_hardirqs_on_caller(caller_addr); + + this_cpu_write(tracing_irq_cpu, 0); } EXPORT_SYMBOL(trace_hardirqs_on_caller); __visible void trace_hardirqs_off_caller(unsigned long caller_addr) { + if (this_cpu_read(tracing_irq_cpu)) + return; + + this_cpu_write(tracing_irq_cpu, 1); + + trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr); tracer_hardirqs_off_caller(caller_addr); } EXPORT_SYMBOL(trace_hardirqs_off_caller); @@ -818,14 +848,17 @@ inline void print_irqtrace_events(struct task_struct *curr) } #endif -#ifdef CONFIG_PREEMPT_TRACER +#if defined(CONFIG_PREEMPT_TRACER) || \ + (defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS)) void trace_preempt_on(unsigned long a0, unsigned long a1) { + trace_preempt_enable_rcuidle(a0, a1); tracer_preempt_on(a0, a1); } void trace_preempt_off(unsigned long a0, unsigned long a1) { + trace_preempt_disable_rcuidle(a0, a1); tracer_preempt_off(a0, a1); } #endif -- cgit v1.3-14-g43fede From 8715b108cd75523c9b2e833cdcd7aeb363767f95 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 9 Oct 2017 12:29:31 -0700 Subject: ftrace: Clear hashes of stale ips of init memory Filters should be cleared of init functions during freeing of init memory when the ftrace dyn records are released. However in current code, the filters are left as is. This patch clears the hashes of the saved init functions when the init memory is freed. This fixes the following issue reproducible with the following sequence of commands for a test module: ================================================ void bar(void) { printk(KERN_INFO "bar!\n"); } void foo(void) { printk(KERN_INFO "foo!\n"); bar(); } static int __init hello_init(void) { printk(KERN_INFO "Hello world!\n"); foo(); return 0; } static void __exit hello_cleanup(void) { printk(KERN_INFO "Cleaning up module.\n"); } module_init(hello_init); module_exit(hello_cleanup); ================================================ Commands: echo '*:mod:test' > /d/tracing/set_ftrace_filter echo function > /d/tracing/current_tracer modprobe test rmmod test sleep 1 modprobe test cat /d/tracing/set_ftrace_filter Behavior without patch: Init function is still in the filter Expected behavior: Shouldn't have any of the filters set Link: http://lkml.kernel.org/r/20171009192931.56401-1-joelaf@google.com Signed-off-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9e99bd55732e..e0a98225666b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6067,6 +6067,63 @@ allocate_ftrace_mod_map(struct module *mod, } #endif /* CONFIG_MODULES */ +struct ftrace_init_func { + struct list_head list; + unsigned long ip; +}; + +/* Clear any init ips from hashes */ +static void +clear_func_from_hash(struct ftrace_init_func *func, struct ftrace_hash *hash) +{ + struct ftrace_func_entry *entry; + + if (ftrace_hash_empty(hash)) + return; + + entry = __ftrace_lookup_ip(hash, func->ip); + + /* + * Do not allow this rec to match again. + * Yeah, it may waste some memory, but will be removed + * if/when the hash is modified again. + */ + if (entry) + entry->ip = 0; +} + +static void +clear_func_from_hashes(struct ftrace_init_func *func) +{ + struct trace_array *tr; + + mutex_lock(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (!tr->ops || !tr->ops->func_hash) + continue; + mutex_lock(&tr->ops->func_hash->regex_lock); + clear_func_from_hash(func, tr->ops->func_hash->filter_hash); + clear_func_from_hash(func, tr->ops->func_hash->notrace_hash); + mutex_unlock(&tr->ops->func_hash->regex_lock); + } + mutex_unlock(&trace_types_lock); +} + +static void add_to_clear_hash_list(struct list_head *clear_list, + struct dyn_ftrace *rec) +{ + struct ftrace_init_func *func; + + func = kmalloc(sizeof(*func), GFP_KERNEL); + if (!func) { + WARN_ONCE(1, "alloc failure, ftrace filter could be stale\n"); + return; + } + + func->ip = rec->ip; + list_add(&func->list, clear_list); +} + void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) { unsigned long start = (unsigned long)(start_ptr); @@ -6076,8 +6133,12 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) struct dyn_ftrace *rec; struct dyn_ftrace key; struct ftrace_mod_map *mod_map = NULL; + struct ftrace_init_func *func, *func_next; + struct list_head clear_hash; int order; + INIT_LIST_HEAD(&clear_hash); + key.ip = start; key.flags = end; /* overload flags, as it is unsigned long */ @@ -6102,6 +6163,9 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) if (!rec) continue; + /* rec will be cleared from hashes after ftrace_lock unlock */ + add_to_clear_hash_list(&clear_hash, rec); + if (mod_map) save_ftrace_mod_rec(mod_map, rec); @@ -6123,6 +6187,11 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) goto again; } mutex_unlock(&ftrace_lock); + + list_for_each_entry_safe(func, func_next, &clear_hash, list) { + clear_func_from_hashes(func); + kfree(func); + } } void __init ftrace_free_init_mem(void) -- cgit v1.3-14-g43fede From c5c1ea75a352c3864c4891f36155287a2ed73928 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 13 Jun 2017 13:06:59 +0200 Subject: tracing: Kconfig text fixes for CONFIG_HWLAT_TRACER Trivial spelling fixes for Kconfig help text of config HWLAT_TRACER. Fixes: e7c15cd8a113 ("tracing: Added hardware latency tracer") Signed-off-by: Jesper Dangaard Brouer Acked-by: Steven Rostedt Signed-off-by: Jiri Kosina --- kernel/trace/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 434c840e2d82..f54b7b6b4a4b 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -224,7 +224,7 @@ config HWLAT_TRACER select GENERIC_TRACER help This tracer, when enabled will create one or more kernel threads, - depening on what the cpumask file is set to, which each thread + depending on what the cpumask file is set to, which each thread spinning in a loop looking for interruptions caused by something other than the kernel. For example, if a System Management Interrupt (SMI) takes a noticeable amount of @@ -239,7 +239,7 @@ config HWLAT_TRACER iteration A kernel thread is created that will spin with interrupts disabled - for "width" microseconds in every "widow" cycle. It will not spin + for "width" microseconds in every "window" cycle. It will not spin for "window - width" microseconds, where the system can continue to operate. -- cgit v1.3-14-g43fede From c3b5b6ed1eb4f429addfd9e8e8eb39d1a38c85d0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Oct 2017 16:22:20 +0200 Subject: tracing: mark trace_test_buffer as __maybe_unused After trace_selftest_startup_sched_switch is removed, trace_test_buffer() is only used sometimes, leading to this warning: kernel/trace/trace_selftest.c:62:12: error: 'trace_test_buffer' defined but not used [-Werror=unused-function] There is no simple #ifdef condition that captures well whether the function is in fact used or not, so marking it as __maybe_unused is probably the best way to shut up the warning. The function will then be silently dropped when there is no user. Link: http://lkml.kernel.org/r/20171013142227.1273469-1-arnd@arndb.de Fixes: d8c4deee6dc6 ("tracing: Remove obsolete sched_switch tracer selftest") Signed-off-by: Arnd Bergmann Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 364f78abdf47..eb9ba5c1ba40 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -59,7 +59,7 @@ static int trace_test_buffer_cpu(struct trace_buffer *buf, int cpu) * Test the trace buffer to see if all the elements * are still sane. */ -static int trace_test_buffer(struct trace_buffer *buf, unsigned long *count) +static int __maybe_unused trace_test_buffer(struct trace_buffer *buf, unsigned long *count) { unsigned long flags, cnt = 0; int cpu, ret = 0; -- cgit v1.3-14-g43fede From 8fd0fbbe8888f295eb34172a7e47bf7d3a0a4687 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Oct 2017 09:45:29 +0200 Subject: perf/ftrace: Revert ("perf/ftrace: Fix double traces of perf on ftrace:function") Revert commit: 75e8387685f6 ("perf/ftrace: Fix double traces of perf on ftrace:function") The reason I instantly stumbled on that patch is that it only addresses the ftrace situation and doesn't mention the other _5_ places that use this interface. It doesn't explain why those don't have the problem and if not, why their solution doesn't work for ftrace. It doesn't, but this is just putting more duct tape on. Link: http://lkml.kernel.org/r/20171011080224.200565770@infradead.org Cc: Zhou Chengming Cc: Jiri Olsa Cc: Ingo Molnar Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) --- include/linux/perf_event.h | 2 +- include/linux/trace_events.h | 4 ++-- kernel/events/core.c | 13 ++++--------- kernel/trace/trace_event_perf.c | 4 +--- kernel/trace/trace_kprobe.c | 4 ++-- kernel/trace/trace_syscalls.c | 4 ++-- kernel/trace/trace_uprobe.c | 2 +- 7 files changed, 13 insertions(+), 20 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8e22f24ded6a..569d1b54e201 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1184,7 +1184,7 @@ extern void perf_event_init(void); extern void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, - struct task_struct *task, struct perf_event *event); + struct task_struct *task); extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 2e0f22298fe9..a6349b76fd39 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -507,9 +507,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, u64 count, struct pt_regs *regs, void *head, - struct task_struct *task, struct perf_event *event) + struct task_struct *task) { - perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event); + perf_tp_event(type, count, raw_data, size, regs, head, rctx, task); } #endif diff --git a/kernel/events/core.c b/kernel/events/core.c index 6bc21e202ae4..b8db80c5513b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7954,15 +7954,16 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, } } perf_tp_event(call->event.type, count, raw_data, size, regs, head, - rctx, task, NULL); + rctx, task); } EXPORT_SYMBOL_GPL(perf_trace_run_bpf_submit); void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, - struct task_struct *task, struct perf_event *event) + struct task_struct *task) { struct perf_sample_data data; + struct perf_event *event; struct perf_raw_record raw = { .frag = { @@ -7976,15 +7977,9 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, perf_trace_buf_update(record, event_type); - /* Use the given event instead of the hlist */ - if (event) { + hlist_for_each_entry_rcu(event, head, hlist_entry) { if (perf_tp_event_match(event, &data, regs)) perf_swevent_event(event, count, &data, regs); - } else { - hlist_for_each_entry_rcu(event, head, hlist_entry) { - if (perf_tp_event_match(event, &data, regs)) - perf_swevent_event(event, count, &data, regs); - } } /* diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 13ba2d3f6a91..562fa69df5d3 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -306,7 +306,6 @@ static void perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *pt_regs) { - struct perf_event *event; struct ftrace_entry *entry; struct hlist_head *head; struct pt_regs regs; @@ -330,9 +329,8 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, entry->ip = ip; entry->parent_ip = parent_ip; - event = container_of(ops, struct perf_event, ftrace_ops); perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN, - 1, ®s, head, NULL, event); + 1, ®s, head, NULL); #undef ENTRY_SIZE } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index af6134f2e597..996902a526d4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1200,7 +1200,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, - head, NULL, NULL); + head, NULL); } NOKPROBE_SYMBOL(kprobe_perf_func); @@ -1236,7 +1236,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, - head, NULL, NULL); + head, NULL); } NOKPROBE_SYMBOL(kretprobe_perf_func); #endif /* CONFIG_PERF_EVENTS */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 696afe72d3b1..934b0da72679 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -622,7 +622,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) perf_trace_buf_submit(rec, size, rctx, sys_data->enter_event->event.type, 1, regs, - head, NULL, NULL); + head, NULL); } static int perf_sysenter_enable(struct trace_event_call *call) @@ -716,7 +716,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) } perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type, - 1, regs, head, NULL, NULL); + 1, regs, head, NULL); } static int perf_sysexit_enable(struct trace_event_call *call) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index b34965e62fb9..402120ba4594 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1156,7 +1156,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, } perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, - head, NULL, NULL); + head, NULL); out: preempt_enable(); } -- cgit v1.3-14-g43fede From 466c81c45b650deca213fda3d0ec4761667379a9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Oct 2017 17:15:47 +0200 Subject: perf/ftrace: Fix function trace events The function-trace <-> perf interface is a tad messed up. Where all the other trace <-> perf interfaces use a single trace hook registration and use per-cpu RCU based hlist to iterate the events, function-trace actually needs multiple hook registrations in order to minimize function entry patching when filters are present. The end result is that we iterate events both on the trace hook and on the hlist, which results in reporting events multiple times. Since function-trace cannot use the regular scheme, fix it the other way around, use singleton hlists. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 5 +++ kernel/trace/trace_event_perf.c | 80 +++++++++++++++++++++++++---------------- 2 files changed, 54 insertions(+), 31 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index a6349b76fd39..ca4e67e466a7 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -173,6 +173,11 @@ enum trace_reg { TRACE_REG_PERF_UNREGISTER, TRACE_REG_PERF_OPEN, TRACE_REG_PERF_CLOSE, + /* + * These (ADD/DEL) use a 'boolean' return value, where 1 (true) means a + * custom action was taken and the default action is not to be + * performed. + */ TRACE_REG_PERF_ADD, TRACE_REG_PERF_DEL, #endif diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 562fa69df5d3..e73f9ab15939 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -240,27 +240,41 @@ void perf_trace_destroy(struct perf_event *p_event) int perf_trace_add(struct perf_event *p_event, int flags) { struct trace_event_call *tp_event = p_event->tp_event; - struct hlist_head __percpu *pcpu_list; - struct hlist_head *list; - - pcpu_list = tp_event->perf_events; - if (WARN_ON_ONCE(!pcpu_list)) - return -EINVAL; if (!(flags & PERF_EF_START)) p_event->hw.state = PERF_HES_STOPPED; - list = this_cpu_ptr(pcpu_list); - hlist_add_head_rcu(&p_event->hlist_entry, list); + /* + * If TRACE_REG_PERF_ADD returns false; no custom action was performed + * and we need to take the default action of enqueueing our event on + * the right per-cpu hlist. + */ + if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event)) { + struct hlist_head __percpu *pcpu_list; + struct hlist_head *list; + + pcpu_list = tp_event->perf_events; + if (WARN_ON_ONCE(!pcpu_list)) + return -EINVAL; + + list = this_cpu_ptr(pcpu_list); + hlist_add_head_rcu(&p_event->hlist_entry, list); + } - return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event); + return 0; } void perf_trace_del(struct perf_event *p_event, int flags) { struct trace_event_call *tp_event = p_event->tp_event; - hlist_del_rcu(&p_event->hlist_entry); - tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); + + /* + * If TRACE_REG_PERF_DEL returns false; no custom action was performed + * and we need to take the default action of dequeueing our event from + * the right per-cpu hlist. + */ + if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event)) + hlist_del_rcu(&p_event->hlist_entry); } void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp) @@ -307,14 +321,24 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *pt_regs) { struct ftrace_entry *entry; - struct hlist_head *head; + struct perf_event *event; + struct hlist_head head; struct pt_regs regs; int rctx; - head = this_cpu_ptr(event_function.perf_events); - if (hlist_empty(head)) + if ((unsigned long)ops->private != smp_processor_id()) return; + event = container_of(ops, struct perf_event, ftrace_ops); + + /* + * @event->hlist entry is NULL (per INIT_HLIST_NODE), and all + * the perf code does is hlist_for_each_entry_rcu(), so we can + * get away with simply setting the @head.first pointer in order + * to create a singular list. + */ + head.first = &event->hlist_entry; + #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ sizeof(u64)) - sizeof(u32)) @@ -330,7 +354,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, entry->ip = ip; entry->parent_ip = parent_ip; perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN, - 1, ®s, head, NULL); + 1, ®s, &head, NULL); #undef ENTRY_SIZE } @@ -339,8 +363,10 @@ static int perf_ftrace_function_register(struct perf_event *event) { struct ftrace_ops *ops = &event->ftrace_ops; - ops->flags |= FTRACE_OPS_FL_PER_CPU | FTRACE_OPS_FL_RCU; - ops->func = perf_ftrace_function_call; + ops->flags |= FTRACE_OPS_FL_RCU; + ops->func = perf_ftrace_function_call; + ops->private = (void *)(unsigned long)nr_cpu_ids; + return register_ftrace_function(ops); } @@ -352,19 +378,11 @@ static int perf_ftrace_function_unregister(struct perf_event *event) return ret; } -static void perf_ftrace_function_enable(struct perf_event *event) -{ - ftrace_function_local_enable(&event->ftrace_ops); -} - -static void perf_ftrace_function_disable(struct perf_event *event) -{ - ftrace_function_local_disable(&event->ftrace_ops); -} - int perf_ftrace_event_register(struct trace_event_call *call, enum trace_reg type, void *data) { + struct perf_event *event = data; + switch (type) { case TRACE_REG_REGISTER: case TRACE_REG_UNREGISTER: @@ -377,11 +395,11 @@ int perf_ftrace_event_register(struct trace_event_call *call, case TRACE_REG_PERF_CLOSE: return perf_ftrace_function_unregister(data); case TRACE_REG_PERF_ADD: - perf_ftrace_function_enable(data); - return 0; + event->ftrace_ops.private = (void *)(unsigned long)smp_processor_id(); + return 1; case TRACE_REG_PERF_DEL: - perf_ftrace_function_disable(data); - return 0; + event->ftrace_ops.private = (void *)(unsigned long)nr_cpu_ids; + return 1; } return -EINVAL; -- cgit v1.3-14-g43fede From 1dd311e6dcda4020c603bcf9f390a577d439d509 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Oct 2017 09:45:31 +0200 Subject: perf/ftrace: Small cleanup ops->flags _should_ be 0 at this point, so setting the flag using bitwise or is a bit daft. Link: http://lkml.kernel.org/r/20171011080224.315585202@infradead.org Requested-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_event_perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index e73f9ab15939..55d6dff37daf 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -363,7 +363,7 @@ static int perf_ftrace_function_register(struct perf_event *event) { struct ftrace_ops *ops = &event->ftrace_ops; - ops->flags |= FTRACE_OPS_FL_RCU; + ops->flags = FTRACE_OPS_FL_RCU; ops->func = perf_ftrace_function_call; ops->private = (void *)(unsigned long)nr_cpu_ids; -- cgit v1.3-14-g43fede From b3a88803ac5b4bda26017b485c8722a8487fefb7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Oct 2017 09:45:32 +0200 Subject: ftrace: Kill FTRACE_OPS_FL_PER_CPU The one and only user of FTRACE_OPS_FL_PER_CPU is gone, remove the lot. Link: http://lkml.kernel.org/r/20171011080224.372422809@infradead.org Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 83 +++++++++----------------------------------------- kernel/trace/ftrace.c | 55 ++++----------------------------- 2 files changed, 20 insertions(+), 118 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1f8545caa691..252e334e7b5f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -102,10 +102,6 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * ENABLED - set/unset when ftrace_ops is registered/unregistered * DYNAMIC - set when ftrace_ops is registered to denote dynamically * allocated ftrace_ops which need special care - * PER_CPU - set manualy by ftrace_ops user to denote the ftrace_ops - * could be controlled by following calls: - * ftrace_function_local_enable - * ftrace_function_local_disable * SAVE_REGS - The ftrace_ops wants regs saved at each function called * and passed to the callback. If this flag is set, but the * architecture does not support passing regs @@ -149,21 +145,20 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); enum { FTRACE_OPS_FL_ENABLED = 1 << 0, FTRACE_OPS_FL_DYNAMIC = 1 << 1, - FTRACE_OPS_FL_PER_CPU = 1 << 2, - FTRACE_OPS_FL_SAVE_REGS = 1 << 3, - FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 4, - FTRACE_OPS_FL_RECURSION_SAFE = 1 << 5, - FTRACE_OPS_FL_STUB = 1 << 6, - FTRACE_OPS_FL_INITIALIZED = 1 << 7, - FTRACE_OPS_FL_DELETED = 1 << 8, - FTRACE_OPS_FL_ADDING = 1 << 9, - FTRACE_OPS_FL_REMOVING = 1 << 10, - FTRACE_OPS_FL_MODIFYING = 1 << 11, - FTRACE_OPS_FL_ALLOC_TRAMP = 1 << 12, - FTRACE_OPS_FL_IPMODIFY = 1 << 13, - FTRACE_OPS_FL_PID = 1 << 14, - FTRACE_OPS_FL_RCU = 1 << 15, - FTRACE_OPS_FL_TRACE_ARRAY = 1 << 16, + FTRACE_OPS_FL_SAVE_REGS = 1 << 2, + FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 3, + FTRACE_OPS_FL_RECURSION_SAFE = 1 << 4, + FTRACE_OPS_FL_STUB = 1 << 5, + FTRACE_OPS_FL_INITIALIZED = 1 << 6, + FTRACE_OPS_FL_DELETED = 1 << 7, + FTRACE_OPS_FL_ADDING = 1 << 8, + FTRACE_OPS_FL_REMOVING = 1 << 9, + FTRACE_OPS_FL_MODIFYING = 1 << 10, + FTRACE_OPS_FL_ALLOC_TRAMP = 1 << 11, + FTRACE_OPS_FL_IPMODIFY = 1 << 12, + FTRACE_OPS_FL_PID = 1 << 13, + FTRACE_OPS_FL_RCU = 1 << 14, + FTRACE_OPS_FL_TRACE_ARRAY = 1 << 15, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -198,7 +193,6 @@ struct ftrace_ops { unsigned long flags; void *private; ftrace_func_t saved_func; - int __percpu *disabled; #ifdef CONFIG_DYNAMIC_FTRACE struct ftrace_ops_hash local_hash; struct ftrace_ops_hash *func_hash; @@ -230,55 +224,6 @@ int register_ftrace_function(struct ftrace_ops *ops); int unregister_ftrace_function(struct ftrace_ops *ops); void clear_ftrace_function(void); -/** - * ftrace_function_local_enable - enable ftrace_ops on current cpu - * - * This function enables tracing on current cpu by decreasing - * the per cpu control variable. - * It must be called with preemption disabled and only on ftrace_ops - * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption - * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. - */ -static inline void ftrace_function_local_enable(struct ftrace_ops *ops) -{ - if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU))) - return; - - (*this_cpu_ptr(ops->disabled))--; -} - -/** - * ftrace_function_local_disable - disable ftrace_ops on current cpu - * - * This function disables tracing on current cpu by increasing - * the per cpu control variable. - * It must be called with preemption disabled and only on ftrace_ops - * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption - * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. - */ -static inline void ftrace_function_local_disable(struct ftrace_ops *ops) -{ - if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU))) - return; - - (*this_cpu_ptr(ops->disabled))++; -} - -/** - * ftrace_function_local_disabled - returns ftrace_ops disabled value - * on current cpu - * - * This function returns value of ftrace_ops::disabled on current cpu. - * It must be called with preemption disabled and only on ftrace_ops - * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption - * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. - */ -static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) -{ - WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU)); - return *this_cpu_ptr(ops->disabled); -} - extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op, struct pt_regs *regs); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e0a98225666b..2fd3edaec6de 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -203,30 +203,6 @@ void clear_ftrace_function(void) ftrace_trace_function = ftrace_stub; } -static void per_cpu_ops_disable_all(struct ftrace_ops *ops) -{ - int cpu; - - for_each_possible_cpu(cpu) - *per_cpu_ptr(ops->disabled, cpu) = 1; -} - -static int per_cpu_ops_alloc(struct ftrace_ops *ops) -{ - int __percpu *disabled; - - if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU))) - return -EINVAL; - - disabled = alloc_percpu(int); - if (!disabled) - return -ENOMEM; - - ops->disabled = disabled; - per_cpu_ops_disable_all(ops); - return 0; -} - static void ftrace_sync(struct work_struct *work) { /* @@ -262,8 +238,8 @@ static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops) * If this is a dynamic, RCU, or per CPU ops, or we force list func, * then it needs to call the list anyway. */ - if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU | - FTRACE_OPS_FL_RCU) || FTRACE_FORCE_LIST_FUNC) + if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_RCU) || + FTRACE_FORCE_LIST_FUNC) return ftrace_ops_list_func; return ftrace_ops_get_func(ops); @@ -422,11 +398,6 @@ static int __register_ftrace_function(struct ftrace_ops *ops) if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; - if (ops->flags & FTRACE_OPS_FL_PER_CPU) { - if (per_cpu_ops_alloc(ops)) - return -ENOMEM; - } - add_ftrace_ops(&ftrace_ops_list, ops); /* Always save the function, and reset at unregistering */ @@ -2727,11 +2698,6 @@ void __weak arch_ftrace_trampoline_free(struct ftrace_ops *ops) { } -static void per_cpu_ops_free(struct ftrace_ops *ops) -{ - free_percpu(ops->disabled); -} - static void ftrace_startup_enable(int command) { if (saved_ftrace_func != ftrace_trace_function) { @@ -2833,7 +2799,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) * not currently active, we can just free them * without synchronizing all CPUs. */ - if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) + if (ops->flags & FTRACE_OPS_FL_DYNAMIC) goto free_ops; return 0; @@ -2880,7 +2846,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) * The same goes for freeing the per_cpu data of the per_cpu * ops. */ - if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) { + if (ops->flags & FTRACE_OPS_FL_DYNAMIC) { /* * We need to do a hard force of sched synchronization. * This is because we use preempt_disable() to do RCU, but @@ -2903,9 +2869,6 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) free_ops: arch_ftrace_trampoline_free(ops); - - if (ops->flags & FTRACE_OPS_FL_PER_CPU) - per_cpu_ops_free(ops); } return 0; @@ -6355,10 +6318,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, * If any of the above fails then the op->func() is not executed. */ if ((!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) && - (!(op->flags & FTRACE_OPS_FL_PER_CPU) || - !ftrace_function_local_disabled(op)) && ftrace_ops_test(op, ip, regs)) { - if (FTRACE_WARN_ON(!op->func)) { pr_warn("op=%p %pS\n", op, op); goto out; @@ -6416,10 +6376,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, preempt_disable_notrace(); - if (!(op->flags & FTRACE_OPS_FL_PER_CPU) || - !ftrace_function_local_disabled(op)) { - op->func(ip, parent_ip, op, regs); - } + op->func(ip, parent_ip, op, regs); preempt_enable_notrace(); trace_clear_recursion(bit); @@ -6443,7 +6400,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) * or does per cpu logic, then we need to call the assist handler. */ if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE) || - ops->flags & (FTRACE_OPS_FL_RCU | FTRACE_OPS_FL_PER_CPU)) + ops->flags & FTRACE_OPS_FL_RCU) return ftrace_ops_assist_func; return ops->func; -- cgit v1.3-14-g43fede From 7de16e3a35578f4f5accc6f5f23970310483d0a2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Oct 2017 16:40:53 -0700 Subject: bpf: split verifier and program ops struct bpf_verifier_ops contains both verifier ops and operations used later during program's lifetime (test_run). Split the runtime ops into a different structure. BPF_PROG_TYPE() will now append ## _prog_ops or ## _verifier_ops to the names. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 15 ++++++++++----- include/linux/bpf_types.h | 28 ++++++++++++++-------------- kernel/bpf/syscall.c | 16 +++++++++++++--- kernel/bpf/verifier.c | 12 ++++++------ kernel/trace/bpf_trace.c | 15 ++++++++++++--- net/core/filter.c | 45 ++++++++++++++++++++++++++++++++++++--------- 6 files changed, 91 insertions(+), 40 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6d4dd844828a..e1fba5504ca5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -157,6 +157,11 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) aux->ctx_field_size = size; } +struct bpf_prog_ops { + int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); +}; + struct bpf_verifier_ops { /* return eBPF function prototype for verification */ const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); @@ -172,8 +177,6 @@ struct bpf_verifier_ops { const struct bpf_insn *src, struct bpf_insn *dst, struct bpf_prog *prog, u32 *target_size); - int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, - union bpf_attr __user *uattr); }; struct bpf_prog_aux { @@ -184,7 +187,8 @@ struct bpf_prog_aux { u32 id; struct latch_tree_node ksym_tnode; struct list_head ksym_lnode; - const struct bpf_verifier_ops *ops; + const struct bpf_prog_ops *ops; + const struct bpf_verifier_ops *vops; struct bpf_map **used_maps; struct bpf_prog *prog; struct user_struct *user; @@ -279,8 +283,9 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); -#define BPF_PROG_TYPE(_id, _ops) \ - extern const struct bpf_verifier_ops _ops; +#define BPF_PROG_TYPE(_id, _name) \ + extern const struct bpf_prog_ops _name ## _prog_ops; \ + extern const struct bpf_verifier_ops _name ## _verifier_ops; #define BPF_MAP_TYPE(_id, _ops) \ extern const struct bpf_map_ops _ops; #include diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 814c1081a4a9..36418ad43245 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -1,22 +1,22 @@ /* internal file - do not include directly */ #ifdef CONFIG_NET -BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act) +BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb) #endif #ifdef CONFIG_BPF_EVENTS -BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) +BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) +BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 54fba06942f5..444902b5a30d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -739,9 +739,18 @@ err_put: return err; } -static const struct bpf_verifier_ops * const bpf_prog_types[] = { -#define BPF_PROG_TYPE(_id, _ops) \ - [_id] = &_ops, +static const struct bpf_prog_ops * const bpf_prog_types[] = { +#define BPF_PROG_TYPE(_id, _name) \ + [_id] = & _name ## _prog_ops, +#define BPF_MAP_TYPE(_id, _ops) +#include +#undef BPF_PROG_TYPE +#undef BPF_MAP_TYPE +}; + +static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { +#define BPF_PROG_TYPE(_id, _name) \ + [_id] = & _name ## _verifier_ops, #define BPF_MAP_TYPE(_id, _ops) #include #undef BPF_PROG_TYPE @@ -754,6 +763,7 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) return -EINVAL; prog->aux->ops = bpf_prog_types[type]; + prog->aux->vops = bpf_verifier_ops[type]; prog->type = type; return 0; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e4d5136725a2..38e24d69fc95 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -856,8 +856,8 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, *reg_type = info.reg_type; return 0; } - } else if (env->prog->aux->ops->is_valid_access && - env->prog->aux->ops->is_valid_access(off, size, t, &info)) { + } else if (env->prog->aux->vops->is_valid_access && + env->prog->aux->vops->is_valid_access(off, size, t, &info)) { /* A non zero info.ctx_field_size indicates that this field is a * candidate for later verifier transformation to load the whole * field and then apply a mask when accessed with a narrower @@ -1565,8 +1565,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) return -EINVAL; } - if (env->prog->aux->ops->get_func_proto) - fn = env->prog->aux->ops->get_func_proto(func_id); + if (env->prog->aux->vops->get_func_proto) + fn = env->prog->aux->vops->get_func_proto(func_id); if (!fn) { verbose(env, "unknown func %s#%d\n", func_id_name(func_id), @@ -4035,7 +4035,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of */ static int convert_ctx_accesses(struct bpf_verifier_env *env) { - const struct bpf_verifier_ops *ops = env->prog->aux->ops; + const struct bpf_verifier_ops *ops = env->prog->aux->vops; int i, cnt, size, ctx_field_size, delta = 0; const int insn_cnt = env->prog->len; struct bpf_insn insn_buf[16], *insn; @@ -4236,7 +4236,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) insn = new_prog->insnsi + i + delta; } patch_call_imm: - fn = prog->aux->ops->get_func_proto(insn->imm); + fn = prog->aux->vops->get_func_proto(insn->imm); /* all functions that have prototype and verifier allowed * programs to call them, must be real in-kernel functions */ diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 04ea5314f2bc..3126da2f468a 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -561,11 +561,14 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type return true; } -const struct bpf_verifier_ops kprobe_prog_ops = { +const struct bpf_verifier_ops kprobe_verifier_ops = { .get_func_proto = kprobe_prog_func_proto, .is_valid_access = kprobe_prog_is_valid_access, }; +const struct bpf_prog_ops kprobe_prog_ops = { +}; + BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, u64, flags, void *, data, u64, size) { @@ -667,11 +670,14 @@ static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type return true; } -const struct bpf_verifier_ops tracepoint_prog_ops = { +const struct bpf_verifier_ops tracepoint_verifier_ops = { .get_func_proto = tp_prog_func_proto, .is_valid_access = tp_prog_is_valid_access, }; +const struct bpf_prog_ops tracepoint_prog_ops = { +}; + static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { @@ -727,8 +733,11 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } -const struct bpf_verifier_ops perf_event_prog_ops = { +const struct bpf_verifier_ops perf_event_verifier_ops = { .get_func_proto = tp_prog_func_proto, .is_valid_access = pe_prog_is_valid_access, .convert_ctx_access = pe_prog_convert_ctx_access, }; + +const struct bpf_prog_ops perf_event_prog_ops = { +}; diff --git a/net/core/filter.c b/net/core/filter.c index 4d88e0665c41..1dd3034f846f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4395,68 +4395,95 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } -const struct bpf_verifier_ops sk_filter_prog_ops = { +const struct bpf_verifier_ops sk_filter_verifier_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, }; -const struct bpf_verifier_ops tc_cls_act_prog_ops = { +const struct bpf_prog_ops sk_filter_prog_ops = { +}; + +const struct bpf_verifier_ops tc_cls_act_verifier_ops = { .get_func_proto = tc_cls_act_func_proto, .is_valid_access = tc_cls_act_is_valid_access, .convert_ctx_access = tc_cls_act_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, +}; + +const struct bpf_prog_ops tc_cls_act_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops xdp_prog_ops = { +const struct bpf_verifier_ops xdp_verifier_ops = { .get_func_proto = xdp_func_proto, .is_valid_access = xdp_is_valid_access, .convert_ctx_access = xdp_convert_ctx_access, +}; + +const struct bpf_prog_ops xdp_prog_ops = { .test_run = bpf_prog_test_run_xdp, }; -const struct bpf_verifier_ops cg_skb_prog_ops = { +const struct bpf_verifier_ops cg_skb_verifier_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, +}; + +const struct bpf_prog_ops cg_skb_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops lwt_inout_prog_ops = { +const struct bpf_verifier_ops lwt_inout_verifier_ops = { .get_func_proto = lwt_inout_func_proto, .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, +}; + +const struct bpf_prog_ops lwt_inout_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops lwt_xmit_prog_ops = { +const struct bpf_verifier_ops lwt_xmit_verifier_ops = { .get_func_proto = lwt_xmit_func_proto, .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, +}; + +const struct bpf_prog_ops lwt_xmit_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops cg_sock_prog_ops = { +const struct bpf_verifier_ops cg_sock_verifier_ops = { .get_func_proto = sock_filter_func_proto, .is_valid_access = sock_filter_is_valid_access, .convert_ctx_access = sock_filter_convert_ctx_access, }; -const struct bpf_verifier_ops sock_ops_prog_ops = { +const struct bpf_prog_ops cg_sock_prog_ops = { +}; + +const struct bpf_verifier_ops sock_ops_verifier_ops = { .get_func_proto = sock_ops_func_proto, .is_valid_access = sock_ops_is_valid_access, .convert_ctx_access = sock_ops_convert_ctx_access, }; -const struct bpf_verifier_ops sk_skb_prog_ops = { +const struct bpf_prog_ops sock_ops_prog_ops = { +}; + +const struct bpf_verifier_ops sk_skb_verifier_ops = { .get_func_proto = sk_skb_func_proto, .is_valid_access = sk_skb_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, .gen_prologue = sk_skb_prologue, }; +const struct bpf_prog_ops sk_skb_prog_ops = { +}; + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; -- cgit v1.3-14-g43fede From e87c6bc3852b981e71c757be20771546ce9f76f3 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 23 Oct 2017 23:53:08 -0700 Subject: bpf: permit multiple bpf attachments for a single perf event This patch enables multiple bpf attachments for a kprobe/uprobe/tracepoint single trace event. Each trace_event keeps a list of attached perf events. When an event happens, all attached bpf programs will be executed based on the order of attachment. A global bpf_event_mutex lock is introduced to protect prog_array attaching and detaching. An alternative will be introduce a mutex lock in every trace_event_call structure, but it takes a lot of extra memory. So a global bpf_event_mutex lock is a good compromise. The bpf prog detachment involves allocation of memory. If the allocation fails, a dummy do-nothing program will replace to-be-detached program in-place. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/bpf.h | 30 +++++++++++++--- include/linux/trace_events.h | 43 ++++++++++++++++++++--- include/trace/perf.h | 6 ++-- kernel/bpf/core.c | 81 ++++++++++++++++++++++++++++++++++++++++++ kernel/events/core.c | 26 +++++--------- kernel/trace/bpf_trace.c | 82 ++++++++++++++++++++++++++++++++++++++++--- kernel/trace/trace_kprobe.c | 6 ++-- kernel/trace/trace_syscalls.c | 34 ++++++++++-------- kernel/trace/trace_uprobe.c | 3 +- 9 files changed, 255 insertions(+), 56 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1e334b248ff6..172be7faf7ba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -273,18 +273,38 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs); int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, __u32 __user *prog_ids, u32 cnt); -#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ +void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, + struct bpf_prog *old_prog); +int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, + struct bpf_prog *exclude_prog, + struct bpf_prog *include_prog, + struct bpf_prog_array **new_array); + +#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \ ({ \ - struct bpf_prog **_prog; \ + struct bpf_prog **_prog, *__prog; \ + struct bpf_prog_array *_array; \ u32 _ret = 1; \ rcu_read_lock(); \ - _prog = rcu_dereference(array)->progs; \ - for (; *_prog; _prog++) \ - _ret &= func(*_prog, ctx); \ + _array = rcu_dereference(array); \ + if (unlikely(check_non_null && !_array))\ + goto _out; \ + _prog = _array->progs; \ + while ((__prog = READ_ONCE(*_prog))) { \ + _ret &= func(__prog, ctx); \ + _prog++; \ + } \ +_out: \ rcu_read_unlock(); \ _ret; \ }) +#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ + __BPF_PROG_RUN_ARRAY(array, ctx, func, false) + +#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \ + __BPF_PROG_RUN_ARRAY(array, ctx, func, true) + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 2e0f22298fe9..fc6aeca945db 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -271,14 +271,37 @@ struct trace_event_call { #ifdef CONFIG_PERF_EVENTS int perf_refcount; struct hlist_head __percpu *perf_events; - struct bpf_prog *prog; - struct perf_event *bpf_prog_owner; + struct bpf_prog_array __rcu *prog_array; int (*perf_perm)(struct trace_event_call *, struct perf_event *); #endif }; +#ifdef CONFIG_PERF_EVENTS +static inline bool bpf_prog_array_valid(struct trace_event_call *call) +{ + /* + * This inline function checks whether call->prog_array + * is valid or not. The function is called in various places, + * outside rcu_read_lock/unlock, as a heuristic to speed up execution. + * + * If this function returns true, and later call->prog_array + * becomes false inside rcu_read_lock/unlock region, + * we bail out then. If this function return false, + * there is a risk that we might miss a few events if the checking + * were delayed until inside rcu_read_lock/unlock region and + * call->prog_array happened to become non-NULL then. + * + * Here, READ_ONCE() is used instead of rcu_access_pointer(). + * rcu_access_pointer() requires the actual definition of + * "struct bpf_prog_array" while READ_ONCE() only needs + * a declaration of the same type. + */ + return !!READ_ONCE(call->prog_array); +} +#endif + static inline const char * trace_event_name(struct trace_event_call *call) { @@ -435,12 +458,23 @@ trace_trigger_soft_disabled(struct trace_event_file *file) } #ifdef CONFIG_BPF_EVENTS -unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); +unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx); +int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog); +void perf_event_detach_bpf_prog(struct perf_event *event); #else -static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { return 1; } + +static inline int +perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} + +static inline void perf_event_detach_bpf_prog(struct perf_event *event) { } + #endif enum { @@ -511,6 +545,7 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, { perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event); } + #endif #endif /* _LINUX_TRACE_EVENT_H */ diff --git a/include/trace/perf.h b/include/trace/perf.h index 04fe68bbe767..14f127b6acf5 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -34,7 +34,6 @@ perf_trace_##call(void *__data, proto) \ struct trace_event_call *event_call = __data; \ struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\ struct trace_event_raw_##call *entry; \ - struct bpf_prog *prog = event_call->prog; \ struct pt_regs *__regs; \ u64 __count = 1; \ struct task_struct *__task = NULL; \ @@ -46,8 +45,9 @@ perf_trace_##call(void *__data, proto) \ __data_size = trace_event_get_offsets_##call(&__data_offsets, args); \ \ head = this_cpu_ptr(event_call->perf_events); \ - if (!prog && __builtin_constant_p(!__task) && !__task && \ - hlist_empty(head)) \ + if (!bpf_prog_array_valid(event_call) && \ + __builtin_constant_p(!__task) && !__task && \ + hlist_empty(head)) \ return; \ \ __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 8e7c8bf2b687..7fe448799d76 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1394,6 +1394,20 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); +static unsigned int __bpf_prog_ret1(const void *ctx, + const struct bpf_insn *insn) +{ + return 1; +} + +static struct bpf_prog_dummy { + struct bpf_prog prog; +} dummy_bpf_prog = { + .prog = { + .bpf_func = __bpf_prog_ret1, + }, +}; + /* to avoid allocating empty bpf_prog_array for cgroups that * don't have bpf program attached use one global 'empty_prog_array' * It will not be modified the caller of bpf_prog_array_alloc() @@ -1463,6 +1477,73 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, return 0; } +void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, + struct bpf_prog *old_prog) +{ + struct bpf_prog **prog = progs->progs; + + for (; *prog; prog++) + if (*prog == old_prog) { + WRITE_ONCE(*prog, &dummy_bpf_prog.prog); + break; + } +} + +int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, + struct bpf_prog *exclude_prog, + struct bpf_prog *include_prog, + struct bpf_prog_array **new_array) +{ + int new_prog_cnt, carry_prog_cnt = 0; + struct bpf_prog **existing_prog; + struct bpf_prog_array *array; + int new_prog_idx = 0; + + /* Figure out how many existing progs we need to carry over to + * the new array. + */ + if (old_array) { + existing_prog = old_array->progs; + for (; *existing_prog; existing_prog++) { + if (*existing_prog != exclude_prog && + *existing_prog != &dummy_bpf_prog.prog) + carry_prog_cnt++; + if (*existing_prog == include_prog) + return -EEXIST; + } + } + + /* How many progs (not NULL) will be in the new array? */ + new_prog_cnt = carry_prog_cnt; + if (include_prog) + new_prog_cnt += 1; + + /* Do we have any prog (not NULL) in the new array? */ + if (!new_prog_cnt) { + *new_array = NULL; + return 0; + } + + /* +1 as the end of prog_array is marked with NULL */ + array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL); + if (!array) + return -ENOMEM; + + /* Fill in the new prog array */ + if (carry_prog_cnt) { + existing_prog = old_array->progs; + for (; *existing_prog; existing_prog++) + if (*existing_prog != exclude_prog && + *existing_prog != &dummy_bpf_prog.prog) + array->progs[new_prog_idx++] = *existing_prog; + } + if (include_prog) + array->progs[new_prog_idx++] = include_prog; + array->progs[new_prog_idx] = NULL; + *new_array = array; + return 0; +} + static void bpf_prog_free_deferred(struct work_struct *work) { struct bpf_prog_aux *aux; diff --git a/kernel/events/core.c b/kernel/events/core.c index 9f78a6825bbe..9660ee65fbef 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7954,11 +7954,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, struct pt_regs *regs, struct hlist_head *head, struct task_struct *task) { - struct bpf_prog *prog = call->prog; - - if (prog) { + if (bpf_prog_array_valid(call)) { *(struct pt_regs **)raw_data = regs; - if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) { + if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) { perf_swevent_put_recursion_context(rctx); return; } @@ -8147,13 +8145,11 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) { bool is_kprobe, is_tracepoint, is_syscall_tp; struct bpf_prog *prog; + int ret; if (event->attr.type != PERF_TYPE_TRACEPOINT) return perf_event_set_bpf_handler(event, prog_fd); - if (event->tp_event->prog) - return -EEXIST; - is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE; is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT; is_syscall_tp = is_syscall_trace_event(event->tp_event); @@ -8181,26 +8177,20 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) return -EACCES; } } - event->tp_event->prog = prog; - event->tp_event->bpf_prog_owner = event; - return 0; + ret = perf_event_attach_bpf_prog(event, prog); + if (ret) + bpf_prog_put(prog); + return ret; } static void perf_event_free_bpf_prog(struct perf_event *event) { - struct bpf_prog *prog; - if (event->attr.type != PERF_TYPE_TRACEPOINT) { perf_event_free_bpf_handler(event); return; } - - prog = event->tp_event->prog; - if (prog && event->tp_event->bpf_prog_owner == event) { - event->tp_event->prog = NULL; - bpf_prog_put(prog); - } + perf_event_detach_bpf_prog(event); } #else diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 3126da2f468a..b65011d320e3 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -17,7 +17,7 @@ /** * trace_call_bpf - invoke BPF program - * @prog: BPF program + * @call: tracepoint event * @ctx: opaque context pointer * * kprobe handlers execute BPF programs via this helper. @@ -29,7 +29,7 @@ * 1 - store kprobe event into ring buffer * Other values are reserved and currently alias to 1 */ -unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { unsigned int ret; @@ -49,9 +49,22 @@ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) goto out; } - rcu_read_lock(); - ret = BPF_PROG_RUN(prog, ctx); - rcu_read_unlock(); + /* + * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock + * to all call sites, we did a bpf_prog_array_valid() there to check + * whether call->prog_array is empty or not, which is + * a heurisitc to speed up execution. + * + * If bpf_prog_array_valid() fetched prog_array was + * non-NULL, we go into trace_call_bpf() and do the actual + * proper rcu_dereference() under RCU lock. + * If it turns out that prog_array is NULL then, we bail out. + * For the opposite, if the bpf_prog_array_valid() fetched pointer + * was NULL, you'll skip the prog_array with the risk of missing + * out of events when it was updated in between this and the + * rcu_dereference() which is accepted risk. + */ + ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN); out: __this_cpu_dec(bpf_prog_active); @@ -741,3 +754,62 @@ const struct bpf_verifier_ops perf_event_verifier_ops = { const struct bpf_prog_ops perf_event_prog_ops = { }; + +static DEFINE_MUTEX(bpf_event_mutex); + +int perf_event_attach_bpf_prog(struct perf_event *event, + struct bpf_prog *prog) +{ + struct bpf_prog_array __rcu *old_array; + struct bpf_prog_array *new_array; + int ret = -EEXIST; + + mutex_lock(&bpf_event_mutex); + + if (event->prog) + goto out; + + old_array = rcu_dereference_protected(event->tp_event->prog_array, + lockdep_is_held(&bpf_event_mutex)); + ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array); + if (ret < 0) + goto out; + + /* set the new array to event->tp_event and set event->prog */ + event->prog = prog; + rcu_assign_pointer(event->tp_event->prog_array, new_array); + bpf_prog_array_free(old_array); + +out: + mutex_unlock(&bpf_event_mutex); + return ret; +} + +void perf_event_detach_bpf_prog(struct perf_event *event) +{ + struct bpf_prog_array __rcu *old_array; + struct bpf_prog_array *new_array; + int ret; + + mutex_lock(&bpf_event_mutex); + + if (!event->prog) + goto out; + + old_array = rcu_dereference_protected(event->tp_event->prog_array, + lockdep_is_held(&bpf_event_mutex)); + + ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array); + if (ret < 0) { + bpf_prog_array_delete_safe(old_array, event->prog); + } else { + rcu_assign_pointer(event->tp_event->prog_array, new_array); + bpf_prog_array_free(old_array); + } + + bpf_prog_put(event->prog); + event->prog = NULL; + +out: + mutex_unlock(&bpf_event_mutex); +} diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 8a907e12b6b9..abf92e478cfb 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1174,13 +1174,12 @@ static void kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; - struct bpf_prog *prog = call->prog; struct kprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; int rctx; - if (prog && !trace_call_bpf(prog, regs)) + if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) return; head = this_cpu_ptr(call->perf_events); @@ -1210,13 +1209,12 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; - struct bpf_prog *prog = call->prog; struct kretprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; int rctx; - if (prog && !trace_call_bpf(prog, regs)) + if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) return; head = this_cpu_ptr(call->perf_events); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 696afe72d3b1..71a6af34d7a9 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -559,9 +559,10 @@ static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); static int sys_perf_refcount_enter; static int sys_perf_refcount_exit; -static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs, - struct syscall_metadata *sys_data, - struct syscall_trace_enter *rec) { +static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *regs, + struct syscall_metadata *sys_data, + struct syscall_trace_enter *rec) +{ struct syscall_tp_t { unsigned long long regs; unsigned long syscall_nr; @@ -573,7 +574,7 @@ static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs, param.syscall_nr = rec->nr; for (i = 0; i < sys_data->nb_args; i++) param.args[i] = rec->args[i]; - return trace_call_bpf(prog, ¶m); + return trace_call_bpf(call, ¶m); } static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) @@ -581,7 +582,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; struct hlist_head *head; - struct bpf_prog *prog; + bool valid_prog_array; int syscall_nr; int rctx; int size; @@ -596,9 +597,9 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) if (!sys_data) return; - prog = READ_ONCE(sys_data->enter_event->prog); head = this_cpu_ptr(sys_data->enter_event->perf_events); - if (!prog && hlist_empty(head)) + valid_prog_array = bpf_prog_array_valid(sys_data->enter_event); + if (!valid_prog_array && hlist_empty(head)) return; /* get the size after alignment with the u32 buffer size field */ @@ -614,7 +615,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - if ((prog && !perf_call_bpf_enter(prog, regs, sys_data, rec)) || + if ((valid_prog_array && + !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) || hlist_empty(head)) { perf_swevent_put_recursion_context(rctx); return; @@ -659,8 +661,9 @@ static void perf_sysenter_disable(struct trace_event_call *call) mutex_unlock(&syscall_trace_lock); } -static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs, - struct syscall_trace_exit *rec) { +static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *regs, + struct syscall_trace_exit *rec) +{ struct syscall_tp_t { unsigned long long regs; unsigned long syscall_nr; @@ -670,7 +673,7 @@ static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs, *(struct pt_regs **)¶m = regs; param.syscall_nr = rec->nr; param.ret = rec->ret; - return trace_call_bpf(prog, ¶m); + return trace_call_bpf(call, ¶m); } static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) @@ -678,7 +681,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; struct hlist_head *head; - struct bpf_prog *prog; + bool valid_prog_array; int syscall_nr; int rctx; int size; @@ -693,9 +696,9 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) if (!sys_data) return; - prog = READ_ONCE(sys_data->exit_event->prog); head = this_cpu_ptr(sys_data->exit_event->perf_events); - if (!prog && hlist_empty(head)) + valid_prog_array = bpf_prog_array_valid(sys_data->exit_event); + if (!valid_prog_array && hlist_empty(head)) return; /* We can probably do that at build time */ @@ -709,7 +712,8 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - if ((prog && !perf_call_bpf_exit(prog, regs, rec)) || + if ((valid_prog_array && + !perf_call_bpf_exit(sys_data->exit_event, regs, rec)) || hlist_empty(head)) { perf_swevent_put_recursion_context(rctx); return; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 4525e0271a53..153c0e411461 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1113,13 +1113,12 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, { struct trace_event_call *call = &tu->tp.call; struct uprobe_trace_entry_head *entry; - struct bpf_prog *prog = call->prog; struct hlist_head *head; void *data; int size, esize; int rctx; - if (prog && !trace_call_bpf(prog, regs)) + if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) return; esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); -- cgit v1.3-14-g43fede From 6aa7de059173a986114ac43b8f50b297a86f09a8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Oct 2017 14:07:29 -0700 Subject: locking/atomics: COCCINELLE/treewide: Convert trivial ACCESS_ONCE() patterns to READ_ONCE()/WRITE_ONCE() Please do not apply this to mainline directly, instead please re-run the coccinelle script shown below and apply its output. For several reasons, it is desirable to use {READ,WRITE}_ONCE() in preference to ACCESS_ONCE(), and new code is expected to use one of the former. So far, there's been no reason to change most existing uses of ACCESS_ONCE(), as these aren't harmful, and changing them results in churn. However, for some features, the read/write distinction is critical to correct operation. To distinguish these cases, separate read/write accessors must be used. This patch migrates (most) remaining ACCESS_ONCE() instances to {READ,WRITE}_ONCE(), using the following coccinelle script: ---- // Convert trivial ACCESS_ONCE() uses to equivalent READ_ONCE() and // WRITE_ONCE() // $ make coccicheck COCCI=/home/mark/once.cocci SPFLAGS="--include-headers" MODE=patch virtual patch @ depends on patch @ expression E1, E2; @@ - ACCESS_ONCE(E1) = E2 + WRITE_ONCE(E1, E2) @ depends on patch @ expression E; @@ - ACCESS_ONCE(E) + READ_ONCE(E) ---- Signed-off-by: Mark Rutland Signed-off-by: Paul E. McKenney Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: davem@davemloft.net Cc: linux-arch@vger.kernel.org Cc: mpe@ellerman.id.au Cc: shuah@kernel.org Cc: snitzer@redhat.com Cc: thor.thayer@linux.intel.com Cc: tj@kernel.org Cc: viro@zeniv.linux.org.uk Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/1508792849-3115-19-git-send-email-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/arc/kernel/smp.c | 2 +- arch/arm/include/asm/spinlock.h | 2 +- arch/arm/mach-tegra/cpuidle-tegra20.c | 2 +- arch/arm/vdso/vgettimeofday.c | 2 +- arch/ia64/include/asm/spinlock.h | 8 ++--- arch/mips/include/asm/vdso.h | 2 +- arch/mips/kernel/pm-cps.c | 2 +- arch/mn10300/kernel/mn10300-serial.c | 4 +-- arch/parisc/include/asm/atomic.h | 2 +- arch/powerpc/platforms/powernv/opal-msglog.c | 2 +- arch/s390/include/asm/spinlock.h | 6 ++-- arch/s390/lib/spinlock.c | 16 +++++----- arch/sparc/include/asm/atomic_32.h | 2 +- arch/tile/gxio/dma_queue.c | 4 +-- arch/tile/include/gxio/dma_queue.h | 2 +- arch/tile/kernel/ptrace.c | 2 +- arch/x86/entry/common.c | 2 +- arch/x86/entry/vdso/vclock_gettime.c | 2 +- arch/x86/events/core.c | 2 +- arch/x86/include/asm/vgtod.h | 2 +- arch/x86/kernel/espfix_64.c | 6 ++-- arch/x86/kernel/nmi.c | 2 +- arch/x86/kvm/mmu.c | 4 +-- arch/x86/kvm/page_track.c | 2 +- arch/x86/xen/p2m.c | 2 +- arch/xtensa/platforms/xtfpga/lcd.c | 14 ++++----- block/blk-wbt.c | 2 +- drivers/base/core.c | 2 +- drivers/base/power/runtime.c | 4 +-- drivers/char/random.c | 4 +-- drivers/clocksource/bcm2835_timer.c | 2 +- drivers/crypto/caam/jr.c | 4 +-- drivers/crypto/nx/nx-842-powernv.c | 2 +- drivers/firewire/ohci.c | 10 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 +-- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 2 +- drivers/gpu/drm/radeon/radeon_gem.c | 4 +-- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 2 +- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- drivers/infiniband/hw/hfi1/pio.c | 6 ++-- drivers/infiniband/hw/hfi1/ruc.c | 2 +- drivers/infiniband/hw/hfi1/sdma.c | 8 ++--- drivers/infiniband/hw/hfi1/sdma.h | 2 +- drivers/infiniband/hw/hfi1/uc.c | 4 +-- drivers/infiniband/hw/hfi1/ud.c | 4 +-- drivers/infiniband/hw/hfi1/user_sdma.c | 8 ++--- drivers/infiniband/hw/qib/qib_ruc.c | 2 +- drivers/infiniband/hw/qib/qib_uc.c | 4 +-- drivers/infiniband/hw/qib/qib_ud.c | 4 +-- drivers/infiniband/sw/rdmavt/qp.c | 6 ++-- drivers/input/misc/regulator-haptic.c | 2 +- drivers/md/dm-bufio.c | 10 +++--- drivers/md/dm-kcopyd.c | 4 +-- drivers/md/dm-stats.c | 36 +++++++++++----------- drivers/md/dm-switch.c | 2 +- drivers/md/dm-thin.c | 2 +- drivers/md/dm-verity-target.c | 2 +- drivers/md/dm.c | 4 +-- drivers/md/md.c | 2 +- drivers/md/raid5.c | 2 +- drivers/misc/mic/scif/scif_rb.c | 8 ++--- drivers/misc/mic/scif/scif_rma_list.c | 2 +- drivers/net/bonding/bond_alb.c | 2 +- drivers/net/bonding/bond_main.c | 6 ++-- drivers/net/ethernet/chelsio/cxgb4/sge.c | 4 +-- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- drivers/net/ethernet/hisilicon/hip04_eth.c | 4 +-- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 4 +-- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 +-- drivers/net/ethernet/intel/i40e/i40e_ptp.c | 4 +-- drivers/net/ethernet/intel/igb/e1000_regs.h | 2 +- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_common.h | 4 +-- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 8 ++--- drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 4 +-- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +- drivers/net/ethernet/intel/ixgbevf/vf.h | 2 +- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 12 ++++---- drivers/net/ethernet/neterion/vxge/vxge-main.c | 2 +- drivers/net/ethernet/sfc/ef10.c | 10 +++--- drivers/net/ethernet/sfc/efx.c | 4 +-- drivers/net/ethernet/sfc/falcon/efx.c | 4 +-- drivers/net/ethernet/sfc/falcon/falcon.c | 4 +-- drivers/net/ethernet/sfc/falcon/farch.c | 8 ++--- drivers/net/ethernet/sfc/falcon/nic.h | 6 ++-- drivers/net/ethernet/sfc/falcon/tx.c | 6 ++-- drivers/net/ethernet/sfc/farch.c | 8 ++--- drivers/net/ethernet/sfc/nic.h | 6 ++-- drivers/net/ethernet/sfc/ptp.c | 10 +++--- drivers/net/ethernet/sfc/tx.c | 6 ++-- drivers/net/ethernet/sun/niu.c | 4 +-- drivers/net/tap.c | 2 +- drivers/net/tun.c | 4 +-- drivers/net/wireless/ath/ath5k/desc.c | 8 ++--- .../wireless/broadcom/brcm80211/brcmfmac/sdio.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 4 +-- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 2 +- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 10 +++--- drivers/net/wireless/mac80211_hwsim.c | 4 +-- drivers/scsi/qla2xxx/qla_target.c | 2 +- drivers/target/target_core_user.c | 2 +- drivers/usb/class/cdc-wdm.c | 2 +- drivers/usb/core/devio.c | 2 +- drivers/usb/core/sysfs.c | 4 +-- drivers/usb/gadget/udc/gr_udc.c | 4 +-- drivers/usb/host/ohci-hcd.c | 2 +- drivers/usb/host/uhci-hcd.h | 4 +-- drivers/vfio/vfio.c | 2 +- drivers/vhost/scsi.c | 2 +- fs/aio.c | 2 +- fs/buffer.c | 3 +- fs/crypto/keyinfo.c | 2 +- fs/direct-io.c | 2 +- fs/exec.c | 2 +- fs/fcntl.c | 2 +- fs/fs_pin.c | 4 +-- fs/fuse/dev.c | 2 +- fs/inode.c | 2 +- fs/namei.c | 4 +-- fs/namespace.c | 2 +- fs/nfs/dir.c | 8 ++--- fs/proc/array.c | 2 +- fs/proc_namespace.c | 2 +- fs/splice.c | 2 +- fs/userfaultfd.c | 8 ++--- fs/xfs/xfs_log_priv.h | 4 +-- include/linux/bitops.h | 4 +-- include/linux/dynamic_queue_limits.h | 2 +- include/linux/huge_mm.h | 2 +- include/linux/if_team.h | 2 +- include/linux/llist.h | 2 +- include/linux/pm_runtime.h | 2 +- include/net/ip_vs.h | 6 ++-- kernel/acct.c | 4 +-- kernel/events/core.c | 6 ++-- kernel/events/ring_buffer.c | 2 +- kernel/exit.c | 2 +- kernel/trace/ring_buffer.c | 2 +- kernel/trace/trace.h | 2 +- kernel/trace/trace_stack.c | 2 +- kernel/user_namespace.c | 2 +- lib/assoc_array.c | 20 ++++++------ lib/dynamic_queue_limits.c | 2 +- lib/llist.c | 2 +- lib/vsprintf.c | 4 +-- mm/huge_memory.c | 2 +- net/core/dev.c | 2 +- net/core/pktgen.c | 2 +- net/ipv4/inet_fragment.c | 2 +- net/ipv4/route.c | 2 +- net/ipv4/tcp_output.c | 2 +- net/ipv4/udp.c | 4 +-- net/ipv6/ip6_tunnel.c | 8 ++--- net/ipv6/udp.c | 4 +-- net/llc/llc_input.c | 4 +-- net/mac80211/sta_info.c | 2 +- net/netlabel/netlabel_calipso.c | 2 +- net/wireless/nl80211.c | 2 +- sound/firewire/amdtp-am824.c | 6 ++-- sound/firewire/amdtp-stream.c | 23 +++++++------- sound/firewire/amdtp-stream.h | 2 +- sound/firewire/digi00x/amdtp-dot.c | 6 ++-- sound/firewire/fireface/amdtp-ff.c | 4 +-- sound/firewire/fireface/ff-midi.c | 10 +++--- sound/firewire/fireface/ff-transaction.c | 8 ++--- sound/firewire/isight.c | 18 +++++------ sound/firewire/motu/amdtp-motu.c | 4 +-- sound/firewire/oxfw/oxfw-scs1x.c | 12 ++++---- sound/firewire/tascam/amdtp-tascam.c | 4 +-- sound/firewire/tascam/tascam-transaction.c | 6 ++-- sound/soc/xtensa/xtfpga-i2s.c | 6 ++-- sound/usb/bcd2000/bcd2000.c | 4 +-- tools/arch/x86/include/asm/atomic.h | 2 +- tools/include/asm-generic/atomic-gcc.h | 2 +- tools/perf/util/auxtrace.h | 4 +-- tools/perf/util/session.h | 2 +- virt/kvm/kvm_main.c | 2 +- 180 files changed, 383 insertions(+), 385 deletions(-) (limited to 'kernel/trace') diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index f46267153ec2..94cabe73664b 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -245,7 +245,7 @@ static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg) * and read back old value */ do { - new = old = ACCESS_ONCE(*ipi_data_ptr); + new = old = READ_ONCE(*ipi_data_ptr); new |= 1U << msg; } while (cmpxchg(ipi_data_ptr, old, new) != old); diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index daa87212c9a1..77f50ae0aeb4 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -71,7 +71,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) while (lockval.tickets.next != lockval.tickets.owner) { wfe(); - lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner); + lockval.tickets.owner = READ_ONCE(lock->tickets.owner); } smp_mb(); diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c index 76e4c83cd5c8..3f24addd7972 100644 --- a/arch/arm/mach-tegra/cpuidle-tegra20.c +++ b/arch/arm/mach-tegra/cpuidle-tegra20.c @@ -179,7 +179,7 @@ static int tegra20_idle_lp2_coupled(struct cpuidle_device *dev, bool entered_lp2 = false; if (tegra_pending_sgi()) - ACCESS_ONCE(abort_flag) = true; + WRITE_ONCE(abort_flag, true); cpuidle_coupled_parallel_barrier(dev, &abort_barrier); diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c index 79214d5ff097..a9dd619c6c29 100644 --- a/arch/arm/vdso/vgettimeofday.c +++ b/arch/arm/vdso/vgettimeofday.c @@ -35,7 +35,7 @@ static notrace u32 __vdso_read_begin(const struct vdso_data *vdata) { u32 seq; repeat: - seq = ACCESS_ONCE(vdata->seq_count); + seq = READ_ONCE(vdata->seq_count); if (seq & 1) { cpu_relax(); goto repeat; diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index 35b31884863b..e98775be112d 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -61,7 +61,7 @@ static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) { - int tmp = ACCESS_ONCE(lock->lock); + int tmp = READ_ONCE(lock->lock); if (!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK)) return ia64_cmpxchg(acq, &lock->lock, tmp, tmp + 1, sizeof (tmp)) == tmp; @@ -73,19 +73,19 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) unsigned short *p = (unsigned short *)&lock->lock + 1, tmp; asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p)); - ACCESS_ONCE(*p) = (tmp + 2) & ~1; + WRITE_ONCE(*p, (tmp + 2) & ~1); } static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) { - long tmp = ACCESS_ONCE(lock->lock); + long tmp = READ_ONCE(lock->lock); return !!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK); } static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) { - long tmp = ACCESS_ONCE(lock->lock); + long tmp = READ_ONCE(lock->lock); return ((tmp - (tmp >> TICKET_SHIFT)) & TICKET_MASK) > 1; } diff --git a/arch/mips/include/asm/vdso.h b/arch/mips/include/asm/vdso.h index b7cd6cf77b83..91bf0c2c265c 100644 --- a/arch/mips/include/asm/vdso.h +++ b/arch/mips/include/asm/vdso.h @@ -99,7 +99,7 @@ static inline u32 vdso_data_read_begin(const union mips_vdso_data *data) u32 seq; while (true) { - seq = ACCESS_ONCE(data->seq_count); + seq = READ_ONCE(data->seq_count); if (likely(!(seq & 1))) { /* Paired with smp_wmb() in vdso_data_write_*(). */ smp_rmb(); diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c index 4655017f2377..1d2996cd58da 100644 --- a/arch/mips/kernel/pm-cps.c +++ b/arch/mips/kernel/pm-cps.c @@ -166,7 +166,7 @@ int cps_pm_enter_state(enum cps_pm_state state) nc_core_ready_count = nc_addr; /* Ensure ready_count is zero-initialised before the assembly runs */ - ACCESS_ONCE(*nc_core_ready_count) = 0; + WRITE_ONCE(*nc_core_ready_count, 0); coupled_barrier(&per_cpu(pm_barrier, core), online); /* Run the generated entry code */ diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c index 7ecf69879e2d..d7ef1232a82a 100644 --- a/arch/mn10300/kernel/mn10300-serial.c +++ b/arch/mn10300/kernel/mn10300-serial.c @@ -543,7 +543,7 @@ static void mn10300_serial_receive_interrupt(struct mn10300_serial_port *port) try_again: /* pull chars out of the hat */ - ix = ACCESS_ONCE(port->rx_outp); + ix = READ_ONCE(port->rx_outp); if (CIRC_CNT(port->rx_inp, ix, MNSC_BUFFER_SIZE) == 0) { if (push && !tport->low_latency) tty_flip_buffer_push(tport); @@ -1724,7 +1724,7 @@ static int mn10300_serial_poll_get_char(struct uart_port *_port) if (mn10300_serial_int_tbl[port->rx_irq].port != NULL) { do { /* pull chars out of the hat */ - ix = ACCESS_ONCE(port->rx_outp); + ix = READ_ONCE(port->rx_outp); if (CIRC_CNT(port->rx_inp, ix, MNSC_BUFFER_SIZE) == 0) return NO_POLL_CHAR; diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 17b98a87e5e2..c57d4e8307f2 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -260,7 +260,7 @@ atomic64_set(atomic64_t *v, s64 i) static __inline__ s64 atomic64_read(const atomic64_t *v) { - return ACCESS_ONCE((v)->counter); + return READ_ONCE((v)->counter); } #define atomic64_inc(v) (atomic64_add( 1,(v))) diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index 7a9cde0cfbd1..acd3206dfae3 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -43,7 +43,7 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count) if (!opal_memcons) return -ENODEV; - out_pos = be32_to_cpu(ACCESS_ONCE(opal_memcons->out_pos)); + out_pos = be32_to_cpu(READ_ONCE(opal_memcons->out_pos)); /* Now we've read out_pos, put a barrier in before reading the new * data it points to in conbuf. */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 9fa855f91e55..66f4160010ef 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -117,14 +117,14 @@ extern int _raw_write_trylock_retry(arch_rwlock_t *lp); static inline int arch_read_trylock_once(arch_rwlock_t *rw) { - int old = ACCESS_ONCE(rw->lock); + int old = READ_ONCE(rw->lock); return likely(old >= 0 && __atomic_cmpxchg_bool(&rw->lock, old, old + 1)); } static inline int arch_write_trylock_once(arch_rwlock_t *rw) { - int old = ACCESS_ONCE(rw->lock); + int old = READ_ONCE(rw->lock); return likely(old == 0 && __atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000)); } @@ -211,7 +211,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) int old; do { - old = ACCESS_ONCE(rw->lock); + old = READ_ONCE(rw->lock); } while (!__atomic_cmpxchg_bool(&rw->lock, old, old - 1)); } diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index b12663d653d8..34e30b9ea234 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -162,8 +162,8 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) smp_yield_cpu(~owner); count = spin_retry; } - old = ACCESS_ONCE(rw->lock); - owner = ACCESS_ONCE(rw->owner); + old = READ_ONCE(rw->lock); + owner = READ_ONCE(rw->owner); if (old < 0) continue; if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1)) @@ -178,7 +178,7 @@ int _raw_read_trylock_retry(arch_rwlock_t *rw) int old; while (count-- > 0) { - old = ACCESS_ONCE(rw->lock); + old = READ_ONCE(rw->lock); if (old < 0) continue; if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1)) @@ -202,8 +202,8 @@ void _raw_write_lock_wait(arch_rwlock_t *rw, int prev) smp_yield_cpu(~owner); count = spin_retry; } - old = ACCESS_ONCE(rw->lock); - owner = ACCESS_ONCE(rw->owner); + old = READ_ONCE(rw->lock); + owner = READ_ONCE(rw->owner); smp_mb(); if (old >= 0) { prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); @@ -230,8 +230,8 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) smp_yield_cpu(~owner); count = spin_retry; } - old = ACCESS_ONCE(rw->lock); - owner = ACCESS_ONCE(rw->owner); + old = READ_ONCE(rw->lock); + owner = READ_ONCE(rw->owner); if (old >= 0 && __atomic_cmpxchg_bool(&rw->lock, old, old | 0x80000000)) prev = old; @@ -251,7 +251,7 @@ int _raw_write_trylock_retry(arch_rwlock_t *rw) int old; while (count-- > 0) { - old = ACCESS_ONCE(rw->lock); + old = READ_ONCE(rw->lock); if (old) continue; if (__atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000)) diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index 7643e979e333..e2f398e9456c 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -31,7 +31,7 @@ void atomic_set(atomic_t *, int); #define atomic_set_release(v, i) atomic_set((v), (i)) -#define atomic_read(v) ACCESS_ONCE((v)->counter) +#define atomic_read(v) READ_ONCE((v)->counter) #define atomic_add(i, v) ((void)atomic_add_return( (int)(i), (v))) #define atomic_sub(i, v) ((void)atomic_add_return(-(int)(i), (v))) diff --git a/arch/tile/gxio/dma_queue.c b/arch/tile/gxio/dma_queue.c index baa60357f8ba..b7ba577d82ca 100644 --- a/arch/tile/gxio/dma_queue.c +++ b/arch/tile/gxio/dma_queue.c @@ -163,14 +163,14 @@ int __gxio_dma_queue_is_complete(__gxio_dma_queue_t *dma_queue, int64_t completion_slot, int update) { if (update) { - if (ACCESS_ONCE(dma_queue->hw_complete_count) > + if (READ_ONCE(dma_queue->hw_complete_count) > completion_slot) return 1; __gxio_dma_queue_update_credits(dma_queue); } - return ACCESS_ONCE(dma_queue->hw_complete_count) > completion_slot; + return READ_ONCE(dma_queue->hw_complete_count) > completion_slot; } EXPORT_SYMBOL_GPL(__gxio_dma_queue_is_complete); diff --git a/arch/tile/include/gxio/dma_queue.h b/arch/tile/include/gxio/dma_queue.h index b9e45e37649e..c8fd47edba30 100644 --- a/arch/tile/include/gxio/dma_queue.h +++ b/arch/tile/include/gxio/dma_queue.h @@ -121,7 +121,7 @@ static inline int64_t __gxio_dma_queue_reserve(__gxio_dma_queue_t *dma_queue, * if the result is LESS than "hw_complete_count". */ uint64_t complete; - complete = ACCESS_ONCE(dma_queue->hw_complete_count); + complete = READ_ONCE(dma_queue->hw_complete_count); slot |= (complete & 0xffffffffff000000); if (slot < complete) slot += 0x1000000; diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index e1a078e6828e..d516d61751c2 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -255,7 +255,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, int do_syscall_trace_enter(struct pt_regs *regs) { - u32 work = ACCESS_ONCE(current_thread_info()->flags); + u32 work = READ_ONCE(current_thread_info()->flags); if ((work & _TIF_SYSCALL_TRACE) && tracehook_report_syscall_entry(regs)) { diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 03505ffbe1b6..eaa0ba66cf96 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -75,7 +75,7 @@ static long syscall_trace_enter(struct pt_regs *regs) if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) BUG_ON(regs != task_pt_regs(current)); - work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; + work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; if (unlikely(work & _TIF_SYSCALL_EMU)) emulated = true; diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index fa8dbfcf7ed3..11b13c4b43d5 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -318,7 +318,7 @@ int gettimeofday(struct timeval *, struct timezone *) notrace time_t __vdso_time(time_t *t) { /* This is atomic on x86 so we don't need any locks. */ - time_t result = ACCESS_ONCE(gtod->wall_time_sec); + time_t result = READ_ONCE(gtod->wall_time_sec); if (t) *t = result; diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 589af1eec7c1..140d33288e78 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2118,7 +2118,7 @@ static int x86_pmu_event_init(struct perf_event *event) event->destroy(event); } - if (ACCESS_ONCE(x86_pmu.attr_rdpmc)) + if (READ_ONCE(x86_pmu.attr_rdpmc)) event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED; return err; diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 022e59714562..53dd162576a8 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -48,7 +48,7 @@ static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) unsigned ret; repeat: - ret = ACCESS_ONCE(s->seq); + ret = READ_ONCE(s->seq); if (unlikely(ret & 1)) { cpu_relax(); goto repeat; diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 9c4e7ba6870c..7d7715dde901 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -155,14 +155,14 @@ void init_espfix_ap(int cpu) page = cpu/ESPFIX_STACKS_PER_PAGE; /* Did another CPU already set this up? */ - stack_page = ACCESS_ONCE(espfix_pages[page]); + stack_page = READ_ONCE(espfix_pages[page]); if (likely(stack_page)) goto done; mutex_lock(&espfix_init_mutex); /* Did we race on the lock? */ - stack_page = ACCESS_ONCE(espfix_pages[page]); + stack_page = READ_ONCE(espfix_pages[page]); if (stack_page) goto unlock_done; @@ -200,7 +200,7 @@ void init_espfix_ap(int cpu) set_pte(&pte_p[n*PTE_STRIDE], pte); /* Job is done for this CPU and any CPU which shares this page */ - ACCESS_ONCE(espfix_pages[page]) = stack_page; + WRITE_ONCE(espfix_pages[page], stack_page); unlock_done: mutex_unlock(&espfix_init_mutex); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 35aafc95e4b8..18bc9b51ac9b 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -105,7 +105,7 @@ static void nmi_max_handler(struct irq_work *w) { struct nmiaction *a = container_of(w, struct nmiaction, irq_work); int remainder_ns, decimal_msecs; - u64 whole_msecs = ACCESS_ONCE(a->max_duration); + u64 whole_msecs = READ_ONCE(a->max_duration); remainder_ns = do_div(whole_msecs, (1000 * 1000)); decimal_msecs = remainder_ns / 1000; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7a69cf053711..a119b361b8b7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -443,7 +443,7 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) static u64 __get_spte_lockless(u64 *sptep) { - return ACCESS_ONCE(*sptep); + return READ_ONCE(*sptep); } #else union split_spte { @@ -4819,7 +4819,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, * If we don't have indirect shadow pages, it means no page is * write-protected, so we can exit simply. */ - if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) + if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) return; remote_flush = local_flush = false; diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index ea67dc876316..01c1371f39f8 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c @@ -157,7 +157,7 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, return false; index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL); - return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]); + return !!READ_ONCE(slot->arch.gfn_track[mode][index]); } void kvm_page_track_cleanup(struct kvm *kvm) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 6083ba462f35..13b4f19b9131 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -547,7 +547,7 @@ int xen_alloc_p2m_entry(unsigned long pfn) if (p2m_top_mfn && pfn < MAX_P2M_PFN) { topidx = p2m_top_index(pfn); top_mfn_p = &p2m_top_mfn[topidx]; - mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); + mid_mfn = READ_ONCE(p2m_top_mfn_p[topidx]); BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); diff --git a/arch/xtensa/platforms/xtfpga/lcd.c b/arch/xtensa/platforms/xtfpga/lcd.c index 4dc0c1b43f4b..2f7eb66c23ec 100644 --- a/arch/xtensa/platforms/xtfpga/lcd.c +++ b/arch/xtensa/platforms/xtfpga/lcd.c @@ -34,23 +34,23 @@ static void lcd_put_byte(u8 *addr, u8 data) { #ifdef CONFIG_XTFPGA_LCD_8BIT_ACCESS - ACCESS_ONCE(*addr) = data; + WRITE_ONCE(*addr, data); #else - ACCESS_ONCE(*addr) = data & 0xf0; - ACCESS_ONCE(*addr) = (data << 4) & 0xf0; + WRITE_ONCE(*addr, data & 0xf0); + WRITE_ONCE(*addr, (data << 4) & 0xf0); #endif } static int __init lcd_init(void) { - ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT; + WRITE_ONCE(*LCD_INSTR_ADDR, LCD_DISPLAY_MODE8BIT); mdelay(5); - ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT; + WRITE_ONCE(*LCD_INSTR_ADDR, LCD_DISPLAY_MODE8BIT); udelay(200); - ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT; + WRITE_ONCE(*LCD_INSTR_ADDR, LCD_DISPLAY_MODE8BIT); udelay(50); #ifndef CONFIG_XTFPGA_LCD_8BIT_ACCESS - ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE4BIT; + WRITE_ONCE(*LCD_INSTR_ADDR, LCD_DISPLAY_MODE4BIT); udelay(50); lcd_put_byte(LCD_INSTR_ADDR, LCD_DISPLAY_MODE4BIT); udelay(50); diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 6a9a0f03a67b..d822530e6aea 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -261,7 +261,7 @@ static inline bool stat_sample_valid(struct blk_rq_stat *stat) static u64 rwb_sync_issue_lat(struct rq_wb *rwb) { - u64 now, issue = ACCESS_ONCE(rwb->sync_issue); + u64 now, issue = READ_ONCE(rwb->sync_issue); if (!issue || !rwb->sync_cookie) return 0; diff --git a/drivers/base/core.c b/drivers/base/core.c index 12ebd055724c..4b8ba2a75a4d 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -668,7 +668,7 @@ const char *dev_driver_string(const struct device *dev) * so be careful about accessing it. dev->bus and dev->class should * never change once they are set, so they don't need special care. */ - drv = ACCESS_ONCE(dev->driver); + drv = READ_ONCE(dev->driver); return drv ? drv->name : (dev->bus ? dev->bus->name : (dev->class ? dev->class->name : "")); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 7bcf80fa9ada..41d7c2b99f69 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -134,11 +134,11 @@ unsigned long pm_runtime_autosuspend_expiration(struct device *dev) if (!dev->power.use_autosuspend) goto out; - autosuspend_delay = ACCESS_ONCE(dev->power.autosuspend_delay); + autosuspend_delay = READ_ONCE(dev->power.autosuspend_delay); if (autosuspend_delay < 0) goto out; - last_busy = ACCESS_ONCE(dev->power.last_busy); + last_busy = READ_ONCE(dev->power.last_busy); elapsed = jiffies - last_busy; if (elapsed < 0) goto out; /* jiffies has wrapped around. */ diff --git a/drivers/char/random.c b/drivers/char/random.c index 8ad92707e45f..6c7ccac2679e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -641,7 +641,7 @@ static void credit_entropy_bits(struct entropy_store *r, int nbits) return; retry: - entropy_count = orig = ACCESS_ONCE(r->entropy_count); + entropy_count = orig = READ_ONCE(r->entropy_count); if (nfrac < 0) { /* Debit */ entropy_count += nfrac; @@ -1265,7 +1265,7 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, /* Can we pull enough? */ retry: - entropy_count = orig = ACCESS_ONCE(r->entropy_count); + entropy_count = orig = READ_ONCE(r->entropy_count); ibytes = nbytes; /* never pull more than available */ have_bytes = entropy_count >> (ENTROPY_SHIFT + 3); diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c index 39e489a96ad7..60da2537bef9 100644 --- a/drivers/clocksource/bcm2835_timer.c +++ b/drivers/clocksource/bcm2835_timer.c @@ -71,7 +71,7 @@ static irqreturn_t bcm2835_time_interrupt(int irq, void *dev_id) if (readl_relaxed(timer->control) & timer->match_mask) { writel_relaxed(timer->match_mask, timer->control); - event_handler = ACCESS_ONCE(timer->evt.event_handler); + event_handler = READ_ONCE(timer->evt.event_handler); if (event_handler) event_handler(&timer->evt); return IRQ_HANDLED; diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index d258953ff488..f4f258075b89 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -172,7 +172,7 @@ static void caam_jr_dequeue(unsigned long devarg) while (rd_reg32(&jrp->rregs->outring_used)) { - head = ACCESS_ONCE(jrp->head); + head = READ_ONCE(jrp->head); spin_lock(&jrp->outlock); @@ -341,7 +341,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, spin_lock_bh(&jrp->inplock); head = jrp->head; - tail = ACCESS_ONCE(jrp->tail); + tail = READ_ONCE(jrp->tail); if (!rd_reg32(&jrp->rregs->inpring_avail) || CIRC_SPACE(head, tail, JOBR_DEPTH) <= 0) { diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c index 874ddf5e9087..0f20f5ec9617 100644 --- a/drivers/crypto/nx/nx-842-powernv.c +++ b/drivers/crypto/nx/nx-842-powernv.c @@ -193,7 +193,7 @@ static int wait_for_csb(struct nx842_workmem *wmem, ktime_t start = wmem->start, now = ktime_get(); ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX); - while (!(ACCESS_ONCE(csb->flags) & CSB_V)) { + while (!(READ_ONCE(csb->flags) & CSB_V)) { cpu_relax(); now = ktime_get(); if (ktime_after(now, timeout)) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 8bf89267dc25..ccf52368a073 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -734,7 +734,7 @@ static unsigned int ar_search_last_active_buffer(struct ar_context *ctx, __le16 res_count, next_res_count; i = ar_first_buffer_index(ctx); - res_count = ACCESS_ONCE(ctx->descriptors[i].res_count); + res_count = READ_ONCE(ctx->descriptors[i].res_count); /* A buffer that is not yet completely filled must be the last one. */ while (i != last && res_count == 0) { @@ -742,8 +742,7 @@ static unsigned int ar_search_last_active_buffer(struct ar_context *ctx, /* Peek at the next descriptor. */ next_i = ar_next_buffer_index(i); rmb(); /* read descriptors in order */ - next_res_count = ACCESS_ONCE( - ctx->descriptors[next_i].res_count); + next_res_count = READ_ONCE(ctx->descriptors[next_i].res_count); /* * If the next descriptor is still empty, we must stop at this * descriptor. @@ -759,8 +758,7 @@ static unsigned int ar_search_last_active_buffer(struct ar_context *ctx, if (MAX_AR_PACKET_SIZE > PAGE_SIZE && i != last) { next_i = ar_next_buffer_index(next_i); rmb(); - next_res_count = ACCESS_ONCE( - ctx->descriptors[next_i].res_count); + next_res_count = READ_ONCE(ctx->descriptors[next_i].res_count); if (next_res_count != cpu_to_le16(PAGE_SIZE)) goto next_buffer_is_active; } @@ -2812,7 +2810,7 @@ static int handle_ir_buffer_fill(struct context *context, u32 buffer_dma; req_count = le16_to_cpu(last->req_count); - res_count = le16_to_cpu(ACCESS_ONCE(last->res_count)); + res_count = le16_to_cpu(READ_ONCE(last->res_count)); completed = req_count - res_count; buffer_dma = le32_to_cpu(last->data_address); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 333bad749067..303b5e099a98 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -260,7 +260,7 @@ static void amdgpu_fence_fallback(unsigned long arg) */ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) { - uint64_t seq = ACCESS_ONCE(ring->fence_drv.sync_seq); + uint64_t seq = READ_ONCE(ring->fence_drv.sync_seq); struct dma_fence *fence, **ptr; int r; @@ -300,7 +300,7 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) amdgpu_fence_process(ring); emitted = 0x100000000ull; emitted -= atomic_read(&ring->fence_drv.last_seq); - emitted += ACCESS_ONCE(ring->fence_drv.sync_seq); + emitted += READ_ONCE(ring->fence_drv.sync_seq); return lower_32_bits(emitted); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 7171968f261e..6149a47fe63d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -788,11 +788,11 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) seq_printf(m, "\t0x%08x: %12ld byte %s", id, amdgpu_bo_size(bo), placement); - offset = ACCESS_ONCE(bo->tbo.mem.start); + offset = READ_ONCE(bo->tbo.mem.start); if (offset != AMDGPU_BO_INVALID_OFFSET) seq_printf(m, " @ 0x%010Lx", offset); - pin_count = ACCESS_ONCE(bo->pin_count); + pin_count = READ_ONCE(bo->pin_count); if (pin_count) seq_printf(m, " pin count %d", pin_count); seq_printf(m, "\n"); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 38cea6fb25a8..a25f6c72f219 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -187,7 +187,7 @@ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity) if (kfifo_is_empty(&entity->job_queue)) return false; - if (ACCESS_ONCE(entity->dependency)) + if (READ_ONCE(entity->dependency)) return false; return true; diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 3386452bd2f0..cf3deb283da5 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -451,7 +451,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, else r = 0; - cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); + cur_placement = READ_ONCE(robj->tbo.mem.mem_type); args->domain = radeon_mem_type_to_domain(cur_placement); drm_gem_object_put_unlocked(gobj); return r; @@ -481,7 +481,7 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, r = ret; /* Flush HDP cache via MMIO if necessary */ - cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); + cur_placement = READ_ONCE(robj->tbo.mem.mem_type); if (rdev->asic->mmio_hdp_flush && radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM) robj->rdev->asic->mmio_hdp_flush(rdev); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index a552e4ea5440..6ac094ee8983 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -904,7 +904,7 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, if (unlikely(drm_is_render_client(file_priv))) require_exist = true; - if (ACCESS_ONCE(vmw_fpriv(file_priv)->locked_master)) { + if (READ_ONCE(vmw_fpriv(file_priv)->locked_master)) { DRM_ERROR("Locked master refused legacy " "surface reference.\n"); return -EACCES; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index d9a1e9893136..97bea2e1aa6a 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -380,7 +380,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, if (sc->flags & SCF_FROZEN) { wait_event_interruptible_timeout( dd->event_queue, - !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN), + !(READ_ONCE(dd->flags) & HFI1_FROZEN), msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); if (dd->flags & HFI1_FROZEN) return -ENOLCK; diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 7108a4b5e94c..75e740780285 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1423,14 +1423,14 @@ retry: goto done; } /* copy from receiver cache line and recalculate */ - sc->alloc_free = ACCESS_ONCE(sc->free); + sc->alloc_free = READ_ONCE(sc->free); avail = (unsigned long)sc->credits - (sc->fill - sc->alloc_free); if (blocks > avail) { /* still no room, actively update */ sc_release_update(sc); - sc->alloc_free = ACCESS_ONCE(sc->free); + sc->alloc_free = READ_ONCE(sc->free); trycount++; goto retry; } @@ -1667,7 +1667,7 @@ void sc_release_update(struct send_context *sc) /* call sent buffer callbacks */ code = -1; /* code not yet set */ - head = ACCESS_ONCE(sc->sr_head); /* snapshot the head */ + head = READ_ONCE(sc->sr_head); /* snapshot the head */ tail = sc->sr_tail; while (head != tail) { pbuf = &sc->sr[tail].pbuf; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index b3291f0fde9a..a7fc664f0d4e 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -363,7 +363,7 @@ static void ruc_loopback(struct rvt_qp *sqp) again: smp_read_barrier_depends(); /* see post_one_send() */ - if (sqp->s_last == ACCESS_ONCE(sqp->s_head)) + if (sqp->s_last == READ_ONCE(sqp->s_head)) goto clr_busy; wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 6781bcdb10b3..08346d25441c 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1725,7 +1725,7 @@ retry: swhead = sde->descq_head & sde->sdma_mask; /* this code is really bad for cache line trading */ - swtail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask; + swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask; cnt = sde->descq_cnt; if (swhead < swtail) @@ -1872,7 +1872,7 @@ retry: if ((status & sde->idle_mask) && !idle_check_done) { u16 swtail; - swtail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask; + swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask; if (swtail != hwhead) { hwhead = (u16)read_sde_csr(sde, SD(HEAD)); idle_check_done = 1; @@ -2222,7 +2222,7 @@ void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde) u16 len; head = sde->descq_head & sde->sdma_mask; - tail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask; + tail = READ_ONCE(sde->descq_tail) & sde->sdma_mask; seq_printf(s, SDE_FMT, sde->this_idx, sde->cpu, sdma_state_name(sde->state.current_state), @@ -3305,7 +3305,7 @@ int sdma_ahg_alloc(struct sdma_engine *sde) return -EINVAL; } while (1) { - nr = ffz(ACCESS_ONCE(sde->ahg_bits)); + nr = ffz(READ_ONCE(sde->ahg_bits)); if (nr > 31) { trace_hfi1_ahg_allocate(sde, -ENOSPC); return -ENOSPC; diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 107011d8613b..374c59784950 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -445,7 +445,7 @@ static inline u16 sdma_descq_freecnt(struct sdma_engine *sde) { return sde->descq_cnt - (sde->descq_tail - - ACCESS_ONCE(sde->descq_head)) - 1; + READ_ONCE(sde->descq_head)) - 1; } static inline u16 sdma_descq_inprocess(struct sdma_engine *sde) diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 0b646173ca22..9a31c585427f 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -80,7 +80,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto bail; /* We are in the error state, flush the work request. */ smp_read_barrier_depends(); /* see post_one_send() */ - if (qp->s_last == ACCESS_ONCE(qp->s_head)) + if (qp->s_last == READ_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (iowait_sdma_pending(&priv->s_iowait)) { @@ -121,7 +121,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto bail; /* Check if send work queue is empty. */ smp_read_barrier_depends(); /* see post_one_send() */ - if (qp->s_cur == ACCESS_ONCE(qp->s_head)) { + if (qp->s_cur == READ_ONCE(qp->s_head)) { clear_ahg(qp); goto bail; } diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 2ba74fdd6f15..7fec6b984e3e 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -487,7 +487,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto bail; /* We are in the error state, flush the work request. */ smp_read_barrier_depends(); /* see post_one_send */ - if (qp->s_last == ACCESS_ONCE(qp->s_head)) + if (qp->s_last == READ_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (iowait_sdma_pending(&priv->s_iowait)) { @@ -501,7 +501,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) /* see post_one_send() */ smp_read_barrier_depends(); - if (qp->s_cur == ACCESS_ONCE(qp->s_head)) + if (qp->s_cur == READ_ONCE(qp->s_head)) goto bail; wqe = rvt_get_swqe_ptr(qp, qp->s_cur); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index c0c0e0445cbf..8ec6e8a8d6f7 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -276,7 +276,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, /* Wait until all requests have been freed. */ wait_event_interruptible( pq->wait, - (ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE)); + (READ_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE)); kfree(pq->reqs); kfree(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); @@ -591,7 +591,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, if (ret != -EBUSY) { req->status = ret; WRITE_ONCE(req->has_error, 1); - if (ACCESS_ONCE(req->seqcomp) == + if (READ_ONCE(req->seqcomp) == req->seqsubmitted - 1) goto free_req; return ret; @@ -825,7 +825,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) */ if (req->data_len) { iovec = &req->iovs[req->iov_idx]; - if (ACCESS_ONCE(iovec->offset) == iovec->iov.iov_len) { + if (READ_ONCE(iovec->offset) == iovec->iov.iov_len) { if (++req->iov_idx == req->data_iovs) { ret = -EFAULT; goto free_txreq; @@ -1390,7 +1390,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) } else { if (status != SDMA_TXREQ_S_OK) req->status = status; - if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) && + if (req->seqcomp == (READ_ONCE(req->seqsubmitted) - 1) && (READ_ONCE(req->done) || READ_ONCE(req->has_error))) { user_sdma_free_request(req, false); diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 53efbb0b40c4..9a37e844d4c8 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -368,7 +368,7 @@ static void qib_ruc_loopback(struct rvt_qp *sqp) again: smp_read_barrier_depends(); /* see post_one_send() */ - if (sqp->s_last == ACCESS_ONCE(sqp->s_head)) + if (sqp->s_last == READ_ONCE(sqp->s_head)) goto clr_busy; wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 498e2202e72c..bddcc37ace44 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -61,7 +61,7 @@ int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags) goto bail; /* We are in the error state, flush the work request. */ smp_read_barrier_depends(); /* see post_one_send() */ - if (qp->s_last == ACCESS_ONCE(qp->s_head)) + if (qp->s_last == READ_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -91,7 +91,7 @@ int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags) goto bail; /* Check if send work queue is empty. */ smp_read_barrier_depends(); /* see post_one_send() */ - if (qp->s_cur == ACCESS_ONCE(qp->s_head)) + if (qp->s_cur == READ_ONCE(qp->s_head)) goto bail; /* * Start a new request. diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index be4907453ac4..15962ed193ce 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -253,7 +253,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags) goto bail; /* We are in the error state, flush the work request. */ smp_read_barrier_depends(); /* see post_one_send */ - if (qp->s_last == ACCESS_ONCE(qp->s_head)) + if (qp->s_last == READ_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -267,7 +267,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags) /* see post_one_send() */ smp_read_barrier_depends(); - if (qp->s_cur == ACCESS_ONCE(qp->s_head)) + if (qp->s_cur == READ_ONCE(qp->s_head)) goto bail; wqe = rvt_get_swqe_ptr(qp, qp->s_cur); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 22df09ae809e..b670cb9d2006 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1073,7 +1073,7 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) rdi->driver_f.notify_error_qp(qp); /* Schedule the sending tasklet to drain the send work queue. */ - if (ACCESS_ONCE(qp->s_last) != qp->s_head) + if (READ_ONCE(qp->s_last) != qp->s_head) rdi->driver_f.schedule_send(qp); rvt_clear_mr_refs(qp, 0); @@ -1686,7 +1686,7 @@ static inline int rvt_qp_is_avail( if (likely(qp->s_avail)) return 0; smp_read_barrier_depends(); /* see rc.c */ - slast = ACCESS_ONCE(qp->s_last); + slast = READ_ONCE(qp->s_last); if (qp->s_head >= slast) avail = qp->s_size - (qp->s_head - slast); else @@ -1917,7 +1917,7 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, * ahead and kick the send engine into gear. Otherwise we will always * just schedule the send to happen later. */ - call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next; + call_send = qp->s_head == READ_ONCE(qp->s_last) && !wr->next; for (; wr; wr = wr->next) { err = rvt_post_one_wr(qp, wr, &call_send); diff --git a/drivers/input/misc/regulator-haptic.c b/drivers/input/misc/regulator-haptic.c index 2e8f801932be..a1db1e5040dc 100644 --- a/drivers/input/misc/regulator-haptic.c +++ b/drivers/input/misc/regulator-haptic.c @@ -233,7 +233,7 @@ static int __maybe_unused regulator_haptic_resume(struct device *dev) haptic->suspended = false; - magnitude = ACCESS_ONCE(haptic->magnitude); + magnitude = READ_ONCE(haptic->magnitude); if (magnitude) regulator_haptic_set_voltage(haptic, magnitude); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index d216a8f7bc22..33bb074d6941 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -347,7 +347,7 @@ static void __cache_size_refresh(void) BUG_ON(!mutex_is_locked(&dm_bufio_clients_lock)); BUG_ON(dm_bufio_client_count < 0); - dm_bufio_cache_size_latch = ACCESS_ONCE(dm_bufio_cache_size); + dm_bufio_cache_size_latch = READ_ONCE(dm_bufio_cache_size); /* * Use default if set to 0 and report the actual cache size used. @@ -960,7 +960,7 @@ static void __get_memory_limit(struct dm_bufio_client *c, { unsigned long buffers; - if (unlikely(ACCESS_ONCE(dm_bufio_cache_size) != dm_bufio_cache_size_latch)) { + if (unlikely(READ_ONCE(dm_bufio_cache_size) != dm_bufio_cache_size_latch)) { if (mutex_trylock(&dm_bufio_clients_lock)) { __cache_size_refresh(); mutex_unlock(&dm_bufio_clients_lock); @@ -1600,7 +1600,7 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp) static unsigned long get_retain_buffers(struct dm_bufio_client *c) { - unsigned long retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes); + unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes); return retain_bytes >> (c->sectors_per_block_bits + SECTOR_SHIFT); } @@ -1647,7 +1647,7 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker); - return ACCESS_ONCE(c->n_buffers[LIST_CLEAN]) + ACCESS_ONCE(c->n_buffers[LIST_DIRTY]); + return READ_ONCE(c->n_buffers[LIST_CLEAN]) + READ_ONCE(c->n_buffers[LIST_DIRTY]); } /* @@ -1818,7 +1818,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_set_sector_offset); static unsigned get_max_age_hz(void) { - unsigned max_age = ACCESS_ONCE(dm_bufio_max_age); + unsigned max_age = READ_ONCE(dm_bufio_max_age); if (max_age > UINT_MAX / HZ) max_age = UINT_MAX / HZ; diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index cf2c67e35eaf..eb45cc3df31d 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -107,7 +107,7 @@ static void io_job_start(struct dm_kcopyd_throttle *t) try_again: spin_lock_irq(&throttle_spinlock); - throttle = ACCESS_ONCE(t->throttle); + throttle = READ_ONCE(t->throttle); if (likely(throttle >= 100)) goto skip_limit; @@ -157,7 +157,7 @@ static void io_job_finish(struct dm_kcopyd_throttle *t) t->num_io_jobs--; - if (likely(ACCESS_ONCE(t->throttle) >= 100)) + if (likely(READ_ONCE(t->throttle) >= 100)) goto skip_limit; if (!t->num_io_jobs) { diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 6028d8247f58..a1a5eec783cc 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -431,7 +431,7 @@ do_sync_free: synchronize_rcu_expedited(); dm_stat_free(&s->rcu_head); } else { - ACCESS_ONCE(dm_stat_need_rcu_barrier) = 1; + WRITE_ONCE(dm_stat_need_rcu_barrier, 1); call_rcu(&s->rcu_head, dm_stat_free); } return 0; @@ -639,12 +639,12 @@ void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, */ last = raw_cpu_ptr(stats->last); stats_aux->merged = - (bi_sector == (ACCESS_ONCE(last->last_sector) && + (bi_sector == (READ_ONCE(last->last_sector) && ((bi_rw == WRITE) == - (ACCESS_ONCE(last->last_rw) == WRITE)) + (READ_ONCE(last->last_rw) == WRITE)) )); - ACCESS_ONCE(last->last_sector) = end_sector; - ACCESS_ONCE(last->last_rw) = bi_rw; + WRITE_ONCE(last->last_sector, end_sector); + WRITE_ONCE(last->last_rw, bi_rw); } rcu_read_lock(); @@ -693,22 +693,22 @@ static void __dm_stat_init_temporary_percpu_totals(struct dm_stat_shared *shared for_each_possible_cpu(cpu) { p = &s->stat_percpu[cpu][x]; - shared->tmp.sectors[READ] += ACCESS_ONCE(p->sectors[READ]); - shared->tmp.sectors[WRITE] += ACCESS_ONCE(p->sectors[WRITE]); - shared->tmp.ios[READ] += ACCESS_ONCE(p->ios[READ]); - shared->tmp.ios[WRITE] += ACCESS_ONCE(p->ios[WRITE]); - shared->tmp.merges[READ] += ACCESS_ONCE(p->merges[READ]); - shared->tmp.merges[WRITE] += ACCESS_ONCE(p->merges[WRITE]); - shared->tmp.ticks[READ] += ACCESS_ONCE(p->ticks[READ]); - shared->tmp.ticks[WRITE] += ACCESS_ONCE(p->ticks[WRITE]); - shared->tmp.io_ticks[READ] += ACCESS_ONCE(p->io_ticks[READ]); - shared->tmp.io_ticks[WRITE] += ACCESS_ONCE(p->io_ticks[WRITE]); - shared->tmp.io_ticks_total += ACCESS_ONCE(p->io_ticks_total); - shared->tmp.time_in_queue += ACCESS_ONCE(p->time_in_queue); + shared->tmp.sectors[READ] += READ_ONCE(p->sectors[READ]); + shared->tmp.sectors[WRITE] += READ_ONCE(p->sectors[WRITE]); + shared->tmp.ios[READ] += READ_ONCE(p->ios[READ]); + shared->tmp.ios[WRITE] += READ_ONCE(p->ios[WRITE]); + shared->tmp.merges[READ] += READ_ONCE(p->merges[READ]); + shared->tmp.merges[WRITE] += READ_ONCE(p->merges[WRITE]); + shared->tmp.ticks[READ] += READ_ONCE(p->ticks[READ]); + shared->tmp.ticks[WRITE] += READ_ONCE(p->ticks[WRITE]); + shared->tmp.io_ticks[READ] += READ_ONCE(p->io_ticks[READ]); + shared->tmp.io_ticks[WRITE] += READ_ONCE(p->io_ticks[WRITE]); + shared->tmp.io_ticks_total += READ_ONCE(p->io_ticks_total); + shared->tmp.time_in_queue += READ_ONCE(p->time_in_queue); if (s->n_histogram_entries) { unsigned i; for (i = 0; i < s->n_histogram_entries + 1; i++) - shared->tmp.histogram[i] += ACCESS_ONCE(p->histogram[i]); + shared->tmp.histogram[i] += READ_ONCE(p->histogram[i]); } } } diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index 4c8de1ff78ca..8d0ba879777e 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c @@ -144,7 +144,7 @@ static unsigned switch_region_table_read(struct switch_ctx *sctx, unsigned long switch_get_position(sctx, region_nr, ®ion_index, &bit); - return (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) & + return (READ_ONCE(sctx->region_table[region_index]) >> bit) & ((1 << sctx->region_table_entry_bits) - 1); } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 1e25705209c2..89e5dff9b4cf 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2431,7 +2431,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) struct pool_c *pt = pool->ti->private; bool needs_check = dm_pool_metadata_needs_check(pool->pmd); enum pool_mode old_mode = get_pool_mode(pool); - unsigned long no_space_timeout = ACCESS_ONCE(no_space_timeout_secs) * HZ; + unsigned long no_space_timeout = READ_ONCE(no_space_timeout_secs) * HZ; /* * Never allow the pool to transition to PM_WRITE mode if user diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index bda3caca23ca..fba93237a780 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -589,7 +589,7 @@ static void verity_prefetch_io(struct work_struct *work) verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL); verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL); if (!i) { - unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster); + unsigned cluster = READ_ONCE(dm_verity_prefetch_cluster); cluster >>= v->data_dev_block_bits; if (unlikely(!cluster)) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4be85324f44d..8aaffa19b29a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -114,7 +114,7 @@ static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS; static int __dm_get_module_param_int(int *module_param, int min, int max) { - int param = ACCESS_ONCE(*module_param); + int param = READ_ONCE(*module_param); int modified_param = 0; bool modified = true; @@ -136,7 +136,7 @@ static int __dm_get_module_param_int(int *module_param, int min, int max) unsigned __dm_get_module_param(unsigned *module_param, unsigned def, unsigned max) { - unsigned param = ACCESS_ONCE(*module_param); + unsigned param = READ_ONCE(*module_param); unsigned modified_param = 0; if (!param) diff --git a/drivers/md/md.c b/drivers/md/md.c index 0ff1bbf6c90e..447ddcbc9566 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2651,7 +2651,7 @@ state_show(struct md_rdev *rdev, char *page) { char *sep = ","; size_t len = 0; - unsigned long flags = ACCESS_ONCE(rdev->flags); + unsigned long flags = READ_ONCE(rdev->flags); if (test_bit(Faulty, &flags) || (!test_bit(ExternalBbl, &flags) && diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 928e24a07133..7d9a50eed9db 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6072,7 +6072,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n */ rcu_read_lock(); for (i = 0; i < conf->raid_disks; i++) { - struct md_rdev *rdev = ACCESS_ONCE(conf->disks[i].rdev); + struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev); if (rdev == NULL || test_bit(Faulty, &rdev->flags)) still_degraded = 1; diff --git a/drivers/misc/mic/scif/scif_rb.c b/drivers/misc/mic/scif/scif_rb.c index 637cc4686742..b665757ca89a 100644 --- a/drivers/misc/mic/scif/scif_rb.c +++ b/drivers/misc/mic/scif/scif_rb.c @@ -138,7 +138,7 @@ void scif_rb_commit(struct scif_rb *rb) * the read barrier in scif_rb_count(..) */ wmb(); - ACCESS_ONCE(*rb->write_ptr) = rb->current_write_offset; + WRITE_ONCE(*rb->write_ptr, rb->current_write_offset); #ifdef CONFIG_INTEL_MIC_CARD /* * X100 Si bug: For the case where a Core is performing an EXT_WR @@ -147,7 +147,7 @@ void scif_rb_commit(struct scif_rb *rb) * This way, if ordering is violated for the Interrupt Message, it will * fall just behind the first Posted associated with the first EXT_WR. */ - ACCESS_ONCE(*rb->write_ptr) = rb->current_write_offset; + WRITE_ONCE(*rb->write_ptr, rb->current_write_offset); #endif } @@ -210,7 +210,7 @@ void scif_rb_update_read_ptr(struct scif_rb *rb) * scif_rb_space(..) */ mb(); - ACCESS_ONCE(*rb->read_ptr) = new_offset; + WRITE_ONCE(*rb->read_ptr, new_offset); #ifdef CONFIG_INTEL_MIC_CARD /* * X100 Si Bug: For the case where a Core is performing an EXT_WR @@ -219,7 +219,7 @@ void scif_rb_update_read_ptr(struct scif_rb *rb) * This way, if ordering is violated for the Interrupt Message, it will * fall just behind the first Posted associated with the first EXT_WR. */ - ACCESS_ONCE(*rb->read_ptr) = new_offset; + WRITE_ONCE(*rb->read_ptr, new_offset); #endif } diff --git a/drivers/misc/mic/scif/scif_rma_list.c b/drivers/misc/mic/scif/scif_rma_list.c index e1ef8daedd5a..a036dbb4101e 100644 --- a/drivers/misc/mic/scif/scif_rma_list.c +++ b/drivers/misc/mic/scif/scif_rma_list.c @@ -277,7 +277,7 @@ retry: * Need to restart list traversal if there has been * an asynchronous list entry deletion. */ - if (ACCESS_ONCE(ep->rma_info.async_list_del)) + if (READ_ONCE(ep->rma_info.async_list_del)) goto retry; } mutex_unlock(&ep->rma_info.rma_lock); diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index c02cc817a490..1ed9529e7bd1 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1378,7 +1378,7 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) unsigned int count; slaves = rcu_dereference(bond->slave_arr); - count = slaves ? ACCESS_ONCE(slaves->count) : 0; + count = slaves ? READ_ONCE(slaves->count) : 0; if (likely(count)) tx_slave = slaves->arr[hash_index % count]; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c99dc59d729b..af51b90cecbb 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1167,7 +1167,7 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) slave = bond_slave_get_rcu(skb->dev); bond = slave->bond; - recv_probe = ACCESS_ONCE(bond->recv_probe); + recv_probe = READ_ONCE(bond->recv_probe); if (recv_probe) { ret = recv_probe(skb, bond, slave); if (ret == RX_HANDLER_CONSUMED) { @@ -3810,7 +3810,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev else bond_xmit_slave_id(bond, skb, 0); } else { - int slave_cnt = ACCESS_ONCE(bond->slave_cnt); + int slave_cnt = READ_ONCE(bond->slave_cnt); if (likely(slave_cnt)) { slave_id = bond_rr_gen_slave_id(bond); @@ -3972,7 +3972,7 @@ static int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int count; slaves = rcu_dereference(bond->slave_arr); - count = slaves ? ACCESS_ONCE(slaves->count) : 0; + count = slaves ? READ_ONCE(slaves->count) : 0; if (likely(count)) { slave = slaves->arr[bond_xmit_hash(bond, skb) % count]; bond_dev_queue_xmit(bond, skb, slave->dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 4ef68f69b58c..43f52a8fe708 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -405,7 +405,7 @@ void free_tx_desc(struct adapter *adap, struct sge_txq *q, */ static inline int reclaimable(const struct sge_txq *q) { - int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx)); + int hw_cidx = ntohs(READ_ONCE(q->stat->cidx)); hw_cidx -= q->cidx; return hw_cidx < 0 ? hw_cidx + q->size : hw_cidx; } @@ -1375,7 +1375,7 @@ out_free: dev_kfree_skb_any(skb); */ static inline void reclaim_completed_tx_imm(struct sge_txq *q) { - int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx)); + int hw_cidx = ntohs(READ_ONCE(q->stat->cidx)); int reclaim = hw_cidx - q->cidx; if (reclaim < 0) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 0e3d9f39a807..c6e859a27ee6 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -605,7 +605,7 @@ static void accumulate_16bit_val(u32 *acc, u16 val) if (wrapped) newacc += 65536; - ACCESS_ONCE(*acc) = newacc; + WRITE_ONCE(*acc, newacc); } static void populate_erx_stats(struct be_adapter *adapter, diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 0cec06bec63e..340e28211135 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -373,7 +373,7 @@ static int hip04_tx_reclaim(struct net_device *ndev, bool force) unsigned int count; smp_rmb(); - count = tx_count(ACCESS_ONCE(priv->tx_head), tx_tail); + count = tx_count(READ_ONCE(priv->tx_head), tx_tail); if (count == 0) goto out; @@ -431,7 +431,7 @@ static int hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) dma_addr_t phys; smp_rmb(); - count = tx_count(tx_head, ACCESS_ONCE(priv->tx_tail)); + count = tx_count(tx_head, READ_ONCE(priv->tx_tail)); if (count == (TX_DESC_NUM - 1)) { netif_stop_queue(ndev); return NETDEV_TX_BUSY; diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 8f326f87a815..2cb9539c931e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -264,7 +264,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) vsi->rx_buf_failed, vsi->rx_page_failed); rcu_read_lock(); for (i = 0; i < vsi->num_queue_pairs; i++) { - struct i40e_ring *rx_ring = ACCESS_ONCE(vsi->rx_rings[i]); + struct i40e_ring *rx_ring = READ_ONCE(vsi->rx_rings[i]); if (!rx_ring) continue; @@ -320,7 +320,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed"); } for (i = 0; i < vsi->num_queue_pairs; i++) { - struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[i]); + struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]); if (!tx_ring) continue; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 05e89864f781..e9e04a485e0a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1570,7 +1570,7 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, } rcu_read_lock(); for (j = 0; j < vsi->num_queue_pairs; j++) { - tx_ring = ACCESS_ONCE(vsi->tx_rings[j]); + tx_ring = READ_ONCE(vsi->tx_rings[j]); if (!tx_ring) continue; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6498da8806cb..de1fcac7834d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -455,7 +455,7 @@ static void i40e_get_netdev_stats_struct(struct net_device *netdev, u64 bytes, packets; unsigned int start; - tx_ring = ACCESS_ONCE(vsi->tx_rings[i]); + tx_ring = READ_ONCE(vsi->tx_rings[i]); if (!tx_ring) continue; i40e_get_netdev_stats_struct_tx(tx_ring, stats); @@ -791,7 +791,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) rcu_read_lock(); for (q = 0; q < vsi->num_queue_pairs; q++) { /* locate Tx ring */ - p = ACCESS_ONCE(vsi->tx_rings[q]); + p = READ_ONCE(vsi->tx_rings[q]); do { start = u64_stats_fetch_begin_irq(&p->syncp); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index d8456c381c99..97381238eb7c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -130,7 +130,7 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) } smp_mb(); /* Force any pending update before accessing. */ - adj = ACCESS_ONCE(pf->ptp_base_adj); + adj = READ_ONCE(pf->ptp_base_adj); freq = adj; freq *= ppb; @@ -499,7 +499,7 @@ void i40e_ptp_set_increment(struct i40e_pf *pf) wr32(hw, I40E_PRTTSYN_INC_H, incval >> 32); /* Update the base adjustement value. */ - ACCESS_ONCE(pf->ptp_base_adj) = incval; + WRITE_ONCE(pf->ptp_base_adj, incval); smp_mb(); /* Force the above update. */ } diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 58adbf234e07..31a3f09df9f7 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -375,7 +375,7 @@ u32 igb_rd32(struct e1000_hw *hw, u32 reg); /* write operations, indexed using DWORDS */ #define wr32(reg, val) \ do { \ - u8 __iomem *hw_addr = ACCESS_ONCE((hw)->hw_addr); \ + u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \ if (!E1000_REMOVED(hw_addr)) \ writel((val), &hw_addr[(reg)]); \ } while (0) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index fd4a46b03cc8..6bccc2be2b91 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -750,7 +750,7 @@ static void igb_cache_ring_register(struct igb_adapter *adapter) u32 igb_rd32(struct e1000_hw *hw, u32 reg) { struct igb_adapter *igb = container_of(hw, struct igb_adapter, hw); - u8 __iomem *hw_addr = ACCESS_ONCE(hw->hw_addr); + u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); u32 value = 0; if (E1000_REMOVED(hw_addr)) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h index e083732adf64..a01409e2e06c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h @@ -161,7 +161,7 @@ static inline bool ixgbe_removed(void __iomem *addr) static inline void ixgbe_write_reg(struct ixgbe_hw *hw, u32 reg, u32 value) { - u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr); + u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr); if (ixgbe_removed(reg_addr)) return; @@ -180,7 +180,7 @@ static inline void writeq(u64 val, void __iomem *addr) static inline void ixgbe_write_reg64(struct ixgbe_hw *hw, u32 reg, u64 value) { - u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr); + u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr); if (ixgbe_removed(reg_addr)) return; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 4d76afd13868..2224e691ee07 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -380,7 +380,7 @@ static void ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg) */ u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg) { - u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr); + u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr); u32 value; if (ixgbe_removed(reg_addr)) @@ -8630,7 +8630,7 @@ static void ixgbe_get_stats64(struct net_device *netdev, rcu_read_lock(); for (i = 0; i < adapter->num_rx_queues; i++) { - struct ixgbe_ring *ring = ACCESS_ONCE(adapter->rx_ring[i]); + struct ixgbe_ring *ring = READ_ONCE(adapter->rx_ring[i]); u64 bytes, packets; unsigned int start; @@ -8646,12 +8646,12 @@ static void ixgbe_get_stats64(struct net_device *netdev, } for (i = 0; i < adapter->num_tx_queues; i++) { - struct ixgbe_ring *ring = ACCESS_ONCE(adapter->tx_ring[i]); + struct ixgbe_ring *ring = READ_ONCE(adapter->tx_ring[i]); ixgbe_get_ring_stats64(stats, ring); } for (i = 0; i < adapter->num_xdp_queues; i++) { - struct ixgbe_ring *ring = ACCESS_ONCE(adapter->xdp_ring[i]); + struct ixgbe_ring *ring = READ_ONCE(adapter->xdp_ring[i]); ixgbe_get_ring_stats64(stats, ring); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 86d6924a2b71..ae312c45696a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -378,7 +378,7 @@ static int ixgbe_ptp_adjfreq_82599(struct ptp_clock_info *ptp, s32 ppb) } smp_mb(); - incval = ACCESS_ONCE(adapter->base_incval); + incval = READ_ONCE(adapter->base_incval); freq = incval; freq *= ppb; @@ -1159,7 +1159,7 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) } /* update the base incval used to calculate frequency adjustment */ - ACCESS_ONCE(adapter->base_incval) = incval; + WRITE_ONCE(adapter->base_incval, incval); smp_mb(); /* need lock to prevent incorrect read while modifying cyclecounter */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 032f8ac06357..cacb30682434 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -164,7 +164,7 @@ static void ixgbevf_check_remove(struct ixgbe_hw *hw, u32 reg) u32 ixgbevf_read_reg(struct ixgbe_hw *hw, u32 reg) { - u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr); + u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr); u32 value; if (IXGBE_REMOVED(reg_addr)) diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h index 04d8d4ee4f04..c651fefcc3d2 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.h +++ b/drivers/net/ethernet/intel/ixgbevf/vf.h @@ -182,7 +182,7 @@ struct ixgbevf_info { static inline void ixgbe_write_reg(struct ixgbe_hw *hw, u32 reg, u32 value) { - u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr); + u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr); if (IXGBE_REMOVED(reg_addr)) return; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 8a32a8f7f9c0..3541a7f9d12e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -414,8 +414,8 @@ bool mlx4_en_process_tx_cq(struct net_device *dev, index = cons_index & size_mask; cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor; - last_nr_txbb = ACCESS_ONCE(ring->last_nr_txbb); - ring_cons = ACCESS_ONCE(ring->cons); + last_nr_txbb = READ_ONCE(ring->last_nr_txbb); + ring_cons = READ_ONCE(ring->cons); ring_index = ring_cons & size_mask; stamp_index = ring_index; @@ -479,8 +479,8 @@ bool mlx4_en_process_tx_cq(struct net_device *dev, wmb(); /* we want to dirty this cache line once */ - ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb; - ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped; + WRITE_ONCE(ring->last_nr_txbb, last_nr_txbb); + WRITE_ONCE(ring->cons, ring_cons + txbbs_skipped); if (cq->type == TX_XDP) return done < budget; @@ -858,7 +858,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_drop; /* fetch ring->cons far ahead before needing it to avoid stall */ - ring_cons = ACCESS_ONCE(ring->cons); + ring_cons = READ_ONCE(ring->cons); real_size = get_real_size(skb, shinfo, dev, &lso_header_size, &inline_ok, &fragptr); @@ -1066,7 +1066,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) */ smp_rmb(); - ring_cons = ACCESS_ONCE(ring->cons); + ring_cons = READ_ONCE(ring->cons); if (unlikely(!mlx4_en_is_tx_ring_full(ring))) { netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 50ea69d88480..5dd5f61e1114 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -2629,7 +2629,7 @@ static void vxge_poll_vp_lockup(unsigned long data) ring = &vdev->vpaths[i].ring; /* Truncated to machine word size number of frames */ - rx_frms = ACCESS_ONCE(ring->stats.rx_frms); + rx_frms = READ_ONCE(ring->stats.rx_frms); /* Did this vpath received any packets */ if (ring->stats.prev_rx_frms == rx_frms) { diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 13f72f5b18d2..a95a46bcd339 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -2073,7 +2073,7 @@ static irqreturn_t efx_ef10_msi_interrupt(int irq, void *dev_id) netif_vdbg(efx, intr, efx->net_dev, "IRQ %d on CPU %d\n", irq, raw_smp_processor_id()); - if (likely(ACCESS_ONCE(efx->irq_soft_enabled))) { + if (likely(READ_ONCE(efx->irq_soft_enabled))) { /* Note test interrupts */ if (context->index == efx->irq_level) efx->last_irq_cpu = raw_smp_processor_id(); @@ -2088,7 +2088,7 @@ static irqreturn_t efx_ef10_msi_interrupt(int irq, void *dev_id) static irqreturn_t efx_ef10_legacy_interrupt(int irq, void *dev_id) { struct efx_nic *efx = dev_id; - bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled); + bool soft_enabled = READ_ONCE(efx->irq_soft_enabled); struct efx_channel *channel; efx_dword_t reg; u32 queues; @@ -3291,7 +3291,7 @@ static int efx_ef10_handle_rx_event(struct efx_channel *channel, bool rx_cont; u16 flags = 0; - if (unlikely(ACCESS_ONCE(efx->reset_pending))) + if (unlikely(READ_ONCE(efx->reset_pending))) return 0; /* Basic packet information */ @@ -3428,7 +3428,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) unsigned int tx_ev_q_label; int tx_descs = 0; - if (unlikely(ACCESS_ONCE(efx->reset_pending))) + if (unlikely(READ_ONCE(efx->reset_pending))) return 0; if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_TX_DROP_EVENT))) @@ -5316,7 +5316,7 @@ static void efx_ef10_filter_remove_old(struct efx_nic *efx) int i; for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) { - if (ACCESS_ONCE(table->entry[i].spec) & + if (READ_ONCE(table->entry[i].spec) & EFX_EF10_FILTER_FLAG_AUTO_OLD) { rc = efx_ef10_filter_remove_internal(efx, 1U << EFX_FILTER_PRI_AUTO, i, true); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index b9cb697b2818..016616a63880 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2809,7 +2809,7 @@ static void efx_reset_work(struct work_struct *data) unsigned long pending; enum reset_type method; - pending = ACCESS_ONCE(efx->reset_pending); + pending = READ_ONCE(efx->reset_pending); method = fls(pending) - 1; if (method == RESET_TYPE_MC_BIST) @@ -2874,7 +2874,7 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) /* If we're not READY then just leave the flags set as the cue * to abort probing or reschedule the reset later. */ - if (ACCESS_ONCE(efx->state) != STATE_READY) + if (READ_ONCE(efx->state) != STATE_READY) return; /* efx_process_channel() will no longer read events once a diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index 29614da91cbf..7263275fde4a 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -2545,7 +2545,7 @@ static void ef4_reset_work(struct work_struct *data) unsigned long pending; enum reset_type method; - pending = ACCESS_ONCE(efx->reset_pending); + pending = READ_ONCE(efx->reset_pending); method = fls(pending) - 1; if ((method == RESET_TYPE_RECOVER_OR_DISABLE || @@ -2605,7 +2605,7 @@ void ef4_schedule_reset(struct ef4_nic *efx, enum reset_type type) /* If we're not READY then just leave the flags set as the cue * to abort probing or reschedule the reset later. */ - if (ACCESS_ONCE(efx->state) != STATE_READY) + if (READ_ONCE(efx->state) != STATE_READY) return; queue_work(reset_workqueue, &efx->reset_work); diff --git a/drivers/net/ethernet/sfc/falcon/falcon.c b/drivers/net/ethernet/sfc/falcon/falcon.c index 93c713c1f627..cd8bb472d758 100644 --- a/drivers/net/ethernet/sfc/falcon/falcon.c +++ b/drivers/net/ethernet/sfc/falcon/falcon.c @@ -452,7 +452,7 @@ static irqreturn_t falcon_legacy_interrupt_a1(int irq, void *dev_id) "IRQ %d on CPU %d status " EF4_OWORD_FMT "\n", irq, raw_smp_processor_id(), EF4_OWORD_VAL(*int_ker)); - if (!likely(ACCESS_ONCE(efx->irq_soft_enabled))) + if (!likely(READ_ONCE(efx->irq_soft_enabled))) return IRQ_HANDLED; /* Check to see if we have a serious error condition */ @@ -1372,7 +1372,7 @@ static void falcon_reconfigure_mac_wrapper(struct ef4_nic *efx) ef4_oword_t reg; int link_speed, isolate; - isolate = !!ACCESS_ONCE(efx->reset_pending); + isolate = !!READ_ONCE(efx->reset_pending); switch (link_state->speed) { case 10000: link_speed = 3; break; diff --git a/drivers/net/ethernet/sfc/falcon/farch.c b/drivers/net/ethernet/sfc/falcon/farch.c index 05916c710d8c..494884f6af4a 100644 --- a/drivers/net/ethernet/sfc/falcon/farch.c +++ b/drivers/net/ethernet/sfc/falcon/farch.c @@ -834,7 +834,7 @@ ef4_farch_handle_tx_event(struct ef4_channel *channel, ef4_qword_t *event) struct ef4_nic *efx = channel->efx; int tx_packets = 0; - if (unlikely(ACCESS_ONCE(efx->reset_pending))) + if (unlikely(READ_ONCE(efx->reset_pending))) return 0; if (likely(EF4_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) { @@ -990,7 +990,7 @@ ef4_farch_handle_rx_event(struct ef4_channel *channel, const ef4_qword_t *event) struct ef4_rx_queue *rx_queue; struct ef4_nic *efx = channel->efx; - if (unlikely(ACCESS_ONCE(efx->reset_pending))) + if (unlikely(READ_ONCE(efx->reset_pending))) return; rx_ev_cont = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT); @@ -1504,7 +1504,7 @@ irqreturn_t ef4_farch_fatal_interrupt(struct ef4_nic *efx) irqreturn_t ef4_farch_legacy_interrupt(int irq, void *dev_id) { struct ef4_nic *efx = dev_id; - bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled); + bool soft_enabled = READ_ONCE(efx->irq_soft_enabled); ef4_oword_t *int_ker = efx->irq_status.addr; irqreturn_t result = IRQ_NONE; struct ef4_channel *channel; @@ -1596,7 +1596,7 @@ irqreturn_t ef4_farch_msi_interrupt(int irq, void *dev_id) "IRQ %d on CPU %d status " EF4_OWORD_FMT "\n", irq, raw_smp_processor_id(), EF4_OWORD_VAL(*int_ker)); - if (!likely(ACCESS_ONCE(efx->irq_soft_enabled))) + if (!likely(READ_ONCE(efx->irq_soft_enabled))) return IRQ_HANDLED; /* Handle non-event-queue sources */ diff --git a/drivers/net/ethernet/sfc/falcon/nic.h b/drivers/net/ethernet/sfc/falcon/nic.h index a4c4592f6023..54ca457cdb15 100644 --- a/drivers/net/ethernet/sfc/falcon/nic.h +++ b/drivers/net/ethernet/sfc/falcon/nic.h @@ -83,7 +83,7 @@ static inline struct ef4_tx_queue *ef4_tx_queue_partner(struct ef4_tx_queue *tx_ static inline bool __ef4_nic_tx_is_empty(struct ef4_tx_queue *tx_queue, unsigned int write_count) { - unsigned int empty_read_count = ACCESS_ONCE(tx_queue->empty_read_count); + unsigned int empty_read_count = READ_ONCE(tx_queue->empty_read_count); if (empty_read_count == 0) return false; @@ -464,11 +464,11 @@ irqreturn_t ef4_farch_fatal_interrupt(struct ef4_nic *efx); static inline int ef4_nic_event_test_irq_cpu(struct ef4_channel *channel) { - return ACCESS_ONCE(channel->event_test_cpu); + return READ_ONCE(channel->event_test_cpu); } static inline int ef4_nic_irq_test_irq_cpu(struct ef4_nic *efx) { - return ACCESS_ONCE(efx->last_irq_cpu); + return READ_ONCE(efx->last_irq_cpu); } /* Global Resources */ diff --git a/drivers/net/ethernet/sfc/falcon/tx.c b/drivers/net/ethernet/sfc/falcon/tx.c index 6a75f4140a4b..6486814e97dc 100644 --- a/drivers/net/ethernet/sfc/falcon/tx.c +++ b/drivers/net/ethernet/sfc/falcon/tx.c @@ -134,8 +134,8 @@ static void ef4_tx_maybe_stop_queue(struct ef4_tx_queue *txq1) */ netif_tx_stop_queue(txq1->core_txq); smp_mb(); - txq1->old_read_count = ACCESS_ONCE(txq1->read_count); - txq2->old_read_count = ACCESS_ONCE(txq2->read_count); + txq1->old_read_count = READ_ONCE(txq1->read_count); + txq2->old_read_count = READ_ONCE(txq2->read_count); fill_level = max(txq1->insert_count - txq1->old_read_count, txq2->insert_count - txq2->old_read_count); @@ -524,7 +524,7 @@ void ef4_xmit_done(struct ef4_tx_queue *tx_queue, unsigned int index) /* Check whether the hardware queue is now empty */ if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) { - tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count); + tx_queue->old_write_count = READ_ONCE(tx_queue->write_count); if (tx_queue->read_count == tx_queue->old_write_count) { smp_mb(); tx_queue->empty_read_count = diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index ba45150f53c7..86454d25a405 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -827,7 +827,7 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) struct efx_nic *efx = channel->efx; int tx_packets = 0; - if (unlikely(ACCESS_ONCE(efx->reset_pending))) + if (unlikely(READ_ONCE(efx->reset_pending))) return 0; if (likely(EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) { @@ -979,7 +979,7 @@ efx_farch_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event) struct efx_rx_queue *rx_queue; struct efx_nic *efx = channel->efx; - if (unlikely(ACCESS_ONCE(efx->reset_pending))) + if (unlikely(READ_ONCE(efx->reset_pending))) return; rx_ev_cont = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT); @@ -1520,7 +1520,7 @@ irqreturn_t efx_farch_fatal_interrupt(struct efx_nic *efx) irqreturn_t efx_farch_legacy_interrupt(int irq, void *dev_id) { struct efx_nic *efx = dev_id; - bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled); + bool soft_enabled = READ_ONCE(efx->irq_soft_enabled); efx_oword_t *int_ker = efx->irq_status.addr; irqreturn_t result = IRQ_NONE; struct efx_channel *channel; @@ -1612,7 +1612,7 @@ irqreturn_t efx_farch_msi_interrupt(int irq, void *dev_id) "IRQ %d on CPU %d status " EFX_OWORD_FMT "\n", irq, raw_smp_processor_id(), EFX_OWORD_VAL(*int_ker)); - if (!likely(ACCESS_ONCE(efx->irq_soft_enabled))) + if (!likely(READ_ONCE(efx->irq_soft_enabled))) return IRQ_HANDLED; /* Handle non-event-queue sources */ diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 4d7fb8af880d..7b51b6371724 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -81,7 +81,7 @@ static struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue) static inline bool __efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue, unsigned int write_count) { - unsigned int empty_read_count = ACCESS_ONCE(tx_queue->empty_read_count); + unsigned int empty_read_count = READ_ONCE(tx_queue->empty_read_count); if (empty_read_count == 0) return false; @@ -617,11 +617,11 @@ irqreturn_t efx_farch_fatal_interrupt(struct efx_nic *efx); static inline int efx_nic_event_test_irq_cpu(struct efx_channel *channel) { - return ACCESS_ONCE(channel->event_test_cpu); + return READ_ONCE(channel->event_test_cpu); } static inline int efx_nic_irq_test_irq_cpu(struct efx_nic *efx) { - return ACCESS_ONCE(efx->last_irq_cpu); + return READ_ONCE(efx->last_irq_cpu); } /* Global Resources */ diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 60cdb97f58e2..56c2db398def 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -658,7 +658,7 @@ static void efx_ptp_send_times(struct efx_nic *efx, /* Write host time for specified period or until MC is done */ while ((timespec64_compare(&now.ts_real, &limit) < 0) && - ACCESS_ONCE(*mc_running)) { + READ_ONCE(*mc_running)) { struct timespec64 update_time; unsigned int host_time; @@ -668,7 +668,7 @@ static void efx_ptp_send_times(struct efx_nic *efx, do { pps_get_ts(&now); } while ((timespec64_compare(&now.ts_real, &update_time) < 0) && - ACCESS_ONCE(*mc_running)); + READ_ONCE(*mc_running)); /* Synchronise NIC with single word of time only */ host_time = (now.ts_real.tv_sec << MC_NANOSECOND_BITS | @@ -832,14 +832,14 @@ static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings) ptp->start.dma_addr); /* Clear flag that signals MC ready */ - ACCESS_ONCE(*start) = 0; + WRITE_ONCE(*start, 0); rc = efx_mcdi_rpc_start(efx, MC_CMD_PTP, synch_buf, MC_CMD_PTP_IN_SYNCHRONIZE_LEN); EFX_WARN_ON_ONCE_PARANOID(rc); /* Wait for start from MCDI (or timeout) */ timeout = jiffies + msecs_to_jiffies(MAX_SYNCHRONISE_WAIT_MS); - while (!ACCESS_ONCE(*start) && (time_before(jiffies, timeout))) { + while (!READ_ONCE(*start) && (time_before(jiffies, timeout))) { udelay(20); /* Usually start MCDI execution quickly */ loops++; } @@ -849,7 +849,7 @@ static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings) if (!time_before(jiffies, timeout)) ++ptp->sync_timeouts; - if (ACCESS_ONCE(*start)) + if (READ_ONCE(*start)) efx_ptp_send_times(efx, &last_time); /* Collect results */ diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 32bf1fecf864..efb66ea21f27 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -136,8 +136,8 @@ static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) */ netif_tx_stop_queue(txq1->core_txq); smp_mb(); - txq1->old_read_count = ACCESS_ONCE(txq1->read_count); - txq2->old_read_count = ACCESS_ONCE(txq2->read_count); + txq1->old_read_count = READ_ONCE(txq1->read_count); + txq2->old_read_count = READ_ONCE(txq2->read_count); fill_level = max(txq1->insert_count - txq1->old_read_count, txq2->insert_count - txq2->old_read_count); @@ -752,7 +752,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) /* Check whether the hardware queue is now empty */ if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) { - tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count); + tx_queue->old_write_count = READ_ONCE(tx_queue->write_count); if (tx_queue->read_count == tx_queue->old_write_count) { smp_mb(); tx_queue->empty_read_count = diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 6a4e8e1bbd90..8ab0fb6892d5 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6245,7 +6245,7 @@ static void niu_get_rx_stats(struct niu *np, pkts = dropped = errors = bytes = 0; - rx_rings = ACCESS_ONCE(np->rx_rings); + rx_rings = READ_ONCE(np->rx_rings); if (!rx_rings) goto no_rings; @@ -6276,7 +6276,7 @@ static void niu_get_tx_stats(struct niu *np, pkts = errors = bytes = 0; - tx_rings = ACCESS_ONCE(np->tx_rings); + tx_rings = READ_ONCE(np->tx_rings); if (!tx_rings) goto no_rings; diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 21b71ae947fd..b55b29b90b88 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -257,7 +257,7 @@ static struct tap_queue *tap_get_queue(struct tap_dev *tap, * and validate that the result isn't NULL - in case we are * racing against queue removal. */ - int numvtaps = ACCESS_ONCE(tap->numvtaps); + int numvtaps = READ_ONCE(tap->numvtaps); __u32 rxq; if (!numvtaps) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e21bf90b819f..27cd50c5bc9e 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -469,7 +469,7 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, u32 numqueues = 0; rcu_read_lock(); - numqueues = ACCESS_ONCE(tun->numqueues); + numqueues = READ_ONCE(tun->numqueues); txq = __skb_get_hash_symmetric(skb); if (txq) { @@ -864,7 +864,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); tfile = rcu_dereference(tun->tfiles[txq]); - numqueues = ACCESS_ONCE(tun->numqueues); + numqueues = READ_ONCE(tun->numqueues); /* Drop packet if interface is not attached */ if (txq >= numqueues) diff --git a/drivers/net/wireless/ath/ath5k/desc.c b/drivers/net/wireless/ath/ath5k/desc.c index bd8d4392d68b..80f75139495f 100644 --- a/drivers/net/wireless/ath/ath5k/desc.c +++ b/drivers/net/wireless/ath/ath5k/desc.c @@ -500,13 +500,13 @@ ath5k_hw_proc_4word_tx_status(struct ath5k_hw *ah, tx_status = &desc->ud.ds_tx5212.tx_stat; - txstat1 = ACCESS_ONCE(tx_status->tx_status_1); + txstat1 = READ_ONCE(tx_status->tx_status_1); /* No frame has been send or error */ if (unlikely(!(txstat1 & AR5K_DESC_TX_STATUS1_DONE))) return -EINPROGRESS; - txstat0 = ACCESS_ONCE(tx_status->tx_status_0); + txstat0 = READ_ONCE(tx_status->tx_status_0); /* * Get descriptor status @@ -700,14 +700,14 @@ ath5k_hw_proc_5212_rx_status(struct ath5k_hw *ah, u32 rxstat0, rxstat1; rx_status = &desc->ud.ds_rx.rx_stat; - rxstat1 = ACCESS_ONCE(rx_status->rx_status_1); + rxstat1 = READ_ONCE(rx_status->rx_status_1); /* No frame received / not ready */ if (unlikely(!(rxstat1 & AR5K_5212_RX_DESC_STATUS1_DONE))) return -EINPROGRESS; memset(rs, 0, sizeof(struct ath5k_rx_status)); - rxstat0 = ACCESS_ONCE(rx_status->rx_status_0); + rxstat0 = READ_ONCE(rx_status->rx_status_0); /* * Frame receive status diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 613caca7dc02..785a0f33b7e6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -3628,7 +3628,7 @@ static void brcmf_sdio_dataworker(struct work_struct *work) bus->dpc_running = true; wmb(); - while (ACCESS_ONCE(bus->dpc_triggered)) { + while (READ_ONCE(bus->dpc_triggered)) { bus->dpc_triggered = false; brcmf_sdio_dpc(bus); bus->idlecount = 0; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 231878969332..0f45f34e39d3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1118,7 +1118,7 @@ void iwl_mvm_set_hw_ctkill_state(struct iwl_mvm *mvm, bool state) static bool iwl_mvm_set_hw_rfkill_state(struct iwl_op_mode *op_mode, bool state) { struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); - bool calibrating = ACCESS_ONCE(mvm->calibrating); + bool calibrating = READ_ONCE(mvm->calibrating); if (state) set_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 6f2e2af23219..6e9d3289b9d0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -652,7 +652,7 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) return -1; } else if (info.control.vif->type == NL80211_IFTYPE_STATION && is_multicast_ether_addr(hdr->addr1)) { - u8 ap_sta_id = ACCESS_ONCE(mvmvif->ap_sta_id); + u8 ap_sta_id = READ_ONCE(mvmvif->ap_sta_id); if (ap_sta_id != IWL_MVM_INVALID_STA) sta_id = ap_sta_id; @@ -700,7 +700,7 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb, snap_ip_tcp = 8 + skb_transport_header(skb) - skb_network_header(skb) + tcp_hdrlen(skb); - dbg_max_amsdu_len = ACCESS_ONCE(mvm->max_amsdu_len); + dbg_max_amsdu_len = READ_ONCE(mvm->max_amsdu_len); if (!sta->max_amsdu_len || !ieee80211_is_data_qos(hdr->frame_control) || diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index a06b6612b658..f25ce3a1ea50 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1247,7 +1247,7 @@ restart: spin_lock(&rxq->lock); /* uCode's read index (stored in shared DRAM) indicates the last Rx * buffer that the driver may process (last buffer filled by ucode). */ - r = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF; + r = le16_to_cpu(READ_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF; i = rxq->read; /* W/A 9000 device step A0 wrap-around bug */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 2e3e013ec95a..9ad3f4fe5894 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2076,12 +2076,12 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, int txq_idx) IWL_DEBUG_TX_QUEUES(trans, "Emptying queue %d...\n", txq_idx); txq = trans_pcie->txq[txq_idx]; - wr_ptr = ACCESS_ONCE(txq->write_ptr); + wr_ptr = READ_ONCE(txq->write_ptr); - while (txq->read_ptr != ACCESS_ONCE(txq->write_ptr) && + while (txq->read_ptr != READ_ONCE(txq->write_ptr) && !time_after(jiffies, now + msecs_to_jiffies(IWL_FLUSH_WAIT_MS))) { - u8 write_ptr = ACCESS_ONCE(txq->write_ptr); + u8 write_ptr = READ_ONCE(txq->write_ptr); if (WARN_ONCE(wr_ptr != write_ptr, "WR pointer moved while flushing %d -> %d\n", @@ -2553,7 +2553,7 @@ static u32 iwl_trans_pcie_dump_rbs(struct iwl_trans *trans, spin_lock(&rxq->lock); - r = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF; + r = le16_to_cpu(READ_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF; for (i = rxq->read, j = 0; i != r && j < allocated_rb_nums; @@ -2814,7 +2814,7 @@ static struct iwl_trans_dump_data /* Dump RBs is supported only for pre-9000 devices (1 queue) */ struct iwl_rxq *rxq = &trans_pcie->rxq[0]; /* RBs */ - num_rbs = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num)) + num_rbs = le16_to_cpu(READ_ONCE(rxq->rb_stts->closed_rb_num)) & 0x0FFF; num_rbs = (num_rbs - rxq->read) & RX_QUEUE_MASK; len += num_rbs * (sizeof(*data) + diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 6467ffac9811..d2b3d6177a55 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1380,7 +1380,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, mac80211_hwsim_monitor_rx(hw, skb, channel); /* wmediumd mode check */ - _portid = ACCESS_ONCE(data->wmediumd); + _portid = READ_ONCE(data->wmediumd); if (_portid) return mac80211_hwsim_tx_frame_nl(hw, skb, _portid); @@ -1477,7 +1477,7 @@ static void mac80211_hwsim_tx_frame(struct ieee80211_hw *hw, struct ieee80211_channel *chan) { struct mac80211_hwsim_data *data = hw->priv; - u32 _pid = ACCESS_ONCE(data->wmediumd); + u32 _pid = READ_ONCE(data->wmediumd); if (ieee80211_hw_check(hw, SUPPORTS_RC_TABLE)) { struct ieee80211_tx_info *txi = IEEE80211_SKB_CB(skb); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index f05cfc83c9c8..f946bf889015 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -996,7 +996,7 @@ static void qlt_free_session_done(struct work_struct *work) if (logout_started) { bool traced = false; - while (!ACCESS_ONCE(sess->logout_completed)) { + while (!READ_ONCE(sess->logout_completed)) { if (!traced) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf086, "%s: waiting for sess %p logout\n", diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 942d094269fb..9469695f5871 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -985,7 +985,7 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) mb = udev->mb_addr; tcmu_flush_dcache_range(mb, sizeof(*mb)); - while (udev->cmdr_last_cleaned != ACCESS_ONCE(mb->cmd_tail)) { + while (udev->cmdr_last_cleaned != READ_ONCE(mb->cmd_tail)) { struct tcmu_cmd_entry *entry = (void *) mb + CMDR_OFF + udev->cmdr_last_cleaned; struct tcmu_cmd *cmd; diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 3e865dbf878c..fbaa2a90d25d 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -483,7 +483,7 @@ static ssize_t wdm_read if (rv < 0) return -ERESTARTSYS; - cntr = ACCESS_ONCE(desc->length); + cntr = READ_ONCE(desc->length); if (cntr == 0) { desc->read = 0; retry: diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index e9326f31db8d..4ae667d8c238 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -150,7 +150,7 @@ static int usbfs_increase_memory_usage(u64 amount) { u64 lim; - lim = ACCESS_ONCE(usbfs_memory_mb); + lim = READ_ONCE(usbfs_memory_mb); lim <<= 20; atomic64_add(amount, &usbfs_memory_usage); diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index d930bfda4010..58d59c5f8592 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -973,7 +973,7 @@ static ssize_t interface_show(struct device *dev, struct device_attribute *attr, char *string; intf = to_usb_interface(dev); - string = ACCESS_ONCE(intf->cur_altsetting->string); + string = READ_ONCE(intf->cur_altsetting->string); if (!string) return 0; return sprintf(buf, "%s\n", string); @@ -989,7 +989,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, intf = to_usb_interface(dev); udev = interface_to_usbdev(intf); - alt = ACCESS_ONCE(intf->cur_altsetting); + alt = READ_ONCE(intf->cur_altsetting); return sprintf(buf, "usb:v%04Xp%04Xd%04Xdc%02Xdsc%02Xdp%02X" "ic%02Xisc%02Xip%02Xin%02X\n", diff --git a/drivers/usb/gadget/udc/gr_udc.c b/drivers/usb/gadget/udc/gr_udc.c index 1f9941145746..0b59fa50aa30 100644 --- a/drivers/usb/gadget/udc/gr_udc.c +++ b/drivers/usb/gadget/udc/gr_udc.c @@ -1261,7 +1261,7 @@ static int gr_handle_in_ep(struct gr_ep *ep) if (!req->last_desc) return 0; - if (ACCESS_ONCE(req->last_desc->ctrl) & GR_DESC_IN_CTRL_EN) + if (READ_ONCE(req->last_desc->ctrl) & GR_DESC_IN_CTRL_EN) return 0; /* Not put in hardware buffers yet */ if (gr_read32(&ep->regs->epstat) & (GR_EPSTAT_B1 | GR_EPSTAT_B0)) @@ -1290,7 +1290,7 @@ static int gr_handle_out_ep(struct gr_ep *ep) if (!req->curr_desc) return 0; - ctrl = ACCESS_ONCE(req->curr_desc->ctrl); + ctrl = READ_ONCE(req->curr_desc->ctrl); if (ctrl & GR_DESC_OUT_CTRL_EN) return 0; /* Not received yet */ diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 44924824fa41..c86f89babd57 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -785,7 +785,7 @@ static void io_watchdog_func(unsigned long _ohci) } /* find the last TD processed by the controller. */ - head = hc32_to_cpu(ohci, ACCESS_ONCE(ed->hwHeadP)) & TD_MASK; + head = hc32_to_cpu(ohci, READ_ONCE(ed->hwHeadP)) & TD_MASK; td_start = td; td_next = list_prepare_entry(td, &ed->td_list, td_list); list_for_each_entry_continue(td_next, &ed->td_list, td_list) { diff --git a/drivers/usb/host/uhci-hcd.h b/drivers/usb/host/uhci-hcd.h index 91b22b2ea3aa..09a2a259941b 100644 --- a/drivers/usb/host/uhci-hcd.h +++ b/drivers/usb/host/uhci-hcd.h @@ -186,7 +186,7 @@ struct uhci_qh { * We need a special accessor for the element pointer because it is * subject to asynchronous updates by the controller. */ -#define qh_element(qh) ACCESS_ONCE((qh)->element) +#define qh_element(qh) READ_ONCE((qh)->element) #define LINK_TO_QH(uhci, qh) (UHCI_PTR_QH((uhci)) | \ cpu_to_hc32((uhci), (qh)->dma_handle)) @@ -274,7 +274,7 @@ struct uhci_td { * subject to asynchronous updates by the controller. */ #define td_status(uhci, td) hc32_to_cpu((uhci), \ - ACCESS_ONCE((td)->status)) + READ_ONCE((td)->status)) #define LINK_TO_TD(uhci, td) (cpu_to_hc32((uhci), (td)->dma_handle)) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index f5a86f651f38..2bc3705a99bd 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -665,7 +665,7 @@ static int vfio_dev_viable(struct device *dev, void *data) { struct vfio_group *group = data; struct vfio_device *device; - struct device_driver *drv = ACCESS_ONCE(dev->driver); + struct device_driver *drv = READ_ONCE(dev->driver); struct vfio_unbound_dev *unbound; int ret = -EINVAL; diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 046f6d280af5..35e929f132e8 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -929,7 +929,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) continue; } - tpg = ACCESS_ONCE(vs_tpg[*target]); + tpg = READ_ONCE(vs_tpg[*target]); if (unlikely(!tpg)) { /* Target does not exist, fail the request */ vhost_scsi_send_bad_target(vs, vq, head, out); diff --git a/fs/aio.c b/fs/aio.c index 5a2487217072..e6de7715228c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -576,7 +576,7 @@ static int kiocb_cancel(struct aio_kiocb *kiocb) * actually has a cancel function, hence the cmpxchg() */ - cancel = ACCESS_ONCE(kiocb->ki_cancel); + cancel = READ_ONCE(kiocb->ki_cancel); do { if (!cancel || cancel == KIOCB_CANCELLED) return -EINVAL; diff --git a/fs/buffer.c b/fs/buffer.c index 170df856bdb9..32ce01f0f95f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1692,7 +1692,8 @@ static struct buffer_head *create_page_buffers(struct page *page, struct inode * BUG_ON(!PageLocked(page)); if (!page_has_buffers(page)) - create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state); + create_empty_buffers(page, 1 << READ_ONCE(inode->i_blkbits), + b_state); return page_buffers(page); } diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 8e704d12a1cf..0083bd4fcaa5 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -373,7 +373,7 @@ void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci) struct fscrypt_info *prev; if (ci == NULL) - ci = ACCESS_ONCE(inode->i_crypt_info); + ci = READ_ONCE(inode->i_crypt_info); if (ci == NULL) return; diff --git a/fs/direct-io.c b/fs/direct-io.c index b53e66d9abd7..98fe1325da9d 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1152,7 +1152,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags) { - unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits); + unsigned i_blkbits = READ_ONCE(inode->i_blkbits); unsigned blkbits = i_blkbits; unsigned blocksize_mask = (1 << blkbits) - 1; ssize_t retval = -EINVAL; diff --git a/fs/exec.c b/fs/exec.c index 3e14ba25f678..1d6243d9f2b6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1911,7 +1911,7 @@ void set_dumpable(struct mm_struct *mm, int value) return; do { - old = ACCESS_ONCE(mm->flags); + old = READ_ONCE(mm->flags); new = (old & ~MMF_DUMPABLE_MASK) | value; } while (cmpxchg(&mm->flags, old, new) != old); } diff --git a/fs/fcntl.c b/fs/fcntl.c index 448a1119f0be..57bf2964bb83 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -724,7 +724,7 @@ static void send_sigio_to_task(struct task_struct *p, * F_SETSIG can change ->signum lockless in parallel, make * sure we read it once and use the same value throughout. */ - int signum = ACCESS_ONCE(fown->signum); + int signum = READ_ONCE(fown->signum); if (!sigio_perm(p, fown, signum)) return; diff --git a/fs/fs_pin.c b/fs/fs_pin.c index e747b3d720ee..2d07f292b625 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -78,7 +78,7 @@ void mnt_pin_kill(struct mount *m) while (1) { struct hlist_node *p; rcu_read_lock(); - p = ACCESS_ONCE(m->mnt_pins.first); + p = READ_ONCE(m->mnt_pins.first); if (!p) { rcu_read_unlock(); break; @@ -92,7 +92,7 @@ void group_pin_kill(struct hlist_head *p) while (1) { struct hlist_node *q; rcu_read_lock(); - q = ACCESS_ONCE(p->first); + q = READ_ONCE(p->first); if (!q) { rcu_read_unlock(); break; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 13c65dd2d37d..a42d89371748 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -33,7 +33,7 @@ static struct fuse_dev *fuse_get_dev(struct file *file) * Lockless access is OK, because file->private data is set * once during mount and is valid until the file is released. */ - return ACCESS_ONCE(file->private_data); + return READ_ONCE(file->private_data); } static void fuse_request_init(struct fuse_req *req, struct page **pages, diff --git a/fs/inode.c b/fs/inode.c index d1e35b53bb23..fd401028a309 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2090,7 +2090,7 @@ void inode_set_flags(struct inode *inode, unsigned int flags, WARN_ON_ONCE(flags & ~mask); do { - old_flags = ACCESS_ONCE(inode->i_flags); + old_flags = READ_ONCE(inode->i_flags); new_flags = (old_flags & ~mask) | flags; } while (unlikely(cmpxchg(&inode->i_flags, old_flags, new_flags) != old_flags)); diff --git a/fs/namei.c b/fs/namei.c index c75ea03ca147..40a0f34bf990 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1209,7 +1209,7 @@ static int follow_managed(struct path *path, struct nameidata *nd) /* Given that we're not holding a lock here, we retain the value in a * local variable for each dentry as we look at it so that we don't see * the components of that value change under us */ - while (managed = ACCESS_ONCE(path->dentry->d_flags), + while (managed = READ_ONCE(path->dentry->d_flags), managed &= DCACHE_MANAGED_DENTRY, unlikely(managed != 0)) { /* Allow the filesystem to manage the transit without i_mutex @@ -1394,7 +1394,7 @@ int follow_down(struct path *path) unsigned managed; int ret; - while (managed = ACCESS_ONCE(path->dentry->d_flags), + while (managed = READ_ONCE(path->dentry->d_flags), unlikely(managed & DCACHE_MANAGED_DENTRY)) { /* Allow the filesystem to manage the transit without i_mutex * being held. diff --git a/fs/namespace.c b/fs/namespace.c index d18deb4c410b..e158ec6b527b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -353,7 +353,7 @@ int __mnt_want_write(struct vfsmount *m) * incremented count after it has set MNT_WRITE_HOLD. */ smp_mb(); - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) + while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) cpu_relax(); /* * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5ceaeb1f6fb6..f439f1c45008 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1081,7 +1081,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) int error; if (flags & LOOKUP_RCU) { - parent = ACCESS_ONCE(dentry->d_parent); + parent = READ_ONCE(dentry->d_parent); dir = d_inode_rcu(parent); if (!dir) return -ECHILD; @@ -1168,7 +1168,7 @@ out_set_verifier: nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_valid: if (flags & LOOKUP_RCU) { - if (parent != ACCESS_ONCE(dentry->d_parent)) + if (parent != READ_ONCE(dentry->d_parent)) return -ECHILD; } else dput(parent); @@ -1582,7 +1582,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) struct inode *dir; if (flags & LOOKUP_RCU) { - parent = ACCESS_ONCE(dentry->d_parent); + parent = READ_ONCE(dentry->d_parent); dir = d_inode_rcu(parent); if (!dir) return -ECHILD; @@ -1596,7 +1596,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) ret = -ECHILD; if (!(flags & LOOKUP_RCU)) dput(parent); - else if (parent != ACCESS_ONCE(dentry->d_parent)) + else if (parent != READ_ONCE(dentry->d_parent)) return -ECHILD; goto out; } diff --git a/fs/proc/array.c b/fs/proc/array.c index 77a8eacbe032..375e8bf0dd24 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -453,7 +453,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, cutime = sig->cutime; cstime = sig->cstime; cgtime = sig->cgtime; - rsslim = ACCESS_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur); + rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur); /* add up live thread stats at the group level */ if (whole) { diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 99dff222fe67..03afd5150916 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -27,7 +27,7 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) poll_wait(file, &p->ns->poll, wait); - event = ACCESS_ONCE(ns->event); + event = READ_ONCE(ns->event); if (m->poll_event != event) { m->poll_event = event; res |= POLLERR | POLLPRI; diff --git a/fs/splice.c b/fs/splice.c index f3084cce0ea6..39e2dc01ac12 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -253,7 +253,7 @@ EXPORT_SYMBOL(add_to_pipe); */ int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) { - unsigned int buffers = ACCESS_ONCE(pipe->buffers); + unsigned int buffers = READ_ONCE(pipe->buffers); spd->nr_pages_max = buffers; if (buffers <= PIPE_DEF_BUFFERS) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 1c713fd5b3e6..f46d133c0949 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -381,7 +381,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) * in __get_user_pages if userfaultfd_release waits on the * caller of handle_userfault to release the mmap_sem. */ - if (unlikely(ACCESS_ONCE(ctx->released))) { + if (unlikely(READ_ONCE(ctx->released))) { /* * Don't return VM_FAULT_SIGBUS in this case, so a non * cooperative manager can close the uffd after the @@ -477,7 +477,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) vmf->flags, reason); up_read(&mm->mmap_sem); - if (likely(must_wait && !ACCESS_ONCE(ctx->released) && + if (likely(must_wait && !READ_ONCE(ctx->released) && (return_to_userland ? !signal_pending(current) : !fatal_signal_pending(current)))) { wake_up_poll(&ctx->fd_wqh, POLLIN); @@ -586,7 +586,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, set_current_state(TASK_KILLABLE); if (ewq->msg.event == 0) break; - if (ACCESS_ONCE(ctx->released) || + if (READ_ONCE(ctx->released) || fatal_signal_pending(current)) { /* * &ewq->wq may be queued in fork_event, but @@ -833,7 +833,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) struct userfaultfd_wake_range range = { .len = 0, }; unsigned long new_flags; - ACCESS_ONCE(ctx->released) = true; + WRITE_ONCE(ctx->released, true); if (!mmget_not_zero(mm)) goto wakeup; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 51bf7b827387..129975970d99 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -592,9 +592,9 @@ xlog_valid_lsn( * a transiently forward state. Instead, we can see the LSN in a * transiently behind state if we happen to race with a cycle wrap. */ - cur_cycle = ACCESS_ONCE(log->l_curr_cycle); + cur_cycle = READ_ONCE(log->l_curr_cycle); smp_rmb(); - cur_block = ACCESS_ONCE(log->l_curr_block); + cur_block = READ_ONCE(log->l_curr_block); if ((CYCLE_LSN(lsn) > cur_cycle) || (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) { diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 8fbe259b197c..0a7ce668f8e0 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -236,7 +236,7 @@ static inline unsigned long __ffs64(u64 word) typeof(*ptr) old, new; \ \ do { \ - old = ACCESS_ONCE(*ptr); \ + old = READ_ONCE(*ptr); \ new = (old & ~mask) | bits; \ } while (cmpxchg(ptr, old, new) != old); \ \ @@ -251,7 +251,7 @@ static inline unsigned long __ffs64(u64 word) typeof(*ptr) old, new; \ \ do { \ - old = ACCESS_ONCE(*ptr); \ + old = READ_ONCE(*ptr); \ new = old & ~clear; \ } while (!(old & test) && \ cmpxchg(ptr, old, new) != old); \ diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index a4be70398ce1..36dd4ffb5715 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -88,7 +88,7 @@ static inline void dql_queued(struct dql *dql, unsigned int count) /* Returns how many objects can be queued, < 0 indicates over limit. */ static inline int dql_avail(const struct dql *dql) { - return ACCESS_ONCE(dql->adj_limit) - ACCESS_ONCE(dql->num_queued); + return READ_ONCE(dql->adj_limit) - READ_ONCE(dql->num_queued); } /* Record number of completed objects and recalculate the limit. */ diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 14bc21c2ee7f..785a00ca4628 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -221,7 +221,7 @@ extern struct page *huge_zero_page; static inline bool is_huge_zero_page(struct page *page) { - return ACCESS_ONCE(huge_zero_page) == page; + return READ_ONCE(huge_zero_page) == page; } static inline bool is_huge_zero_pmd(pmd_t pmd) diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 30294603526f..d95cae09dea0 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -247,7 +247,7 @@ static inline struct team_port *team_get_port_by_index(struct team *team, static inline int team_num_to_port_index(struct team *team, unsigned int num) { - int en_port_count = ACCESS_ONCE(team->en_port_count); + int en_port_count = READ_ONCE(team->en_port_count); if (unlikely(!en_port_count)) return 0; diff --git a/include/linux/llist.h b/include/linux/llist.h index 1957635e6d5f..85abc2915e8d 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -198,7 +198,7 @@ static inline void init_llist_head(struct llist_head *list) */ static inline bool llist_empty(const struct llist_head *head) { - return ACCESS_ONCE(head->first) == NULL; + return READ_ONCE(head->first) == NULL; } static inline struct llist_node *llist_next(struct llist_node *node) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 2efb08a60e63..f0fc4700b6ff 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -105,7 +105,7 @@ static inline bool pm_runtime_callbacks_present(struct device *dev) static inline void pm_runtime_mark_last_busy(struct device *dev) { - ACCESS_ONCE(dev->power.last_busy) = jiffies; + WRITE_ONCE(dev->power.last_busy, jiffies); } static inline bool pm_runtime_is_irq_safe(struct device *dev) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 4f4f786255ef..3fadb6f9982b 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -983,12 +983,12 @@ static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) static inline int sysctl_sync_period(struct netns_ipvs *ipvs) { - return ACCESS_ONCE(ipvs->sysctl_sync_threshold[1]); + return READ_ONCE(ipvs->sysctl_sync_threshold[1]); } static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs) { - return ACCESS_ONCE(ipvs->sysctl_sync_refresh_period); + return READ_ONCE(ipvs->sysctl_sync_refresh_period); } static inline int sysctl_sync_retries(struct netns_ipvs *ipvs) @@ -1013,7 +1013,7 @@ static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs) static inline int sysctl_sync_ports(struct netns_ipvs *ipvs) { - return ACCESS_ONCE(ipvs->sysctl_sync_ports); + return READ_ONCE(ipvs->sysctl_sync_ports); } static inline int sysctl_sync_persist_mode(struct netns_ipvs *ipvs) diff --git a/kernel/acct.c b/kernel/acct.c index 5e72af29ab73..21eedd0dd81a 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -146,7 +146,7 @@ static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) again: smp_rmb(); rcu_read_lock(); - res = to_acct(ACCESS_ONCE(ns->bacct)); + res = to_acct(READ_ONCE(ns->bacct)); if (!res) { rcu_read_unlock(); return NULL; @@ -158,7 +158,7 @@ again: } rcu_read_unlock(); mutex_lock(&res->lock); - if (res != to_acct(ACCESS_ONCE(ns->bacct))) { + if (res != to_acct(READ_ONCE(ns->bacct))) { mutex_unlock(&res->lock); acct_put(res); goto again; diff --git a/kernel/events/core.c b/kernel/events/core.c index 824a583079a1..8fd2f2d1358a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1200,7 +1200,7 @@ perf_event_ctx_lock_nested(struct perf_event *event, int nesting) again: rcu_read_lock(); - ctx = ACCESS_ONCE(event->ctx); + ctx = READ_ONCE(event->ctx); if (!atomic_inc_not_zero(&ctx->refcount)) { rcu_read_unlock(); goto again; @@ -5302,8 +5302,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!rb) goto aux_unlock; - aux_offset = ACCESS_ONCE(rb->user_page->aux_offset); - aux_size = ACCESS_ONCE(rb->user_page->aux_size); + aux_offset = READ_ONCE(rb->user_page->aux_offset); + aux_size = READ_ONCE(rb->user_page->aux_size); if (aux_offset < perf_data_size(rb) + PAGE_SIZE) goto aux_unlock; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index f684d8e5fa2b..f3e37971c842 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -381,7 +381,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, * (B) <-> (C) ordering is still observed by the pmu driver. */ if (!rb->aux_overwrite) { - aux_tail = ACCESS_ONCE(rb->user_page->aux_tail); + aux_tail = READ_ONCE(rb->user_page->aux_tail); handle->wakeup = rb->aux_wakeup + rb->aux_watermark; if (aux_head - aux_tail < perf_aux_size(rb)) handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb)); diff --git a/kernel/exit.c b/kernel/exit.c index f6cad39f35df..6b4298a41167 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1339,7 +1339,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, * Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition * can't confuse the checks below. */ - int exit_state = ACCESS_ONCE(p->exit_state); + int exit_state = READ_ONCE(p->exit_state); int ret; if (unlikely(exit_state == EXIT_DEAD)) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 81279c6602ff..845f3805c73d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2724,7 +2724,7 @@ rb_reserve_next_event(struct ring_buffer *buffer, * if it happened, we have to fail the write. */ barrier(); - if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) { + if (unlikely(READ_ONCE(cpu_buffer->buffer) != buffer)) { local_dec(&cpu_buffer->committing); local_dec(&cpu_buffer->commits); return NULL; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 652c682707cd..9050c8b3ccde 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1459,7 +1459,7 @@ extern struct trace_event_file *find_event_file(struct trace_array *tr, static inline void *event_file_data(struct file *filp) { - return ACCESS_ONCE(file_inode(filp)->i_private); + return READ_ONCE(file_inode(filp)->i_private); } extern struct mutex event_mutex; diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 49cb41412eec..780262210c9a 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -77,7 +77,7 @@ check_stack(unsigned long ip, unsigned long *stack) { unsigned long this_size, flags; unsigned long *p, *top, *start; static int tracer_frame; - int frame_size = ACCESS_ONCE(tracer_frame); + int frame_size = READ_ONCE(tracer_frame); int i, x; this_size = ((unsigned long)stack) & (THREAD_SIZE-1); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index c490f1e4313b..d32b45662fb6 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -894,7 +894,7 @@ static bool new_idmap_permitted(const struct file *file, int proc_setgroups_show(struct seq_file *seq, void *v) { struct user_namespace *ns = seq->private; - unsigned long userns_flags = ACCESS_ONCE(ns->flags); + unsigned long userns_flags = READ_ONCE(ns->flags); seq_printf(seq, "%s\n", (userns_flags & USERNS_SETGROUPS_ALLOWED) ? diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 155c55d8db5f..fe7953aead82 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -39,7 +39,7 @@ begin_node: /* Descend through a shortcut */ shortcut = assoc_array_ptr_to_shortcut(cursor); smp_read_barrier_depends(); - cursor = ACCESS_ONCE(shortcut->next_node); + cursor = READ_ONCE(shortcut->next_node); } node = assoc_array_ptr_to_node(cursor); @@ -55,7 +55,7 @@ begin_node: */ has_meta = 0; for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { - ptr = ACCESS_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); has_meta |= (unsigned long)ptr; if (ptr && assoc_array_ptr_is_leaf(ptr)) { /* We need a barrier between the read of the pointer @@ -89,7 +89,7 @@ continue_node: smp_read_barrier_depends(); for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { - ptr = ACCESS_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); if (assoc_array_ptr_is_meta(ptr)) { cursor = ptr; goto begin_node; @@ -98,7 +98,7 @@ continue_node: finished_node: /* Move up to the parent (may need to skip back over a shortcut) */ - parent = ACCESS_ONCE(node->back_pointer); + parent = READ_ONCE(node->back_pointer); slot = node->parent_slot; if (parent == stop) return 0; @@ -107,7 +107,7 @@ finished_node: shortcut = assoc_array_ptr_to_shortcut(parent); smp_read_barrier_depends(); cursor = parent; - parent = ACCESS_ONCE(shortcut->back_pointer); + parent = READ_ONCE(shortcut->back_pointer); slot = shortcut->parent_slot; if (parent == stop) return 0; @@ -147,7 +147,7 @@ int assoc_array_iterate(const struct assoc_array *array, void *iterator_data), void *iterator_data) { - struct assoc_array_ptr *root = ACCESS_ONCE(array->root); + struct assoc_array_ptr *root = READ_ONCE(array->root); if (!root) return 0; @@ -194,7 +194,7 @@ assoc_array_walk(const struct assoc_array *array, pr_devel("-->%s()\n", __func__); - cursor = ACCESS_ONCE(array->root); + cursor = READ_ONCE(array->root); if (!cursor) return assoc_array_walk_tree_empty; @@ -220,7 +220,7 @@ consider_node: slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); slot &= ASSOC_ARRAY_FAN_MASK; - ptr = ACCESS_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); pr_devel("consider slot %x [ix=%d type=%lu]\n", slot, level, (unsigned long)ptr & 3); @@ -294,7 +294,7 @@ follow_shortcut: } while (sc_level < shortcut->skip_to_level); /* The shortcut matches the leaf's index to this point. */ - cursor = ACCESS_ONCE(shortcut->next_node); + cursor = READ_ONCE(shortcut->next_node); if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { level = sc_level; goto jumped; @@ -337,7 +337,7 @@ void *assoc_array_find(const struct assoc_array *array, * the terminal node. */ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { - ptr = ACCESS_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); if (ptr && assoc_array_ptr_is_leaf(ptr)) { /* We need a barrier between the read of the pointer * and dereferencing the pointer - but only if we are diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c index f346715e2255..81770a55cb16 100644 --- a/lib/dynamic_queue_limits.c +++ b/lib/dynamic_queue_limits.c @@ -20,7 +20,7 @@ void dql_completed(struct dql *dql, unsigned int count) unsigned int ovlimit, completed, num_queued; bool all_prev_completed; - num_queued = ACCESS_ONCE(dql->num_queued); + num_queued = READ_ONCE(dql->num_queued); /* Can't complete more than what's in queue */ BUG_ON(count > num_queued - dql->num_completed); diff --git a/lib/llist.c b/lib/llist.c index ae5872b1df0c..7062e931a7bb 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -41,7 +41,7 @@ bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, struct llist_node *first; do { - new_last->next = first = ACCESS_ONCE(head->first); + new_last->next = first = READ_ONCE(head->first); } while (cmpxchg(&head->first, first, new_first) != first); return !first; diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 86c3385b9eb3..1746bae94d41 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -620,8 +620,8 @@ char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_sp rcu_read_lock(); for (i = 0; i < depth; i++, d = p) { - p = ACCESS_ONCE(d->d_parent); - array[i] = ACCESS_ONCE(d->d_name.name); + p = READ_ONCE(d->d_parent); + array[i] = READ_ONCE(d->d_name.name); if (p == d) { if (i) array[i] = ""; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 269b5df58543..c3bf907a03ee 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2715,7 +2715,7 @@ static unsigned long deferred_split_count(struct shrinker *shrink, struct shrink_control *sc) { struct pglist_data *pgdata = NODE_DATA(sc->nid); - return ACCESS_ONCE(pgdata->split_queue_len); + return READ_ONCE(pgdata->split_queue_len); } static unsigned long deferred_split_scan(struct shrinker *shrink, diff --git a/net/core/dev.c b/net/core/dev.c index 11596a302a26..61559ca3980b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3725,7 +3725,7 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, flow_table = rcu_dereference(rxqueue->rps_flow_table); if (flow_table && flow_id <= flow_table->mask) { rflow = &flow_table->flows[flow_id]; - cpu = ACCESS_ONCE(rflow->cpu); + cpu = READ_ONCE(rflow->cpu); if (rflow->filter == filter_id && cpu < nr_cpu_ids && ((int)(per_cpu(softnet_data, cpu).input_queue_head - rflow->last_qtail) < diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 6e1e10ff433a..3b2034f6d49d 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3377,7 +3377,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) static void pktgen_xmit(struct pktgen_dev *pkt_dev) { - unsigned int burst = ACCESS_ONCE(pkt_dev->burst); + unsigned int burst = READ_ONCE(pkt_dev->burst); struct net_device *odev = pkt_dev->odev; struct netdev_queue *txq; struct sk_buff *skb; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index af74d0433453..f9597ba26599 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -164,7 +164,7 @@ static void inet_frag_worker(struct work_struct *work) local_bh_disable(); - for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) { + for (i = READ_ONCE(f->next_bucket); budget; --budget) { evicted += inet_evict_bucket(f, &f->hash[i]); i = (i + 1) & (INETFRAGS_HASHSZ - 1); if (evicted > INETFRAGS_EVICT_MAX) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3d9f1c2f81c5..c0864562083b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -495,7 +495,7 @@ u32 ip_idents_reserve(u32 hash, int segs) { u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ; atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; - u32 old = ACCESS_ONCE(*p_tstamp); + u32 old = READ_ONCE(*p_tstamp); u32 now = (u32)jiffies; u32 new, delta = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0bc9e46a5369..48531da1aba6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1908,7 +1908,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) goto send_now; - win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor); + win_divisor = READ_ONCE(sysctl_tcp_tso_win_divisor); if (win_divisor) { u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ebfbccae62fd..02ec9a349303 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1853,7 +1853,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) */ /* if we're overly short, let UDP handle it */ - encap_rcv = ACCESS_ONCE(up->encap_rcv); + encap_rcv = READ_ONCE(up->encap_rcv); if (encap_rcv) { int ret; @@ -2298,7 +2298,7 @@ void udp_destroy_sock(struct sock *sk) unlock_sock_fast(sk, slow); if (static_key_false(&udp_encap_needed) && up->encap_type) { void (*encap_destroy)(struct sock *sk); - encap_destroy = ACCESS_ONCE(up->encap_destroy); + encap_destroy = READ_ONCE(up->encap_destroy); if (encap_destroy) encap_destroy(sk); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a1c24443cd9e..dab946554157 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -490,7 +490,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, if (!t) goto out; - tproto = ACCESS_ONCE(t->parms.proto); + tproto = READ_ONCE(t->parms.proto); if (tproto != ipproto && tproto != 0) goto out; @@ -899,7 +899,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); if (t) { - u8 tproto = ACCESS_ONCE(t->parms.proto); + u8 tproto = READ_ONCE(t->parms.proto); if (tproto != ipproto && tproto != 0) goto drop; @@ -1233,7 +1233,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - tproto = ACCESS_ONCE(t->parms.proto); + tproto = READ_ONCE(t->parms.proto); if (tproto != IPPROTO_IPIP && tproto != 0) return -1; @@ -1303,7 +1303,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; - tproto = ACCESS_ONCE(t->parms.proto); + tproto = READ_ONCE(t->parms.proto); if ((tproto != IPPROTO_IPV6 && tproto != 0) || ip6_tnl_addr_conflict(t, ipv6h)) return -1; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 40d7234c27b9..3f30fa313bf2 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -606,7 +606,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) */ /* if we're overly short, let UDP handle it */ - encap_rcv = ACCESS_ONCE(up->encap_rcv); + encap_rcv = READ_ONCE(up->encap_rcv); if (encap_rcv) { int ret; @@ -1432,7 +1432,7 @@ void udpv6_destroy_sock(struct sock *sk) if (static_key_false(&udpv6_encap_needed) && up->encap_type) { void (*encap_destroy)(struct sock *sk); - encap_destroy = ACCESS_ONCE(up->encap_destroy); + encap_destroy = READ_ONCE(up->encap_destroy); if (encap_destroy) encap_destroy(sk); } diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index dd3e83328ad5..82cb93f66b9b 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -193,7 +193,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, */ rcv = rcu_dereference(sap->rcv_func); dest = llc_pdu_type(skb); - sap_handler = dest ? ACCESS_ONCE(llc_type_handlers[dest - 1]) : NULL; + sap_handler = dest ? READ_ONCE(llc_type_handlers[dest - 1]) : NULL; if (unlikely(!sap_handler)) { if (rcv) rcv(skb, dev, pt, orig_dev); @@ -214,7 +214,7 @@ drop: kfree_skb(skb); goto out; handle_station: - sta_handler = ACCESS_ONCE(llc_station_handler); + sta_handler = READ_ONCE(llc_station_handler); if (!sta_handler) goto drop; sta_handler(skb); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 69615016d5bf..214d2ba02877 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2008,7 +2008,7 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate, static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) { - u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate); + u16 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate); if (rate == STA_STATS_RATE_INVALID) return -EINVAL; diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index d177dd066504..4d748975117d 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -393,7 +393,7 @@ EXPORT_SYMBOL(netlbl_calipso_ops_register); static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void) { - return ACCESS_ONCE(calipso_ops); + return READ_ONCE(calipso_ops); } /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d396cb61a280..eb866647a27a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -14201,7 +14201,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; - u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid); + u32 nlportid = READ_ONCE(wdev->ap_unexpected_nlportid); if (!nlportid) return false; diff --git a/sound/firewire/amdtp-am824.c b/sound/firewire/amdtp-am824.c index 23ccddb20de1..4210e5c6262e 100644 --- a/sound/firewire/amdtp-am824.c +++ b/sound/firewire/amdtp-am824.c @@ -247,7 +247,7 @@ void amdtp_am824_midi_trigger(struct amdtp_stream *s, unsigned int port, struct amdtp_am824 *p = s->protocol; if (port < p->midi_ports) - ACCESS_ONCE(p->midi[port]) = midi; + WRITE_ONCE(p->midi[port], midi); } EXPORT_SYMBOL_GPL(amdtp_am824_midi_trigger); @@ -336,7 +336,7 @@ static unsigned int process_rx_data_blocks(struct amdtp_stream *s, __be32 *buffe unsigned int data_blocks, unsigned int *syt) { struct amdtp_am824 *p = s->protocol; - struct snd_pcm_substream *pcm = ACCESS_ONCE(s->pcm); + struct snd_pcm_substream *pcm = READ_ONCE(s->pcm); unsigned int pcm_frames; if (pcm) { @@ -357,7 +357,7 @@ static unsigned int process_tx_data_blocks(struct amdtp_stream *s, __be32 *buffe unsigned int data_blocks, unsigned int *syt) { struct amdtp_am824 *p = s->protocol; - struct snd_pcm_substream *pcm = ACCESS_ONCE(s->pcm); + struct snd_pcm_substream *pcm = READ_ONCE(s->pcm); unsigned int pcm_frames; if (pcm) { diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index 3fc581a5ad62..4a1dc145327b 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -376,7 +376,7 @@ static void update_pcm_pointers(struct amdtp_stream *s, ptr = s->pcm_buffer_pointer + frames; if (ptr >= pcm->runtime->buffer_size) ptr -= pcm->runtime->buffer_size; - ACCESS_ONCE(s->pcm_buffer_pointer) = ptr; + WRITE_ONCE(s->pcm_buffer_pointer, ptr); s->pcm_period_pointer += frames; if (s->pcm_period_pointer >= pcm->runtime->period_size) { @@ -388,7 +388,7 @@ static void update_pcm_pointers(struct amdtp_stream *s, static void pcm_period_tasklet(unsigned long data) { struct amdtp_stream *s = (void *)data; - struct snd_pcm_substream *pcm = ACCESS_ONCE(s->pcm); + struct snd_pcm_substream *pcm = READ_ONCE(s->pcm); if (pcm) snd_pcm_period_elapsed(pcm); @@ -453,7 +453,7 @@ static int handle_out_packet(struct amdtp_stream *s, s->data_block_counter = (s->data_block_counter + data_blocks) & 0xff; - buffer[0] = cpu_to_be32(ACCESS_ONCE(s->source_node_id_field) | + buffer[0] = cpu_to_be32(READ_ONCE(s->source_node_id_field) | (s->data_block_quadlets << CIP_DBS_SHIFT) | ((s->sph << CIP_SPH_SHIFT) & CIP_SPH_MASK) | s->data_block_counter); @@ -472,7 +472,7 @@ static int handle_out_packet(struct amdtp_stream *s, if (queue_out_packet(s, payload_length) < 0) return -EIO; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm && pcm_frames > 0) update_pcm_pointers(s, pcm, pcm_frames); @@ -504,7 +504,7 @@ static int handle_out_packet_without_header(struct amdtp_stream *s, if (queue_out_packet(s, payload_length) < 0) return -EIO; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm && pcm_frames > 0) update_pcm_pointers(s, pcm, pcm_frames); @@ -621,7 +621,7 @@ end: if (queue_in_packet(s) < 0) return -EIO; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm && pcm_frames > 0) update_pcm_pointers(s, pcm, pcm_frames); @@ -649,7 +649,7 @@ static int handle_in_packet_without_header(struct amdtp_stream *s, if (queue_in_packet(s) < 0) return -EIO; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm && pcm_frames > 0) update_pcm_pointers(s, pcm, pcm_frames); @@ -947,7 +947,7 @@ unsigned long amdtp_stream_pcm_pointer(struct amdtp_stream *s) if (!in_interrupt() && amdtp_stream_running(s)) fw_iso_context_flush_completions(s->context); - return ACCESS_ONCE(s->pcm_buffer_pointer); + return READ_ONCE(s->pcm_buffer_pointer); } EXPORT_SYMBOL(amdtp_stream_pcm_pointer); @@ -977,9 +977,8 @@ EXPORT_SYMBOL(amdtp_stream_pcm_ack); void amdtp_stream_update(struct amdtp_stream *s) { /* Precomputing. */ - ACCESS_ONCE(s->source_node_id_field) = - (fw_parent_device(s->unit)->card->node_id << CIP_SID_SHIFT) & - CIP_SID_MASK; + WRITE_ONCE(s->source_node_id_field, + (fw_parent_device(s->unit)->card->node_id << CIP_SID_SHIFT) & CIP_SID_MASK); } EXPORT_SYMBOL(amdtp_stream_update); @@ -1022,7 +1021,7 @@ void amdtp_stream_pcm_abort(struct amdtp_stream *s) { struct snd_pcm_substream *pcm; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm) snd_pcm_stop_xrun(pcm); } diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h index ed6eafd10992..f9abd8b07ce6 100644 --- a/sound/firewire/amdtp-stream.h +++ b/sound/firewire/amdtp-stream.h @@ -220,7 +220,7 @@ static inline bool amdtp_stream_pcm_running(struct amdtp_stream *s) static inline void amdtp_stream_pcm_trigger(struct amdtp_stream *s, struct snd_pcm_substream *pcm) { - ACCESS_ONCE(s->pcm) = pcm; + WRITE_ONCE(s->pcm, pcm); } static inline bool cip_sfc_is_base_44100(enum cip_sfc sfc) diff --git a/sound/firewire/digi00x/amdtp-dot.c b/sound/firewire/digi00x/amdtp-dot.c index 1453c34ce99f..4a884a335248 100644 --- a/sound/firewire/digi00x/amdtp-dot.c +++ b/sound/firewire/digi00x/amdtp-dot.c @@ -327,7 +327,7 @@ void amdtp_dot_midi_trigger(struct amdtp_stream *s, unsigned int port, struct amdtp_dot *p = s->protocol; if (port < MAX_MIDI_PORTS) - ACCESS_ONCE(p->midi[port]) = midi; + WRITE_ONCE(p->midi[port], midi); } static unsigned int process_tx_data_blocks(struct amdtp_stream *s, @@ -338,7 +338,7 @@ static unsigned int process_tx_data_blocks(struct amdtp_stream *s, struct snd_pcm_substream *pcm; unsigned int pcm_frames; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm) { read_pcm_s32(s, pcm, buffer, data_blocks); pcm_frames = data_blocks; @@ -359,7 +359,7 @@ static unsigned int process_rx_data_blocks(struct amdtp_stream *s, struct snd_pcm_substream *pcm; unsigned int pcm_frames; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm) { write_pcm_s32(s, pcm, buffer, data_blocks); pcm_frames = data_blocks; diff --git a/sound/firewire/fireface/amdtp-ff.c b/sound/firewire/fireface/amdtp-ff.c index 780da9deb2f0..77c7598b61ab 100644 --- a/sound/firewire/fireface/amdtp-ff.c +++ b/sound/firewire/fireface/amdtp-ff.c @@ -108,7 +108,7 @@ static unsigned int process_rx_data_blocks(struct amdtp_stream *s, unsigned int data_blocks, unsigned int *syt) { - struct snd_pcm_substream *pcm = ACCESS_ONCE(s->pcm); + struct snd_pcm_substream *pcm = READ_ONCE(s->pcm); unsigned int pcm_frames; if (pcm) { @@ -127,7 +127,7 @@ static unsigned int process_tx_data_blocks(struct amdtp_stream *s, unsigned int data_blocks, unsigned int *syt) { - struct snd_pcm_substream *pcm = ACCESS_ONCE(s->pcm); + struct snd_pcm_substream *pcm = READ_ONCE(s->pcm); unsigned int pcm_frames; if (pcm) { diff --git a/sound/firewire/fireface/ff-midi.c b/sound/firewire/fireface/ff-midi.c index 949ee56b4e0e..6a49611ee462 100644 --- a/sound/firewire/fireface/ff-midi.c +++ b/sound/firewire/fireface/ff-midi.c @@ -22,7 +22,7 @@ static int midi_playback_open(struct snd_rawmidi_substream *substream) ff->running_status[substream->number] = 0; ff->rx_midi_error[substream->number] = false; - ACCESS_ONCE(ff->rx_midi_substreams[substream->number]) = substream; + WRITE_ONCE(ff->rx_midi_substreams[substream->number], substream); return 0; } @@ -38,7 +38,7 @@ static int midi_playback_close(struct snd_rawmidi_substream *substream) struct snd_ff *ff = substream->rmidi->private_data; cancel_work_sync(&ff->rx_midi_work[substream->number]); - ACCESS_ONCE(ff->rx_midi_substreams[substream->number]) = NULL; + WRITE_ONCE(ff->rx_midi_substreams[substream->number], NULL); return 0; } @@ -52,10 +52,10 @@ static void midi_capture_trigger(struct snd_rawmidi_substream *substream, spin_lock_irqsave(&ff->lock, flags); if (up) - ACCESS_ONCE(ff->tx_midi_substreams[substream->number]) = - substream; + WRITE_ONCE(ff->tx_midi_substreams[substream->number], + substream); else - ACCESS_ONCE(ff->tx_midi_substreams[substream->number]) = NULL; + WRITE_ONCE(ff->tx_midi_substreams[substream->number], NULL); spin_unlock_irqrestore(&ff->lock, flags); } diff --git a/sound/firewire/fireface/ff-transaction.c b/sound/firewire/fireface/ff-transaction.c index dd6c8e839647..332b29f8ed75 100644 --- a/sound/firewire/fireface/ff-transaction.c +++ b/sound/firewire/fireface/ff-transaction.c @@ -12,7 +12,7 @@ static void finish_transmit_midi_msg(struct snd_ff *ff, unsigned int port, int rcode) { struct snd_rawmidi_substream *substream = - ACCESS_ONCE(ff->rx_midi_substreams[port]); + READ_ONCE(ff->rx_midi_substreams[port]); if (rcode_is_permanent_error(rcode)) { ff->rx_midi_error[port] = true; @@ -60,7 +60,7 @@ static inline void fill_midi_buf(struct snd_ff *ff, unsigned int port, static void transmit_midi_msg(struct snd_ff *ff, unsigned int port) { struct snd_rawmidi_substream *substream = - ACCESS_ONCE(ff->rx_midi_substreams[port]); + READ_ONCE(ff->rx_midi_substreams[port]); u8 *buf = (u8 *)ff->msg_buf[port]; int i, len; @@ -159,7 +159,7 @@ static void handle_midi_msg(struct fw_card *card, struct fw_request *request, */ index = (quad >> 8) & 0xff; if (index > 0) { - substream = ACCESS_ONCE(ff->tx_midi_substreams[0]); + substream = READ_ONCE(ff->tx_midi_substreams[0]); if (substream != NULL) { byte = quad & 0xff; snd_rawmidi_receive(substream, &byte, 1); @@ -169,7 +169,7 @@ static void handle_midi_msg(struct fw_card *card, struct fw_request *request, /* Message in second port. */ index = (quad >> 24) & 0xff; if (index > 0) { - substream = ACCESS_ONCE(ff->tx_midi_substreams[1]); + substream = READ_ONCE(ff->tx_midi_substreams[1]); if (substream != NULL) { byte = (quad >> 16) & 0xff; snd_rawmidi_receive(substream, &byte, 1); diff --git a/sound/firewire/isight.c b/sound/firewire/isight.c index 5826aa8362f1..46092fa3ff9b 100644 --- a/sound/firewire/isight.c +++ b/sound/firewire/isight.c @@ -96,7 +96,7 @@ static void isight_update_pointers(struct isight *isight, unsigned int count) ptr += count; if (ptr >= runtime->buffer_size) ptr -= runtime->buffer_size; - ACCESS_ONCE(isight->buffer_pointer) = ptr; + WRITE_ONCE(isight->buffer_pointer, ptr); isight->period_counter += count; if (isight->period_counter >= runtime->period_size) { @@ -111,7 +111,7 @@ static void isight_samples(struct isight *isight, struct snd_pcm_runtime *runtime; unsigned int count1; - if (!ACCESS_ONCE(isight->pcm_running)) + if (!READ_ONCE(isight->pcm_running)) return; runtime = isight->pcm->runtime; @@ -131,7 +131,7 @@ static void isight_samples(struct isight *isight, static void isight_pcm_abort(struct isight *isight) { - if (ACCESS_ONCE(isight->pcm_active)) + if (READ_ONCE(isight->pcm_active)) snd_pcm_stop_xrun(isight->pcm); } @@ -141,7 +141,7 @@ static void isight_dropped_samples(struct isight *isight, unsigned int total) u32 dropped; unsigned int count1; - if (!ACCESS_ONCE(isight->pcm_running)) + if (!READ_ONCE(isight->pcm_running)) return; runtime = isight->pcm->runtime; @@ -293,7 +293,7 @@ static int isight_hw_params(struct snd_pcm_substream *substream, if (err < 0) return err; - ACCESS_ONCE(isight->pcm_active) = true; + WRITE_ONCE(isight->pcm_active, true); return 0; } @@ -331,7 +331,7 @@ static int isight_hw_free(struct snd_pcm_substream *substream) { struct isight *isight = substream->private_data; - ACCESS_ONCE(isight->pcm_active) = false; + WRITE_ONCE(isight->pcm_active, false); mutex_lock(&isight->mutex); isight_stop_streaming(isight); @@ -424,10 +424,10 @@ static int isight_trigger(struct snd_pcm_substream *substream, int cmd) switch (cmd) { case SNDRV_PCM_TRIGGER_START: - ACCESS_ONCE(isight->pcm_running) = true; + WRITE_ONCE(isight->pcm_running, true); break; case SNDRV_PCM_TRIGGER_STOP: - ACCESS_ONCE(isight->pcm_running) = false; + WRITE_ONCE(isight->pcm_running, false); break; default: return -EINVAL; @@ -439,7 +439,7 @@ static snd_pcm_uframes_t isight_pointer(struct snd_pcm_substream *substream) { struct isight *isight = substream->private_data; - return ACCESS_ONCE(isight->buffer_pointer); + return READ_ONCE(isight->buffer_pointer); } static int isight_create_pcm(struct isight *isight) diff --git a/sound/firewire/motu/amdtp-motu.c b/sound/firewire/motu/amdtp-motu.c index 96f0091144bb..f0555a24d90e 100644 --- a/sound/firewire/motu/amdtp-motu.c +++ b/sound/firewire/motu/amdtp-motu.c @@ -310,7 +310,7 @@ static unsigned int process_tx_data_blocks(struct amdtp_stream *s, if (p->midi_ports) read_midi_messages(s, buffer, data_blocks); - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (data_blocks > 0 && pcm) read_pcm_s32(s, pcm->runtime, buffer, data_blocks); @@ -374,7 +374,7 @@ static unsigned int process_rx_data_blocks(struct amdtp_stream *s, if (p->midi_ports) write_midi_messages(s, buffer, data_blocks); - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm) write_pcm_s32(s, pcm->runtime, buffer, data_blocks); else diff --git a/sound/firewire/oxfw/oxfw-scs1x.c b/sound/firewire/oxfw/oxfw-scs1x.c index 02d595665898..f33497cdc706 100644 --- a/sound/firewire/oxfw/oxfw-scs1x.c +++ b/sound/firewire/oxfw/oxfw-scs1x.c @@ -112,7 +112,7 @@ static void handle_hss(struct fw_card *card, struct fw_request *request, } if (length >= 1) { - stream = ACCESS_ONCE(scs->input); + stream = READ_ONCE(scs->input); if (stream) midi_input_packet(scs, stream, data, length); } @@ -183,7 +183,7 @@ static void scs_output_work(struct work_struct *work) if (scs->transaction_running) return; - stream = ACCESS_ONCE(scs->output); + stream = READ_ONCE(scs->output); if (!stream || scs->error) { scs->output_idle = true; wake_up(&scs->idle_wait); @@ -291,9 +291,9 @@ static void midi_capture_trigger(struct snd_rawmidi_substream *stream, int up) if (up) { scs->input_escape_count = 0; - ACCESS_ONCE(scs->input) = stream; + WRITE_ONCE(scs->input, stream); } else { - ACCESS_ONCE(scs->input) = NULL; + WRITE_ONCE(scs->input, NULL); } } @@ -319,10 +319,10 @@ static void midi_playback_trigger(struct snd_rawmidi_substream *stream, int up) scs->transaction_bytes = 0; scs->error = false; - ACCESS_ONCE(scs->output) = stream; + WRITE_ONCE(scs->output, stream); schedule_work(&scs->work); } else { - ACCESS_ONCE(scs->output) = NULL; + WRITE_ONCE(scs->output, NULL); } } static void midi_playback_drain(struct snd_rawmidi_substream *stream) diff --git a/sound/firewire/tascam/amdtp-tascam.c b/sound/firewire/tascam/amdtp-tascam.c index 6aff1fc1c72d..ab482423c165 100644 --- a/sound/firewire/tascam/amdtp-tascam.c +++ b/sound/firewire/tascam/amdtp-tascam.c @@ -124,7 +124,7 @@ static unsigned int process_tx_data_blocks(struct amdtp_stream *s, { struct snd_pcm_substream *pcm; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (data_blocks > 0 && pcm) read_pcm_s32(s, pcm, buffer, data_blocks); @@ -143,7 +143,7 @@ static unsigned int process_rx_data_blocks(struct amdtp_stream *s, /* This field is not used. */ *syt = 0x0000; - pcm = ACCESS_ONCE(s->pcm); + pcm = READ_ONCE(s->pcm); if (pcm) write_pcm_s32(s, pcm, buffer, data_blocks); else diff --git a/sound/firewire/tascam/tascam-transaction.c b/sound/firewire/tascam/tascam-transaction.c index 8967c52f5032..2ad692dd4b13 100644 --- a/sound/firewire/tascam/tascam-transaction.c +++ b/sound/firewire/tascam/tascam-transaction.c @@ -148,7 +148,7 @@ static void async_midi_port_callback(struct fw_card *card, int rcode, void *callback_data) { struct snd_fw_async_midi_port *port = callback_data; - struct snd_rawmidi_substream *substream = ACCESS_ONCE(port->substream); + struct snd_rawmidi_substream *substream = READ_ONCE(port->substream); /* This port is closed. */ if (substream == NULL) @@ -173,7 +173,7 @@ static void midi_port_work(struct work_struct *work) { struct snd_fw_async_midi_port *port = container_of(work, struct snd_fw_async_midi_port, work); - struct snd_rawmidi_substream *substream = ACCESS_ONCE(port->substream); + struct snd_rawmidi_substream *substream = READ_ONCE(port->substream); int generation; /* Under transacting or error state. */ @@ -282,7 +282,7 @@ static void handle_midi_tx(struct fw_card *card, struct fw_request *request, bytes = 3; } - substream = ACCESS_ONCE(tscm->tx_midi_substreams[port]); + substream = READ_ONCE(tscm->tx_midi_substreams[port]); if (substream != NULL) snd_rawmidi_receive(substream, b + 1, bytes); } diff --git a/sound/soc/xtensa/xtfpga-i2s.c b/sound/soc/xtensa/xtfpga-i2s.c index 8382ffa3bcaf..2472144b329e 100644 --- a/sound/soc/xtensa/xtfpga-i2s.c +++ b/sound/soc/xtensa/xtfpga-i2s.c @@ -165,7 +165,7 @@ static bool xtfpga_pcm_push_tx(struct xtfpga_i2s *i2s) tx_substream = rcu_dereference(i2s->tx_substream); tx_active = tx_substream && snd_pcm_running(tx_substream); if (tx_active) { - unsigned tx_ptr = ACCESS_ONCE(i2s->tx_ptr); + unsigned tx_ptr = READ_ONCE(i2s->tx_ptr); unsigned new_tx_ptr = i2s->tx_fn(i2s, tx_substream->runtime, tx_ptr); @@ -437,7 +437,7 @@ static int xtfpga_pcm_trigger(struct snd_pcm_substream *substream, int cmd) case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - ACCESS_ONCE(i2s->tx_ptr) = 0; + WRITE_ONCE(i2s->tx_ptr, 0); rcu_assign_pointer(i2s->tx_substream, substream); xtfpga_pcm_refill_fifo(i2s); break; @@ -459,7 +459,7 @@ static snd_pcm_uframes_t xtfpga_pcm_pointer(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct xtfpga_i2s *i2s = runtime->private_data; - snd_pcm_uframes_t pos = ACCESS_ONCE(i2s->tx_ptr); + snd_pcm_uframes_t pos = READ_ONCE(i2s->tx_ptr); return pos < runtime->buffer_size ? pos : 0; } diff --git a/sound/usb/bcd2000/bcd2000.c b/sound/usb/bcd2000/bcd2000.c index 7371e5b06035..fc579f330601 100644 --- a/sound/usb/bcd2000/bcd2000.c +++ b/sound/usb/bcd2000/bcd2000.c @@ -108,7 +108,7 @@ static void bcd2000_midi_handle_input(struct bcd2000 *bcd2k, unsigned int payload_length, tocopy; struct snd_rawmidi_substream *midi_receive_substream; - midi_receive_substream = ACCESS_ONCE(bcd2k->midi_receive_substream); + midi_receive_substream = READ_ONCE(bcd2k->midi_receive_substream); if (!midi_receive_substream) return; @@ -139,7 +139,7 @@ static void bcd2000_midi_send(struct bcd2000 *bcd2k) BUILD_BUG_ON(sizeof(device_cmd_prefix) >= BUFSIZE); - midi_out_substream = ACCESS_ONCE(bcd2k->midi_out_substream); + midi_out_substream = READ_ONCE(bcd2k->midi_out_substream); if (!midi_out_substream) return; diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h index 328eeceec709..96e2d06cb031 100644 --- a/tools/arch/x86/include/asm/atomic.h +++ b/tools/arch/x86/include/asm/atomic.h @@ -24,7 +24,7 @@ */ static inline int atomic_read(const atomic_t *v) { - return ACCESS_ONCE((v)->counter); + return READ_ONCE((v)->counter); } /** diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h index 5e9738f97bf3..97427e700e3b 100644 --- a/tools/include/asm-generic/atomic-gcc.h +++ b/tools/include/asm-generic/atomic-gcc.h @@ -21,7 +21,7 @@ */ static inline int atomic_read(const atomic_t *v) { - return ACCESS_ONCE((v)->counter); + return READ_ONCE((v)->counter); } /** diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 33b5e6cdf38c..d19e11b68de7 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -378,7 +378,7 @@ struct addr_filters { static inline u64 auxtrace_mmap__read_snapshot_head(struct auxtrace_mmap *mm) { struct perf_event_mmap_page *pc = mm->userpg; - u64 head = ACCESS_ONCE(pc->aux_head); + u64 head = READ_ONCE(pc->aux_head); /* Ensure all reads are done after we read the head */ rmb(); @@ -389,7 +389,7 @@ static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm) { struct perf_event_mmap_page *pc = mm->userpg; #if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) - u64 head = ACCESS_ONCE(pc->aux_head); + u64 head = READ_ONCE(pc->aux_head); #else u64 head = __sync_val_compare_and_swap(&pc->aux_head, 0, 0); #endif diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 47b5e7dbcb18..aae9645c7122 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -113,7 +113,7 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session, extern volatile int session_done; -#define session_done() ACCESS_ONCE(session_done) +#define session_done() READ_ONCE(session_done) int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9deb5a245b83..ce507ae1d4f5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2302,7 +2302,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) continue; } else if (pass && i > last_boosted_vcpu) break; - if (!ACCESS_ONCE(vcpu->preempted)) + if (!READ_ONCE(vcpu->preempted)) continue; if (vcpu == me) continue; -- cgit v1.3-14-g43fede From 7d9285e82db5defca4d9674ba089429eeca0c697 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 5 Oct 2017 09:19:19 -0700 Subject: perf/bpf: Extend the perf_event_read_local() interface, a.k.a. "bpf: perf event change needed for subsequent bpf helpers" eBPF programs would like access to the (perf) event enabled and running times along with the event value, such that they can deal with event multiplexing (among other things). This patch extends the interface; a future eBPF patch will utilize the new functionality. [ Note, there's a same-content commit with a poor changelog and a meaningless title in the networking tree as well - but we need this change for subsequent perf work, so apply it here as well, with a proper changelog. Hopefully Git will be able to sort out this somewhat messy workflow, if there are no other, conflicting changes to these files. ] Signed-off-by: Yonghong Song [ Rewrote the changelog. ] Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: David S. Miller Link: http://lkml.kernel.org/r/20171005161923.332790-2-yhs@fb.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 7 +++++-- kernel/bpf/arraymap.c | 2 +- kernel/events/core.c | 15 +++++++++++++-- kernel/trace/bpf_trace.c | 2 +- 4 files changed, 20 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8e22f24ded6a..79b18a20cf5d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -806,6 +806,7 @@ struct perf_output_handle { struct bpf_perf_event_data_kern { struct pt_regs *regs; struct perf_sample_data *data; + struct perf_event *event; }; #ifdef CONFIG_CGROUP_PERF @@ -884,7 +885,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, void *context); extern void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu); -int perf_event_read_local(struct perf_event *event, u64 *value); +int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); @@ -1286,7 +1288,8 @@ static inline const struct perf_event_attr *perf_event_attrs(struct perf_event * { return ERR_PTR(-EINVAL); } -static inline int perf_event_read_local(struct perf_event *event, u64 *value) +static inline int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running) { return -EINVAL; } diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index e2636737b69b..c4b9ab01bba5 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -492,7 +492,7 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, ee = ERR_PTR(-EOPNOTSUPP); event = perf_file->private_data; - if (perf_event_read_local(event, &value) == -EOPNOTSUPP) + if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP) goto err_out; ee = bpf_event_entry_gen(perf_file, map_file); diff --git a/kernel/events/core.c b/kernel/events/core.c index 04989fb769f0..74671e101b92 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3684,10 +3684,12 @@ static inline u64 perf_event_count(struct perf_event *event) * will not be local and we cannot read them atomically * - must not have a pmu::count method */ -int perf_event_read_local(struct perf_event *event, u64 *value) +int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running) { unsigned long flags; int ret = 0; + u64 now; /* * Disabling interrupts avoids all counter scheduling (context @@ -3718,13 +3720,21 @@ int perf_event_read_local(struct perf_event *event, u64 *value) goto out; } + now = event->shadow_ctx_time + perf_clock(); + if (enabled) + *enabled = now - event->tstamp_enabled; /* * If the event is currently on this CPU, its either a per-task event, * or local to this CPU. Furthermore it means its ACTIVE (otherwise * oncpu == -1). */ - if (event->oncpu == smp_processor_id()) + if (event->oncpu == smp_processor_id()) { event->pmu->read(event); + if (running) + *running = now - event->tstamp_running; + } else if (running) { + *running = event->total_time_running; + } *value = local64_read(&event->count); out: @@ -8072,6 +8082,7 @@ static void bpf_overflow_handler(struct perf_event *event, struct bpf_perf_event_data_kern ctx = { .data = data, .regs = regs, + .event = event, }; int ret = 0; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index dc498b605d5d..95888ae6c263 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -275,7 +275,7 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) if (!ee) return -ENOENT; - err = perf_event_read_local(ee->event, &value); + err = perf_event_read_local(ee->event, &value, NULL, NULL); /* * this api is ugly since we miss [-22..-2] range of valid * counter values, but that's uapi -- cgit v1.3-14-g43fede From 035226b964c820f65e201cdf123705a8f1d7c670 Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Thu, 26 Oct 2017 01:47:42 +0000 Subject: bpf: remove tail_call and get_stackid helper declarations from bpf.h commit afdb09c720b6 ("security: bpf: Add LSM hooks for bpf object related syscall") included linux/bpf.h in linux/security.h. As a result, bpf programs including bpf_helpers.h and some other header that ends up pulling in also security.h, such as several examples under samples/bpf, fail to compile because bpf_tail_call and bpf_get_stackid are now "redefined as different kind of symbol". >From bpf.h: u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); Whereas in bpf_helpers.h they are: static void (*bpf_tail_call)(void *ctx, void *map, int index); static int (*bpf_get_stackid)(void *ctx, void *map, int flags); Fix this by removing the unused declaration of bpf_tail_call and moving the declaration of bpf_get_stackid in bpf_trace.c, which is the only place where it's needed. Signed-off-by: Gianluca Borello Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 --- kernel/trace/bpf_trace.c | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 172be7faf7ba..520aeebe0d93 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -231,9 +231,6 @@ struct bpf_event_entry { struct rcu_head rcu; }; -u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); -u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); - bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); int bpf_prog_calc_tag(struct bpf_prog *fp); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b65011d320e3..136aa6bb0422 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -15,6 +15,8 @@ #include #include "trace.h" +u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); + /** * trace_call_bpf - invoke BPF program * @call: tracepoint event -- cgit v1.3-14-g43fede From 07c41a295c5f25928a7cb689fdec816bd0089fe8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 30 Oct 2017 13:50:22 -0700 Subject: bpf: avoid rcu_dereference inside bpf_event_mutex lock region During perf event attaching/detaching bpf programs, the tp_event->prog_array change is protected by the bpf_event_mutex lock in both attaching and deteching functions. Although tp_event->prog_array is a rcu pointer, rcu_derefrence is not needed to access it since mutex lock will guarantee ordering. Verified through "make C=2" that sparse locking check still happy with the new change. Also change the label name in perf_event_{attach,detach}_bpf_prog from "out" to "unlock" to reflect the code action after the label. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- kernel/trace/bpf_trace.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 136aa6bb0422..506efe6e8ed9 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -769,20 +769,19 @@ int perf_event_attach_bpf_prog(struct perf_event *event, mutex_lock(&bpf_event_mutex); if (event->prog) - goto out; + goto unlock; - old_array = rcu_dereference_protected(event->tp_event->prog_array, - lockdep_is_held(&bpf_event_mutex)); + old_array = event->tp_event->prog_array; ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array); if (ret < 0) - goto out; + goto unlock; /* set the new array to event->tp_event and set event->prog */ event->prog = prog; rcu_assign_pointer(event->tp_event->prog_array, new_array); bpf_prog_array_free(old_array); -out: +unlock: mutex_unlock(&bpf_event_mutex); return ret; } @@ -796,11 +795,9 @@ void perf_event_detach_bpf_prog(struct perf_event *event) mutex_lock(&bpf_event_mutex); if (!event->prog) - goto out; - - old_array = rcu_dereference_protected(event->tp_event->prog_array, - lockdep_is_held(&bpf_event_mutex)); + goto unlock; + old_array = event->tp_event->prog_array; ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array); if (ret < 0) { bpf_prog_array_delete_safe(old_array, event->prog); @@ -812,6 +809,6 @@ void perf_event_detach_bpf_prog(struct perf_event *event) bpf_prog_put(event->prog); event->prog = NULL; -out: +unlock: mutex_unlock(&bpf_event_mutex); } -- cgit v1.3-14-g43fede From 1f2cac107c591c24b60b115d6050adc213d10fc0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 5 Nov 2017 09:13:48 -0700 Subject: blktrace: fix unlocked access to init/start-stop/teardown sg.c calls into the blktrace functions without holding the proper queue mutex for doing setup, start/stop, or teardown. Add internal unlocked variants, and export the ones that do the proper locking. Fixes: 6da127ad0918 ("blktrace: Add blktrace ioctls to SCSI generic devices") Tested-by: Dmitry Vyukov Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 58 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 10 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 45a3928544ce..ea57dd94b2b2 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -336,7 +336,7 @@ static void blk_trace_cleanup(struct blk_trace *bt) blk_unregister_tracepoints(); } -int blk_trace_remove(struct request_queue *q) +static int __blk_trace_remove(struct request_queue *q) { struct blk_trace *bt; @@ -349,6 +349,17 @@ int blk_trace_remove(struct request_queue *q) return 0; } + +int blk_trace_remove(struct request_queue *q) +{ + int ret; + + mutex_lock(&q->blk_trace_mutex); + ret = __blk_trace_remove(q); + mutex_unlock(&q->blk_trace_mutex); + + return ret; +} EXPORT_SYMBOL_GPL(blk_trace_remove); static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, @@ -550,9 +561,8 @@ err: return ret; } -int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, - struct block_device *bdev, - char __user *arg) +static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct block_device *bdev, char __user *arg) { struct blk_user_trace_setup buts; int ret; @@ -571,6 +581,19 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, } return 0; } + +int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct block_device *bdev, + char __user *arg) +{ + int ret; + + mutex_lock(&q->blk_trace_mutex); + ret = __blk_trace_setup(q, name, dev, bdev, arg); + mutex_unlock(&q->blk_trace_mutex); + + return ret; +} EXPORT_SYMBOL_GPL(blk_trace_setup); #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) @@ -607,7 +630,7 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, } #endif -int blk_trace_startstop(struct request_queue *q, int start) +static int __blk_trace_startstop(struct request_queue *q, int start) { int ret; struct blk_trace *bt = q->blk_trace; @@ -646,6 +669,17 @@ int blk_trace_startstop(struct request_queue *q, int start) return ret; } + +int blk_trace_startstop(struct request_queue *q, int start) +{ + int ret; + + mutex_lock(&q->blk_trace_mutex); + ret = __blk_trace_startstop(q, start); + mutex_unlock(&q->blk_trace_mutex); + + return ret; +} EXPORT_SYMBOL_GPL(blk_trace_startstop); /* @@ -676,7 +710,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) switch (cmd) { case BLKTRACESETUP: bdevname(bdev, b); - ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); + ret = __blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); break; #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) case BLKTRACESETUP32: @@ -687,10 +721,10 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) case BLKTRACESTART: start = 1; case BLKTRACESTOP: - ret = blk_trace_startstop(q, start); + ret = __blk_trace_startstop(q, start); break; case BLKTRACETEARDOWN: - ret = blk_trace_remove(q); + ret = __blk_trace_remove(q); break; default: ret = -ENOTTY; @@ -708,10 +742,14 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) **/ void blk_trace_shutdown(struct request_queue *q) { + mutex_lock(&q->blk_trace_mutex); + if (q->blk_trace) { - blk_trace_startstop(q, 0); - blk_trace_remove(q); + __blk_trace_startstop(q, 0); + __blk_trace_remove(q); } + + mutex_unlock(&q->blk_trace_mutex); } #ifdef CONFIG_BLK_CGROUP -- cgit v1.3-14-g43fede From a6da0024ffc19e0d47712bb5ca4fd083f76b07df Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 5 Nov 2017 09:16:09 -0700 Subject: blktrace: fix unlocked registration of tracepoints We need to ensure that tracepoints are registered and unregistered with the users of them. The existing atomic count isn't enough for that. Add a lock around the tracepoints, so we serialize access to them. This fixes cases where we have multiple users setting up and tearing down tracepoints, like this: CPU: 0 PID: 2995 Comm: syzkaller857118 Not tainted 4.14.0-rc5-next-20171018+ #36 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 panic+0x1e4/0x41c kernel/panic.c:183 __warn+0x1c4/0x1e0 kernel/panic.c:546 report_bug+0x211/0x2d0 lib/bug.c:183 fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:177 do_trap_no_signal arch/x86/kernel/traps.c:211 [inline] do_trap+0x260/0x390 arch/x86/kernel/traps.c:260 do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:297 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:310 invalid_op+0x18/0x20 arch/x86/entry/entry_64.S:905 RIP: 0010:tracepoint_add_func kernel/tracepoint.c:210 [inline] RIP: 0010:tracepoint_probe_register_prio+0x397/0x9a0 kernel/tracepoint.c:283 RSP: 0018:ffff8801d1d1f6c0 EFLAGS: 00010293 RAX: ffff8801d22e8540 RBX: 00000000ffffffef RCX: ffffffff81710f07 RDX: 0000000000000000 RSI: ffffffff85b679c0 RDI: ffff8801d5f19818 RBP: ffff8801d1d1f7c8 R08: ffffffff81710c10 R09: 0000000000000004 R10: ffff8801d1d1f6b0 R11: 0000000000000003 R12: ffffffff817597f0 R13: 0000000000000000 R14: 00000000ffffffff R15: ffff8801d1d1f7a0 tracepoint_probe_register+0x2a/0x40 kernel/tracepoint.c:304 register_trace_block_rq_insert include/trace/events/block.h:191 [inline] blk_register_tracepoints+0x1e/0x2f0 kernel/trace/blktrace.c:1043 do_blk_trace_setup+0xa10/0xcf0 kernel/trace/blktrace.c:542 blk_trace_setup+0xbd/0x180 kernel/trace/blktrace.c:564 sg_ioctl+0xc71/0x2d90 drivers/scsi/sg.c:1089 vfs_ioctl fs/ioctl.c:45 [inline] do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685 SYSC_ioctl fs/ioctl.c:700 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x444339 RSP: 002b:00007ffe05bb5b18 EFLAGS: 00000206 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00000000006d66c0 RCX: 0000000000444339 RDX: 000000002084cf90 RSI: 00000000c0481273 RDI: 0000000000000009 RBP: 0000000000000082 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000206 R12: ffffffffffffffff R13: 00000000c0481273 R14: 0000000000000000 R15: 0000000000000000 since we can now run these in parallel. Ensure that the exported helpers for doing this are grabbing the queue trace mutex. Reported-by: Steven Rostedt Tested-by: Dmitry Vyukov Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index ea57dd94b2b2..206e0e2ace53 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -66,7 +66,8 @@ static struct tracer_flags blk_tracer_flags = { }; /* Global reference count of probes */ -static atomic_t blk_probes_ref = ATOMIC_INIT(0); +static DEFINE_MUTEX(blk_probe_mutex); +static int blk_probes_ref; static void blk_register_tracepoints(void); static void blk_unregister_tracepoints(void); @@ -329,11 +330,26 @@ static void blk_trace_free(struct blk_trace *bt) kfree(bt); } +static void get_probe_ref(void) +{ + mutex_lock(&blk_probe_mutex); + if (++blk_probes_ref == 1) + blk_register_tracepoints(); + mutex_unlock(&blk_probe_mutex); +} + +static void put_probe_ref(void) +{ + mutex_lock(&blk_probe_mutex); + if (!--blk_probes_ref) + blk_unregister_tracepoints(); + mutex_unlock(&blk_probe_mutex); +} + static void blk_trace_cleanup(struct blk_trace *bt) { blk_trace_free(bt); - if (atomic_dec_and_test(&blk_probes_ref)) - blk_unregister_tracepoints(); + put_probe_ref(); } static int __blk_trace_remove(struct request_queue *q) @@ -549,8 +565,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (cmpxchg(&q->blk_trace, NULL, bt)) goto err; - if (atomic_inc_return(&blk_probes_ref) == 1) - blk_register_tracepoints(); + get_probe_ref(); ret = 0; err: @@ -1596,9 +1611,7 @@ static int blk_trace_remove_queue(struct request_queue *q) if (bt == NULL) return -EINVAL; - if (atomic_dec_and_test(&blk_probes_ref)) - blk_unregister_tracepoints(); - + put_probe_ref(); blk_trace_free(bt); return 0; } @@ -1629,8 +1642,7 @@ static int blk_trace_setup_queue(struct request_queue *q, if (cmpxchg(&q->blk_trace, NULL, bt)) goto free_bt; - if (atomic_inc_return(&blk_probes_ref) == 1) - blk_register_tracepoints(); + get_probe_ref(); return 0; free_bt: -- cgit v1.3-14-g43fede From dd0bb688eaa241b5655d396d45366cba9225aed9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 7 Nov 2017 15:28:42 -0500 Subject: bpf: add a bpf_override_function helper Error injection is sloppy and very ad-hoc. BPF could fill this niche perfectly with it's kprobe functionality. We could make sure errors are only triggered in specific call chains that we care about with very specific situations. Accomplish this with the bpf_override_funciton helper. This will modify the probe'd callers return value to the specified value and set the PC to an override function that simply returns, bypassing the originally probed function. This gives us a nice clean way to implement systematic error injection for all of our code paths. Acked-by: Alexei Starovoitov Signed-off-by: Josef Bacik Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- arch/Kconfig | 3 +++ arch/x86/Kconfig | 1 + arch/x86/include/asm/kprobes.h | 4 ++++ arch/x86/include/asm/ptrace.h | 5 +++++ arch/x86/kernel/kprobes/ftrace.c | 14 ++++++++++++++ include/linux/filter.h | 3 ++- include/linux/trace_events.h | 1 + include/uapi/linux/bpf.h | 7 ++++++- kernel/bpf/core.c | 3 +++ kernel/bpf/verifier.c | 2 ++ kernel/events/core.c | 7 +++++++ kernel/trace/Kconfig | 11 +++++++++++ kernel/trace/bpf_trace.c | 35 +++++++++++++++++++++++++++++++++++ kernel/trace/trace_kprobe.c | 40 +++++++++++++++++++++++++++++++++------- kernel/trace/trace_probe.h | 6 ++++++ 15 files changed, 133 insertions(+), 9 deletions(-) (limited to 'kernel/trace') diff --git a/arch/Kconfig b/arch/Kconfig index 057370a0ac4e..6e8520f09bc1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -196,6 +196,9 @@ config HAVE_OPTPROBES config HAVE_KPROBES_ON_FTRACE bool +config HAVE_KPROBE_OVERRIDE + bool + config HAVE_NMI bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2fdb23313dd5..51458c1a0b4a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -153,6 +153,7 @@ config X86 select HAVE_KERNEL_XZ select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE + select HAVE_KPROBE_OVERRIDE select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH if X86_64 diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 6cf65437b5e5..c6c3b1f4306a 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -67,6 +67,10 @@ extern const int kretprobe_blacklist_size; void arch_remove_kprobe(struct kprobe *p); asmlinkage void kretprobe_trampoline(void); +#ifdef CONFIG_KPROBES_ON_FTRACE +extern void arch_ftrace_kprobe_override_function(struct pt_regs *regs); +#endif + /* Architecture specific copy of original instruction*/ struct arch_specific_insn { /* copy of the original instruction */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index c0e3c45cf6ab..2370bb0149cc 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -109,6 +109,11 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) return regs->ax; } +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->ax = rc; +} + /* * user_mode(regs) determines whether a register set came from user * mode. On x86_32, this is true if V8086 mode was enabled OR if the diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 041f7b6dfa0f..3c455bf490cb 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -97,3 +97,17 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p) p->ainsn.boostable = false; return 0; } + +asmlinkage void override_func(void); +asm( + ".type override_func, @function\n" + "override_func:\n" + " ret\n" + ".size override_func, .-override_func\n" +); + +void arch_ftrace_kprobe_override_function(struct pt_regs *regs) +{ + regs->ip = (unsigned long)&override_func; +} +NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function); diff --git a/include/linux/filter.h b/include/linux/filter.h index 0cd02ff4ae30..eaec066f99e8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -459,7 +459,8 @@ struct bpf_prog { locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ - dst_needed:1; /* Do we need dst entry? */ + dst_needed:1, /* Do we need dst entry? */ + kprobe_override:1; /* Do we override a kprobe? */ kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 84014ecfa67f..17e5e820a84c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -523,6 +523,7 @@ do { \ struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); +DECLARE_PER_CPU(int, bpf_kprobe_override); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e880ae6434ee..adb66f78b674 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -677,6 +677,10 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code + * + * int bpf_override_return(pt_regs, rc) + * @pt_regs: pointer to struct pt_regs + * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -736,7 +740,8 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), + FN(getsockopt), \ + FN(override_return), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 8a6c37762330..271daad31f37 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1326,6 +1326,9 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { + if (fp->kprobe_override) + return false; + if (!array->owner_prog_type) { /* There's no owner yet where we could check for * compatibility. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4a942e2e753d..bc464b8ec91e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4357,6 +4357,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) prog->dst_needed = 1; if (insn->imm == BPF_FUNC_get_prandom_u32) bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_override_return) + prog->kprobe_override = 1; if (insn->imm == BPF_FUNC_tail_call) { /* If we tail call into other programs, we * cannot make any assumptions since they can diff --git a/kernel/events/core.c b/kernel/events/core.c index 42d24bd64ea4..ac240d31b5bf 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8171,6 +8171,13 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) return -EINVAL; } + /* Kprobe override only works for kprobes, not uprobes. */ + if (prog->kprobe_override && + !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) { + bpf_prog_put(prog); + return -EINVAL; + } + if (is_tracepoint || is_syscall_tp) { int off = trace_event_get_offsets(event->tp_event); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 434c840e2d82..9dc0deeaad2b 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -518,6 +518,17 @@ config FUNCTION_PROFILER If in doubt, say N. +config BPF_KPROBE_OVERRIDE + bool "Enable BPF programs to override a kprobed function" + depends on BPF_EVENTS + depends on KPROBES_ON_FTRACE + depends on HAVE_KPROBE_OVERRIDE + depends on DYNAMIC_FTRACE_WITH_REGS + default n + help + Allows BPF to override the execution of a probed function and + set a different return value. This is used for error injection. + config FTRACE_MCOUNT_RECORD def_bool y depends on DYNAMIC_FTRACE diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 506efe6e8ed9..1865b0d4cdeb 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -13,6 +13,10 @@ #include #include #include +#include +#include + +#include "trace_probe.h" #include "trace.h" u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); @@ -76,6 +80,29 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) } EXPORT_SYMBOL_GPL(trace_call_bpf); +#ifdef CONFIG_BPF_KPROBE_OVERRIDE +BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) +{ + __this_cpu_write(bpf_kprobe_override, 1); + regs_set_return_value(regs, rc); + arch_ftrace_kprobe_override_function(regs); + return 0; +} +#else +BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) +{ + return -EINVAL; +} +#endif + +static const struct bpf_func_proto bpf_override_return_proto = { + .func = bpf_override_return, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { int ret; @@ -551,6 +578,10 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_get_stackid_proto; case BPF_FUNC_perf_event_read_value: return &bpf_perf_event_read_value_proto; + case BPF_FUNC_override_return: + pr_warn_ratelimited("%s[%d] is installing a program with bpf_override_return helper that may cause unexpected behavior!", + current->comm, task_pid_nr(current)); + return &bpf_override_return_proto; default: return tracing_func_proto(func_id); } @@ -766,6 +797,10 @@ int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog_array *new_array; int ret = -EEXIST; + /* Kprobe override only works for ftrace based kprobes. */ + if (prog->kprobe_override && !trace_kprobe_ftrace(event->tp_event)) + return -EINVAL; + mutex_lock(&bpf_event_mutex); if (event->prog) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index abf92e478cfb..8e3c9ec1faf7 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -42,6 +42,7 @@ struct trace_kprobe { (offsetof(struct trace_kprobe, tp.args) + \ (sizeof(struct probe_arg) * (n))) +DEFINE_PER_CPU(int, bpf_kprobe_override); static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) { @@ -87,6 +88,12 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) return nhit; } +int trace_kprobe_ftrace(struct trace_event_call *call) +{ + struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + return kprobe_ftrace(&tk->rp.kp); +} + static int register_kprobe_event(struct trace_kprobe *tk); static int unregister_kprobe_event(struct trace_kprobe *tk); @@ -1170,7 +1177,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call) #ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ -static void +static int kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; @@ -1179,12 +1186,29 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) int size, __size, dsize; int rctx; - if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) - return; + if (bpf_prog_array_valid(call)) { + int ret; + + ret = trace_call_bpf(call, regs); + + /* + * We need to check and see if we modified the pc of the + * pt_regs, and if so clear the kprobe and return 1 so that we + * don't do the instruction skipping. Also reset our state so + * we are clean the next pass through. + */ + if (__this_cpu_read(bpf_kprobe_override)) { + __this_cpu_write(bpf_kprobe_override, 0); + reset_current_kprobe(); + return 1; + } + if (!ret) + return 0; + } head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) - return; + return 0; dsize = __get_data_size(&tk->tp, regs); __size = sizeof(*entry) + tk->tp.size + dsize; @@ -1193,13 +1217,14 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) - return; + return 0; entry->ip = (unsigned long)tk->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL, NULL); + return 0; } NOKPROBE_SYMBOL(kprobe_perf_func); @@ -1275,6 +1300,7 @@ static int kprobe_register(struct trace_event_call *event, static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) { struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); + int ret = 0; raw_cpu_inc(*tk->nhit); @@ -1282,9 +1308,9 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS if (tk->tp.flags & TP_FLAG_PROFILE) - kprobe_perf_func(tk, regs); + ret = kprobe_perf_func(tk, regs); #endif - return 0; /* We don't tweek kernel, so just return 0 */ + return ret; } NOKPROBE_SYMBOL(kprobe_dispatcher); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 903273c93e61..adbb3f7d1fb5 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -253,6 +253,7 @@ struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); void free_symbol_cache(struct symbol_cache *sc); struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); +int trace_kprobe_ftrace(struct trace_event_call *call); #else /* uprobes do not support symbol fetch methods */ #define fetch_symbol_u8 NULL @@ -278,6 +279,11 @@ alloc_symbol_cache(const char *sym, long offset) { return NULL; } + +static inline int trace_kprobe_ftrace(struct trace_event_call *call) +{ + return 0; +} #endif /* CONFIG_KPROBE_EVENTS */ struct probe_arg { -- cgit v1.3-14-g43fede From f3edacbd697f94a743fff1a3d26910ab99948ba7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 11 Nov 2017 18:24:55 +0900 Subject: bpf: Revert bpf_overrid_function() helper changes. NACK'd by x86 maintainer. Signed-off-by: David S. Miller --- arch/Kconfig | 3 --- arch/x86/Kconfig | 1 - arch/x86/include/asm/kprobes.h | 4 ---- arch/x86/include/asm/ptrace.h | 5 ---- arch/x86/kernel/kprobes/ftrace.c | 14 ----------- include/linux/filter.h | 3 +-- include/linux/trace_events.h | 1 - include/uapi/linux/bpf.h | 7 +----- kernel/bpf/core.c | 3 --- kernel/bpf/verifier.c | 2 -- kernel/events/core.c | 7 ------ kernel/trace/Kconfig | 11 --------- kernel/trace/bpf_trace.c | 35 --------------------------- kernel/trace/trace_kprobe.c | 40 ++++++------------------------- kernel/trace/trace_probe.h | 6 ----- samples/bpf/Makefile | 4 ---- samples/bpf/test_override_return.sh | 15 ------------ samples/bpf/tracex7_kern.c | 16 ------------- samples/bpf/tracex7_user.c | 28 ---------------------- tools/include/uapi/linux/bpf.h | 7 +----- tools/testing/selftests/bpf/bpf_helpers.h | 3 +-- 21 files changed, 11 insertions(+), 204 deletions(-) delete mode 100755 samples/bpf/test_override_return.sh delete mode 100644 samples/bpf/tracex7_kern.c delete mode 100644 samples/bpf/tracex7_user.c (limited to 'kernel/trace') diff --git a/arch/Kconfig b/arch/Kconfig index 6e8520f09bc1..057370a0ac4e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -196,9 +196,6 @@ config HAVE_OPTPROBES config HAVE_KPROBES_ON_FTRACE bool -config HAVE_KPROBE_OVERRIDE - bool - config HAVE_NMI bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 51458c1a0b4a..2fdb23313dd5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -153,7 +153,6 @@ config X86 select HAVE_KERNEL_XZ select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE - select HAVE_KPROBE_OVERRIDE select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH if X86_64 diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index c6c3b1f4306a..6cf65437b5e5 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -67,10 +67,6 @@ extern const int kretprobe_blacklist_size; void arch_remove_kprobe(struct kprobe *p); asmlinkage void kretprobe_trampoline(void); -#ifdef CONFIG_KPROBES_ON_FTRACE -extern void arch_ftrace_kprobe_override_function(struct pt_regs *regs); -#endif - /* Architecture specific copy of original instruction*/ struct arch_specific_insn { /* copy of the original instruction */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 2370bb0149cc..c0e3c45cf6ab 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -109,11 +109,6 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) return regs->ax; } -static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) -{ - regs->ax = rc; -} - /* * user_mode(regs) determines whether a register set came from user * mode. On x86_32, this is true if V8086 mode was enabled OR if the diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 3c455bf490cb..041f7b6dfa0f 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -97,17 +97,3 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p) p->ainsn.boostable = false; return 0; } - -asmlinkage void override_func(void); -asm( - ".type override_func, @function\n" - "override_func:\n" - " ret\n" - ".size override_func, .-override_func\n" -); - -void arch_ftrace_kprobe_override_function(struct pt_regs *regs) -{ - regs->ip = (unsigned long)&override_func; -} -NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function); diff --git a/include/linux/filter.h b/include/linux/filter.h index eaec066f99e8..0cd02ff4ae30 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -459,8 +459,7 @@ struct bpf_prog { locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ - dst_needed:1, /* Do we need dst entry? */ - kprobe_override:1; /* Do we override a kprobe? */ + dst_needed:1; /* Do we need dst entry? */ kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 17e5e820a84c..84014ecfa67f 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -523,7 +523,6 @@ do { \ struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); -DECLARE_PER_CPU(int, bpf_kprobe_override); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index adb66f78b674..e880ae6434ee 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -677,10 +677,6 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code - * - * int bpf_override_return(pt_regs, rc) - * @pt_regs: pointer to struct pt_regs - * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -740,8 +736,7 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), \ - FN(override_return), + FN(getsockopt), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 271daad31f37..8a6c37762330 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1326,9 +1326,6 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { - if (fp->kprobe_override) - return false; - if (!array->owner_prog_type) { /* There's no owner yet where we could check for * compatibility. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bc464b8ec91e..4a942e2e753d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4357,8 +4357,6 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) prog->dst_needed = 1; if (insn->imm == BPF_FUNC_get_prandom_u32) bpf_user_rnd_init_once(); - if (insn->imm == BPF_FUNC_override_return) - prog->kprobe_override = 1; if (insn->imm == BPF_FUNC_tail_call) { /* If we tail call into other programs, we * cannot make any assumptions since they can diff --git a/kernel/events/core.c b/kernel/events/core.c index ac240d31b5bf..42d24bd64ea4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8171,13 +8171,6 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) return -EINVAL; } - /* Kprobe override only works for kprobes, not uprobes. */ - if (prog->kprobe_override && - !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) { - bpf_prog_put(prog); - return -EINVAL; - } - if (is_tracepoint || is_syscall_tp) { int off = trace_event_get_offsets(event->tp_event); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 9dc0deeaad2b..434c840e2d82 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -518,17 +518,6 @@ config FUNCTION_PROFILER If in doubt, say N. -config BPF_KPROBE_OVERRIDE - bool "Enable BPF programs to override a kprobed function" - depends on BPF_EVENTS - depends on KPROBES_ON_FTRACE - depends on HAVE_KPROBE_OVERRIDE - depends on DYNAMIC_FTRACE_WITH_REGS - default n - help - Allows BPF to override the execution of a probed function and - set a different return value. This is used for error injection. - config FTRACE_MCOUNT_RECORD def_bool y depends on DYNAMIC_FTRACE diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 1865b0d4cdeb..506efe6e8ed9 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -13,10 +13,6 @@ #include #include #include -#include -#include - -#include "trace_probe.h" #include "trace.h" u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); @@ -80,29 +76,6 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) } EXPORT_SYMBOL_GPL(trace_call_bpf); -#ifdef CONFIG_BPF_KPROBE_OVERRIDE -BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) -{ - __this_cpu_write(bpf_kprobe_override, 1); - regs_set_return_value(regs, rc); - arch_ftrace_kprobe_override_function(regs); - return 0; -} -#else -BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) -{ - return -EINVAL; -} -#endif - -static const struct bpf_func_proto bpf_override_return_proto = { - .func = bpf_override_return, - .gpl_only = true, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_ANYTHING, -}; - BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { int ret; @@ -578,10 +551,6 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_get_stackid_proto; case BPF_FUNC_perf_event_read_value: return &bpf_perf_event_read_value_proto; - case BPF_FUNC_override_return: - pr_warn_ratelimited("%s[%d] is installing a program with bpf_override_return helper that may cause unexpected behavior!", - current->comm, task_pid_nr(current)); - return &bpf_override_return_proto; default: return tracing_func_proto(func_id); } @@ -797,10 +766,6 @@ int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog_array *new_array; int ret = -EEXIST; - /* Kprobe override only works for ftrace based kprobes. */ - if (prog->kprobe_override && !trace_kprobe_ftrace(event->tp_event)) - return -EINVAL; - mutex_lock(&bpf_event_mutex); if (event->prog) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 8e3c9ec1faf7..abf92e478cfb 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -42,7 +42,6 @@ struct trace_kprobe { (offsetof(struct trace_kprobe, tp.args) + \ (sizeof(struct probe_arg) * (n))) -DEFINE_PER_CPU(int, bpf_kprobe_override); static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) { @@ -88,12 +87,6 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) return nhit; } -int trace_kprobe_ftrace(struct trace_event_call *call) -{ - struct trace_kprobe *tk = (struct trace_kprobe *)call->data; - return kprobe_ftrace(&tk->rp.kp); -} - static int register_kprobe_event(struct trace_kprobe *tk); static int unregister_kprobe_event(struct trace_kprobe *tk); @@ -1177,7 +1170,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call) #ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ -static int +static void kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; @@ -1186,29 +1179,12 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) int size, __size, dsize; int rctx; - if (bpf_prog_array_valid(call)) { - int ret; - - ret = trace_call_bpf(call, regs); - - /* - * We need to check and see if we modified the pc of the - * pt_regs, and if so clear the kprobe and return 1 so that we - * don't do the instruction skipping. Also reset our state so - * we are clean the next pass through. - */ - if (__this_cpu_read(bpf_kprobe_override)) { - __this_cpu_write(bpf_kprobe_override, 0); - reset_current_kprobe(); - return 1; - } - if (!ret) - return 0; - } + if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) + return; head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) - return 0; + return; dsize = __get_data_size(&tk->tp, regs); __size = sizeof(*entry) + tk->tp.size + dsize; @@ -1217,14 +1193,13 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) - return 0; + return; entry->ip = (unsigned long)tk->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL, NULL); - return 0; } NOKPROBE_SYMBOL(kprobe_perf_func); @@ -1300,7 +1275,6 @@ static int kprobe_register(struct trace_event_call *event, static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) { struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); - int ret = 0; raw_cpu_inc(*tk->nhit); @@ -1308,9 +1282,9 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS if (tk->tp.flags & TP_FLAG_PROFILE) - ret = kprobe_perf_func(tk, regs); + kprobe_perf_func(tk, regs); #endif - return ret; + return 0; /* We don't tweek kernel, so just return 0 */ } NOKPROBE_SYMBOL(kprobe_dispatcher); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index adbb3f7d1fb5..903273c93e61 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -253,7 +253,6 @@ struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); void free_symbol_cache(struct symbol_cache *sc); struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); -int trace_kprobe_ftrace(struct trace_event_call *call); #else /* uprobes do not support symbol fetch methods */ #define fetch_symbol_u8 NULL @@ -279,11 +278,6 @@ alloc_symbol_cache(const char *sym, long offset) { return NULL; } - -static inline int trace_kprobe_ftrace(struct trace_event_call *call) -{ - return 0; -} #endif /* CONFIG_KPROBE_EVENTS */ struct probe_arg { diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 87db0f9a4c15..3b4945c1eab0 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -15,7 +15,6 @@ hostprogs-y += tracex3 hostprogs-y += tracex4 hostprogs-y += tracex5 hostprogs-y += tracex6 -hostprogs-y += tracex7 hostprogs-y += test_probe_write_user hostprogs-y += trace_output hostprogs-y += lathist @@ -62,7 +61,6 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o -tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o @@ -106,7 +104,6 @@ always += tracex3_kern.o always += tracex4_kern.o always += tracex5_kern.o always += tracex6_kern.o -always += tracex7_kern.o always += sock_flags_kern.o always += test_probe_write_user_kern.o always += trace_output_kern.o @@ -161,7 +158,6 @@ HOSTLOADLIBES_tracex3 += -lelf HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex6 += -lelf -HOSTLOADLIBES_tracex7 += -lelf HOSTLOADLIBES_test_cgrp2_sock2 += -lelf HOSTLOADLIBES_load_sock_ops += -lelf HOSTLOADLIBES_test_probe_write_user += -lelf diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh deleted file mode 100755 index e68b9ee6814b..000000000000 --- a/samples/bpf/test_override_return.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -rm -f testfile.img -dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1 -DEVICE=$(losetup --show -f testfile.img) -mkfs.btrfs -f $DEVICE -mkdir tmpmnt -./tracex7 $DEVICE -if [ $? -eq 0 ] -then - echo "SUCCESS!" -else - echo "FAILED!" -fi -losetup -d $DEVICE diff --git a/samples/bpf/tracex7_kern.c b/samples/bpf/tracex7_kern.c deleted file mode 100644 index 1ab308a43e0f..000000000000 --- a/samples/bpf/tracex7_kern.c +++ /dev/null @@ -1,16 +0,0 @@ -#include -#include -#include -#include "bpf_helpers.h" - -SEC("kprobe/open_ctree") -int bpf_prog1(struct pt_regs *ctx) -{ - unsigned long rc = -12; - - bpf_override_return(ctx, rc); - return 0; -} - -char _license[] SEC("license") = "GPL"; -u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c deleted file mode 100644 index 8a52ac492e8b..000000000000 --- a/samples/bpf/tracex7_user.c +++ /dev/null @@ -1,28 +0,0 @@ -#define _GNU_SOURCE - -#include -#include -#include -#include "libbpf.h" -#include "bpf_load.h" - -int main(int argc, char **argv) -{ - FILE *f; - char filename[256]; - char command[256]; - int ret; - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; - } - - snprintf(command, 256, "mount %s tmpmnt/", argv[1]); - f = popen(command, "r"); - ret = pclose(f); - - return ret ? 0 : 1; -} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index adb66f78b674..e880ae6434ee 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -677,10 +677,6 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code - * - * int bpf_override_return(pt_regs, rc) - * @pt_regs: pointer to struct pt_regs - * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -740,8 +736,7 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), \ - FN(override_return), + FN(getsockopt), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 33cb00e46c49..fd9a17fa8a8b 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -82,8 +82,7 @@ static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, unsigned int buf_size) = (void *) BPF_FUNC_perf_prog_read_value; -static int (*bpf_override_return)(void *ctx, unsigned long rc) = - (void *) BPF_FUNC_override_return; + /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions -- cgit v1.3-14-g43fede From 9c019e2bc4b2bd8223c8c0d4b6962478b479834d Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 12 Nov 2017 14:49:10 -0800 Subject: bpf: change helper bpf_probe_read arg2 type to ARG_CONST_SIZE_OR_ZERO The helper bpf_probe_read arg2 type is changed from ARG_CONST_SIZE to ARG_CONST_SIZE_OR_ZERO to permit size-0 buffer. Together with newer ARG_CONST_SIZE_OR_ZERO semantics which allows non-NULL buffer with size 0, this allows simpler bpf programs with verifier acceptance. The previous commit which changes ARG_CONST_SIZE_OR_ZERO semantics has details on examples. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/trace/bpf_trace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 506efe6e8ed9..a5580c670866 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -78,12 +78,16 @@ EXPORT_SYMBOL_GPL(trace_call_bpf); BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { - int ret; + int ret = 0; + + if (unlikely(size == 0)) + goto out; ret = probe_kernel_read(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); + out: return ret; } @@ -92,7 +96,7 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, - .arg2_type = ARG_CONST_SIZE, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, }; -- cgit v1.3-14-g43fede From 4950276672fce5c241857540f8561c440663673d Mon Sep 17 00:00:00 2001 From: "Levin, Alexander (Sasha Levin)" Date: Wed, 15 Nov 2017 17:35:51 -0800 Subject: kmemcheck: remove annotations Patch series "kmemcheck: kill kmemcheck", v2. As discussed at LSF/MM, kill kmemcheck. KASan is a replacement that is able to work without the limitation of kmemcheck (single CPU, slow). KASan is already upstream. We are also not aware of any users of kmemcheck (or users who don't consider KASan as a suitable replacement). The only objection was that since KASAN wasn't supported by all GCC versions provided by distros at that time we should hold off for 2 years, and try again. Now that 2 years have passed, and all distros provide gcc that supports KASAN, kill kmemcheck again for the very same reasons. This patch (of 4): Remove kmemcheck annotations, and calls to kmemcheck from the kernel. [alexander.levin@verizon.com: correctly remove kmemcheck call from dma_map_sg_attrs] Link: http://lkml.kernel.org/r/20171012192151.26531-1-alexander.levin@verizon.com Link: http://lkml.kernel.org/r/20171007030159.22241-2-alexander.levin@verizon.com Signed-off-by: Sasha Levin Cc: Alexander Potapenko Cc: Eric W. Biederman Cc: Michal Hocko Cc: Pekka Enberg Cc: Steven Rostedt Cc: Tim Hansen Cc: Vegard Nossum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/dma-iommu.h | 1 - arch/openrisc/include/asm/dma-mapping.h | 1 - arch/x86/Makefile | 5 ----- arch/x86/include/asm/dma-mapping.h | 1 - arch/x86/include/asm/xor.h | 5 +---- arch/x86/kernel/traps.c | 5 ----- arch/x86/mm/fault.c | 6 ------ drivers/char/random.c | 1 - drivers/misc/c2port/core.c | 2 -- fs/dcache.c | 2 -- include/linux/c2port.h | 4 ---- include/linux/dma-mapping.h | 8 +------- include/linux/filter.h | 2 -- include/linux/mm_types.h | 8 -------- include/linux/net.h | 3 --- include/linux/ring_buffer.h | 3 --- include/linux/skbuff.h | 3 --- include/net/inet_sock.h | 3 --- include/net/inet_timewait_sock.h | 4 ---- include/net/sock.h | 3 --- init/main.c | 1 - kernel/bpf/core.c | 6 ------ kernel/locking/lockdep.c | 3 --- kernel/trace/ring_buffer.c | 3 --- mm/kmemleak.c | 9 --------- mm/page_alloc.c | 14 -------------- mm/slab.c | 14 -------------- mm/slab.h | 2 -- mm/slub.c | 20 -------------------- net/core/skbuff.c | 5 ----- net/core/sock.c | 2 -- net/ipv4/inet_timewait_sock.c | 3 --- net/ipv4/tcp_input.c | 1 - net/socket.c | 1 - 34 files changed, 2 insertions(+), 152 deletions(-) (limited to 'kernel/trace') diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h index 0722ec6be692..6821f1249300 100644 --- a/arch/arm/include/asm/dma-iommu.h +++ b/arch/arm/include/asm/dma-iommu.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #define ARM_MAPPING_ERROR (~(dma_addr_t)0x0) diff --git a/arch/openrisc/include/asm/dma-mapping.h b/arch/openrisc/include/asm/dma-mapping.h index f41bd3cb76d9..e212a1f0b6d2 100644 --- a/arch/openrisc/include/asm/dma-mapping.h +++ b/arch/openrisc/include/asm/dma-mapping.h @@ -23,7 +23,6 @@ */ #include -#include #include extern const struct dma_map_ops or1k_dma_map_ops; diff --git a/arch/x86/Makefile b/arch/x86/Makefile index a20eacd9c7e9..3e73bc255e4e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -158,11 +158,6 @@ ifdef CONFIG_X86_X32 endif export CONFIG_X86_X32_ABI -# Don't unroll struct assignments with kmemcheck enabled -ifeq ($(CONFIG_KMEMCHECK),y) - KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy) -endif - # # If the function graph tracer is used with mcount instead of fentry, # '-maccumulate-outgoing-args' is needed to prevent a GCC bug diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 43cbe843de8d..0350d99bb8fd 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -7,7 +7,6 @@ * Documentation/DMA-API.txt for documentation. */ -#include #include #include #include diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index 1f5c5161ead6..45c8605467f1 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h @@ -1,7 +1,4 @@ -#ifdef CONFIG_KMEMCHECK -/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */ -# include -#elif !defined(_ASM_X86_XOR_H) +#ifndef _ASM_X86_XOR_H #define _ASM_X86_XOR_H /* diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b7b0f74a2150..989514c94a55 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -42,7 +42,6 @@ #include #endif -#include #include #include #include @@ -749,10 +748,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) if (!dr6 && user_mode(regs)) user_icebp = 1; - /* Catch kmemcheck conditions! */ - if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) - goto exit; - /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 3109ba6c6ede..78ca9a8ee454 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -20,7 +20,6 @@ #include /* boot_cpu_has, ... */ #include /* dotraplinkage, ... */ #include /* pgd_*(), ... */ -#include /* kmemcheck_*(), ... */ #include /* VSYSCALL_ADDR */ #include /* emulate_vsyscall */ #include /* struct vm86 */ @@ -1256,8 +1255,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, * Detect and handle instructions that would cause a page fault for * both a tracked kernel page and a userspace page. */ - if (kmemcheck_active(regs)) - kmemcheck_hide(regs); prefetchw(&mm->mmap_sem); if (unlikely(kmmio_fault(regs, address))) @@ -1280,9 +1277,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { if (vmalloc_fault(address) >= 0) return; - - if (kmemcheck_fault(regs, address, error_code)) - return; } /* Can handle a stale RO->RW TLB: */ diff --git a/drivers/char/random.c b/drivers/char/random.c index 6c7ccac2679e..ec42c8bb9b0d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -259,7 +259,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c index 1922cb8f6b88..1c5b7aec13d4 100644 --- a/drivers/misc/c2port/core.c +++ b/drivers/misc/c2port/core.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -904,7 +903,6 @@ struct c2port_device *c2port_device_register(char *name, return ERR_PTR(-EINVAL); c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL); - kmemcheck_annotate_bitfield(c2dev, flags); if (unlikely(!c2dev)) return ERR_PTR(-ENOMEM); diff --git a/fs/dcache.c b/fs/dcache.c index bcc9f6981569..5c7df1df81ff 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2705,8 +2705,6 @@ static void swap_names(struct dentry *dentry, struct dentry *target) */ unsigned int i; BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); - kmemcheck_mark_initialized(dentry->d_iname, DNAME_INLINE_LEN); - kmemcheck_mark_initialized(target->d_iname, DNAME_INLINE_LEN); for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { swap(((long *) &dentry->d_iname)[i], ((long *) &target->d_iname)[i]); diff --git a/include/linux/c2port.h b/include/linux/c2port.h index 4efabcb51347..f2736348ca26 100644 --- a/include/linux/c2port.h +++ b/include/linux/c2port.h @@ -9,8 +9,6 @@ * the Free Software Foundation */ -#include - #define C2PORT_NAME_LEN 32 struct device; @@ -22,10 +20,8 @@ struct device; /* Main struct */ struct c2port_ops; struct c2port_device { - kmemcheck_bitfield_begin(flags); unsigned int access:1; unsigned int flash_access:1; - kmemcheck_bitfield_end(flags); int id; char name[C2PORT_NAME_LEN]; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index eee1499db396..e8f8e8fb244d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -232,7 +231,6 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, const struct dma_map_ops *ops = get_dma_ops(dev); dma_addr_t addr; - kmemcheck_mark_initialized(ptr, size); BUG_ON(!valid_dma_direction(dir)); addr = ops->map_page(dev, virt_to_page(ptr), offset_in_page(ptr), size, @@ -265,11 +263,8 @@ static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, unsigned long attrs) { const struct dma_map_ops *ops = get_dma_ops(dev); - int i, ents; - struct scatterlist *s; + int ents; - for_each_sg(sg, s, nents, i) - kmemcheck_mark_initialized(sg_virt(s), s->length); BUG_ON(!valid_dma_direction(dir)); ents = ops->map_sg(dev, sg, nents, dir, attrs); BUG_ON(ents < 0); @@ -299,7 +294,6 @@ static inline dma_addr_t dma_map_page_attrs(struct device *dev, const struct dma_map_ops *ops = get_dma_ops(dev); dma_addr_t addr; - kmemcheck_mark_initialized(page_address(page) + offset, size); BUG_ON(!valid_dma_direction(dir)); addr = ops->map_page(dev, page, offset, size, dir, attrs); debug_dma_map_page(dev, page, offset, size, dir, addr, false); diff --git a/include/linux/filter.h b/include/linux/filter.h index 48ec57e70f9f..42197b16dd78 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -454,13 +454,11 @@ struct bpf_binary_header { struct bpf_prog { u16 pages; /* Number of allocated pages */ - kmemcheck_bitfield_begin(meta); u16 jited:1, /* Is our filter JIT'ed? */ locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ dst_needed:1; /* Do we need dst entry? */ - kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ u32 jited_len; /* Size of jited insns in bytes */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 09643e0472fc..cfd0ac4e5e0e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -209,14 +209,6 @@ struct page { not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ -#ifdef CONFIG_KMEMCHECK - /* - * kmemcheck wants to track the status of each byte in a page; this - * is a pointer to such a status block. NULL if not tracked. - */ - void *shadow; -#endif - #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS int _last_cpupid; #endif diff --git a/include/linux/net.h b/include/linux/net.h index d97d80d7fdf8..caeb159abda5 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -22,7 +22,6 @@ #include #include #include /* For O_CLOEXEC and O_NONBLOCK */ -#include #include #include #include @@ -111,9 +110,7 @@ struct socket_wq { struct socket { socket_state state; - kmemcheck_bitfield_begin(type); short type; - kmemcheck_bitfield_end(type); unsigned long flags; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index fa6ace66fea5..289e4d54e3e0 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -2,7 +2,6 @@ #ifndef _LINUX_RING_BUFFER_H #define _LINUX_RING_BUFFER_H -#include #include #include #include @@ -14,9 +13,7 @@ struct ring_buffer_iter; * Don't refer to this struct directly, use functions below. */ struct ring_buffer_event { - kmemcheck_bitfield_begin(bitfield); u32 type_len:5, time_delta:27; - kmemcheck_bitfield_end(bitfield); u32 array[]; }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d448a4804aea..aa1341474916 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -15,7 +15,6 @@ #define _LINUX_SKBUFF_H #include -#include #include #include #include @@ -704,7 +703,6 @@ struct sk_buff { /* Following fields are _not_ copied in __copy_skb_header() * Note that queue_mapping is here mostly to fill a hole. */ - kmemcheck_bitfield_begin(flags1); __u16 queue_mapping; /* if you move cloned around you also must adapt those constants */ @@ -723,7 +721,6 @@ struct sk_buff { head_frag:1, xmit_more:1, __unused:1; /* one bit hole */ - kmemcheck_bitfield_end(flags1); /* fields enclosed in headers_start/headers_end are copied * using a single memcpy() in __copy_skb_header() diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index db8162dd8c0b..8e51b4a69088 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -17,7 +17,6 @@ #define _INET_SOCK_H #include -#include #include #include #include @@ -84,7 +83,6 @@ struct inet_request_sock { #define ireq_state req.__req_common.skc_state #define ireq_family req.__req_common.skc_family - kmemcheck_bitfield_begin(flags); u16 snd_wscale : 4, rcv_wscale : 4, tstamp_ok : 1, @@ -93,7 +91,6 @@ struct inet_request_sock { ecn_ok : 1, acked : 1, no_srccheck: 1; - kmemcheck_bitfield_end(flags); u32 ir_mark; union { struct ip_options_rcu __rcu *ireq_opt; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 6a75d67a30fd..1356fa6a7566 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -15,8 +15,6 @@ #ifndef _INET_TIMEWAIT_SOCK_ #define _INET_TIMEWAIT_SOCK_ - -#include #include #include #include @@ -69,14 +67,12 @@ struct inet_timewait_sock { /* Socket demultiplex comparisons on incoming packets. */ /* these three are in inet_sock */ __be16 tw_sport; - kmemcheck_bitfield_begin(flags); /* And these are ours. */ unsigned int tw_kill : 1, tw_transparent : 1, tw_flowlabel : 20, tw_pad : 2, /* 2 bits hole */ tw_tos : 8; - kmemcheck_bitfield_end(flags); struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; }; diff --git a/include/net/sock.h b/include/net/sock.h index c577286dbffb..a63e6a8bb7e0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -436,7 +436,6 @@ struct sock { #define SK_FL_TYPE_MASK 0xffff0000 #endif - kmemcheck_bitfield_begin(flags); unsigned int sk_padding : 1, sk_kern_sock : 1, sk_no_check_tx : 1, @@ -445,8 +444,6 @@ struct sock { sk_protocol : 8, sk_type : 16; #define SK_PROTOCOL_MAX U8_MAX - kmemcheck_bitfield_end(flags); - u16 sk_gso_max_segs; unsigned long sk_lingertime; struct proto *sk_prot_creator; diff --git a/init/main.c b/init/main.c index 3bdd8da90f69..859a786f7c0a 100644 --- a/init/main.c +++ b/init/main.c @@ -70,7 +70,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7b62df86be1d..11ad089f2c74 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -85,8 +85,6 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) if (fp == NULL) return NULL; - kmemcheck_annotate_bitfield(fp, meta); - aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags); if (aux == NULL) { vfree(fp); @@ -127,8 +125,6 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, if (fp == NULL) { __bpf_prog_uncharge(fp_old->aux->user, delta); } else { - kmemcheck_annotate_bitfield(fp, meta); - memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); fp->pages = pages; fp->aux->prog = fp; @@ -662,8 +658,6 @@ static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other, fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL); if (fp != NULL) { - kmemcheck_annotate_bitfield(fp, meta); - /* aux->prog still points to the fp_other one, so * when promoting the clone to the real program, * this still needs to be adapted. diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index db933d063bfc..9776da8db180 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include @@ -3238,8 +3237,6 @@ static void __lockdep_init_map(struct lockdep_map *lock, const char *name, { int i; - kmemcheck_mark_initialized(lock, sizeof(*lock)); - for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) lock->class_cache[i] = NULL; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 845f3805c73d..d57fede84b38 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -13,7 +13,6 @@ #include #include #include /* for self test */ -#include #include #include #include @@ -2055,7 +2054,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, } event = __rb_page_index(tail_page, tail); - kmemcheck_annotate_bitfield(event, bitfield); /* account for padding bytes */ local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); @@ -2686,7 +2684,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* We reserved something on the buffer */ event = __rb_page_index(tail_page, tail); - kmemcheck_annotate_bitfield(event, bitfield); rb_update_event(cpu_buffer, event, info); local_inc(&tail_page->entries); diff --git a/mm/kmemleak.c b/mm/kmemleak.c index fca3452e56c1..e4738d5e9b8c 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -110,7 +110,6 @@ #include #include -#include #include #include @@ -1238,9 +1237,6 @@ static bool update_checksum(struct kmemleak_object *object) { u32 old_csum = object->checksum; - if (!kmemcheck_is_obj_initialized(object->pointer, object->size)) - return false; - kasan_disable_current(); object->checksum = crc32(0, (void *)object->pointer, object->size); kasan_enable_current(); @@ -1314,11 +1310,6 @@ static void scan_block(void *_start, void *_end, if (scan_should_stop()) break; - /* don't scan uninitialized memory */ - if (!kmemcheck_is_obj_initialized((unsigned long)ptr, - BYTES_PER_POINTER)) - continue; - kasan_disable_current(); pointer = *ptr; kasan_enable_current(); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e6106d7e9eb0..30a464b47366 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -1013,7 +1012,6 @@ static __always_inline bool free_pages_prepare(struct page *page, VM_BUG_ON_PAGE(PageTail(page), page); trace_mm_page_free(page, order); - kmemcheck_free_shadow(page, order); /* * Check tail pages before head page information is cleared to @@ -2669,15 +2667,6 @@ void split_page(struct page *page, unsigned int order) VM_BUG_ON_PAGE(PageCompound(page), page); VM_BUG_ON_PAGE(!page_count(page), page); -#ifdef CONFIG_KMEMCHECK - /* - * Split shadow pages too, because free(page[0]) would - * otherwise free the whole shadow. - */ - if (kmemcheck_page_is_tracked(page)) - split_page(virt_to_page(page[0].shadow), order); -#endif - for (i = 1; i < (1 << order); i++) set_page_refcounted(page + i); split_page_owner(page, order); @@ -4223,9 +4212,6 @@ out: page = NULL; } - if (kmemcheck_enabled && page) - kmemcheck_pagealloc_alloc(page, order, gfp_mask); - trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype); return page; diff --git a/mm/slab.c b/mm/slab.c index 7a5e0888a401..c84365e9a591 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -114,7 +114,6 @@ #include #include #include -#include #include #include #include @@ -1433,15 +1432,6 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, if (sk_memalloc_socks() && page_is_pfmemalloc(page)) SetPageSlabPfmemalloc(page); - if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { - kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); - - if (cachep->ctor) - kmemcheck_mark_uninitialized_pages(page, nr_pages); - else - kmemcheck_mark_unallocated_pages(page, nr_pages); - } - return page; } @@ -1453,8 +1443,6 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page) int order = cachep->gfporder; unsigned long nr_freed = (1 << order); - kmemcheck_free_shadow(page, order); - if (cachep->flags & SLAB_RECLAIM_ACCOUNT) mod_lruvec_page_state(page, NR_SLAB_RECLAIMABLE, -nr_freed); else @@ -3515,8 +3503,6 @@ void ___cache_free(struct kmem_cache *cachep, void *objp, kmemleak_free_recursive(objp, cachep->flags); objp = cache_free_debugcheck(cachep, objp, caller); - kmemcheck_slab_free(cachep, objp, cachep->object_size); - /* * Skip calling cache_free_alien() when the platform is not numa. * This will avoid cache misses that happen while accessing slabp (which diff --git a/mm/slab.h b/mm/slab.h index e19255638cb6..e60a3d1d8f6f 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -40,7 +40,6 @@ struct kmem_cache { #include #include -#include #include #include #include @@ -439,7 +438,6 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, for (i = 0; i < size; i++) { void *object = p[i]; - kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); kasan_slab_alloc(s, object, flags); diff --git a/mm/slub.c b/mm/slub.c index 51484f0fc068..ac3b50b9abec 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -1377,7 +1376,6 @@ static inline void *slab_free_hook(struct kmem_cache *s, void *x) unsigned long flags; local_irq_save(flags); - kmemcheck_slab_free(s, x, s->object_size); debug_check_no_locks_freed(x, s->object_size); local_irq_restore(flags); } @@ -1598,22 +1596,6 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) stat(s, ORDER_FALLBACK); } - if (kmemcheck_enabled && - !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { - int pages = 1 << oo_order(oo); - - kmemcheck_alloc_shadow(page, oo_order(oo), alloc_gfp, node); - - /* - * Objects from caches that have a constructor don't get - * cleared when they're allocated, so we need to do it here. - */ - if (s->ctor) - kmemcheck_mark_uninitialized_pages(page, pages); - else - kmemcheck_mark_unallocated_pages(page, pages); - } - page->objects = oo_objects(oo); order = compound_order(page); @@ -1689,8 +1671,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page) check_object(s, page, p, SLUB_RED_INACTIVE); } - kmemcheck_free_shadow(page, compound_order(page)); - mod_lruvec_page_state(page, (s->flags & SLAB_RECLAIM_ACCOUNT) ? NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e140ba49b30a..6cd057b41f34 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -234,14 +233,12 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); - kmemcheck_annotate_variable(shinfo->destructor_arg); if (flags & SKB_ALLOC_FCLONE) { struct sk_buff_fclones *fclones; fclones = container_of(skb, struct sk_buff_fclones, skb1); - kmemcheck_annotate_bitfield(&fclones->skb2, flags1); skb->fclone = SKB_FCLONE_ORIG; refcount_set(&fclones->fclone_ref, 1); @@ -301,7 +298,6 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size) shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); - kmemcheck_annotate_variable(shinfo->destructor_arg); return skb; } @@ -1283,7 +1279,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) if (!n) return NULL; - kmemcheck_annotate_bitfield(n, flags1); n->fclone = SKB_FCLONE_UNAVAILABLE; } diff --git a/net/core/sock.c b/net/core/sock.c index 415f441c63b9..78401fa33ce8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1469,8 +1469,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, sk = kmalloc(prot->obj_size, priority); if (sk != NULL) { - kmemcheck_annotate_bitfield(sk, flags); - if (security_sk_alloc(sk, family, priority)) goto out_free; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 5b039159e67a..d451b9f19b59 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -9,7 +9,6 @@ */ #include -#include #include #include #include @@ -167,8 +166,6 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, if (tw) { const struct inet_sock *inet = inet_sk(sk); - kmemcheck_annotate_bitfield(tw, flags); - tw->tw_dr = dr; /* Give us an identity. */ tw->tw_daddr = inet->inet_daddr; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 887585045b27..c04d60a677a7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6195,7 +6195,6 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, if (req) { struct inet_request_sock *ireq = inet_rsk(req); - kmemcheck_annotate_bitfield(ireq, flags); ireq->ireq_opt = NULL; #if IS_ENABLED(CONFIG_IPV6) ireq->pktopts = NULL; diff --git a/net/socket.c b/net/socket.c index c729625eb5d3..42d8e9c9ccd5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -568,7 +568,6 @@ struct socket *sock_alloc(void) sock = SOCKET_I(inode); - kmemcheck_annotate_bitfield(sock, type); inode->i_ino = get_next_ino(); inode->i_mode = S_IFSOCK | S_IRWXUGO; inode->i_uid = current_fsuid(); -- cgit v1.3-14-g43fede From eb33f2cca49ec49a1b893b5af546e7c042ca6365 Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Wed, 22 Nov 2017 18:32:54 +0000 Subject: bpf: remove explicit handling of 0 for arg2 in bpf_probe_read Commit 9c019e2bc4b2 ("bpf: change helper bpf_probe_read arg2 type to ARG_CONST_SIZE_OR_ZERO") changed arg2 type to ARG_CONST_SIZE_OR_ZERO to simplify writing bpf programs by taking advantage of the new semantics introduced for ARG_CONST_SIZE_OR_ZERO which allows arguments. In order to prevent the helper from actually passing a NULL pointer to probe_kernel_read, which can happen when is passed to the helper, the commit also introduced an explicit check against size == 0. After the recent introduction of the ARG_PTR_TO_MEM_OR_NULL type, bpf_probe_read can not receive a pair of arguments anymore, thus the check is not needed anymore and can be removed, since probe_kernel_read can correctly handle a call. This also fixes the semantics of the helper before it gets officially released and bpf programs start relying on this check. Fixes: 9c019e2bc4b2 ("bpf: change helper bpf_probe_read arg2 type to ARG_CONST_SIZE_OR_ZERO") Signed-off-by: Gianluca Borello Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- kernel/trace/bpf_trace.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index a5580c670866..728909f7951c 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -78,16 +78,12 @@ EXPORT_SYMBOL_GPL(trace_call_bpf); BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { - int ret = 0; - - if (unlikely(size == 0)) - goto out; + int ret; ret = probe_kernel_read(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); - out: return ret; } -- cgit v1.3-14-g43fede From 5c4e1201740ceae9bd6f622851a9bf7c66debe3a Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Wed, 22 Nov 2017 18:32:55 +0000 Subject: bpf: change bpf_probe_read_str arg2 type to ARG_CONST_SIZE_OR_ZERO Commit 9fd29c08e520 ("bpf: improve verifier ARG_CONST_SIZE_OR_ZERO semantics") relaxed the treatment of ARG_CONST_SIZE_OR_ZERO due to the way the compiler generates optimized BPF code when checking boundaries of an argument from C code. A typical example of this optimized code can be generated using the bpf_probe_read_str helper when operating on variable memory: /* len is a generic scalar */ if (len > 0 && len <= 0x7fff) bpf_probe_read_str(p, len, s); 251: (79) r1 = *(u64 *)(r10 -88) 252: (07) r1 += -1 253: (25) if r1 > 0x7ffe goto pc-42 254: (bf) r1 = r7 255: (79) r2 = *(u64 *)(r10 -88) 256: (bf) r8 = r4 257: (85) call bpf_probe_read_str#45 R2 min value is negative, either use unsigned or 'var &= const' With this code, the verifier loses track of the variable. Replacing arg2 with ARG_CONST_SIZE_OR_ZERO is thus desirable since it avoids this quite common case which leads to usability issues, and the compiler generates code that the verifier can more easily test: if (len <= 0x7fff) bpf_probe_read_str(p, len, s); or bpf_probe_read_str(p, len & 0x7fff, s); No changes to the bpf_probe_read_str helper are necessary since strncpy_from_unsafe itself immediately returns if the size passed is 0. Signed-off-by: Gianluca Borello Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- kernel/trace/bpf_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 728909f7951c..ed8601a1a861 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -494,7 +494,7 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = { .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, - .arg2_type = ARG_CONST_SIZE, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, }; -- cgit v1.3-14-g43fede From a60dd35d2e39209fa7645945e1192bf9769872c6 Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Wed, 22 Nov 2017 18:32:56 +0000 Subject: bpf: change bpf_perf_event_output arg5 type to ARG_CONST_SIZE_OR_ZERO Commit 9fd29c08e520 ("bpf: improve verifier ARG_CONST_SIZE_OR_ZERO semantics") relaxed the treatment of ARG_CONST_SIZE_OR_ZERO due to the way the compiler generates optimized BPF code when checking boundaries of an argument from C code. A typical example of this optimized code can be generated using the bpf_perf_event_output helper when operating on variable memory: /* len is a generic scalar */ if (len > 0 && len <= 0x7fff) bpf_perf_event_output(ctx, &perf_map, 0, buf, len); 110: (79) r5 = *(u64 *)(r10 -40) 111: (bf) r1 = r5 112: (07) r1 += -1 113: (25) if r1 > 0x7ffe goto pc+6 114: (bf) r1 = r6 115: (18) r2 = 0xffff94e5f166c200 117: (b7) r3 = 0 118: (bf) r4 = r7 119: (85) call bpf_perf_event_output#25 R5 min value is negative, either use unsigned or 'var &= const' With this code, the verifier loses track of the variable. Replacing arg5 with ARG_CONST_SIZE_OR_ZERO is thus desirable since it avoids this quite common case which leads to usability issues, and the compiler generates code that the verifier can more easily test: if (len <= 0x7fff) bpf_perf_event_output(ctx, &perf_map, 0, buf, len); or bpf_perf_event_output(ctx, &perf_map, 0, buf, len & 0x7fff); No changes to the bpf_perf_event_output helper are necessary since it can handle a case where size is 0, and an empty frame is pushed. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Gianluca Borello Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- kernel/trace/bpf_trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ed8601a1a861..27d1f4ffa3de 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -403,7 +403,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_MEM, - .arg5_type = ARG_CONST_SIZE, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); @@ -605,7 +605,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_MEM, - .arg5_type = ARG_CONST_SIZE, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map, -- cgit v1.3-14-g43fede