aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-22 18:18:55 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-22 18:18:55 -0700
commit2ff2b289a695807e291e1ed9f639d8a3ba5f4254 (patch)
treee4b7f44e5cc1582ba2be8aeba221f4841f4c86a6 /kernel
parentMerge branch 'for-3.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup (diff)
parentMerge branch 'perf/parse-events-4' of git://github.com/fweisbec/tracing into perf/core (diff)
downloadlinux-dev-2ff2b289a695807e291e1ed9f639d8a3ba5f4254.tar.xz
linux-dev-2ff2b289a695807e291e1ed9f639d8a3ba5f4254.zip
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf changes from Ingo Molnar: "Lots of changes: - (much) improved assembly annotation support in perf report, with jump visualization, searching, navigation, visual output improvements and more. - kernel support for AMD IBS PMU hardware features. Notably 'perf record -e cycles:p' and 'perf top -e cycles:p' should work without skid now, like PEBS does on the Intel side, because it takes advantage of IBS transparently. - the libtracevents library: it is the first step towards unifying tracing tooling and perf, and it also gives a tracing library for external tools like powertop to rely on. - infrastructure: various improvements and refactoring of the UI modules and related code - infrastructure: cleanup and simplification of the profiling targets code (--uid, --pid, --tid, --cpu, --all-cpus, etc.) - tons of robustness fixes all around - various ftrace updates: speedups, cleanups, robustness improvements. - typing 'make' in tools/ will now give you a menu of projects to build and a short help text to explain what each does. - ... and lots of other changes I forgot to list. The perf record make bzImage + perf report regression you reported should be fixed." * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (166 commits) tracing: Remove kernel_lock annotations tracing: Fix initial buffer_size_kb state ring-buffer: Merge separate resize loops perf evsel: Create events initially disabled -- again perf tools: Split term type into value type and term type perf hists: Fix callchain ip printf format perf target: Add uses_mmap field ftrace: Remove selecting FRAME_POINTER with FUNCTION_TRACER ftrace/x86: Have x86 ftrace use the ftrace_modify_all_code() ftrace: Make ftrace_modify_all_code() global for archs to use ftrace: Return record ip addr for ftrace_location() ftrace: Consolidate ftrace_location() and ftrace_text_reserved() ftrace: Speed up search by skipping pages by address ftrace: Remove extra helper functions ftrace: Sort all function addresses, not just per page tracing: change CPU ring buffer state from tracing_cpumask tracing: Check return value of tracing_dentry_percpu() ring-buffer: Reset head page before running self test ring-buffer: Add integrity check at end of iter read ring-buffer: Make addition of pages in ring buffer atomic ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c25
-rw-r--r--kernel/sched/core.c9
-rw-r--r--kernel/trace/Kconfig1
-rw-r--r--kernel/trace/ftrace.c242
-rw-r--r--kernel/trace/ring_buffer.c585
-rw-r--r--kernel/trace/trace.c503
-rw-r--r--kernel/trace/trace.h4
-rw-r--r--kernel/trace/trace_printk.c4
8 files changed, 874 insertions, 499 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fd126f82b57c..91a445925855 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2039,8 +2039,8 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
* accessing the event control register. If a NMI hits, then it will
* not restart the event.
*/
-void __perf_event_task_sched_out(struct task_struct *task,
- struct task_struct *next)
+static void __perf_event_task_sched_out(struct task_struct *task,
+ struct task_struct *next)
{
int ctxn;
@@ -2279,8 +2279,8 @@ static void perf_branch_stack_sched_in(struct task_struct *prev,
* accessing the event control register. If a NMI hits, then it will
* keep the event running.
*/
-void __perf_event_task_sched_in(struct task_struct *prev,
- struct task_struct *task)
+static void __perf_event_task_sched_in(struct task_struct *prev,
+ struct task_struct *task)
{
struct perf_event_context *ctx;
int ctxn;
@@ -2305,6 +2305,12 @@ void __perf_event_task_sched_in(struct task_struct *prev,
perf_branch_stack_sched_in(prev, task);
}
+void __perf_event_task_sched(struct task_struct *prev, struct task_struct *next)
+{
+ __perf_event_task_sched_out(prev, next);
+ __perf_event_task_sched_in(prev, next);
+}
+
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
{
u64 frequency = event->attr.sample_freq;
@@ -4957,7 +4963,7 @@ void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
if (rctx < 0)
return;
- perf_sample_data_init(&data, addr);
+ perf_sample_data_init(&data, addr, 0);
do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
@@ -5215,7 +5221,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
.data = record,
};
- perf_sample_data_init(&data, addr);
+ perf_sample_data_init(&data, addr, 0);
data.raw = &raw;
hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
@@ -5318,7 +5324,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
struct perf_sample_data sample;
struct pt_regs *regs = data;
- perf_sample_data_init(&sample, bp->attr.bp_addr);
+ perf_sample_data_init(&sample, bp->attr.bp_addr, 0);
if (!bp->hw.state && !perf_exclude_event(bp, regs))
perf_swevent_event(bp, 1, &sample, regs);
@@ -5344,13 +5350,12 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
event->pmu->read(event);
- perf_sample_data_init(&data, 0);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
regs = get_irq_regs();
if (regs && !perf_exclude_event(event, regs)) {
if (!(event->attr.exclude_idle && is_idle_task(current)))
- if (perf_event_overflow(event, &data, regs))
+ if (__perf_event_overflow(event, 1, &data, regs))
ret = HRTIMER_NORESTART;
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 03667c3fdb33..d2e2e173d8f7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1914,7 +1914,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
sched_info_switch(prev, next);
- perf_event_task_sched_out(prev, next);
+ perf_event_task_sched(prev, next);
fire_sched_out_preempt_notifiers(prev, next);
prepare_lock_switch(rq, next);
prepare_arch_switch(next);
@@ -1957,13 +1957,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
*/
prev_state = prev->state;
finish_arch_switch(prev);
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
- local_irq_disable();
-#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
- perf_event_task_sched_in(prev, current);
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
- local_irq_enable();
-#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
finish_lock_switch(rq, prev);
finish_arch_post_lock_switch();
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index a1d2849f2473..d81a1a532994 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -141,7 +141,6 @@ if FTRACE
config FUNCTION_TRACER
bool "Kernel Function Tracer"
depends on HAVE_FUNCTION_TRACER
- select FRAME_POINTER if !ARM_UNWIND && !PPC && !S390 && !MICROBLAZE
select KALLSYMS
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0fa92f677c92..a008663d86c8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1383,44 +1383,73 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
static int ftrace_cmp_recs(const void *a, const void *b)
{
- const struct dyn_ftrace *reca = a;
- const struct dyn_ftrace *recb = b;
+ const struct dyn_ftrace *key = a;
+ const struct dyn_ftrace *rec = b;
- if (reca->ip > recb->ip)
- return 1;
- if (reca->ip < recb->ip)
+ if (key->flags < rec->ip)
return -1;
+ if (key->ip >= rec->ip + MCOUNT_INSN_SIZE)
+ return 1;
return 0;
}
-/**
- * ftrace_location - return true if the ip giving is a traced location
- * @ip: the instruction pointer to check
- *
- * Returns 1 if @ip given is a pointer to a ftrace location.
- * That is, the instruction that is either a NOP or call to
- * the function tracer. It checks the ftrace internal tables to
- * determine if the address belongs or not.
- */
-int ftrace_location(unsigned long ip)
+static unsigned long ftrace_location_range(unsigned long start, unsigned long end)
{
struct ftrace_page *pg;
struct dyn_ftrace *rec;
struct dyn_ftrace key;
- key.ip = ip;
+ key.ip = start;
+ key.flags = end; /* overload flags, as it is unsigned long */
for (pg = ftrace_pages_start; pg; pg = pg->next) {
+ if (end < pg->records[0].ip ||
+ start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
+ continue;
rec = bsearch(&key, pg->records, pg->index,
sizeof(struct dyn_ftrace),
ftrace_cmp_recs);
if (rec)
- return 1;
+ return rec->ip;
}
return 0;
}
+/**
+ * ftrace_location - return true if the ip giving is a traced location
+ * @ip: the instruction pointer to check
+ *
+ * Returns rec->ip if @ip given is a pointer to a ftrace location.
+ * That is, the instruction that is either a NOP or call to
+ * the function tracer. It checks the ftrace internal tables to
+ * determine if the address belongs or not.
+ */
+unsigned long ftrace_location(unsigned long ip)
+{
+ return ftrace_location_range(ip, ip);
+}
+
+/**
+ * ftrace_text_reserved - return true if range contains an ftrace location
+ * @start: start of range to search
+ * @end: end of range to search (inclusive). @end points to the last byte to check.
+ *
+ * Returns 1 if @start and @end contains a ftrace location.
+ * That is, the instruction that is either a NOP or call to
+ * the function tracer. It checks the ftrace internal tables to
+ * determine if the address belongs or not.
+ */
+int ftrace_text_reserved(void *start, void *end)
+{
+ unsigned long ret;
+
+ ret = ftrace_location_range((unsigned long)start,
+ (unsigned long)end);
+
+ return (int)!!ret;
+}
+
static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
int filter_hash,
bool inc)
@@ -1520,35 +1549,6 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
__ftrace_hash_rec_update(ops, filter_hash, 1);
}
-static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
-{
- if (ftrace_pages->index == ftrace_pages->size) {
- /* We should have allocated enough */
- if (WARN_ON(!ftrace_pages->next))
- return NULL;
- ftrace_pages = ftrace_pages->next;
- }
-
- return &ftrace_pages->records[ftrace_pages->index++];
-}
-
-static struct dyn_ftrace *
-ftrace_record_ip(unsigned long ip)
-{
- struct dyn_ftrace *rec;
-
- if (ftrace_disabled)
- return NULL;
-
- rec = ftrace_alloc_dyn_node(ip);
- if (!rec)
- return NULL;
-
- rec->ip = ip;
-
- return rec;
-}
-
static void print_ip_ins(const char *fmt, unsigned char *p)
{
int i;
@@ -1598,21 +1598,6 @@ void ftrace_bug(int failed, unsigned long ip)
}
}
-
-/* Return 1 if the address range is reserved for ftrace */
-int ftrace_text_reserved(void *start, void *end)
-{
- struct dyn_ftrace *rec;
- struct ftrace_page *pg;
-
- do_for_each_ftrace_rec(pg, rec) {
- if (rec->ip <= (unsigned long)end &&
- rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
- return 1;
- } while_for_each_ftrace_rec();
- return 0;
-}
-
static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
{
unsigned long flag = 0UL;
@@ -1698,7 +1683,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
return -1; /* unknow ftrace bug */
}
-static void ftrace_replace_code(int update)
+void __weak ftrace_replace_code(int enable)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
@@ -1708,7 +1693,7 @@ static void ftrace_replace_code(int update)
return;
do_for_each_ftrace_rec(pg, rec) {
- failed = __ftrace_replace_code(rec, update);
+ failed = __ftrace_replace_code(rec, enable);
if (failed) {
ftrace_bug(failed, rec->ip);
/* Stop processing */
@@ -1826,22 +1811,27 @@ int __weak ftrace_arch_code_modify_post_process(void)
return 0;
}
-static int __ftrace_modify_code(void *data)
+void ftrace_modify_all_code(int command)
{
- int *command = data;
-
- if (*command & FTRACE_UPDATE_CALLS)
+ if (command & FTRACE_UPDATE_CALLS)
ftrace_replace_code(1);
- else if (*command & FTRACE_DISABLE_CALLS)
+ else if (command & FTRACE_DISABLE_CALLS)
ftrace_replace_code(0);
- if (*command & FTRACE_UPDATE_TRACE_FUNC)
+ if (command & FTRACE_UPDATE_TRACE_FUNC)
ftrace_update_ftrace_func(ftrace_trace_function);
- if (*command & FTRACE_START_FUNC_RET)
+ if (command & FTRACE_START_FUNC_RET)
ftrace_enable_ftrace_graph_caller();
- else if (*command & FTRACE_STOP_FUNC_RET)
+ else if (command & FTRACE_STOP_FUNC_RET)
ftrace_disable_ftrace_graph_caller();
+}
+
+static int __ftrace_modify_code(void *data)
+{
+ int *command = data;
+
+ ftrace_modify_all_code(*command);
return 0;
}
@@ -2469,57 +2459,35 @@ static int
ftrace_avail_open(struct inode *inode, struct file *file)
{
struct ftrace_iterator *iter;
- int ret;
if (unlikely(ftrace_disabled))
return -ENODEV;
- iter = kzalloc(sizeof(*iter), GFP_KERNEL);
- if (!iter)
- return -ENOMEM;
-
- iter->pg = ftrace_pages_start;
- iter->ops = &global_ops;
-
- ret = seq_open(file, &show_ftrace_seq_ops);
- if (!ret) {
- struct seq_file *m = file->private_data;
-
- m->private = iter;
- } else {
- kfree(iter);
+ iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
+ if (iter) {
+ iter->pg = ftrace_pages_start;
+ iter->ops = &global_ops;
}
- return ret;
+ return iter ? 0 : -ENOMEM;
}
static int
ftrace_enabled_open(struct inode *inode, struct file *file)
{
struct ftrace_iterator *iter;
- int ret;
if (unlikely(ftrace_disabled))
return -ENODEV;
- iter = kzalloc(sizeof(*iter), GFP_KERNEL);
- if (!iter)
- return -ENOMEM;
-
- iter->pg = ftrace_pages_start;
- iter->flags = FTRACE_ITER_ENABLED;
- iter->ops = &global_ops;
-
- ret = seq_open(file, &show_ftrace_seq_ops);
- if (!ret) {
- struct seq_file *m = file->private_data;
-
- m->private = iter;
- } else {
- kfree(iter);
+ iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
+ if (iter) {
+ iter->pg = ftrace_pages_start;
+ iter->flags = FTRACE_ITER_ENABLED;
+ iter->ops = &global_ops;
}
- return ret;
+ return iter ? 0 : -ENOMEM;
}
static void ftrace_filter_reset(struct ftrace_hash *hash)
@@ -3688,22 +3656,36 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
return 0;
}
-static void ftrace_swap_recs(void *a, void *b, int size)
+static int ftrace_cmp_ips(const void *a, const void *b)
+{
+ const unsigned long *ipa = a;
+ const unsigned long *ipb = b;
+
+ if (*ipa > *ipb)
+ return 1;
+ if (*ipa < *ipb)
+ return -1;
+ return 0;
+}
+
+static void ftrace_swap_ips(void *a, void *b, int size)
{
- struct dyn_ftrace *reca = a;
- struct dyn_ftrace *recb = b;
- struct dyn_ftrace t;
+ unsigned long *ipa = a;
+ unsigned long *ipb = b;
+ unsigned long t;
- t = *reca;
- *reca = *recb;
- *recb = t;
+ t = *ipa;
+ *ipa = *ipb;
+ *ipb = t;
}
static int ftrace_process_locs(struct module *mod,
unsigned long *start,
unsigned long *end)
{
+ struct ftrace_page *start_pg;
struct ftrace_page *pg;
+ struct dyn_ftrace *rec;
unsigned long count;
unsigned long *p;
unsigned long addr;
@@ -3715,8 +3697,11 @@ static int ftrace_process_locs(struct module *mod,
if (!count)
return 0;
- pg = ftrace_allocate_pages(count);
- if (!pg)
+ sort(start, count, sizeof(*start),
+ ftrace_cmp_ips, ftrace_swap_ips);
+
+ start_pg = ftrace_allocate_pages(count);
+ if (!start_pg)
return -ENOMEM;
mutex_lock(&ftrace_lock);
@@ -3729,7 +3714,7 @@ static int ftrace_process_locs(struct module *mod,
if (!mod) {
WARN_ON(ftrace_pages || ftrace_pages_start);
/* First initialization */
- ftrace_pages = ftrace_pages_start = pg;
+ ftrace_pages = ftrace_pages_start = start_pg;
} else {
if (!ftrace_pages)
goto out;
@@ -3740,11 +3725,11 @@ static int ftrace_process_locs(struct module *mod,
ftrace_pages = ftrace_pages->next;
}
- ftrace_pages->next = pg;
- ftrace_pages = pg;
+ ftrace_pages->next = start_pg;
}
p = start;
+ pg = start_pg;
while (p < end) {
addr = ftrace_call_adjust(*p++);
/*
@@ -3755,17 +3740,26 @@ static int ftrace_process_locs(struct module *mod,
*/
if (!addr)
continue;
- if (!ftrace_record_ip(addr))
- break;
+
+ if (pg->index == pg->size) {
+ /* We should have allocated enough */
+ if (WARN_ON(!pg->next))
+ break;
+ pg = pg->next;
+ }
+
+ rec = &pg->records[pg->index++];
+ rec->ip = addr;
}
- /* These new locations need to be initialized */
- ftrace_new_pgs = pg;
+ /* We should have used all pages */
+ WARN_ON(pg->next);
+
+ /* Assign the last page to ftrace_pages */
+ ftrace_pages = pg;
- /* Make each individual set of pages sorted by ips */
- for (; pg; pg = pg->next)
- sort(pg->records, pg->index, sizeof(struct dyn_ftrace),
- ftrace_cmp_recs, ftrace_swap_recs);
+ /* These new locations need to be initialized */
+ ftrace_new_pgs = start_pg;
/*
* We only need to disable interrupts on start up
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index cf8d11e91efd..6420cda62336 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -23,6 +23,8 @@
#include <asm/local.h>
#include "trace.h"
+static void update_pages_handler(struct work_struct *work);
+
/*
* The ring buffer header is special. We must manually up keep it.
*/
@@ -449,6 +451,7 @@ struct ring_buffer_per_cpu {
raw_spinlock_t reader_lock; /* serialize readers */
arch_spinlock_t lock;
struct lock_class_key lock_key;
+ unsigned int nr_pages;
struct list_head *pages;
struct buffer_page *head_page; /* read from head */
struct buffer_page *tail_page; /* write to tail */
@@ -466,13 +469,18 @@ struct ring_buffer_per_cpu {
unsigned long read_bytes;
u64 write_stamp;
u64 read_stamp;
+ /* ring buffer pages to update, > 0 to add, < 0 to remove */
+ int nr_pages_to_update;
+ struct list_head new_pages; /* new pages to add */
+ struct work_struct update_pages_work;
+ struct completion update_done;
};
struct ring_buffer {
- unsigned pages;
unsigned flags;
int cpus;
atomic_t record_disabled;
+ atomic_t resize_disabled;
cpumask_var_t cpumask;
struct lock_class_key *reader_lock_key;
@@ -937,6 +945,10 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
struct list_head *head = cpu_buffer->pages;
struct buffer_page *bpage, *tmp;
+ /* Reset the head page if it exists */
+ if (cpu_buffer->head_page)
+ rb_set_head_page(cpu_buffer);
+
rb_head_page_deactivate(cpu_buffer);
if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
@@ -963,14 +975,10 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
return 0;
}
-static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
- unsigned nr_pages)
+static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu)
{
+ int i;
struct buffer_page *bpage, *tmp;
- LIST_HEAD(pages);
- unsigned i;
-
- WARN_ON(!nr_pages);
for (i = 0; i < nr_pages; i++) {
struct page *page;
@@ -981,15 +989,13 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
*/
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
GFP_KERNEL | __GFP_NORETRY,
- cpu_to_node(cpu_buffer->cpu));
+ cpu_to_node(cpu));
if (!bpage)
goto free_pages;
- rb_check_bpage(cpu_buffer, bpage);
+ list_add(&bpage->list, pages);
- list_add(&bpage->list, &pages);
-
- page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
+ page = alloc_pages_node(cpu_to_node(cpu),
GFP_KERNEL | __GFP_NORETRY, 0);
if (!page)
goto free_pages;
@@ -997,6 +1003,27 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
rb_init_page(bpage->page);
}
+ return 0;
+
+free_pages:
+ list_for_each_entry_safe(bpage, tmp, pages, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
+ }
+
+ return -ENOMEM;
+}
+
+static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
+ unsigned nr_pages)
+{
+ LIST_HEAD(pages);
+
+ WARN_ON(!nr_pages);
+
+ if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
+ return -ENOMEM;
+
/*
* The ring buffer page list is a circular list that does not
* start and end with a list head. All page list items point to
@@ -1005,20 +1032,15 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
cpu_buffer->pages = pages.next;
list_del(&pages);
+ cpu_buffer->nr_pages = nr_pages;
+
rb_check_pages(cpu_buffer);
return 0;
-
- free_pages:
- list_for_each_entry_safe(bpage, tmp, &pages, list) {
- list_del_init(&bpage->list);
- free_buffer_page(bpage);
- }
- return -ENOMEM;
}
static struct ring_buffer_per_cpu *
-rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
+rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct buffer_page *bpage;
@@ -1035,6 +1057,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
raw_spin_lock_init(&cpu_buffer->reader_lock);
lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+ INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
+ init_completion(&cpu_buffer->update_done);
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu));
@@ -1052,7 +1076,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
- ret = rb_allocate_pages(cpu_buffer, buffer->pages);
+ ret = rb_allocate_pages(cpu_buffer, nr_pages);
if (ret < 0)
goto fail_free_reader;
@@ -1113,7 +1137,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
{
struct ring_buffer *buffer;
int bsize;
- int cpu;
+ int cpu, nr_pages;
/* keep it in its own cache line */
buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
@@ -1124,14 +1148,14 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
goto fail_free_buffer;
- buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+ nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
buffer->flags = flags;
buffer->clock = trace_clock_local;
buffer->reader_lock_key = key;
/* need at least two pages */
- if (buffer->pages < 2)
- buffer->pages = 2;
+ if (nr_pages < 2)
+ nr_pages = 2;
/*
* In case of non-hotplug cpu, if the ring-buffer is allocated
@@ -1154,7 +1178,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
for_each_buffer_cpu(buffer, cpu) {
buffer->buffers[cpu] =
- rb_allocate_cpu_buffer(buffer, cpu);
+ rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
if (!buffer->buffers[cpu])
goto fail_free_buffers;
}
@@ -1222,58 +1246,222 @@ void ring_buffer_set_clock(struct ring_buffer *buffer,
static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
-static void
-rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
+static inline unsigned long rb_page_entries(struct buffer_page *bpage)
{
- struct buffer_page *bpage;
- struct list_head *p;
- unsigned i;
+ return local_read(&bpage->entries) & RB_WRITE_MASK;
+}
+
+static inline unsigned long rb_page_write(struct buffer_page *bpage)
+{
+ return local_read(&bpage->write) & RB_WRITE_MASK;
+}
+
+static int
+rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned int nr_pages)
+{
+ struct list_head *tail_page, *to_remove, *next_page;
+ struct buffer_page *to_remove_page, *tmp_iter_page;
+ struct buffer_page *last_page, *first_page;
+ unsigned int nr_removed;
+ unsigned long head_bit;
+ int page_entries;
+
+ head_bit = 0;
raw_spin_lock_irq(&cpu_buffer->reader_lock);
- rb_head_page_deactivate(cpu_buffer);
+ atomic_inc(&cpu_buffer->record_disabled);
+ /*
+ * We don't race with the readers since we have acquired the reader
+ * lock. We also don't race with writers after disabling recording.
+ * This makes it easy to figure out the first and the last page to be
+ * removed from the list. We unlink all the pages in between including
+ * the first and last pages. This is done in a busy loop so that we
+ * lose the least number of traces.
+ * The pages are freed after we restart recording and unlock readers.
+ */
+ tail_page = &cpu_buffer->tail_page->list;
- for (i = 0; i < nr_pages; i++) {
- if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
- goto out;
- p = cpu_buffer->pages->next;
- bpage = list_entry(p, struct buffer_page, list);
- list_del_init(&bpage->list);
- free_buffer_page(bpage);
+ /*
+ * tail page might be on reader page, we remove the next page
+ * from the ring buffer
+ */
+ if (cpu_buffer->tail_page == cpu_buffer->reader_page)
+ tail_page = rb_list_head(tail_page->next);
+ to_remove = tail_page;
+
+ /* start of pages to remove */
+ first_page = list_entry(rb_list_head(to_remove->next),
+ struct buffer_page, list);
+
+ for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) {
+ to_remove = rb_list_head(to_remove)->next;
+ head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
}
- if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
- goto out;
- rb_reset_cpu(cpu_buffer);
- rb_check_pages(cpu_buffer);
+ next_page = rb_list_head(to_remove)->next;
-out:
+ /*
+ * Now we remove all pages between tail_page and next_page.
+ * Make sure that we have head_bit value preserved for the
+ * next page
+ */
+ tail_page->next = (struct list_head *)((unsigned long)next_page |
+ head_bit);
+ next_page = rb_list_head(next_page);
+ next_page->prev = tail_page;
+
+ /* make sure pages points to a valid page in the ring buffer */
+ cpu_buffer->pages = next_page;
+
+ /* update head page */
+ if (head_bit)
+ cpu_buffer->head_page = list_entry(next_page,
+ struct buffer_page, list);
+
+ /*
+ * change read pointer to make sure any read iterators reset
+ * themselves
+ */
+ cpu_buffer->read = 0;
+
+ /* pages are removed, resume tracing and then free the pages */
+ atomic_dec(&cpu_buffer->record_disabled);
raw_spin_unlock_irq(&cpu_buffer->reader_lock);
+
+ RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
+
+ /* last buffer page to remove */
+ last_page = list_entry(rb_list_head(to_remove), struct buffer_page,
+ list);
+ tmp_iter_page = first_page;
+
+ do {
+ to_remove_page = tmp_iter_page;
+ rb_inc_page(cpu_buffer, &tmp_iter_page);
+
+ /* update the counters */
+ page_entries = rb_page_entries(to_remove_page);
+ if (page_entries) {
+ /*
+ * If something was added to this page, it was full
+ * since it is not the tail page. So we deduct the
+ * bytes consumed in ring buffer from here.
+ * No need to update overruns, since this page is
+ * deleted from ring buffer and its entries are
+ * already accounted for.
+ */
+ local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
+ }
+
+ /*
+ * We have already removed references to this list item, just
+ * free up the buffer_page and its page
+ */
+ free_buffer_page(to_remove_page);
+ nr_removed--;
+
+ } while (to_remove_page != last_page);
+
+ RB_WARN_ON(cpu_buffer, nr_removed);
+
+ return nr_removed == 0;
}
-static void
-rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
- struct list_head *pages, unsigned nr_pages)
+static int
+rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
{
- struct buffer_page *bpage;
- struct list_head *p;
- unsigned i;
+ struct list_head *pages = &cpu_buffer->new_pages;
+ int retries, success;
raw_spin_lock_irq(&cpu_buffer->reader_lock);
- rb_head_page_deactivate(cpu_buffer);
+ /*
+ * We are holding the reader lock, so the reader page won't be swapped
+ * in the ring buffer. Now we are racing with the writer trying to
+ * move head page and the tail page.
+ * We are going to adapt the reader page update process where:
+ * 1. We first splice the start and end of list of new pages between
+ * the head page and its previous page.
+ * 2. We cmpxchg the prev_page->next to point from head page to the
+ * start of new pages list.
+ * 3. Finally, we update the head->prev to the end of new list.
+ *
+ * We will try this process 10 times, to make sure that we don't keep
+ * spinning.
+ */
+ retries = 10;
+ success = 0;
+ while (retries--) {
+ struct list_head *head_page, *prev_page, *r;
+ struct list_head *last_page, *first_page;
+ struct list_head *head_page_with_bit;
- for (i = 0; i < nr_pages; i++) {
- if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
- goto out;
- p = pages->next;
- bpage = list_entry(p, struct buffer_page, list);
- list_del_init(&bpage->list);
- list_add_tail(&bpage->list, cpu_buffer->pages);
+ head_page = &rb_set_head_page(cpu_buffer)->list;
+ prev_page = head_page->prev;
+
+ first_page = pages->next;
+ last_page = pages->prev;
+
+ head_page_with_bit = (struct list_head *)
+ ((unsigned long)head_page | RB_PAGE_HEAD);
+
+ last_page->next = head_page_with_bit;
+ first_page->prev = prev_page;
+
+ r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
+
+ if (r == head_page_with_bit) {
+ /*
+ * yay, we replaced the page pointer to our new list,
+ * now, we just have to update to head page's prev
+ * pointer to point to end of list
+ */
+ head_page->prev = last_page;
+ success = 1;
+ break;
+ }
}
- rb_reset_cpu(cpu_buffer);
- rb_check_pages(cpu_buffer);
-out:
+ if (success)
+ INIT_LIST_HEAD(pages);
+ /*
+ * If we weren't successful in adding in new pages, warn and stop
+ * tracing
+ */
+ RB_WARN_ON(cpu_buffer, !success);
raw_spin_unlock_irq(&cpu_buffer->reader_lock);
+
+ /* free pages if they weren't inserted */
+ if (!success) {
+ struct buffer_page *bpage, *tmp;
+ list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
+ list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
+ }
+ }
+ return success;
+}
+
+static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ int success;
+
+ if (cpu_buffer->nr_pages_to_update > 0)
+ success = rb_insert_pages(cpu_buffer);
+ else
+ success = rb_remove_pages(cpu_buffer,
+ -cpu_buffer->nr_pages_to_update);
+
+ if (success)
+ cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
+}
+
+static void update_pages_handler(struct work_struct *work)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
+ struct ring_buffer_per_cpu, update_pages_work);
+ rb_update_pages(cpu_buffer);
+ complete(&cpu_buffer->update_done);
}
/**
@@ -1283,16 +1471,14 @@ out:
*
* Minimum size is 2 * BUF_PAGE_SIZE.
*
- * Returns -1 on failure.
+ * Returns 0 on success and < 0 on failure.
*/
-int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
+int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
+ int cpu_id)
{
struct ring_buffer_per_cpu *cpu_buffer;
- unsigned nr_pages, rm_pages, new_pages;
- struct buffer_page *bpage, *tmp;
- unsigned long buffer_size;
- LIST_HEAD(pages);
- int i, cpu;
+ unsigned nr_pages;
+ int cpu, err = 0;
/*
* Always succeed at resizing a non-existent buffer:
@@ -1302,113 +1488,154 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
size *= BUF_PAGE_SIZE;
- buffer_size = buffer->pages * BUF_PAGE_SIZE;
/* we need a minimum of two pages */
if (size < BUF_PAGE_SIZE * 2)
size = BUF_PAGE_SIZE * 2;
- if (size == buffer_size)
- return size;
-
- atomic_inc(&buffer->record_disabled);
+ nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
- /* Make sure all writers are done with this buffer. */
- synchronize_sched();
+ /*
+ * Don't succeed if resizing is disabled, as a reader might be
+ * manipulating the ring buffer and is expecting a sane state while
+ * this is true.
+ */
+ if (atomic_read(&buffer->resize_disabled))
+ return -EBUSY;
+ /* prevent another thread from changing buffer sizes */
mutex_lock(&buffer->mutex);
- get_online_cpus();
-
- nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
- if (size < buffer_size) {
+ if (cpu_id == RING_BUFFER_ALL_CPUS) {
+ /* calculate the pages to update */
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
- /* easy case, just free pages */
- if (RB_WARN_ON(buffer, nr_pages >= buffer->pages))
- goto out_fail;
+ cpu_buffer->nr_pages_to_update = nr_pages -
+ cpu_buffer->nr_pages;
+ /*
+ * nothing more to do for removing pages or no update
+ */
+ if (cpu_buffer->nr_pages_to_update <= 0)
+ continue;
+ /*
+ * to add pages, make sure all new pages can be
+ * allocated without receiving ENOMEM
+ */
+ INIT_LIST_HEAD(&cpu_buffer->new_pages);
+ if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
+ &cpu_buffer->new_pages, cpu)) {
+ /* not enough memory for new pages */
+ err = -ENOMEM;
+ goto out_err;
+ }
+ }
- rm_pages = buffer->pages - nr_pages;
+ get_online_cpus();
+ /*
+ * Fire off all the required work handlers
+ * We can't schedule on offline CPUs, but it's not necessary
+ * since we can change their buffer sizes without any race.
+ */
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ if (!cpu_buffer->nr_pages_to_update)
+ continue;
+
+ if (cpu_online(cpu))
+ schedule_work_on(cpu,
+ &cpu_buffer->update_pages_work);
+ else
+ rb_update_pages(cpu_buffer);
+ }
+ /* wait for all the updates to complete */
for_each_buffer_cpu(buffer, cpu) {
cpu_buffer = buffer->buffers[cpu];
- rb_remove_pages(cpu_buffer, rm_pages);
+ if (!cpu_buffer->nr_pages_to_update)
+ continue;
+
+ if (cpu_online(cpu))
+ wait_for_completion(&cpu_buffer->update_done);
+ cpu_buffer->nr_pages_to_update = 0;
}
- goto out;
- }
- /*
- * This is a bit more difficult. We only want to add pages
- * when we can allocate enough for all CPUs. We do this
- * by allocating all the pages and storing them on a local
- * link list. If we succeed in our allocation, then we
- * add these pages to the cpu_buffers. Otherwise we just free
- * them all and return -ENOMEM;
- */
- if (RB_WARN_ON(buffer, nr_pages <= buffer->pages))
- goto out_fail;
+ put_online_cpus();
+ } else {
+ cpu_buffer = buffer->buffers[cpu_id];
- new_pages = nr_pages - buffer->pages;
+ if (nr_pages == cpu_buffer->nr_pages)
+ goto out;
- for_each_buffer_cpu(buffer, cpu) {
- for (i = 0; i < new_pages; i++) {
- struct page *page;
- /*
- * __GFP_NORETRY flag makes sure that the allocation
- * fails gracefully without invoking oom-killer and
- * the system is not destabilized.
- */
- bpage = kzalloc_node(ALIGN(sizeof(*bpage),
- cache_line_size()),
- GFP_KERNEL | __GFP_NORETRY,
- cpu_to_node(cpu));
- if (!bpage)
- goto free_pages;
- list_add(&bpage->list, &pages);
- page = alloc_pages_node(cpu_to_node(cpu),
- GFP_KERNEL | __GFP_NORETRY, 0);
- if (!page)
- goto free_pages;
- bpage->page = page_address(page);
- rb_init_page(bpage->page);
+ cpu_buffer->nr_pages_to_update = nr_pages -
+ cpu_buffer->nr_pages;
+
+ INIT_LIST_HEAD(&cpu_buffer->new_pages);
+ if (cpu_buffer->nr_pages_to_update > 0 &&
+ __rb_allocate_pages(cpu_buffer->nr_pages_to_update,
+ &cpu_buffer->new_pages, cpu_id)) {
+ err = -ENOMEM;
+ goto out_err;
}
- }
- for_each_buffer_cpu(buffer, cpu) {
- cpu_buffer = buffer->buffers[cpu];
- rb_insert_pages(cpu_buffer, &pages, new_pages);
- }
+ get_online_cpus();
- if (RB_WARN_ON(buffer, !list_empty(&pages)))
- goto out_fail;
+ if (cpu_online(cpu_id)) {
+ schedule_work_on(cpu_id,
+ &cpu_buffer->update_pages_work);
+ wait_for_completion(&cpu_buffer->update_done);
+ } else
+ rb_update_pages(cpu_buffer);
+
+ cpu_buffer->nr_pages_to_update = 0;
+ put_online_cpus();
+ }
out:
- buffer->pages = nr_pages;
- put_online_cpus();
+ /*
+ * The ring buffer resize can happen with the ring buffer
+ * enabled, so that the update disturbs the tracing as little
+ * as possible. But if the buffer is disabled, we do not need
+ * to worry about that, and we can take the time to verify
+ * that the buffer is not corrupt.
+ */
+ if (atomic_read(&buffer->record_disabled)) {
+ atomic_inc(&buffer->record_disabled);
+ /*
+ * Even though the buffer was disabled, we must make sure
+ * that it is truly disabled before calling rb_check_pages.
+ * There could have been a race between checking
+ * record_disable and incrementing it.
+ */
+ synchronize_sched();
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+ rb_check_pages(cpu_buffer);
+ }
+ atomic_dec(&buffer->record_disabled);
+ }
+
mutex_unlock(&buffer->mutex);
+ return size;
- atomic_dec(&buffer->record_disabled);
+ out_err:
+ for_each_buffer_cpu(buffer, cpu) {
+ struct buffer_page *bpage, *tmp;
- return size;
+ cpu_buffer = buffer->buffers[cpu];
+ cpu_buffer->nr_pages_to_update = 0;
- free_pages:
- list_for_each_entry_safe(bpage, tmp, &pages, list) {
- list_del_init(&bpage->list);
- free_buffer_page(bpage);
- }
- put_online_cpus();
- mutex_unlock(&buffer->mutex);
- atomic_dec(&buffer->record_disabled);
- return -ENOMEM;
+ if (list_empty(&cpu_buffer->new_pages))
+ continue;
- /*
- * Something went totally wrong, and we are too paranoid
- * to even clean up the mess.
- */
- out_fail:
- put_online_cpus();
+ list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
+ list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
+ }
+ }
mutex_unlock(&buffer->mutex);
- atomic_dec(&buffer->record_disabled);
- return -1;
+ return err;
}
EXPORT_SYMBOL_GPL(ring_buffer_resize);
@@ -1447,21 +1674,11 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
return __rb_page_index(iter->head_page, iter->head);
}
-static inline unsigned long rb_page_write(struct buffer_page *bpage)
-{
- return local_read(&bpage->write) & RB_WRITE_MASK;
-}
-
static inline unsigned rb_page_commit(struct buffer_page *bpage)
{
return local_read(&bpage->page->commit);
}
-static inline unsigned long rb_page_entries(struct buffer_page *bpage)
-{
- return local_read(&bpage->entries) & RB_WRITE_MASK;
-}
-
/* Size is determined by what has been committed */
static inline unsigned rb_page_size(struct buffer_page *bpage)
{
@@ -1510,7 +1727,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
* assign the commit to the tail.
*/
again:
- max_count = cpu_buffer->buffer->pages * 100;
+ max_count = cpu_buffer->nr_pages * 100;
while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
if (RB_WARN_ON(cpu_buffer, !(--max_count)))
@@ -3486,6 +3703,7 @@ ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
iter->cpu_buffer = cpu_buffer;
+ atomic_inc(&buffer->resize_disabled);
atomic_inc(&cpu_buffer->record_disabled);
return iter;
@@ -3548,7 +3766,14 @@ ring_buffer_read_finish(struct ring_buffer_iter *iter)
{
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ /*
+ * Ring buffer is disabled from recording, here's a good place
+ * to check the integrity of the ring buffer.
+ */
+ rb_check_pages(cpu_buffer);
+
atomic_dec(&cpu_buffer->record_disabled);
+ atomic_dec(&cpu_buffer->buffer->resize_disabled);
kfree(iter);
}
EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
@@ -3588,9 +3813,18 @@ EXPORT_SYMBOL_GPL(ring_buffer_read);
* ring_buffer_size - return the size of the ring buffer (in bytes)
* @buffer: The ring buffer.
*/
-unsigned long ring_buffer_size(struct ring_buffer *buffer)
+unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu)
{
- return BUF_PAGE_SIZE * buffer->pages;
+ /*
+ * Earlier, this method returned
+ * BUF_PAGE_SIZE * buffer->nr_pages
+ * Since the nr_pages field is now removed, we have converted this to
+ * return the per cpu buffer value.
+ */
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return 0;
+
+ return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
}
EXPORT_SYMBOL_GPL(ring_buffer_size);
@@ -3611,6 +3845,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->commit_page = cpu_buffer->head_page;
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
+ INIT_LIST_HEAD(&cpu_buffer->new_pages);
local_set(&cpu_buffer->reader_page->write, 0);
local_set(&cpu_buffer->reader_page->entries, 0);
local_set(&cpu_buffer->reader_page->page->commit, 0);
@@ -3647,8 +3882,12 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return;
+ atomic_inc(&buffer->resize_disabled);
atomic_inc(&cpu_buffer->record_disabled);
+ /* Make sure all commits have finished */
+ synchronize_sched();
+
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
@@ -3664,6 +3903,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
atomic_dec(&cpu_buffer->record_disabled);
+ atomic_dec(&buffer->resize_disabled);
}
EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
@@ -3765,8 +4005,11 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
!cpumask_test_cpu(cpu, buffer_b->cpumask))
goto out;
+ cpu_buffer_a = buffer_a->buffers[cpu];
+ cpu_buffer_b = buffer_b->buffers[cpu];
+
/* At least make sure the two buffers are somewhat the same */
- if (buffer_a->pages != buffer_b->pages)
+ if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
goto out;
ret = -EAGAIN;
@@ -3780,9 +4023,6 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
if (atomic_read(&buffer_b->record_disabled))
goto out;
- cpu_buffer_a = buffer_a->buffers[cpu];
- cpu_buffer_b = buffer_b->buffers[cpu];
-
if (atomic_read(&cpu_buffer_a->record_disabled))
goto out;
@@ -4071,6 +4311,8 @@ static int rb_cpu_notify(struct notifier_block *self,
struct ring_buffer *buffer =
container_of(self, struct ring_buffer, cpu_notify);
long cpu = (long)hcpu;
+ int cpu_i, nr_pages_same;
+ unsigned int nr_pages;
switch (action) {
case CPU_UP_PREPARE:
@@ -4078,8 +4320,23 @@ static int rb_cpu_notify(struct notifier_block *self,
if (cpumask_test_cpu(cpu, buffer->cpumask))
return NOTIFY_OK;
+ nr_pages = 0;
+ nr_pages_same = 1;
+ /* check if all cpu sizes are same */
+ for_each_buffer_cpu(buffer, cpu_i) {
+ /* fill in the size from first enabled cpu */
+ if (nr_pages == 0)
+ nr_pages = buffer->buffers[cpu_i]->nr_pages;
+ if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
+ nr_pages_same = 0;
+ break;
+ }
+ }
+ /* allocate minimum pages, user can later expand it */
+ if (!nr_pages_same)
+ nr_pages = 2;
buffer->buffers[cpu] =
- rb_allocate_cpu_buffer(buffer, cpu);
+ rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
if (!buffer->buffers[cpu]) {
WARN(1, "failed to allocate ring buffer on CPU %ld\n",
cpu);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2a22255c1010..68032c6177db 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -87,18 +87,6 @@ static int tracing_disabled = 1;
DEFINE_PER_CPU(int, ftrace_cpu_disabled);
-static inline void ftrace_disable_cpu(void)
-{
- preempt_disable();
- __this_cpu_inc(ftrace_cpu_disabled);
-}
-
-static inline void ftrace_enable_cpu(void)
-{
- __this_cpu_dec(ftrace_cpu_disabled);
- preempt_enable();
-}
-
cpumask_var_t __read_mostly tracing_buffer_mask;
/*
@@ -629,7 +617,6 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
{
int len;
- void *ret;
if (s->len <= s->readpos)
return -EBUSY;
@@ -637,9 +624,7 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
len = s->len - s->readpos;
if (cnt > len)
cnt = len;
- ret = memcpy(buf, s->buffer + s->readpos, cnt);
- if (!ret)
- return -EFAULT;
+ memcpy(buf, s->buffer + s->readpos, cnt);
s->readpos += cnt;
return cnt;
@@ -751,8 +736,6 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
arch_spin_lock(&ftrace_max_lock);
- ftrace_disable_cpu();
-
ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
if (ret == -EBUSY) {
@@ -766,8 +749,6 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
"Failed to swap buffers due to commit in progress\n");
}
- ftrace_enable_cpu();
-
WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
__update_max_tr(tr, tsk, cpu);
@@ -782,8 +763,6 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
* Register a new plugin tracer.
*/
int register_tracer(struct tracer *type)
-__releases(kernel_lock)
-__acquires(kernel_lock)
{
struct tracer *t;
int ret = 0;
@@ -841,7 +820,8 @@ __acquires(kernel_lock)
/* If we expanded the buffers, make sure the max is expanded too */
if (ring_buffer_expanded && type->use_max_tr)
- ring_buffer_resize(max_tr.buffer, trace_buf_size);
+ ring_buffer_resize(max_tr.buffer, trace_buf_size,
+ RING_BUFFER_ALL_CPUS);
/* the test is responsible for initializing and enabling */
pr_info("Testing tracer %s: ", type->name);
@@ -857,7 +837,8 @@ __acquires(kernel_lock)
/* Shrink the max buffer again */
if (ring_buffer_expanded && type->use_max_tr)
- ring_buffer_resize(max_tr.buffer, 1);
+ ring_buffer_resize(max_tr.buffer, 1,
+ RING_BUFFER_ALL_CPUS);
printk(KERN_CONT "PASSED\n");
}
@@ -917,13 +898,6 @@ out:
mutex_unlock(&trace_types_lock);
}
-static void __tracing_reset(struct ring_buffer *buffer, int cpu)
-{
- ftrace_disable_cpu();
- ring_buffer_reset_cpu(buffer, cpu);
- ftrace_enable_cpu();
-}
-
void tracing_reset(struct trace_array *tr, int cpu)
{
struct ring_buffer *buffer = tr->buffer;
@@ -932,7 +906,7 @@ void tracing_reset(struct trace_array *tr, int cpu)
/* Make sure all commits have finished */
synchronize_sched();
- __tracing_reset(buffer, cpu);
+ ring_buffer_reset_cpu(buffer, cpu);
ring_buffer_record_enable(buffer);
}
@@ -950,7 +924,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
tr->time_start = ftrace_now(tr->cpu);
for_each_online_cpu(cpu)
- __tracing_reset(buffer, cpu);
+ ring_buffer_reset_cpu(buffer, cpu);
ring_buffer_record_enable(buffer);
}
@@ -1498,25 +1472,119 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
#endif /* CONFIG_STACKTRACE */
+/* created for use with alloc_percpu */
+struct trace_buffer_struct {
+ char buffer[TRACE_BUF_SIZE];
+};
+
+static struct trace_buffer_struct *trace_percpu_buffer;
+static struct trace_buffer_struct *trace_percpu_sirq_buffer;
+static struct trace_buffer_struct *trace_percpu_irq_buffer;
+static struct trace_buffer_struct *trace_percpu_nmi_buffer;
+
+/*
+ * The buffer used is dependent on the context. There is a per cpu
+ * buffer for normal context, softirq contex, hard irq context and
+ * for NMI context. Thise allows for lockless recording.
+ *
+ * Note, if the buffers failed to be allocated, then this returns NULL
+ */
+static char *get_trace_buf(void)
+{
+ struct trace_buffer_struct *percpu_buffer;
+ struct trace_buffer_struct *buffer;
+
+ /*
+ * If we have allocated per cpu buffers, then we do not
+ * need to do any locking.
+ */
+ if (in_nmi())
+ percpu_buffer = trace_percpu_nmi_buffer;
+ else if (in_irq())
+ percpu_buffer = trace_percpu_irq_buffer;
+ else if (in_softirq())
+ percpu_buffer = trace_percpu_sirq_buffer;
+ else
+ percpu_buffer = trace_percpu_buffer;
+
+ if (!percpu_buffer)
+ return NULL;
+
+ buffer = per_cpu_ptr(percpu_buffer, smp_processor_id());
+
+ return buffer->buffer;
+}
+
+static int alloc_percpu_trace_buffer(void)
+{
+ struct trace_buffer_struct *buffers;
+ struct trace_buffer_struct *sirq_buffers;
+ struct trace_buffer_struct *irq_buffers;
+ struct trace_buffer_struct *nmi_buffers;
+
+ buffers = alloc_percpu(struct trace_buffer_struct);
+ if (!buffers)
+ goto err_warn;
+
+ sirq_buffers = alloc_percpu(struct trace_buffer_struct);
+ if (!sirq_buffers)
+ goto err_sirq;
+
+ irq_buffers = alloc_percpu(struct trace_buffer_struct);
+ if (!irq_buffers)
+ goto err_irq;
+
+ nmi_buffers = alloc_percpu(struct trace_buffer_struct);
+ if (!nmi_buffers)
+ goto err_nmi;
+
+ trace_percpu_buffer = buffers;
+ trace_percpu_sirq_buffer = sirq_buffers;
+ trace_percpu_irq_buffer = irq_buffers;
+ trace_percpu_nmi_buffer = nmi_buffers;
+
+ return 0;
+
+ err_nmi:
+ free_percpu(irq_buffers);
+ err_irq:
+ free_percpu(sirq_buffers);
+ err_sirq:
+ free_percpu(buffers);
+ err_warn:
+ WARN(1, "Could not allocate percpu trace_printk buffer");
+ return -ENOMEM;
+}
+
+void trace_printk_init_buffers(void)
+{
+ static int buffers_allocated;
+
+ if (buffers_allocated)
+ return;
+
+ if (alloc_percpu_trace_buffer())
+ return;
+
+ pr_info("ftrace: Allocated trace_printk buffers\n");
+
+ buffers_allocated = 1;
+}
+
/**
* trace_vbprintk - write binary msg to tracing buffer
*
*/
int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
{
- static arch_spinlock_t trace_buf_lock =
- (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
- static u32 trace_buf[TRACE_BUF_SIZE];
-
struct ftrace_event_call *call = &event_bprint;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
struct trace_array *tr = &global_trace;
- struct trace_array_cpu *data;
struct bprint_entry *entry;
unsigned long flags;
- int disable;
- int cpu, len = 0, size, pc;
+ char *tbuffer;
+ int len = 0, size, pc;
if (unlikely(tracing_selftest_running || tracing_disabled))
return 0;
@@ -1526,43 +1594,36 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
pc = preempt_count();
preempt_disable_notrace();
- cpu = raw_smp_processor_id();
- data = tr->data[cpu];
- disable = atomic_inc_return(&data->disabled);
- if (unlikely(disable != 1))
+ tbuffer = get_trace_buf();
+ if (!tbuffer) {
+ len = 0;
goto out;
+ }
- /* Lockdep uses trace_printk for lock tracing */
- local_irq_save(flags);
- arch_spin_lock(&trace_buf_lock);
- len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
+ len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
- if (len > TRACE_BUF_SIZE || len < 0)
- goto out_unlock;
+ if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
+ goto out;
+ local_save_flags(flags);
size = sizeof(*entry) + sizeof(u32) * len;
buffer = tr->buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
flags, pc);
if (!event)
- goto out_unlock;
+ goto out;
entry = ring_buffer_event_data(event);
entry->ip = ip;
entry->fmt = fmt;
- memcpy(entry->buf, trace_buf, sizeof(u32) * len);
+ memcpy(entry->buf, tbuffer, sizeof(u32) * len);
if (!filter_check_discard(call, entry, buffer, event)) {
ring_buffer_unlock_commit(buffer, event);
ftrace_trace_stack(buffer, flags, 6, pc);
}
-out_unlock:
- arch_spin_unlock(&trace_buf_lock);
- local_irq_restore(flags);
-
out:
- atomic_dec_return(&data->disabled);
preempt_enable_notrace();
unpause_graph_tracing();
@@ -1588,58 +1649,53 @@ int trace_array_printk(struct trace_array *tr,
int trace_array_vprintk(struct trace_array *tr,
unsigned long ip, const char *fmt, va_list args)
{
- static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
- static char trace_buf[TRACE_BUF_SIZE];
-
struct ftrace_event_call *call = &event_print;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
- struct trace_array_cpu *data;
- int cpu, len = 0, size, pc;
+ int len = 0, size, pc;
struct print_entry *entry;
- unsigned long irq_flags;
- int disable;
+ unsigned long flags;
+ char *tbuffer;
if (tracing_disabled || tracing_selftest_running)
return 0;
+ /* Don't pollute graph traces with trace_vprintk internals */
+ pause_graph_tracing();
+
pc = preempt_count();
preempt_disable_notrace();
- cpu = raw_smp_processor_id();
- data = tr->data[cpu];
- disable = atomic_inc_return(&data->disabled);
- if (unlikely(disable != 1))
+
+ tbuffer = get_trace_buf();
+ if (!tbuffer) {
+ len = 0;
goto out;
+ }
- pause_graph_tracing();
- raw_local_irq_save(irq_flags);
- arch_spin_lock(&trace_buf_lock);
- len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
+ len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
+ if (len > TRACE_BUF_SIZE)
+ goto out;
+ local_save_flags(flags);
size = sizeof(*entry) + len + 1;
buffer = tr->buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
- irq_flags, pc);
+ flags, pc);
if (!event)
- goto out_unlock;
+ goto out;
entry = ring_buffer_event_data(event);
entry->ip = ip;
- memcpy(&entry->buf, trace_buf, len);
+ memcpy(&entry->buf, tbuffer, len);
entry->buf[len] = '\0';
if (!filter_check_discard(call, entry, buffer, event)) {
ring_buffer_unlock_commit(buffer, event);
- ftrace_trace_stack(buffer, irq_flags, 6, pc);
+ ftrace_trace_stack(buffer, flags, 6, pc);
}
-
- out_unlock:
- arch_spin_unlock(&trace_buf_lock);
- raw_local_irq_restore(irq_flags);
- unpause_graph_tracing();
out:
- atomic_dec_return(&data->disabled);
preempt_enable_notrace();
+ unpause_graph_tracing();
return len;
}
@@ -1652,14 +1708,9 @@ EXPORT_SYMBOL_GPL(trace_vprintk);
static void trace_iterator_increment(struct trace_iterator *iter)
{
- /* Don't allow ftrace to trace into the ring buffers */
- ftrace_disable_cpu();
-
iter->idx++;
if (iter->buffer_iter[iter->cpu])
ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
-
- ftrace_enable_cpu();
}
static struct trace_entry *
@@ -1669,17 +1720,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
struct ring_buffer_event *event;
struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
- /* Don't allow ftrace to trace into the ring buffers */
- ftrace_disable_cpu();
-
if (buf_iter)
event = ring_buffer_iter_peek(buf_iter, ts);
else
event = ring_buffer_peek(iter->tr->buffer, cpu, ts,
lost_events);
- ftrace_enable_cpu();
-
if (event) {
iter->ent_size = ring_buffer_event_length(event);
return ring_buffer_event_data(event);
@@ -1769,11 +1815,8 @@ void *trace_find_next_entry_inc(struct trace_iterator *iter)
static void trace_consume(struct trace_iterator *iter)
{
- /* Don't allow ftrace to trace into the ring buffers */
- ftrace_disable_cpu();
ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts,
&iter->lost_events);
- ftrace_enable_cpu();
}
static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1862,16 +1905,12 @@ static void *s_start(struct seq_file *m, loff_t *pos)
iter->cpu = 0;
iter->idx = -1;
- ftrace_disable_cpu();
-
if (cpu_file == TRACE_PIPE_ALL_CPU) {
for_each_tracing_cpu(cpu)
tracing_iter_reset(iter, cpu);
} else
tracing_iter_reset(iter, cpu_file);
- ftrace_enable_cpu();
-
iter->leftover = 0;
for (p = iter; p && l < *pos; p = s_next(m, p, &l))
;
@@ -2332,15 +2371,13 @@ static struct trace_iterator *
__tracing_open(struct inode *inode, struct file *file)
{
long cpu_file = (long) inode->i_private;
- void *fail_ret = ERR_PTR(-ENOMEM);
struct trace_iterator *iter;
- struct seq_file *m;
- int cpu, ret;
+ int cpu;
if (tracing_disabled)
return ERR_PTR(-ENODEV);
- iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
if (!iter)
return ERR_PTR(-ENOMEM);
@@ -2397,32 +2434,15 @@ __tracing_open(struct inode *inode, struct file *file)
tracing_iter_reset(iter, cpu);
}
- ret = seq_open(file, &tracer_seq_ops);
- if (ret < 0) {
- fail_ret = ERR_PTR(ret);
- goto fail_buffer;
- }
-
- m = file->private_data;
- m->private = iter;
-
mutex_unlock(&trace_types_lock);
return iter;
- fail_buffer:
- for_each_tracing_cpu(cpu) {
- if (iter->buffer_iter[cpu])
- ring_buffer_read_finish(iter->buffer_iter[cpu]);
- }
- free_cpumask_var(iter->started);
- tracing_start();
fail:
mutex_unlock(&trace_types_lock);
kfree(iter->trace);
- kfree(iter);
-
- return fail_ret;
+ seq_release_private(inode, file);
+ return ERR_PTR(-ENOMEM);
}
int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -2458,11 +2478,10 @@ static int tracing_release(struct inode *inode, struct file *file)
tracing_start();
mutex_unlock(&trace_types_lock);
- seq_release(inode, file);
mutex_destroy(&iter->mutex);
free_cpumask_var(iter->started);
kfree(iter->trace);
- kfree(iter);
+ seq_release_private(inode, file);
return 0;
}
@@ -2648,10 +2667,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
if (cpumask_test_cpu(cpu, tracing_cpumask) &&
!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
atomic_inc(&global_trace.data[cpu]->disabled);
+ ring_buffer_record_disable_cpu(global_trace.buffer, cpu);
}
if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
cpumask_test_cpu(cpu, tracing_cpumask_new)) {
atomic_dec(&global_trace.data[cpu]->disabled);
+ ring_buffer_record_enable_cpu(global_trace.buffer, cpu);
}
}
arch_spin_unlock(&ftrace_max_lock);
@@ -2974,7 +2995,14 @@ int tracer_init(struct tracer *t, struct trace_array *tr)
return t->init(tr);
}
-static int __tracing_resize_ring_buffer(unsigned long size)
+static void set_buffer_entries(struct trace_array *tr, unsigned long val)
+{
+ int cpu;
+ for_each_tracing_cpu(cpu)
+ tr->data[cpu]->entries = val;
+}
+
+static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
{
int ret;
@@ -2985,19 +3013,32 @@ static int __tracing_resize_ring_buffer(unsigned long size)
*/
ring_buffer_expanded = 1;
- ret = ring_buffer_resize(global_trace.buffer, size);
+ ret = ring_buffer_resize(global_trace.buffer, size, cpu);
if (ret < 0)
return ret;
if (!current_trace->use_max_tr)
goto out;
- ret = ring_buffer_resize(max_tr.buffer, size);
+ ret = ring_buffer_resize(max_tr.buffer, size, cpu);
if (ret < 0) {
- int r;
+ int r = 0;
+
+ if (cpu == RING_BUFFER_ALL_CPUS) {
+ int i;
+ for_each_tracing_cpu(i) {
+ r = ring_buffer_resize(global_trace.buffer,
+ global_trace.data[i]->entries,
+ i);
+ if (r < 0)
+ break;
+ }
+ } else {
+ r = ring_buffer_resize(global_trace.buffer,
+ global_trace.data[cpu]->entries,
+ cpu);
+ }
- r = ring_buffer_resize(global_trace.buffer,
- global_trace.entries);
if (r < 0) {
/*
* AARGH! We are left with different
@@ -3019,43 +3060,39 @@ static int __tracing_resize_ring_buffer(unsigned long size)
return ret;
}
- max_tr.entries = size;
+ if (cpu == RING_BUFFER_ALL_CPUS)
+ set_buffer_entries(&max_tr, size);
+ else
+ max_tr.data[cpu]->entries = size;
+
out:
- global_trace.entries = size;
+ if (cpu == RING_BUFFER_ALL_CPUS)
+ set_buffer_entries(&global_trace, size);
+ else
+ global_trace.data[cpu]->entries = size;
return ret;
}
-static ssize_t tracing_resize_ring_buffer(unsigned long size)
+static ssize_t tracing_resize_ring_buffer(unsigned long size, int cpu_id)
{
- int cpu, ret = size;
+ int ret = size;
mutex_lock(&trace_types_lock);
- tracing_stop();
-
- /* disable all cpu buffers */
- for_each_tracing_cpu(cpu) {
- if (global_trace.data[cpu])
- atomic_inc(&global_trace.data[cpu]->disabled);
- if (max_tr.data[cpu])
- atomic_inc(&max_tr.data[cpu]->disabled);
+ if (cpu_id != RING_BUFFER_ALL_CPUS) {
+ /* make sure, this cpu is enabled in the mask */
+ if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
+ ret = -EINVAL;
+ goto out;
+ }
}
- if (size != global_trace.entries)
- ret = __tracing_resize_ring_buffer(size);
-
+ ret = __tracing_resize_ring_buffer(size, cpu_id);
if (ret < 0)
ret = -ENOMEM;
- for_each_tracing_cpu(cpu) {
- if (global_trace.data[cpu])
- atomic_dec(&global_trace.data[cpu]->disabled);
- if (max_tr.data[cpu])
- atomic_dec(&max_tr.data[cpu]->disabled);
- }
-
- tracing_start();
+out:
mutex_unlock(&trace_types_lock);
return ret;
@@ -3078,7 +3115,8 @@ int tracing_update_buffers(void)
mutex_lock(&trace_types_lock);
if (!ring_buffer_expanded)
- ret = __tracing_resize_ring_buffer(trace_buf_size);
+ ret = __tracing_resize_ring_buffer(trace_buf_size,
+ RING_BUFFER_ALL_CPUS);
mutex_unlock(&trace_types_lock);
return ret;
@@ -3102,7 +3140,8 @@ static int tracing_set_tracer(const char *buf)
mutex_lock(&trace_types_lock);
if (!ring_buffer_expanded) {
- ret = __tracing_resize_ring_buffer(trace_buf_size);
+ ret = __tracing_resize_ring_buffer(trace_buf_size,
+ RING_BUFFER_ALL_CPUS);
if (ret < 0)
goto out;
ret = 0;
@@ -3128,8 +3167,8 @@ static int tracing_set_tracer(const char *buf)
* The max_tr ring buffer has some state (e.g. ring->clock) and
* we want preserve it.
*/
- ring_buffer_resize(max_tr.buffer, 1);
- max_tr.entries = 1;
+ ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
+ set_buffer_entries(&max_tr, 1);
}
destroy_trace_option_files(topts);
@@ -3137,10 +3176,17 @@ static int tracing_set_tracer(const char *buf)
topts = create_trace_option_files(current_trace);
if (current_trace->use_max_tr) {
- ret = ring_buffer_resize(max_tr.buffer, global_trace.entries);
- if (ret < 0)
- goto out;
- max_tr.entries = global_trace.entries;
+ int cpu;
+ /* we need to make per cpu buffer sizes equivalent */
+ for_each_tracing_cpu(cpu) {
+ ret = ring_buffer_resize(max_tr.buffer,
+ global_trace.data[cpu]->entries,
+ cpu);
+ if (ret < 0)
+ goto out;
+ max_tr.data[cpu]->entries =
+ global_trace.data[cpu]->entries;
+ }
}
if (t->init) {
@@ -3642,30 +3688,82 @@ out_err:
goto out;
}
+struct ftrace_entries_info {
+ struct trace_array *tr;
+ int cpu;
+};
+
+static int tracing_entries_open(struct inode *inode, struct file *filp)
+{
+ struct ftrace_entries_info *info;
+
+ if (tracing_disabled)
+ return -ENODEV;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ info->tr = &global_trace;
+ info->cpu = (unsigned long)inode->i_private;
+
+ filp->private_data = info;
+
+ return 0;
+}
+
static ssize_t
tracing_entries_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- struct trace_array *tr = filp->private_data;
- char buf[96];
- int r;
+ struct ftrace_entries_info *info = filp->private_data;
+ struct trace_array *tr = info->tr;
+ char buf[64];
+ int r = 0;
+ ssize_t ret;
mutex_lock(&trace_types_lock);
- if (!ring_buffer_expanded)
- r = sprintf(buf, "%lu (expanded: %lu)\n",
- tr->entries >> 10,
- trace_buf_size >> 10);
- else
- r = sprintf(buf, "%lu\n", tr->entries >> 10);
+
+ if (info->cpu == RING_BUFFER_ALL_CPUS) {
+ int cpu, buf_size_same;
+ unsigned long size;
+
+ size = 0;
+ buf_size_same = 1;
+ /* check if all cpu sizes are same */
+ for_each_tracing_cpu(cpu) {
+ /* fill in the size from first enabled cpu */
+ if (size == 0)
+ size = tr->data[cpu]->entries;
+ if (size != tr->data[cpu]->entries) {
+ buf_size_same = 0;
+ break;
+ }
+ }
+
+ if (buf_size_same) {
+ if (!ring_buffer_expanded)
+ r = sprintf(buf, "%lu (expanded: %lu)\n",
+ size >> 10,
+ trace_buf_size >> 10);
+ else
+ r = sprintf(buf, "%lu\n", size >> 10);
+ } else
+ r = sprintf(buf, "X\n");
+ } else
+ r = sprintf(buf, "%lu\n", tr->data[info->cpu]->entries >> 10);
+
mutex_unlock(&trace_types_lock);
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ return ret;
}
static ssize_t
tracing_entries_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
+ struct ftrace_entries_info *info = filp->private_data;
unsigned long val;
int ret;
@@ -3680,7 +3778,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
/* value is in KB */
val <<= 10;
- ret = tracing_resize_ring_buffer(val);
+ ret = tracing_resize_ring_buffer(val, info->cpu);
if (ret < 0)
return ret;
@@ -3689,6 +3787,16 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
return cnt;
}
+static int
+tracing_entries_release(struct inode *inode, struct file *filp)
+{
+ struct ftrace_entries_info *info = filp->private_data;
+
+ kfree(info);
+
+ return 0;
+}
+
static ssize_t
tracing_total_entries_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
@@ -3700,7 +3808,7 @@ tracing_total_entries_read(struct file *filp, char __user *ubuf,
mutex_lock(&trace_types_lock);
for_each_tracing_cpu(cpu) {
- size += tr->entries >> 10;
+ size += tr->data[cpu]->entries >> 10;
if (!ring_buffer_expanded)
expanded_size += trace_buf_size >> 10;
}
@@ -3734,7 +3842,7 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
if (trace_flags & TRACE_ITER_STOP_ON_FREE)
tracing_off();
/* resize the ring buffer to 0 */
- tracing_resize_ring_buffer(0);
+ tracing_resize_ring_buffer(0, RING_BUFFER_ALL_CPUS);
return 0;
}
@@ -3749,14 +3857,14 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
struct print_entry *entry;
unsigned long irq_flags;
struct page *pages[2];
+ void *map_page[2];
int nr_pages = 1;
ssize_t written;
- void *page1;
- void *page2;
int offset;
int size;
int len;
int ret;
+ int i;
if (tracing_disabled)
return -EINVAL;
@@ -3795,9 +3903,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
goto out;
}
- page1 = kmap_atomic(pages[0]);
- if (nr_pages == 2)
- page2 = kmap_atomic(pages[1]);
+ for (i = 0; i < nr_pages; i++)
+ map_page[i] = kmap_atomic(pages[i]);
local_save_flags(irq_flags);
size = sizeof(*entry) + cnt + 2; /* possible \n added */
@@ -3815,10 +3922,10 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
if (nr_pages == 2) {
len = PAGE_SIZE - offset;
- memcpy(&entry->buf, page1 + offset, len);
- memcpy(&entry->buf[len], page2, cnt - len);
+ memcpy(&entry->buf, map_page[0] + offset, len);
+ memcpy(&entry->buf[len], map_page[1], cnt - len);
} else
- memcpy(&entry->buf, page1 + offset, cnt);
+ memcpy(&entry->buf, map_page[0] + offset, cnt);
if (entry->buf[cnt - 1] != '\n') {
entry->buf[cnt] = '\n';
@@ -3833,11 +3940,10 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
*fpos += written;
out_unlock:
- if (nr_pages == 2)
- kunmap_atomic(page2);
- kunmap_atomic(page1);
- while (nr_pages > 0)
- put_page(pages[--nr_pages]);
+ for (i = 0; i < nr_pages; i++){
+ kunmap_atomic(map_page[i]);
+ put_page(pages[i]);
+ }
out:
return written;
}
@@ -3933,9 +4039,10 @@ static const struct file_operations tracing_pipe_fops = {
};
static const struct file_operations tracing_entries_fops = {
- .open = tracing_open_generic,
+ .open = tracing_entries_open,
.read = tracing_entries_read,
.write = tracing_entries_write,
+ .release = tracing_entries_release,
.llseek = generic_file_llseek,
};
@@ -4367,6 +4474,9 @@ static void tracing_init_debugfs_percpu(long cpu)
struct dentry *d_cpu;
char cpu_dir[30]; /* 30 characters should be more than enough */
+ if (!d_percpu)
+ return;
+
snprintf(cpu_dir, 30, "cpu%ld", cpu);
d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
if (!d_cpu) {
@@ -4387,6 +4497,9 @@ static void tracing_init_debugfs_percpu(long cpu)
trace_create_file("stats", 0444, d_cpu,
(void *) cpu, &tracing_stats_fops);
+
+ trace_create_file("buffer_size_kb", 0444, d_cpu,
+ (void *) cpu, &tracing_entries_fops);
}
#ifdef CONFIG_FTRACE_SELFTEST
@@ -4718,7 +4831,7 @@ static __init int tracer_init_debugfs(void)
(void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
trace_create_file("buffer_size_kb", 0644, d_tracer,
- &global_trace, &tracing_entries_fops);
+ (void *) RING_BUFFER_ALL_CPUS, &tracing_entries_fops);
trace_create_file("buffer_total_size_kb", 0444, d_tracer,
&global_trace, &tracing_total_entries_fops);
@@ -4957,6 +5070,10 @@ __init static int tracer_alloc_buffers(void)
if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
goto out_free_buffer_mask;
+ /* Only allocate trace_printk buffers if a trace_printk exists */
+ if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
+ trace_printk_init_buffers();
+
/* To save memory, keep the ring buffer size to its minimum */
if (ring_buffer_expanded)
ring_buf_size = trace_buf_size;
@@ -4975,7 +5092,6 @@ __init static int tracer_alloc_buffers(void)
WARN_ON(1);
goto out_free_cpumask;
}
- global_trace.entries = ring_buffer_size(global_trace.buffer);
if (global_trace.buffer_disabled)
tracing_off();
@@ -4988,7 +5104,6 @@ __init static int tracer_alloc_buffers(void)
ring_buffer_free(global_trace.buffer);
goto out_free_cpumask;
}
- max_tr.entries = 1;
#endif
/* Allocate the first page for all buffers */
@@ -4997,6 +5112,12 @@ __init static int tracer_alloc_buffers(void)
max_tr.data[i] = &per_cpu(max_tr_data, i);
}
+ set_buffer_entries(&global_trace,
+ ring_buffer_size(global_trace.buffer, 0));
+#ifdef CONFIG_TRACER_MAX_TRACE
+ set_buffer_entries(&max_tr, 1);
+#endif
+
trace_init_cmdlines();
register_tracer(&nop_trace);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f95d65da6db8..6c6f7933eede 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -131,6 +131,7 @@ struct trace_array_cpu {
atomic_t disabled;
void *buffer_page; /* ring buffer spare */
+ unsigned long entries;
unsigned long saved_latency;
unsigned long critical_start;
unsigned long critical_end;
@@ -152,7 +153,6 @@ struct trace_array_cpu {
*/
struct trace_array {
struct ring_buffer *buffer;
- unsigned long entries;
int cpu;
int buffer_disabled;
cycle_t time_start;
@@ -826,6 +826,8 @@ extern struct list_head ftrace_events;
extern const char *__start___trace_bprintk_fmt[];
extern const char *__stop___trace_bprintk_fmt[];
+void trace_printk_init_buffers(void);
+
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \
extern struct ftrace_event_call \
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 6fd4ffd042f9..a9077c1b4ad3 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -51,6 +51,10 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
const char **iter;
char *fmt;
+ /* allocate the trace_printk per cpu buffers */
+ if (start != end)
+ trace_printk_init_buffers();
+
mutex_lock(&btrace_mutex);
for (iter = start; iter < end; iter++) {
struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);