aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c709
1 files changed, 503 insertions, 206 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 668bbb5ef2bd..76f34c0ef29c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -18,8 +18,46 @@
#include "trace.h"
-/* Global flag to disable all recording to ring buffers */
-static int ring_buffers_off __read_mostly;
+/*
+ * A fast way to enable or disable all ring buffers is to
+ * call tracing_on or tracing_off. Turning off the ring buffers
+ * prevents all ring buffers from being recorded to.
+ * Turning this switch on, makes it OK to write to the
+ * ring buffer, if the ring buffer is enabled itself.
+ *
+ * There's three layers that must be on in order to write
+ * to the ring buffer.
+ *
+ * 1) This global flag must be set.
+ * 2) The ring buffer must be enabled for recording.
+ * 3) The per cpu buffer must be enabled for recording.
+ *
+ * In case of an anomaly, this global flag has a bit set that
+ * will permantly disable all ring buffers.
+ */
+
+/*
+ * Global flag to disable all recording to ring buffers
+ * This has two bits: ON, DISABLED
+ *
+ * ON DISABLED
+ * ---- ----------
+ * 0 0 : ring buffers are off
+ * 1 0 : ring buffers are on
+ * X 1 : ring buffers are permanently disabled
+ */
+
+enum {
+ RB_BUFFERS_ON_BIT = 0,
+ RB_BUFFERS_DISABLED_BIT = 1,
+};
+
+enum {
+ RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
+ RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
+};
+
+static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
/**
* tracing_on - enable all tracing buffers
@@ -29,7 +67,7 @@ static int ring_buffers_off __read_mostly;
*/
void tracing_on(void)
{
- ring_buffers_off = 0;
+ set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
}
/**
@@ -42,9 +80,22 @@ void tracing_on(void)
*/
void tracing_off(void)
{
- ring_buffers_off = 1;
+ clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
}
+/**
+ * tracing_off_permanent - permanently disable ring buffers
+ *
+ * This function, once called, will disable all ring buffers
+ * permanenty.
+ */
+void tracing_off_permanent(void)
+{
+ set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
+}
+
+#include "trace.h"
+
/* Up this if you want to test the TIME_EXTENTS and normalization */
#define DEBUG_SHIFT 0
@@ -56,7 +107,7 @@ u64 ring_buffer_time_stamp(int cpu)
preempt_disable_notrace();
/* shift to debug/test normalization and TIME_EXTENTS */
time = sched_clock() << DEBUG_SHIFT;
- preempt_enable_notrace();
+ preempt_enable_no_resched_notrace();
return time;
}
@@ -144,20 +195,24 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
#define TS_MASK ((1ULL << TS_SHIFT) - 1)
#define TS_DELTA_TEST (~TS_MASK)
-/*
- * This hack stolen from mm/slob.c.
- * We can store per page timing information in the page frame of the page.
- * Thanks to Peter Zijlstra for suggesting this idea.
- */
-struct buffer_page {
+struct buffer_data_page {
u64 time_stamp; /* page time stamp */
- local_t write; /* index for next write */
local_t commit; /* write commited index */
+ unsigned char data[]; /* data of buffer page */
+};
+
+struct buffer_page {
+ local_t write; /* index for next write */
unsigned read; /* index for next read */
struct list_head list; /* list of free pages */
- void *page; /* Actual data page */
+ struct buffer_data_page *page; /* Actual data page */
};
+static void rb_init_page(struct buffer_data_page *bpage)
+{
+ local_set(&bpage->commit, 0);
+}
+
/*
* Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
* this issue out.
@@ -179,7 +234,7 @@ static inline int test_time_stamp(u64 delta)
return 0;
}
-#define BUF_PAGE_SIZE PAGE_SIZE
+#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
/*
* head_page == tail_page && head == tail then buffer is empty.
@@ -187,7 +242,8 @@ static inline int test_time_stamp(u64 delta)
struct ring_buffer_per_cpu {
int cpu;
struct ring_buffer *buffer;
- spinlock_t lock;
+ spinlock_t reader_lock; /* serialize readers */
+ raw_spinlock_t lock;
struct lock_class_key lock_key;
struct list_head pages;
struct buffer_page *head_page; /* read from head */
@@ -202,7 +258,6 @@ struct ring_buffer_per_cpu {
};
struct ring_buffer {
- unsigned long size;
unsigned pages;
unsigned flags;
int cpus;
@@ -221,32 +276,16 @@ struct ring_buffer_iter {
u64 read_stamp;
};
+/* buffer may be either ring_buffer or ring_buffer_per_cpu */
#define RB_WARN_ON(buffer, cond) \
- do { \
- if (unlikely(cond)) { \
- atomic_inc(&buffer->record_disabled); \
- WARN_ON(1); \
- } \
- } while (0)
-
-#define RB_WARN_ON_RET(buffer, cond) \
- do { \
- if (unlikely(cond)) { \
- atomic_inc(&buffer->record_disabled); \
- WARN_ON(1); \
- return -1; \
- } \
- } while (0)
-
-#define RB_WARN_ON_ONCE(buffer, cond) \
- do { \
- static int once; \
- if (unlikely(cond) && !once) { \
- once++; \
+ ({ \
+ int _____ret = unlikely(cond); \
+ if (_____ret) { \
atomic_inc(&buffer->record_disabled); \
WARN_ON(1); \
} \
- } while (0)
+ _____ret; \
+ })
/**
* check_pages - integrity check of buffer pages
@@ -258,16 +297,20 @@ struct ring_buffer_iter {
static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
{
struct list_head *head = &cpu_buffer->pages;
- struct buffer_page *page, *tmp;
+ struct buffer_page *bpage, *tmp;
- RB_WARN_ON_RET(cpu_buffer, head->next->prev != head);
- RB_WARN_ON_RET(cpu_buffer, head->prev->next != head);
+ if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
+ return -1;
+ if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
+ return -1;
- list_for_each_entry_safe(page, tmp, head, list) {
- RB_WARN_ON_RET(cpu_buffer,
- page->list.next->prev != &page->list);
- RB_WARN_ON_RET(cpu_buffer,
- page->list.prev->next != &page->list);
+ list_for_each_entry_safe(bpage, tmp, head, list) {
+ if (RB_WARN_ON(cpu_buffer,
+ bpage->list.next->prev != &bpage->list))
+ return -1;
+ if (RB_WARN_ON(cpu_buffer,
+ bpage->list.prev->next != &bpage->list))
+ return -1;
}
return 0;
@@ -277,22 +320,23 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
unsigned nr_pages)
{
struct list_head *head = &cpu_buffer->pages;
- struct buffer_page *page, *tmp;
+ struct buffer_page *bpage, *tmp;
unsigned long addr;
LIST_HEAD(pages);
unsigned i;
for (i = 0; i < nr_pages; i++) {
- page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+ bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
- if (!page)
+ if (!bpage)
goto free_pages;
- list_add(&page->list, &pages);
+ list_add(&bpage->list, &pages);
addr = __get_free_page(GFP_KERNEL);
if (!addr)
goto free_pages;
- page->page = (void *)addr;
+ bpage->page = (void *)addr;
+ rb_init_page(bpage->page);
}
list_splice(&pages, head);
@@ -302,9 +346,9 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
return 0;
free_pages:
- list_for_each_entry_safe(page, tmp, &pages, list) {
- list_del_init(&page->list);
- free_buffer_page(page);
+ list_for_each_entry_safe(bpage, tmp, &pages, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
}
return -ENOMEM;
}
@@ -313,7 +357,7 @@ static struct ring_buffer_per_cpu *
rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
- struct buffer_page *page;
+ struct buffer_page *bpage;
unsigned long addr;
int ret;
@@ -324,19 +368,21 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
cpu_buffer->cpu = cpu;
cpu_buffer->buffer = buffer;
- spin_lock_init(&cpu_buffer->lock);
+ spin_lock_init(&cpu_buffer->reader_lock);
+ cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
INIT_LIST_HEAD(&cpu_buffer->pages);
- page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
+ bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu));
- if (!page)
+ if (!bpage)
goto fail_free_buffer;
- cpu_buffer->reader_page = page;
+ cpu_buffer->reader_page = bpage;
addr = __get_free_page(GFP_KERNEL);
if (!addr)
goto fail_free_reader;
- page->page = (void *)addr;
+ bpage->page = (void *)addr;
+ rb_init_page(bpage->page);
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
@@ -361,14 +407,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
{
struct list_head *head = &cpu_buffer->pages;
- struct buffer_page *page, *tmp;
+ struct buffer_page *bpage, *tmp;
list_del_init(&cpu_buffer->reader_page->list);
free_buffer_page(cpu_buffer->reader_page);
- list_for_each_entry_safe(page, tmp, head, list) {
- list_del_init(&page->list);
- free_buffer_page(page);
+ list_for_each_entry_safe(bpage, tmp, head, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
}
kfree(cpu_buffer);
}
@@ -465,7 +511,7 @@ static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
static void
rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
{
- struct buffer_page *page;
+ struct buffer_page *bpage;
struct list_head *p;
unsigned i;
@@ -473,13 +519,15 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
synchronize_sched();
for (i = 0; i < nr_pages; i++) {
- BUG_ON(list_empty(&cpu_buffer->pages));
+ if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
+ return;
p = cpu_buffer->pages.next;
- page = list_entry(p, struct buffer_page, list);
- list_del_init(&page->list);
- free_buffer_page(page);
+ bpage = list_entry(p, struct buffer_page, list);
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
}
- BUG_ON(list_empty(&cpu_buffer->pages));
+ if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
+ return;
rb_reset_cpu(cpu_buffer);
@@ -493,7 +541,7 @@ static void
rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
struct list_head *pages, unsigned nr_pages)
{
- struct buffer_page *page;
+ struct buffer_page *bpage;
struct list_head *p;
unsigned i;
@@ -501,11 +549,12 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
synchronize_sched();
for (i = 0; i < nr_pages; i++) {
- BUG_ON(list_empty(pages));
+ if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
+ return;
p = pages->next;
- page = list_entry(p, struct buffer_page, list);
- list_del_init(&page->list);
- list_add_tail(&page->list, &cpu_buffer->pages);
+ bpage = list_entry(p, struct buffer_page, list);
+ list_del_init(&bpage->list);
+ list_add_tail(&bpage->list, &cpu_buffer->pages);
}
rb_reset_cpu(cpu_buffer);
@@ -532,7 +581,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
{
struct ring_buffer_per_cpu *cpu_buffer;
unsigned nr_pages, rm_pages, new_pages;
- struct buffer_page *page, *tmp;
+ struct buffer_page *bpage, *tmp;
unsigned long buffer_size;
unsigned long addr;
LIST_HEAD(pages);
@@ -562,7 +611,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
if (size < buffer_size) {
/* easy case, just free pages */
- BUG_ON(nr_pages >= buffer->pages);
+ if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
+ mutex_unlock(&buffer->mutex);
+ return -1;
+ }
rm_pages = buffer->pages - nr_pages;
@@ -581,21 +633,26 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
* add these pages to the cpu_buffers. Otherwise we just free
* them all and return -ENOMEM;
*/
- BUG_ON(nr_pages <= buffer->pages);
+ if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
+ mutex_unlock(&buffer->mutex);
+ return -1;
+ }
+
new_pages = nr_pages - buffer->pages;
for_each_buffer_cpu(buffer, cpu) {
for (i = 0; i < new_pages; i++) {
- page = kzalloc_node(ALIGN(sizeof(*page),
+ bpage = kzalloc_node(ALIGN(sizeof(*bpage),
cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu));
- if (!page)
+ if (!bpage)
goto free_pages;
- list_add(&page->list, &pages);
+ list_add(&bpage->list, &pages);
addr = __get_free_page(GFP_KERNEL);
if (!addr)
goto free_pages;
- page->page = (void *)addr;
+ bpage->page = (void *)addr;
+ rb_init_page(bpage->page);
}
}
@@ -604,7 +661,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
rb_insert_pages(cpu_buffer, &pages, new_pages);
}
- BUG_ON(!list_empty(&pages));
+ if (RB_WARN_ON(buffer, !list_empty(&pages))) {
+ mutex_unlock(&buffer->mutex);
+ return -1;
+ }
out:
buffer->pages = nr_pages;
@@ -613,9 +673,9 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
return size;
free_pages:
- list_for_each_entry_safe(page, tmp, &pages, list) {
- list_del_init(&page->list);
- free_buffer_page(page);
+ list_for_each_entry_safe(bpage, tmp, &pages, list) {
+ list_del_init(&bpage->list);
+ free_buffer_page(bpage);
}
mutex_unlock(&buffer->mutex);
return -ENOMEM;
@@ -626,9 +686,15 @@ static inline int rb_null_event(struct ring_buffer_event *event)
return event->type == RINGBUF_TYPE_PADDING;
}
-static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
+static inline void *
+__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
+{
+ return bpage->data + index;
+}
+
+static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
{
- return page->page + index;
+ return bpage->page->data + index;
}
static inline struct ring_buffer_event *
@@ -658,7 +724,7 @@ static inline unsigned rb_page_write(struct buffer_page *bpage)
static inline unsigned rb_page_commit(struct buffer_page *bpage)
{
- return local_read(&bpage->commit);
+ return local_read(&bpage->page->commit);
}
/* Size is determined by what has been commited */
@@ -693,7 +759,8 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
head += rb_event_length(event)) {
event = __rb_page_index(cpu_buffer->head_page, head);
- BUG_ON(rb_null_event(event));
+ if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
+ return;
/* Only count data entries */
if (event->type != RINGBUF_TYPE_DATA)
continue;
@@ -703,14 +770,14 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
}
static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
- struct buffer_page **page)
+ struct buffer_page **bpage)
{
- struct list_head *p = (*page)->list.next;
+ struct list_head *p = (*bpage)->list.next;
if (p == &cpu_buffer->pages)
p = p->next;
- *page = list_entry(p, struct buffer_page, list);
+ *bpage = list_entry(p, struct buffer_page, list);
}
static inline unsigned
@@ -746,16 +813,18 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
addr &= PAGE_MASK;
while (cpu_buffer->commit_page->page != (void *)addr) {
- RB_WARN_ON(cpu_buffer,
- cpu_buffer->commit_page == cpu_buffer->tail_page);
- cpu_buffer->commit_page->commit =
+ if (RB_WARN_ON(cpu_buffer,
+ cpu_buffer->commit_page == cpu_buffer->tail_page))
+ return;
+ cpu_buffer->commit_page->page->commit =
cpu_buffer->commit_page->write;
rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
- cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+ cpu_buffer->write_stamp =
+ cpu_buffer->commit_page->page->time_stamp;
}
/* Now set the commit to the event's index */
- local_set(&cpu_buffer->commit_page->commit, index);
+ local_set(&cpu_buffer->commit_page->page->commit, index);
}
static inline void
@@ -769,25 +838,38 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
* back to us). This allows us to do a simple loop to
* assign the commit to the tail.
*/
+ again:
while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
- cpu_buffer->commit_page->commit =
+ cpu_buffer->commit_page->page->commit =
cpu_buffer->commit_page->write;
rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
- cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
+ cpu_buffer->write_stamp =
+ cpu_buffer->commit_page->page->time_stamp;
/* add barrier to keep gcc from optimizing too much */
barrier();
}
while (rb_commit_index(cpu_buffer) !=
rb_page_write(cpu_buffer->commit_page)) {
- cpu_buffer->commit_page->commit =
+ cpu_buffer->commit_page->page->commit =
cpu_buffer->commit_page->write;
barrier();
}
+
+ /* again, keep gcc from optimizing */
+ barrier();
+
+ /*
+ * If an interrupt came in just after the first while loop
+ * and pushed the tail page forward, we will be left with
+ * a dangling commit that will never go forward.
+ */
+ if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
+ goto again;
}
static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
{
- cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
+ cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
cpu_buffer->reader_page->read = 0;
}
@@ -806,7 +888,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
else
rb_inc_page(cpu_buffer, &iter->head_page);
- iter->read_stamp = iter->head_page->time_stamp;
+ iter->read_stamp = iter->head_page->page->time_stamp;
iter->head = 0;
}
@@ -880,12 +962,15 @@ static struct ring_buffer_event *
__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
unsigned type, unsigned long length, u64 *ts)
{
- struct buffer_page *tail_page, *head_page, *reader_page;
+ struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
unsigned long tail, write;
struct ring_buffer *buffer = cpu_buffer->buffer;
struct ring_buffer_event *event;
unsigned long flags;
+ commit_page = cpu_buffer->commit_page;
+ /* we just need to protect against interrupts */
+ barrier();
tail_page = cpu_buffer->tail_page;
write = local_add_return(length, &tail_page->write);
tail = write - length;
@@ -894,7 +979,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
if (write > BUF_PAGE_SIZE) {
struct buffer_page *next_page = tail_page;
- spin_lock_irqsave(&cpu_buffer->lock, flags);
+ local_irq_save(flags);
+ __raw_spin_lock(&cpu_buffer->lock);
rb_inc_page(cpu_buffer, &next_page);
@@ -902,14 +988,15 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
reader_page = cpu_buffer->reader_page;
/* we grabbed the lock before incrementing */
- RB_WARN_ON(cpu_buffer, next_page == reader_page);
+ if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
+ goto out_unlock;
/*
* If for some reason, we had an interrupt storm that made
* it all the way around the buffer, bail, and warn
* about it.
*/
- if (unlikely(next_page == cpu_buffer->commit_page)) {
+ if (unlikely(next_page == commit_page)) {
WARN_ON_ONCE(1);
goto out_unlock;
}
@@ -940,12 +1027,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
*/
if (tail_page == cpu_buffer->tail_page) {
local_set(&next_page->write, 0);
- local_set(&next_page->commit, 0);
+ local_set(&next_page->page->commit, 0);
cpu_buffer->tail_page = next_page;
/* reread the time stamp */
*ts = ring_buffer_time_stamp(cpu_buffer->cpu);
- cpu_buffer->tail_page->time_stamp = *ts;
+ cpu_buffer->tail_page->page->time_stamp = *ts;
}
/*
@@ -970,7 +1057,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
rb_set_commit_to_write(cpu_buffer);
}
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
/* fail and let the caller try again */
return ERR_PTR(-EAGAIN);
@@ -978,7 +1066,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
/* We reserved something on the buffer */
- BUG_ON(write > BUF_PAGE_SIZE);
+ if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
+ return NULL;
event = __rb_page_index(tail_page, tail);
rb_update_event(event, type, length);
@@ -988,12 +1077,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
* this page's time stamp.
*/
if (!tail && rb_is_commit(cpu_buffer, event))
- cpu_buffer->commit_page->time_stamp = *ts;
+ cpu_buffer->commit_page->page->time_stamp = *ts;
return event;
out_unlock:
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
return NULL;
}
@@ -1038,7 +1128,7 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
event->time_delta = *delta & TS_MASK;
event->array[0] = *delta >> TS_SHIFT;
} else {
- cpu_buffer->commit_page->time_stamp = *ts;
+ cpu_buffer->commit_page->page->time_stamp = *ts;
event->time_delta = 0;
event->array[0] = 0;
}
@@ -1076,10 +1166,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
* storm or we have something buggy.
* Bail!
*/
- if (unlikely(++nr_loops > 1000)) {
- RB_WARN_ON(cpu_buffer, 1);
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
return NULL;
- }
ts = ring_buffer_time_stamp(cpu_buffer->cpu);
@@ -1175,15 +1263,14 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
struct ring_buffer_event *event;
int cpu, resched;
- if (ring_buffers_off)
+ if (ring_buffer_flags != RB_BUFFERS_ON)
return NULL;
if (atomic_read(&buffer->record_disabled))
return NULL;
/* If we are tracing schedule, we don't want to recurse */
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
cpu = raw_smp_processor_id();
@@ -1214,10 +1301,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
return event;
out:
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
return NULL;
}
@@ -1259,12 +1343,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
/*
* Only the last preempt count needs to restore preemption.
*/
- if (preempt_count() == 1) {
- if (per_cpu(rb_need_resched, cpu))
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
- } else
+ if (preempt_count() == 1)
+ ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
+ else
preempt_enable_no_resched_notrace();
return 0;
@@ -1294,14 +1375,13 @@ int ring_buffer_write(struct ring_buffer *buffer,
int ret = -EBUSY;
int cpu, resched;
- if (ring_buffers_off)
+ if (ring_buffer_flags != RB_BUFFERS_ON)
return -EBUSY;
if (atomic_read(&buffer->record_disabled))
return -EBUSY;
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
cpu = raw_smp_processor_id();
@@ -1327,10 +1407,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
ret = 0;
out:
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
return ret;
}
@@ -1489,14 +1566,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
return overruns;
}
-/**
- * ring_buffer_iter_reset - reset an iterator
- * @iter: The iterator to reset
- *
- * Resets the iterator, so that it will start from the beginning
- * again.
- */
-void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
+static void rb_iter_reset(struct ring_buffer_iter *iter)
{
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
@@ -1511,7 +1581,24 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
if (iter->head)
iter->read_stamp = cpu_buffer->read_stamp;
else
- iter->read_stamp = iter->head_page->time_stamp;
+ iter->read_stamp = iter->head_page->page->time_stamp;
+}
+
+/**
+ * ring_buffer_iter_reset - reset an iterator
+ * @iter: The iterator to reset
+ *
+ * Resets the iterator, so that it will start from the beginning
+ * again.
+ */
+void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ rb_iter_reset(iter);
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
/**
@@ -1597,7 +1684,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
unsigned long flags;
int nr_loops = 0;
- spin_lock_irqsave(&cpu_buffer->lock, flags);
+ local_irq_save(flags);
+ __raw_spin_lock(&cpu_buffer->lock);
again:
/*
@@ -1606,8 +1694,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
* a case where we will loop three times. There should be no
* reason to loop four times (that I know of).
*/
- if (unlikely(++nr_loops > 3)) {
- RB_WARN_ON(cpu_buffer, 1);
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
reader = NULL;
goto out;
}
@@ -1619,8 +1706,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
goto out;
/* Never should we have an index greater than the size */
- RB_WARN_ON(cpu_buffer,
- cpu_buffer->reader_page->read > rb_page_size(reader));
+ if (RB_WARN_ON(cpu_buffer,
+ cpu_buffer->reader_page->read > rb_page_size(reader)))
+ goto out;
/* check if we caught up to the tail */
reader = NULL;
@@ -1637,7 +1725,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->reader_page->list.prev = reader->list.prev;
local_set(&cpu_buffer->reader_page->write, 0);
- local_set(&cpu_buffer->reader_page->commit, 0);
+ local_set(&cpu_buffer->reader_page->page->commit, 0);
/* Make the reader page now replace the head */
reader->list.prev->next = &cpu_buffer->reader_page->list;
@@ -1659,7 +1747,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
goto again;
out:
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
return reader;
}
@@ -1673,7 +1762,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
reader = rb_get_reader_page(cpu_buffer);
/* This function should not be called when buffer is empty */
- BUG_ON(!reader);
+ if (RB_WARN_ON(cpu_buffer, !reader))
+ return;
event = rb_reader_event(cpu_buffer);
@@ -1700,7 +1790,9 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
* Check if we are at the end of the buffer.
*/
if (iter->head >= rb_page_size(iter->head_page)) {
- BUG_ON(iter->head_page == cpu_buffer->commit_page);
+ if (RB_WARN_ON(buffer,
+ iter->head_page == cpu_buffer->commit_page))
+ return;
rb_inc_iter(iter);
return;
}
@@ -1713,8 +1805,10 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
* This should not be called to advance the header if we are
* at the tail of the buffer.
*/
- BUG_ON((iter->head_page == cpu_buffer->commit_page) &&
- (iter->head + length > rb_commit_index(cpu_buffer)));
+ if (RB_WARN_ON(cpu_buffer,
+ (iter->head_page == cpu_buffer->commit_page) &&
+ (iter->head + length > rb_commit_index(cpu_buffer))))
+ return;
rb_update_iter_read_stamp(iter, event);
@@ -1726,17 +1820,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
rb_advance_iter(iter);
}
-/**
- * ring_buffer_peek - peek at the next event to be read
- * @buffer: The ring buffer to read
- * @cpu: The cpu to peak at
- * @ts: The timestamp counter of this event.
- *
- * This will return the event that will be read next, but does
- * not consume the data.
- */
-struct ring_buffer_event *
-ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
+static struct ring_buffer_event *
+rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
@@ -1757,10 +1842,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
* can have. Nesting 10 deep of interrupts is clearly
* an anomaly.
*/
- if (unlikely(++nr_loops > 10)) {
- RB_WARN_ON(cpu_buffer, 1);
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
return NULL;
- }
reader = rb_get_reader_page(cpu_buffer);
if (!reader)
@@ -1798,16 +1881,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
return NULL;
}
-/**
- * ring_buffer_iter_peek - peek at the next event to be read
- * @iter: The ring buffer iterator
- * @ts: The timestamp counter of this event.
- *
- * This will return the event that will be read next, but does
- * not increment the iterator.
- */
-struct ring_buffer_event *
-ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
+static struct ring_buffer_event *
+rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
{
struct ring_buffer *buffer;
struct ring_buffer_per_cpu *cpu_buffer;
@@ -1829,10 +1904,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
* can have. Nesting 10 deep of interrupts is clearly
* an anomaly.
*/
- if (unlikely(++nr_loops > 10)) {
- RB_WARN_ON(cpu_buffer, 1);
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
return NULL;
- }
if (rb_per_cpu_empty(cpu_buffer))
return NULL;
@@ -1869,6 +1942,51 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
}
/**
+ * ring_buffer_peek - peek at the next event to be read
+ * @buffer: The ring buffer to read
+ * @cpu: The cpu to peak at
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not consume the data.
+ */
+struct ring_buffer_event *
+ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ struct ring_buffer_event *event;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ event = rb_buffer_peek(buffer, cpu, ts);
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return event;
+}
+
+/**
+ * ring_buffer_iter_peek - peek at the next event to be read
+ * @iter: The ring buffer iterator
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not increment the iterator.
+ */
+struct ring_buffer_event *
+ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ struct ring_buffer_event *event;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ event = rb_iter_peek(iter, ts);
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return event;
+}
+
+/**
* ring_buffer_consume - return an event and consume it
* @buffer: The ring buffer to get the next event from
*
@@ -1879,19 +1997,24 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
struct ring_buffer_event *
ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
{
- struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
struct ring_buffer_event *event;
+ unsigned long flags;
if (!cpu_isset(cpu, buffer->cpumask))
return NULL;
- event = ring_buffer_peek(buffer, cpu, ts);
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+ event = rb_buffer_peek(buffer, cpu, ts);
if (!event)
- return NULL;
+ goto out;
- cpu_buffer = buffer->buffers[cpu];
rb_advance_reader(cpu_buffer);
+ out:
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
return event;
}
@@ -1928,9 +2051,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
atomic_inc(&cpu_buffer->record_disabled);
synchronize_sched();
- spin_lock_irqsave(&cpu_buffer->lock, flags);
- ring_buffer_iter_reset(iter);
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ __raw_spin_lock(&cpu_buffer->lock);
+ rb_iter_reset(iter);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
return iter;
}
@@ -1962,12 +2087,17 @@ struct ring_buffer_event *
ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
{
struct ring_buffer_event *event;
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ unsigned long flags;
- event = ring_buffer_iter_peek(iter, ts);
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ event = rb_iter_peek(iter, ts);
if (!event)
- return NULL;
+ goto out;
rb_advance_iter(iter);
+ out:
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
return event;
}
@@ -1987,7 +2117,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->head_page
= list_entry(cpu_buffer->pages.next, struct buffer_page, list);
local_set(&cpu_buffer->head_page->write, 0);
- local_set(&cpu_buffer->head_page->commit, 0);
+ local_set(&cpu_buffer->head_page->page->commit, 0);
cpu_buffer->head_page->read = 0;
@@ -1996,7 +2126,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
local_set(&cpu_buffer->reader_page->write, 0);
- local_set(&cpu_buffer->reader_page->commit, 0);
+ local_set(&cpu_buffer->reader_page->page->commit, 0);
cpu_buffer->reader_page->read = 0;
cpu_buffer->overrun = 0;
@@ -2016,11 +2146,15 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
if (!cpu_isset(cpu, buffer->cpumask))
return;
- spin_lock_irqsave(&cpu_buffer->lock, flags);
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+ __raw_spin_lock(&cpu_buffer->lock);
rb_reset_cpu(cpu_buffer);
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
/**
@@ -2090,8 +2224,7 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
return -EINVAL;
/* At least make sure the two buffers are somewhat the same */
- if (buffer_a->size != buffer_b->size ||
- buffer_a->pages != buffer_b->pages)
+ if (buffer_a->pages != buffer_b->pages)
return -EINVAL;
cpu_buffer_a = buffer_a->buffers[cpu];
@@ -2118,16 +2251,178 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
return 0;
}
+static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_data_page *bpage)
+{
+ struct ring_buffer_event *event;
+ unsigned long head;
+
+ __raw_spin_lock(&cpu_buffer->lock);
+ for (head = 0; head < local_read(&bpage->commit);
+ head += rb_event_length(event)) {
+
+ event = __rb_data_page_index(bpage, head);
+ if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
+ return;
+ /* Only count data entries */
+ if (event->type != RINGBUF_TYPE_DATA)
+ continue;
+ cpu_buffer->entries--;
+ }
+ __raw_spin_unlock(&cpu_buffer->lock);
+}
+
+/**
+ * ring_buffer_alloc_read_page - allocate a page to read from buffer
+ * @buffer: the buffer to allocate for.
+ *
+ * This function is used in conjunction with ring_buffer_read_page.
+ * When reading a full page from the ring buffer, these functions
+ * can be used to speed up the process. The calling function should
+ * allocate a few pages first with this function. Then when it
+ * needs to get pages from the ring buffer, it passes the result
+ * of this function into ring_buffer_read_page, which will swap
+ * the page that was allocated, with the read page of the buffer.
+ *
+ * Returns:
+ * The page allocated, or NULL on error.
+ */
+void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
+{
+ unsigned long addr;
+ struct buffer_data_page *bpage;
+
+ addr = __get_free_page(GFP_KERNEL);
+ if (!addr)
+ return NULL;
+
+ bpage = (void *)addr;
+
+ return bpage;
+}
+
+/**
+ * ring_buffer_free_read_page - free an allocated read page
+ * @buffer: the buffer the page was allocate for
+ * @data: the page to free
+ *
+ * Free a page allocated from ring_buffer_alloc_read_page.
+ */
+void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
+{
+ free_page((unsigned long)data);
+}
+
+/**
+ * ring_buffer_read_page - extract a page from the ring buffer
+ * @buffer: buffer to extract from
+ * @data_page: the page to use allocated from ring_buffer_alloc_read_page
+ * @cpu: the cpu of the buffer to extract
+ * @full: should the extraction only happen when the page is full.
+ *
+ * This function will pull out a page from the ring buffer and consume it.
+ * @data_page must be the address of the variable that was returned
+ * from ring_buffer_alloc_read_page. This is because the page might be used
+ * to swap with a page in the ring buffer.
+ *
+ * for example:
+ * rpage = ring_buffer_alloc_page(buffer);
+ * if (!rpage)
+ * return error;
+ * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
+ * if (ret)
+ * process_page(rpage);
+ *
+ * When @full is set, the function will not return true unless
+ * the writer is off the reader page.
+ *
+ * Note: it is up to the calling functions to handle sleeps and wakeups.
+ * The ring buffer can be used anywhere in the kernel and can not
+ * blindly call wake_up. The layer that uses the ring buffer must be
+ * responsible for that.
+ *
+ * Returns:
+ * 1 if data has been transferred
+ * 0 if no data has been transferred.
+ */
+int ring_buffer_read_page(struct ring_buffer *buffer,
+ void **data_page, int cpu, int full)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ struct ring_buffer_event *event;
+ struct buffer_data_page *bpage;
+ unsigned long flags;
+ int ret = 0;
+
+ if (!data_page)
+ return 0;
+
+ bpage = *data_page;
+ if (!bpage)
+ return 0;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+ /*
+ * rb_buffer_peek will get the next ring buffer if
+ * the current reader page is empty.
+ */
+ event = rb_buffer_peek(buffer, cpu, NULL);
+ if (!event)
+ goto out;
+
+ /* check for data */
+ if (!local_read(&cpu_buffer->reader_page->page->commit))
+ goto out;
+ /*
+ * If the writer is already off of the read page, then simply
+ * switch the read page with the given page. Otherwise
+ * we need to copy the data from the reader to the writer.
+ */
+ if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
+ unsigned int read = cpu_buffer->reader_page->read;
+
+ if (full)
+ goto out;
+ /* The writer is still on the reader page, we must copy */
+ bpage = cpu_buffer->reader_page->page;
+ memcpy(bpage->data,
+ cpu_buffer->reader_page->page->data + read,
+ local_read(&bpage->commit) - read);
+
+ /* consume what was read */
+ cpu_buffer->reader_page += read;
+
+ } else {
+ /* swap the pages */
+ rb_init_page(bpage);
+ bpage = cpu_buffer->reader_page->page;
+ cpu_buffer->reader_page->page = *data_page;
+ cpu_buffer->reader_page->read = 0;
+ *data_page = bpage;
+ }
+ ret = 1;
+
+ /* update the entry counter */
+ rb_remove_entries(cpu_buffer, bpage);
+ out:
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return ret;
+}
+
static ssize_t
rb_simple_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- int *p = filp->private_data;
+ long *p = filp->private_data;
char buf[64];
int r;
- /* !ring_buffers_off == tracing_on */
- r = sprintf(buf, "%d\n", !*p);
+ if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
+ r = sprintf(buf, "permanently disabled\n");
+ else
+ r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
@@ -2136,7 +2431,7 @@ static ssize_t
rb_simple_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- int *p = filp->private_data;
+ long *p = filp->private_data;
char buf[64];
long val;
int ret;
@@ -2153,8 +2448,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
if (ret < 0)
return ret;
- /* !ring_buffers_off == tracing_on */
- *p = !val;
+ if (val)
+ set_bit(RB_BUFFERS_ON_BIT, p);
+ else
+ clear_bit(RB_BUFFERS_ON_BIT, p);
(*ppos)++;
@@ -2176,7 +2473,7 @@ static __init int rb_init_debugfs(void)
d_tracer = tracing_init_dentry();
entry = debugfs_create_file("tracing_on", 0644, d_tracer,
- &ring_buffers_off, &rb_simple_fops);
+ &ring_buffer_flags, &rb_simple_fops);
if (!entry)
pr_warning("Could not create debugfs 'tracing_on' entry\n");