aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c177
1 files changed, 109 insertions, 68 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3103a484182e..25476ead681b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -384,6 +384,7 @@ struct rb_irq_work {
struct irq_work work;
wait_queue_head_t waiters;
wait_queue_head_t full_waiters;
+ atomic_t seq;
bool waiters_pending;
bool full_waiters_pending;
bool wakeup_full;
@@ -753,6 +754,9 @@ static void rb_wake_up_waiters(struct irq_work *work)
{
struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
+ /* For waiters waiting for the first wake up */
+ (void)atomic_fetch_inc_release(&rbwork->seq);
+
wake_up_all(&rbwork->waiters);
if (rbwork->full_waiters_pending || rbwork->wakeup_full) {
/* Only cpu_buffer sets the above flags */
@@ -834,51 +838,24 @@ static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full)
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
ret = !pagebusy && full_hit(buffer, cpu, full);
- if (!cpu_buffer->shortest_full ||
- cpu_buffer->shortest_full > full)
- cpu_buffer->shortest_full = full;
+ if (!ret && (!cpu_buffer->shortest_full ||
+ cpu_buffer->shortest_full > full)) {
+ cpu_buffer->shortest_full = full;
+ }
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
return ret;
}
-/**
- * ring_buffer_wait - wait for input to the ring buffer
- * @buffer: buffer to wait on
- * @cpu: the cpu buffer to wait on
- * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
- *
- * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
- * as data is added to any of the @buffer's cpu buffers. Otherwise
- * it will wait for data to be added to a specific cpu buffer.
- */
-int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
+static inline bool
+rb_wait_cond(struct rb_irq_work *rbwork, struct trace_buffer *buffer,
+ int cpu, int full, ring_buffer_cond_fn cond, void *data)
{
- struct ring_buffer_per_cpu *cpu_buffer;
- DEFINE_WAIT(wait);
- struct rb_irq_work *work;
- int ret = 0;
-
- /*
- * Depending on what the caller is waiting for, either any
- * data in any cpu buffer, or a specific buffer, put the
- * caller on the appropriate wait queue.
- */
- if (cpu == RING_BUFFER_ALL_CPUS) {
- work = &buffer->irq_work;
- /* Full only makes sense on per cpu reads */
- full = 0;
- } else {
- if (!cpumask_test_cpu(cpu, buffer->cpumask))
- return -ENODEV;
- cpu_buffer = buffer->buffers[cpu];
- work = &cpu_buffer->irq_work;
- }
+ if (rb_watermark_hit(buffer, cpu, full))
+ return true;
- if (full)
- prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
- else
- prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
+ if (cond(data))
+ return true;
/*
* The events can happen in critical sections where
@@ -901,27 +878,82 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
* a task has been queued. It's OK for spurious wake ups.
*/
if (full)
- work->full_waiters_pending = true;
+ rbwork->full_waiters_pending = true;
else
- work->waiters_pending = true;
+ rbwork->waiters_pending = true;
- if (rb_watermark_hit(buffer, cpu, full))
- goto out;
+ return false;
+}
- if (signal_pending(current)) {
- ret = -EINTR;
- goto out;
+struct rb_wait_data {
+ struct rb_irq_work *irq_work;
+ int seq;
+};
+
+/*
+ * The default wait condition for ring_buffer_wait() is to just to exit the
+ * wait loop the first time it is woken up.
+ */
+static bool rb_wait_once(void *data)
+{
+ struct rb_wait_data *rdata = data;
+ struct rb_irq_work *rbwork = rdata->irq_work;
+
+ return atomic_read_acquire(&rbwork->seq) != rdata->seq;
+}
+
+/**
+ * ring_buffer_wait - wait for input to the ring buffer
+ * @buffer: buffer to wait on
+ * @cpu: the cpu buffer to wait on
+ * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
+ * @cond: condition function to break out of wait (NULL to run once)
+ * @data: the data to pass to @cond.
+ *
+ * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
+ * as data is added to any of the @buffer's cpu buffers. Otherwise
+ * it will wait for data to be added to a specific cpu buffer.
+ */
+int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full,
+ ring_buffer_cond_fn cond, void *data)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct wait_queue_head *waitq;
+ struct rb_irq_work *rbwork;
+ struct rb_wait_data rdata;
+ int ret = 0;
+
+ /*
+ * Depending on what the caller is waiting for, either any
+ * data in any cpu buffer, or a specific buffer, put the
+ * caller on the appropriate wait queue.
+ */
+ if (cpu == RING_BUFFER_ALL_CPUS) {
+ rbwork = &buffer->irq_work;
+ /* Full only makes sense on per cpu reads */
+ full = 0;
+ } else {
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return -ENODEV;
+ cpu_buffer = buffer->buffers[cpu];
+ rbwork = &cpu_buffer->irq_work;
}
- schedule();
- out:
if (full)
- finish_wait(&work->full_waiters, &wait);
+ waitq = &rbwork->full_waiters;
else
- finish_wait(&work->waiters, &wait);
+ waitq = &rbwork->waiters;
+
+ /* Set up to exit loop as soon as it is woken */
+ if (!cond) {
+ cond = rb_wait_once;
+ rdata.irq_work = rbwork;
+ rdata.seq = atomic_read_acquire(&rbwork->seq);
+ data = &rdata;
+ }
- if (!ret && !rb_watermark_hit(buffer, cpu, full) && signal_pending(current))
- ret = -EINTR;
+ ret = wait_event_interruptible((*waitq),
+ rb_wait_cond(rbwork, buffer, cpu, full, cond, data));
return ret;
}
@@ -959,21 +991,30 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
}
if (full) {
- unsigned long flags;
-
poll_wait(filp, &rbwork->full_waiters, poll_table);
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ if (rb_watermark_hit(buffer, cpu, full))
+ return EPOLLIN | EPOLLRDNORM;
+ /*
+ * Only allow full_waiters_pending update to be seen after
+ * the shortest_full is set (in rb_watermark_hit). If the
+ * writer sees the full_waiters_pending flag set, it will
+ * compare the amount in the ring buffer to shortest_full.
+ * If the amount in the ring buffer is greater than the
+ * shortest_full percent, it will call the irq_work handler
+ * to wake up this list. The irq_handler will reset shortest_full
+ * back to zero. That's done under the reader_lock, but
+ * the below smp_mb() makes sure that the update to
+ * full_waiters_pending doesn't leak up into the above.
+ */
+ smp_mb();
rbwork->full_waiters_pending = true;
- if (!cpu_buffer->shortest_full ||
- cpu_buffer->shortest_full > full)
- cpu_buffer->shortest_full = full;
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
- } else {
- poll_wait(filp, &rbwork->waiters, poll_table);
- rbwork->waiters_pending = true;
+ return 0;
}
+ poll_wait(filp, &rbwork->waiters, poll_table);
+ rbwork->waiters_pending = true;
+
/*
* There's a tight race between setting the waiters_pending and
* checking if the ring buffer is empty. Once the waiters_pending bit
@@ -989,9 +1030,6 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
*/
smp_mb();
- if (full)
- return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0;
-
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
return EPOLLIN | EPOLLRDNORM;
@@ -1485,7 +1523,8 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
list_add(&bpage->list, pages);
- page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), mflags,
+ page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
+ mflags | __GFP_ZERO,
cpu_buffer->buffer->subbuf_order);
if (!page)
goto free_pages;
@@ -1570,7 +1609,8 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
cpu_buffer->reader_page = bpage;
- page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, cpu_buffer->buffer->subbuf_order);
+ page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_ZERO,
+ cpu_buffer->buffer->subbuf_order);
if (!page)
goto fail_free_reader;
bpage->page = page_address(page);
@@ -4350,7 +4390,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
cpu_buffer = iter->cpu_buffer;
reader = cpu_buffer->reader_page;
head_page = cpu_buffer->head_page;
- commit_page = cpu_buffer->commit_page;
+ commit_page = READ_ONCE(cpu_buffer->commit_page);
commit_ts = commit_page->page->time_stamp;
/*
@@ -5538,7 +5578,8 @@ ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu)
if (bpage->data)
goto out;
- page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_NORETRY,
+ page = alloc_pages_node(cpu_to_node(cpu),
+ GFP_KERNEL | __GFP_NORETRY | __GFP_ZERO,
cpu_buffer->buffer->subbuf_order);
if (!page) {
kfree(bpage);