aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/ptr_ring.h
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-05-18 10:07:42 -0400
committerDavid S. Miller <davem@davemloft.net>2017-05-18 10:07:42 -0400
commitf646c75b79c31a7fc40211e7291287e2b99ee168 (patch)
tree240db037b0414eda56435162247acabd75dd1477 /include/linux/ptr_ring.h
parentMerge branch 'phy-marvell-cleanups' (diff)
parentvhost_net: try batch dequing from skb array (diff)
downloadlinux-dev-f646c75b79c31a7fc40211e7291287e2b99ee168.tar.xz
linux-dev-f646c75b79c31a7fc40211e7291287e2b99ee168.zip
Merge branch 'vhost_net-rx-batch-dequeuing'
Jason Wang says: ==================== vhost_net rx batch dequeuing This series tries to implement rx batching for vhost-net. This is done by batching the dequeuing from skb_array which was exported by underlayer socket and pass the sbk back through msg_control to finish userspace copying. This is also the requirement for more batching implemention on rx path. Tests shows at most 7.56% improvment bon rx pps on top of batch zeroing and no obvious changes for TCP_STREAM/TCP_RR result. Please review. Thanks Changes from V4: - drop batch zeroing patch - renew the performance numbers - move skb pointer array out of vhost_net structure Changes from V3: - add batch zeroing patch to fix the build warnings Changes from V2: - rebase to net-next HEAD - use unconsume helpers to put skb back on releasing - introduce and use vhost_net internal buffer helpers - renew performance numbers on top of batch zeroing Changes from V1: - switch to use for() in __ptr_ring_consume_batched() - rename peek_head_len_batched() to fetch_skbs() - use skb_array_consume_batched() instead of skb_array_consume_batched_bh() since no consumer run in bh - drop the lockless peeking patch since skb_array could be resized, so it's not safe to call lockless one ==================== Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux/ptr_ring.h')
-rw-r--r--include/linux/ptr_ring.h120
1 files changed, 120 insertions, 0 deletions
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 6b2e0dd88569..d8c97ec8a8e6 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -278,6 +278,22 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r)
return ptr;
}
+static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
+ void **array, int n)
+{
+ void *ptr;
+ int i;
+
+ for (i = 0; i < n; i++) {
+ ptr = __ptr_ring_consume(r);
+ if (!ptr)
+ break;
+ array[i] = ptr;
+ }
+
+ return i;
+}
+
/*
* Note: resize (below) nests producer lock within consumer lock, so if you
* call this in interrupt or BH context, you must disable interrupts/BH when
@@ -328,6 +344,55 @@ static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
return ptr;
}
+static inline int ptr_ring_consume_batched(struct ptr_ring *r,
+ void **array, int n)
+{
+ int ret;
+
+ spin_lock(&r->consumer_lock);
+ ret = __ptr_ring_consume_batched(r, array, n);
+ spin_unlock(&r->consumer_lock);
+
+ return ret;
+}
+
+static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
+ void **array, int n)
+{
+ int ret;
+
+ spin_lock_irq(&r->consumer_lock);
+ ret = __ptr_ring_consume_batched(r, array, n);
+ spin_unlock_irq(&r->consumer_lock);
+
+ return ret;
+}
+
+static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
+ void **array, int n)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&r->consumer_lock, flags);
+ ret = __ptr_ring_consume_batched(r, array, n);
+ spin_unlock_irqrestore(&r->consumer_lock, flags);
+
+ return ret;
+}
+
+static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
+ void **array, int n)
+{
+ int ret;
+
+ spin_lock_bh(&r->consumer_lock);
+ ret = __ptr_ring_consume_batched(r, array, n);
+ spin_unlock_bh(&r->consumer_lock);
+
+ return ret;
+}
+
/* Cast to structure type and call a function without discarding from FIFO.
* Function must return a value.
* Callers must take consumer_lock.
@@ -403,6 +468,61 @@ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
return 0;
}
+/*
+ * Return entries into ring. Destroy entries that don't fit.
+ *
+ * Note: this is expected to be a rare slow path operation.
+ *
+ * Note: producer lock is nested within consumer lock, so if you
+ * resize you must make sure all uses nest correctly.
+ * In particular if you consume ring in interrupt or BH context, you must
+ * disable interrupts/BH when doing so.
+ */
+static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
+ void (*destroy)(void *))
+{
+ unsigned long flags;
+ int head;
+
+ spin_lock_irqsave(&r->consumer_lock, flags);
+ spin_lock(&r->producer_lock);
+
+ if (!r->size)
+ goto done;
+
+ /*
+ * Clean out buffered entries (for simplicity). This way following code
+ * can test entries for NULL and if not assume they are valid.
+ */
+ head = r->consumer_head - 1;
+ while (likely(head >= r->consumer_tail))
+ r->queue[head--] = NULL;
+ r->consumer_tail = r->consumer_head;
+
+ /*
+ * Go over entries in batch, start moving head back and copy entries.
+ * Stop when we run into previously unconsumed entries.
+ */
+ while (n) {
+ head = r->consumer_head - 1;
+ if (head < 0)
+ head = r->size - 1;
+ if (r->queue[head]) {
+ /* This batch entry will have to be destroyed. */
+ goto done;
+ }
+ r->queue[head] = batch[--n];
+ r->consumer_tail = r->consumer_head = head;
+ }
+
+done:
+ /* Destroy all entries left in the batch. */
+ while (n)
+ destroy(batch[--n]);
+ spin_unlock(&r->producer_lock);
+ spin_unlock_irqrestore(&r->consumer_lock, flags);
+}
+
static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
int size, gfp_t gfp,
void (*destroy)(void *))