aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/samples/bpf
diff options
context:
space:
mode:
authorMagnus Karlsson <magnus.karlsson@intel.com>2020-08-28 14:51:05 +0200
committerDaniel Borkmann <daniel@iogearbox.net>2020-08-31 22:34:25 +0200
commitc8a039a47ffe06077929f29c9d4c7cba01a2104b (patch)
tree7bb041c8f5e73a19617a6dfddeaee3f7a00f3f28 /samples/bpf
parentxsk: Documentation for XDP_SHARED_UMEM between queues and netdevs (diff)
downloadwireguard-linux-c8a039a47ffe06077929f29c9d4c7cba01a2104b.tar.xz
wireguard-linux-c8a039a47ffe06077929f29c9d4c7cba01a2104b.zip
samples/bpf: Optimize l2fwd performance in xdpsock
Optimize the throughput performance of the l2fwd sub-app in the xdpsock sample application by removing a duplicate syscall and increasing the size of the fill ring. The latter needs some further explanation. We recommend that you set the fill ring size >= HW RX ring size + AF_XDP RX ring size. Make sure you fill up the fill ring with buffers at regular intervals, and you will with this setting avoid allocation failures in the driver. These are usually quite expensive since drivers have not been written to assume that allocation failures are common. For regular sockets, kernel allocated memory is used that only runs out in OOM situations that should be rare. These two performance optimizations together lead to a 6% percent improvement for the l2fwd app on my machine. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/1598619065-1944-1-git-send-email-magnus.karlsson@intel.com
Diffstat (limited to 'samples/bpf')
-rw-r--r--samples/bpf/xdpsock_user.c22
1 files changed, 14 insertions, 8 deletions
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 19c679456a0e..9f54df768d35 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -613,7 +613,16 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
{
struct xsk_umem_info *umem;
struct xsk_umem_config cfg = {
- .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ /* We recommend that you set the fill ring size >= HW RX ring size +
+ * AF_XDP RX ring size. Make sure you fill up the fill ring
+ * with buffers at regular intervals, and you will with this setting
+ * avoid allocation failures in the driver. These are usually quite
+ * expensive since drivers have not been written to assume that
+ * allocation failures are common. For regular sockets, kernel
+ * allocated memory is used that only runs out in OOM situations
+ * that should be rare.
+ */
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
.frame_size = opt_xsk_frame_size,
.frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
@@ -640,13 +649,13 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
u32 idx;
ret = xsk_ring_prod__reserve(&umem->fq,
- XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
- if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
+ XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, &idx);
+ if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS * 2)
exit_with_error(-ret);
- for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
+ for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS * 2; i++)
*xsk_ring_prod__fill_addr(&umem->fq, idx++) =
i * opt_xsk_frame_size;
- xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
+ xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS * 2);
}
static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
@@ -888,9 +897,6 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
if (!xsk->outstanding_tx)
return;
- if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
- kick_tx(xsk);
-
ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size :
xsk->outstanding_tx;