aboutsummaryrefslogtreecommitdiffstats
path: root/net/xdp/xsk.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/xdp/xsk.c')
-rw-r--r--net/xdp/xsk.c216
1 files changed, 73 insertions, 143 deletions
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index c350108aa38d..b6c0f08bd80d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -22,7 +22,7 @@
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/rculist.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include <net/xdp.h>
#include "xsk_queue.h"
@@ -39,24 +39,6 @@ bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
READ_ONCE(xs->umem->fq);
}
-bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt)
-{
- return xskq_cons_has_entries(umem->fq, cnt);
-}
-EXPORT_SYMBOL(xsk_umem_has_addrs);
-
-bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
-{
- return xskq_cons_peek_addr(umem->fq, addr, umem);
-}
-EXPORT_SYMBOL(xsk_umem_peek_addr);
-
-void xsk_umem_release_addr(struct xdp_umem *umem)
-{
- xskq_cons_release(umem->fq);
-}
-EXPORT_SYMBOL(xsk_umem_release_addr);
-
void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
{
if (umem->need_wakeup & XDP_WAKEUP_RX)
@@ -75,7 +57,7 @@ void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
return;
rcu_read_lock();
- list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
}
rcu_read_unlock();
@@ -102,7 +84,7 @@ void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
return;
rcu_read_lock();
- list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP;
}
rcu_read_unlock();
@@ -117,76 +99,82 @@ bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
}
EXPORT_SYMBOL(xsk_umem_uses_need_wakeup);
-/* If a buffer crosses a page boundary, we need to do 2 memcpy's, one for
- * each page. This is only required in copy mode.
- */
-static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf,
- u32 len, u32 metalen)
+void xp_release(struct xdp_buff_xsk *xskb)
{
- void *to_buf = xdp_umem_get_data(umem, addr);
-
- addr = xsk_umem_add_offset_to_addr(addr);
- if (xskq_cons_crosses_non_contig_pg(umem, addr, len + metalen)) {
- void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr;
- u64 page_start = addr & ~(PAGE_SIZE - 1);
- u64 first_len = PAGE_SIZE - (addr - page_start);
-
- memcpy(to_buf, from_buf, first_len);
- memcpy(next_pg_addr, from_buf + first_len,
- len + metalen - first_len);
+ xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
+}
- return;
- }
+static u64 xp_get_handle(struct xdp_buff_xsk *xskb)
+{
+ u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
- memcpy(to_buf, from_buf, len + metalen);
+ offset += xskb->pool->headroom;
+ if (!xskb->pool->unaligned)
+ return xskb->orig_addr + offset;
+ return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
}
-static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
- u64 offset = xs->umem->headroom;
- u64 addr, memcpy_addr;
- void *from_buf;
- u32 metalen;
+ struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+ u64 addr;
int err;
- if (!xskq_cons_peek_addr(xs->umem->fq, &addr, xs->umem) ||
- len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
+ addr = xp_get_handle(xskb);
+ err = xskq_prod_reserve_desc(xs->rx, addr, len);
+ if (err) {
xs->rx_dropped++;
- return -ENOSPC;
+ return err;
}
- if (unlikely(xdp_data_meta_unsupported(xdp))) {
- from_buf = xdp->data;
- metalen = 0;
- } else {
- from_buf = xdp->data_meta;
- metalen = xdp->data - xdp->data_meta;
- }
+ xp_release(xskb);
+ return 0;
+}
- memcpy_addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
- __xsk_rcv_memcpy(xs->umem, memcpy_addr, from_buf, len, metalen);
+static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len)
+{
+ void *from_buf, *to_buf;
+ u32 metalen;
- offset += metalen;
- addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
- err = xskq_prod_reserve_desc(xs->rx, addr, len);
- if (!err) {
- xskq_cons_release(xs->umem->fq);
- xdp_return_buff(xdp);
- return 0;
+ if (unlikely(xdp_data_meta_unsupported(from))) {
+ from_buf = from->data;
+ to_buf = to->data;
+ metalen = 0;
+ } else {
+ from_buf = from->data_meta;
+ metalen = from->data - from->data_meta;
+ to_buf = to->data - metalen;
}
- xs->rx_dropped++;
- return err;
+ memcpy(to_buf, from_buf, len + metalen);
}
-static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len,
+ bool explicit_free)
{
- int err = xskq_prod_reserve_desc(xs->rx, xdp->handle, len);
+ struct xdp_buff *xsk_xdp;
+ int err;
- if (err)
+ if (len > xsk_umem_get_rx_frame_size(xs->umem)) {
xs->rx_dropped++;
+ return -ENOSPC;
+ }
- return err;
+ xsk_xdp = xsk_buff_alloc(xs->umem);
+ if (!xsk_xdp) {
+ xs->rx_dropped++;
+ return -ENOSPC;
+ }
+
+ xsk_copy_xdp(xsk_xdp, xdp, len);
+ err = __xsk_rcv_zc(xs, xsk_xdp, len);
+ if (err) {
+ xsk_buff_free(xsk_xdp);
+ return err;
+ }
+ if (explicit_free)
+ xdp_return_buff(xdp);
+ return 0;
}
static bool xsk_is_bound(struct xdp_sock *xs)
@@ -199,7 +187,8 @@ static bool xsk_is_bound(struct xdp_sock *xs)
return false;
}
-static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp,
+ bool explicit_free)
{
u32 len;
@@ -211,8 +200,9 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
len = xdp->data_end - xdp->data;
- return (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) ?
- __xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
+ return xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL ?
+ __xsk_rcv_zc(xs, xdp, len) :
+ __xsk_rcv(xs, xdp, len, explicit_free);
}
static void xsk_flush(struct xdp_sock *xs)
@@ -224,46 +214,11 @@ static void xsk_flush(struct xdp_sock *xs)
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
- u32 metalen = xdp->data - xdp->data_meta;
- u32 len = xdp->data_end - xdp->data;
- u64 offset = xs->umem->headroom;
- void *buffer;
- u64 addr;
int err;
spin_lock_bh(&xs->rx_lock);
-
- if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) {
- err = -EINVAL;
- goto out_unlock;
- }
-
- if (!xskq_cons_peek_addr(xs->umem->fq, &addr, xs->umem) ||
- len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
- err = -ENOSPC;
- goto out_drop;
- }
-
- addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
- buffer = xdp_umem_get_data(xs->umem, addr);
- memcpy(buffer, xdp->data_meta, len + metalen);
-
- addr = xsk_umem_adjust_offset(xs->umem, addr, metalen);
- err = xskq_prod_reserve_desc(xs->rx, addr, len);
- if (err)
- goto out_drop;
-
- xskq_cons_release(xs->umem->fq);
- xskq_prod_submit(xs->rx);
-
- spin_unlock_bh(&xs->rx_lock);
-
- xs->sk.sk_data_ready(&xs->sk);
- return 0;
-
-out_drop:
- xs->rx_dropped++;
-out_unlock:
+ err = xsk_rcv(xs, xdp, false);
+ xsk_flush(xs);
spin_unlock_bh(&xs->rx_lock);
return err;
}
@@ -273,7 +228,7 @@ int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
int err;
- err = xsk_rcv(xs, xdp);
+ err = xsk_rcv(xs, xdp, true);
if (err)
return err;
@@ -305,7 +260,7 @@ void xsk_umem_consume_tx_done(struct xdp_umem *umem)
struct xdp_sock *xs;
rcu_read_lock();
- list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
__xskq_cons_release(xs->tx);
xs->sk.sk_write_space(&xs->sk);
}
@@ -318,11 +273,11 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
struct xdp_sock *xs;
rcu_read_lock();
- list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
if (!xskq_cons_peek_desc(xs->tx, desc, umem))
continue;
- /* This is the backpreassure mechanism for the Tx path.
+ /* This is the backpressure mechanism for the Tx path.
* Reserve space in the completion queue and only proceed
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
@@ -404,9 +359,9 @@ static int xsk_generic_xmit(struct sock *sk)
skb_put(skb, len);
addr = desc.addr;
- buffer = xdp_umem_get_data(xs->umem, addr);
+ buffer = xsk_buff_raw_get_data(xs->umem, addr);
err = skb_store_bits(skb, 0, buffer, len);
- /* This is the backpreassure mechanism for the Tx path.
+ /* This is the backpressure mechanism for the Tx path.
* Reserve space in the completion queue and only proceed
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
@@ -629,24 +584,6 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd)
return sock;
}
-/* Check if umem pages are contiguous.
- * If zero-copy mode, use the DMA address to do the page contiguity check
- * For all other modes we use addr (kernel virtual address)
- * Store the result in the low bits of addr.
- */
-static void xsk_check_page_contiguity(struct xdp_umem *umem, u32 flags)
-{
- struct xdp_umem_page *pgs = umem->pages;
- int i, is_contig;
-
- for (i = 0; i < umem->npgs - 1; i++) {
- is_contig = (flags & XDP_ZEROCOPY) ?
- (pgs[i].dma + PAGE_SIZE == pgs[i + 1].dma) :
- (pgs[i].addr + PAGE_SIZE == pgs[i + 1].addr);
- pgs[i].addr += is_contig << XSK_NEXT_PG_CONTIG_SHIFT;
- }
-}
-
static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{
struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
@@ -729,23 +666,14 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
goto out_unlock;
} else {
/* This xsk has its own umem. */
- xskq_set_umem(xs->umem->fq, xs->umem->size,
- xs->umem->chunk_mask);
- xskq_set_umem(xs->umem->cq, xs->umem->size,
- xs->umem->chunk_mask);
-
err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
if (err)
goto out_unlock;
-
- xsk_check_page_contiguity(xs->umem, flags);
}
xs->dev = dev;
xs->zc = xs->umem->zc;
xs->queue_id = qid;
- xskq_set_umem(xs->rx, xs->umem->size, xs->umem->chunk_mask);
- xskq_set_umem(xs->tx, xs->umem->size, xs->umem->chunk_mask);
xdp_add_sk_umem(xs->umem, xs);
out_unlock:
@@ -860,6 +788,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
&xs->umem->cq;
err = xsk_init_queue(entries, q, true);
+ if (optname == XDP_UMEM_FILL_RING)
+ xp_set_fq(xs->umem->pool, *q);
mutex_unlock(&xs->mutex);
return err;
}