From bbff2f321a864ee07c9d3d1245af498023146951 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Mon, 4 Jun 2018 13:57:13 +0200 Subject: xsk: new descriptor addressing scheme MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, AF_XDP only supports a fixed frame-size memory scheme where each frame is referenced via an index (idx). A user passes the frame index to the kernel, and the kernel acts upon the data. Some NICs, however, do not have a fixed frame-size model, instead they have a model where a memory window is passed to the hardware and multiple frames are filled into that window (referred to as the "type-writer" model). By changing the descriptor format from the current frame index addressing scheme, AF_XDP can in the future be extended to support these kinds of NICs. In the index-based model, an idx refers to a frame of size frame_size. Addressing a frame in the UMEM is done by offseting the UMEM starting address by a global offset, idx * frame_size + offset. Communicating via the fill- and completion-rings are done by means of idx. In this commit, the idx is removed in favor of an address (addr), which is a relative address ranging over the UMEM. To convert an idx-based address to the new addr is simply: addr = idx * frame_size + offset. We also stop referring to the UMEM "frame" as a frame. Instead it is simply called a chunk. To transfer ownership of a chunk to the kernel, the addr of the chunk is passed in the fill-ring. Note, that the kernel will mask addr to make it chunk aligned, so there is no need for userspace to do that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or 3000 to the fill-ring will refer to the same chunk. On the completion-ring, the addr will match that of the Tx descriptor, passed to the kernel. Changing the descriptor format to use chunks/addr will allow for future changes to move to a type-writer based model, where multiple frames can reside in one chunk. In this model passing one single chunk into the fill-ring, would potentially result in multiple Rx descriptors. This commit changes the uapi of AF_XDP sockets, and updates the documentation. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xsk.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'net/xdp/xsk.c') diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 966307ce4b8e..4688c750df1d 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -41,24 +41,27 @@ bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { - u32 id, len = xdp->data_end - xdp->data; + u32 len = xdp->data_end - xdp->data; void *buffer; + u64 addr; int err; if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) return -EINVAL; - if (!xskq_peek_id(xs->umem->fq, &id)) { + if (!xskq_peek_addr(xs->umem->fq, &addr) || + len > xs->umem->chunk_size_nohr) { xs->rx_dropped++; return -ENOSPC; } - buffer = xdp_umem_get_data_with_headroom(xs->umem, id); + addr += xs->umem->headroom; + + buffer = xdp_umem_get_data(xs->umem, addr); memcpy(buffer, xdp->data, len); - err = xskq_produce_batch_desc(xs->rx, id, len, - xs->umem->frame_headroom); + err = xskq_produce_batch_desc(xs->rx, addr, len); if (!err) - xskq_discard_id(xs->umem->fq); + xskq_discard_addr(xs->umem->fq); else xs->rx_dropped++; @@ -95,10 +98,10 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) static void xsk_destruct_skb(struct sk_buff *skb) { - u32 id = (u32)(long)skb_shinfo(skb)->destructor_arg; + u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg; struct xdp_sock *xs = xdp_sk(skb->sk); - WARN_ON_ONCE(xskq_produce_id(xs->umem->cq, id)); + WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr)); sock_wfree(skb); } @@ -123,14 +126,15 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, while (xskq_peek_desc(xs->tx, &desc)) { char *buffer; - u32 id, len; + u64 addr; + u32 len; if (max_batch-- == 0) { err = -EAGAIN; goto out; } - if (xskq_reserve_id(xs->umem->cq)) { + if (xskq_reserve_addr(xs->umem->cq)) { err = -EAGAIN; goto out; } @@ -153,8 +157,8 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, } skb_put(skb, len); - id = desc.idx; - buffer = xdp_umem_get_data(xs->umem, id) + desc.offset; + addr = desc.addr; + buffer = xdp_umem_get_data(xs->umem, addr); err = skb_store_bits(skb, 0, buffer, len); if (unlikely(err)) { kfree_skb(skb); @@ -164,7 +168,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, skb->dev = xs->dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_shinfo(skb)->destructor_arg = (void *)(long)id; + skb_shinfo(skb)->destructor_arg = (void *)(long)addr; skb->destructor = xsk_destruct_skb; err = dev_direct_xmit(skb, xs->queue_id); -- cgit v1.2.3-59-g8ed1b