aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-12-23 13:48:56 -0500
committerDavid S. Miller <davem@davemloft.net>2016-12-23 13:48:56 -0500
commite57cbe48a6b7a9a05a058aee5336d25407ad1c2c (patch)
tree3fb73f7fd7545d53be8ba17d64285fffc4768eed
parentMerge branch 'mlxsw-router-fixes' (diff)
parentvirtio-net: XDP support for small buffers (diff)
downloadlinux-dev-e57cbe48a6b7a9a05a058aee5336d25407ad1c2c.tar.xz
linux-dev-e57cbe48a6b7a9a05a058aee5336d25407ad1c2c.zip
Merge branch 'virtio-net-xdp-fixes'
Jason Wang says: ==================== several fixups for virtio-net XDP Merry Xmas and a Happy New year to all: This series tries to fixes several issues for virtio-net XDP which could be categorized into several parts: - fix several issues during XDP linearizing - allow csumed packet to work for XDP_PASS - make EWMA rxbuf size estimation works for XDP - forbid XDP when GUEST_UFO is support - remove big packet XDP support - add XDP support or small buffer Please see individual patches for details. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/virtio_net.c172
1 files changed, 102 insertions, 70 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 08327e005ccc..5deeda61d6d3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -333,9 +333,9 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
static void virtnet_xdp_xmit(struct virtnet_info *vi,
struct receive_queue *rq,
struct send_queue *sq,
- struct xdp_buff *xdp)
+ struct xdp_buff *xdp,
+ void *data)
{
- struct page *page = virt_to_head_page(xdp->data);
struct virtio_net_hdr_mrg_rxbuf *hdr;
unsigned int num_sg, len;
void *xdp_sent;
@@ -343,32 +343,46 @@ static void virtnet_xdp_xmit(struct virtnet_info *vi,
/* Free up any pending old buffers before queueing new ones. */
while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) {
- struct page *sent_page = virt_to_head_page(xdp_sent);
+ if (vi->mergeable_rx_bufs) {
+ struct page *sent_page = virt_to_head_page(xdp_sent);
- if (vi->mergeable_rx_bufs)
put_page(sent_page);
- else
- give_pages(rq, sent_page);
+ } else { /* small buffer */
+ struct sk_buff *skb = xdp_sent;
+
+ kfree_skb(skb);
+ }
}
- /* Zero header and leave csum up to XDP layers */
- hdr = xdp->data;
- memset(hdr, 0, vi->hdr_len);
+ if (vi->mergeable_rx_bufs) {
+ /* Zero header and leave csum up to XDP layers */
+ hdr = xdp->data;
+ memset(hdr, 0, vi->hdr_len);
+
+ num_sg = 1;
+ sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
+ } else { /* small buffer */
+ struct sk_buff *skb = data;
+
+ /* Zero header and leave csum up to XDP layers */
+ hdr = skb_vnet_hdr(skb);
+ memset(hdr, 0, vi->hdr_len);
- num_sg = 1;
- sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
+ num_sg = 2;
+ sg_init_table(sq->sg, 2);
+ sg_set_buf(sq->sg, hdr, vi->hdr_len);
+ skb_to_sgvec(skb, sq->sg + 1, 0, skb->len);
+ }
err = virtqueue_add_outbuf(sq->vq, sq->sg, num_sg,
- xdp->data, GFP_ATOMIC);
+ data, GFP_ATOMIC);
if (unlikely(err)) {
- if (vi->mergeable_rx_bufs)
+ if (vi->mergeable_rx_bufs) {
+ struct page *page = virt_to_head_page(xdp->data);
+
put_page(page);
- else
- give_pages(rq, page);
+ } else /* small buffer */
+ kfree_skb(data);
return; // On error abort to avoid unnecessary kick
- } else if (!vi->mergeable_rx_bufs) {
- /* If not mergeable bufs must be big packets so cleanup pages */
- give_pages(rq, (struct page *)page->private);
- page->private = 0;
}
virtqueue_kick(sq->vq);
@@ -377,23 +391,26 @@ static void virtnet_xdp_xmit(struct virtnet_info *vi,
static u32 do_xdp_prog(struct virtnet_info *vi,
struct receive_queue *rq,
struct bpf_prog *xdp_prog,
- struct page *page, int offset, int len)
+ void *data, int len)
{
int hdr_padded_len;
struct xdp_buff xdp;
+ void *buf;
unsigned int qp;
u32 act;
- u8 *buf;
-
- buf = page_address(page) + offset;
- if (vi->mergeable_rx_bufs)
+ if (vi->mergeable_rx_bufs) {
hdr_padded_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
- else
- hdr_padded_len = sizeof(struct padded_vnet_hdr);
+ xdp.data = data + hdr_padded_len;
+ xdp.data_end = xdp.data + (len - vi->hdr_len);
+ buf = data;
+ } else { /* small buffers */
+ struct sk_buff *skb = data;
- xdp.data = buf + hdr_padded_len;
- xdp.data_end = xdp.data + (len - vi->hdr_len);
+ xdp.data = skb->data;
+ xdp.data_end = xdp.data + len;
+ buf = skb->data;
+ }
act = bpf_prog_run_xdp(xdp_prog, &xdp);
switch (act) {
@@ -403,8 +420,8 @@ static u32 do_xdp_prog(struct virtnet_info *vi,
qp = vi->curr_queue_pairs -
vi->xdp_queue_pairs +
smp_processor_id();
- xdp.data = buf + (vi->mergeable_rx_bufs ? 0 : 4);
- virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp);
+ xdp.data = buf;
+ virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp, data);
return XDP_TX;
default:
bpf_warn_invalid_xdp_action(act);
@@ -414,26 +431,17 @@ static u32 do_xdp_prog(struct virtnet_info *vi,
}
}
-static struct sk_buff *receive_small(struct virtnet_info *vi, void *buf, unsigned int len)
+static struct sk_buff *receive_small(struct net_device *dev,
+ struct virtnet_info *vi,
+ struct receive_queue *rq,
+ void *buf, unsigned int len)
{
struct sk_buff * skb = buf;
+ struct bpf_prog *xdp_prog;
len -= vi->hdr_len;
skb_trim(skb, len);
- return skb;
-}
-
-static struct sk_buff *receive_big(struct net_device *dev,
- struct virtnet_info *vi,
- struct receive_queue *rq,
- void *buf,
- unsigned int len)
-{
- struct bpf_prog *xdp_prog;
- struct page *page = buf;
- struct sk_buff *skb;
-
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
if (xdp_prog) {
@@ -442,7 +450,7 @@ static struct sk_buff *receive_big(struct net_device *dev,
if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
goto err_xdp;
- act = do_xdp_prog(vi, rq, xdp_prog, page, 0, len);
+ act = do_xdp_prog(vi, rq, xdp_prog, skb, len);
switch (act) {
case XDP_PASS:
break;
@@ -456,18 +464,33 @@ static struct sk_buff *receive_big(struct net_device *dev,
}
rcu_read_unlock();
- skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
+ return skb;
+
+err_xdp:
+ rcu_read_unlock();
+ dev->stats.rx_dropped++;
+ kfree_skb(skb);
+xdp_xmit:
+ return NULL;
+}
+
+static struct sk_buff *receive_big(struct net_device *dev,
+ struct virtnet_info *vi,
+ struct receive_queue *rq,
+ void *buf,
+ unsigned int len)
+{
+ struct page *page = buf;
+ struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
+
if (unlikely(!skb))
goto err;
return skb;
-err_xdp:
- rcu_read_unlock();
err:
dev->stats.rx_dropped++;
give_pages(rq, page);
-xdp_xmit:
return NULL;
}
@@ -483,7 +506,7 @@ xdp_xmit:
* anymore.
*/
static struct page *xdp_linearize_page(struct receive_queue *rq,
- u16 num_buf,
+ u16 *num_buf,
struct page *p,
int offset,
unsigned int *len)
@@ -497,7 +520,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
page_off += *len;
- while (--num_buf) {
+ while (--*num_buf) {
unsigned int buflen;
unsigned long ctx;
void *buf;
@@ -507,19 +530,22 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
if (unlikely(!ctx))
goto err_buf;
+ buf = mergeable_ctx_to_buf_address(ctx);
+ p = virt_to_head_page(buf);
+ off = buf - page_address(p);
+
/* guard against a misconfigured or uncooperative backend that
* is sending packet larger than the MTU.
*/
- if ((page_off + buflen) > PAGE_SIZE)
+ if ((page_off + buflen) > PAGE_SIZE) {
+ put_page(p);
goto err_buf;
-
- buf = mergeable_ctx_to_buf_address(ctx);
- p = virt_to_head_page(buf);
- off = buf - page_address(p);
+ }
memcpy(page_address(page) + page_off,
page_address(p) + off, buflen);
page_off += buflen;
+ put_page(p);
}
*len = page_off;
@@ -552,16 +578,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
struct page *xdp_page;
u32 act;
- /* No known backend devices should send packets with
- * more than a single buffer when XDP conditions are
- * met. However it is not strictly illegal so the case
- * is handled as an exception and a warning is thrown.
- */
+ /* This happens when rx buffer size is underestimated */
if (unlikely(num_buf > 1)) {
- bpf_warn_invalid_xdp_buffer();
-
/* linearize data for XDP */
- xdp_page = xdp_linearize_page(rq, num_buf,
+ xdp_page = xdp_linearize_page(rq, &num_buf,
page, offset, &len);
if (!xdp_page)
goto err_xdp;
@@ -575,16 +595,25 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
* the receive path after XDP is loaded. In practice I
* was not able to create this condition.
*/
- if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
+ if (unlikely(hdr->hdr.gso_type))
goto err_xdp;
- act = do_xdp_prog(vi, rq, xdp_prog, page, offset, len);
+ act = do_xdp_prog(vi, rq, xdp_prog,
+ page_address(xdp_page) + offset, len);
switch (act) {
case XDP_PASS:
- if (unlikely(xdp_page != page))
- __free_pages(xdp_page, 0);
+ /* We can only create skb based on xdp_page. */
+ if (unlikely(xdp_page != page)) {
+ rcu_read_unlock();
+ put_page(page);
+ head_skb = page_to_skb(vi, rq, xdp_page,
+ 0, len, PAGE_SIZE);
+ ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
+ return head_skb;
+ }
break;
case XDP_TX:
+ ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
if (unlikely(xdp_page != page))
goto err_xdp;
rcu_read_unlock();
@@ -593,6 +622,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
default:
if (unlikely(xdp_page != page))
__free_pages(xdp_page, 0);
+ ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
goto err_xdp;
}
}
@@ -704,7 +734,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
else if (vi->big_packets)
skb = receive_big(dev, vi, rq, buf, len);
else
- skb = receive_small(vi, buf, len);
+ skb = receive_small(dev, vi, rq, buf, len);
if (unlikely(!skb))
return;
@@ -1678,7 +1708,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
int i, err;
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
- virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6)) {
+ virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
+ virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
+ virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO)) {
netdev_warn(dev, "can't set XDP while host is implementing LRO, disable LRO first\n");
return -EOPNOTSUPP;
}