diff options
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 209 |
1 files changed, 140 insertions, 69 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 030d30603c29..f4c03518d7d2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -63,6 +63,11 @@ static const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_CSUM }; +#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ + (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ + (1ULL << VIRTIO_NET_F_GUEST_UFO)) + struct virtnet_stat_desc { char desc[ETH_GSTRING_LEN]; size_t offset; @@ -190,6 +195,9 @@ struct virtnet_info { /* # of XDP queue pairs currently used by the driver */ u16 xdp_queue_pairs; + /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ + bool xdp_enabled; + /* I like... big packets and I cannot lie! */ bool big_packets; @@ -371,7 +379,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, unsigned int len, unsigned int truesize, - bool hdr_valid) + bool hdr_valid, unsigned int metasize) { struct sk_buff *skb; struct virtio_net_hdr_mrg_rxbuf *hdr; @@ -393,6 +401,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, else hdr_padded_len = sizeof(struct padded_vnet_hdr); + /* hdr_valid means no XDP, so we can copy the vnet header */ if (hdr_valid) memcpy(hdr, p, hdr_len); @@ -400,11 +409,20 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, offset += hdr_padded_len; p += hdr_padded_len; - copy = len; - if (copy > skb_tailroom(skb)) - copy = skb_tailroom(skb); + /* Copy all frame if it fits skb->head, otherwise + * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. + */ + if (len <= skb_tailroom(skb)) + copy = len; + else + copy = ETH_HLEN + metasize; skb_put_data(skb, p, copy); + if (metasize) { + __skb_pull(skb, metasize); + skb_metadata_set(skb, metasize); + } + len -= copy; offset += copy; @@ -450,10 +468,6 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, struct virtio_net_hdr_mrg_rxbuf *hdr; int err; - /* virtqueue want to use data area in-front of packet */ - if (unlikely(xdpf->metasize > 0)) - return -EOPNOTSUPP; - if (unlikely(xdpf->headroom < vi->hdr_len)) return -EOVERFLOW; @@ -474,12 +488,41 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, return 0; } -static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi) -{ - unsigned int qp; - - qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); - return &vi->sq[qp]; +/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on + * the current cpu, so it does not need to be locked. + * + * Here we use marco instead of inline functions because we have to deal with + * three issues at the same time: 1. the choice of sq. 2. judge and execute the + * lock/unlock of txq 3. make sparse happy. It is difficult for two inline + * functions to perfectly solve these three problems at the same time. + */ +#define virtnet_xdp_get_sq(vi) ({ \ + struct netdev_queue *txq; \ + typeof(vi) v = (vi); \ + unsigned int qp; \ + \ + if (v->curr_queue_pairs > nr_cpu_ids) { \ + qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ + qp += smp_processor_id(); \ + txq = netdev_get_tx_queue(v->dev, qp); \ + __netif_tx_acquire(txq); \ + } else { \ + qp = smp_processor_id() % v->curr_queue_pairs; \ + txq = netdev_get_tx_queue(v->dev, qp); \ + __netif_tx_lock(txq, raw_smp_processor_id()); \ + } \ + v->sq + qp; \ +}) + +#define virtnet_xdp_put_sq(vi, q) { \ + struct netdev_queue *txq; \ + typeof(vi) v = (vi); \ + \ + txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ + if (v->curr_queue_pairs > nr_cpu_ids) \ + __netif_tx_release(txq); \ + else \ + __netif_tx_unlock(txq); \ } static int virtnet_xdp_xmit(struct net_device *dev, @@ -505,7 +548,7 @@ static int virtnet_xdp_xmit(struct net_device *dev, if (!xdp_prog) return -ENXIO; - sq = virtnet_xdp_sq(vi); + sq = virtnet_xdp_get_sq(vi); if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { ret = -EINVAL; @@ -553,12 +596,13 @@ out: sq->stats.kicks += kicks; u64_stats_update_end(&sq->stats.syncp); + virtnet_xdp_put_sq(vi, sq); return ret; } static unsigned int virtnet_get_headroom(struct virtnet_info *vi) { - return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0; + return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0; } /* We copy the packet for XDP in the following cases: @@ -644,6 +688,7 @@ static struct sk_buff *receive_small(struct net_device *dev, unsigned int delta = 0; struct page *xdp_page; int err; + unsigned int metasize = 0; len -= vi->hdr_len; stats->bytes += len; @@ -683,8 +728,8 @@ static struct sk_buff *receive_small(struct net_device *dev, xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; xdp.data = xdp.data_hard_start + xdp_headroom; - xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + len; + xdp.data_meta = xdp.data; xdp.rxq = &rq->xdp_rxq; orig_data = xdp.data; act = bpf_prog_run_xdp(xdp_prog, &xdp); @@ -695,6 +740,7 @@ static struct sk_buff *receive_small(struct net_device *dev, /* Recalculate length in case bpf program changed it */ delta = orig_data - xdp.data; len = xdp.data_end - xdp.data; + metasize = xdp.data - xdp.data_meta; break; case XDP_TX: stats->xdp_tx++; @@ -740,6 +786,9 @@ static struct sk_buff *receive_small(struct net_device *dev, memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); } /* keep zeroed vnet hdr since packet was changed by bpf */ + if (metasize) + skb_metadata_set(skb, metasize); + err: return skb; @@ -760,8 +809,8 @@ static struct sk_buff *receive_big(struct net_device *dev, struct virtnet_rq_stats *stats) { struct page *page = buf; - struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, - PAGE_SIZE, true); + struct sk_buff *skb = + page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0); stats->bytes += len - vi->hdr_len; if (unlikely(!skb)) @@ -793,6 +842,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, unsigned int truesize; unsigned int headroom = mergeable_ctx_to_headroom(ctx); int err; + unsigned int metasize = 0; head_skb = NULL; stats->bytes += len - vi->hdr_len; @@ -839,8 +889,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, data = page_address(xdp_page) + offset; xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; xdp.data = data + vi->hdr_len; - xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + (len - vi->hdr_len); + xdp.data_meta = xdp.data; xdp.rxq = &rq->xdp_rxq; act = bpf_prog_run_xdp(xdp_prog, &xdp); @@ -848,24 +898,27 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, switch (act) { case XDP_PASS: + metasize = xdp.data - xdp.data_meta; + /* recalculate offset to account for any header - * adjustments. Note other cases do not build an - * skb and avoid using offset + * adjustments and minus the metasize to copy the + * metadata in page_to_skb(). Note other cases do not + * build an skb and avoid using offset */ - offset = xdp.data - - page_address(xdp_page) - vi->hdr_len; + offset = xdp.data - page_address(xdp_page) - + vi->hdr_len - metasize; - /* recalculate len if xdp.data or xdp.data_end were - * adjusted + /* recalculate len if xdp.data, xdp.data_end or + * xdp.data_meta were adjusted */ - len = xdp.data_end - xdp.data + vi->hdr_len; + len = xdp.data_end - xdp.data + vi->hdr_len + metasize; /* We can only create skb based on xdp_page. */ if (unlikely(xdp_page != page)) { rcu_read_unlock(); put_page(page); - head_skb = page_to_skb(vi, rq, xdp_page, - offset, len, - PAGE_SIZE, false); + head_skb = page_to_skb(vi, rq, xdp_page, offset, + len, PAGE_SIZE, false, + metasize); return head_skb; } break; @@ -921,7 +974,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, goto err_skb; } - head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog); + head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, + metasize); curr_skb = head_skb; if (unlikely(!curr_skb)) @@ -1437,12 +1491,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget) xdp_do_flush_map(); if (xdp_xmit & VIRTIO_XDP_TX) { - sq = virtnet_xdp_sq(vi); + sq = virtnet_xdp_get_sq(vi); if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { u64_stats_update_begin(&sq->stats.syncp); sq->stats.kicks++; u64_stats_update_end(&sq->stats.syncp); } + virtnet_xdp_put_sq(vi, sq); } return received; @@ -1483,6 +1538,8 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) struct virtnet_info *vi = sq->vq->vdev->priv; unsigned int index = vq2txq(sq->vq); struct netdev_queue *txq; + int opaque; + bool done; if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { /* We don't need to enable cb for XDP */ @@ -1492,10 +1549,28 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) txq = netdev_get_tx_queue(vi->dev, index); __netif_tx_lock(txq, raw_smp_processor_id()); + virtqueue_disable_cb(sq->vq); free_old_xmit_skbs(sq, true); + + opaque = virtqueue_enable_cb_prepare(sq->vq); + + done = napi_complete_done(napi, 0); + + if (!done) + virtqueue_disable_cb(sq->vq); + __netif_tx_unlock(txq); - virtqueue_napi_complete(napi, sq->vq, 0); + if (done) { + if (unlikely(virtqueue_poll(sq->vq, opaque))) { + if (napi_schedule_prep(napi)) { + __netif_tx_lock(txq, raw_smp_processor_id()); + virtqueue_disable_cb(sq->vq); + __netif_tx_unlock(txq); + __napi_schedule(napi); + } + } + } if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) netif_tx_wake_queue(txq); @@ -1527,7 +1602,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) if (virtio_net_hdr_from_skb(skb, &hdr->hdr, virtio_is_little_endian(vi->vdev), false, 0)) - BUG(); + return -EPROTO; if (vi->mergeable_rx_bufs) hdr->num_buffers = 0; @@ -2067,14 +2142,16 @@ static int virtnet_set_channels(struct net_device *dev, get_online_cpus(); err = _virtnet_set_queues(vi, queue_pairs); - if (!err) { - netif_set_real_num_tx_queues(dev, queue_pairs); - netif_set_real_num_rx_queues(dev, queue_pairs); - - virtnet_set_affinity(vi); + if (err) { + put_online_cpus(); + goto err; } + virtnet_set_affinity(vi); put_online_cpus(); + netif_set_real_num_tx_queues(dev, queue_pairs); + netif_set_real_num_rx_queues(dev, queue_pairs); + err: return err; } @@ -2316,7 +2393,6 @@ static const struct ethtool_ops virtnet_ethtool_ops = { static void virtnet_freeze_down(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - int i; /* Make sure no work handler is accessing the device */ flush_work(&vi->config_work); @@ -2324,14 +2400,8 @@ static void virtnet_freeze_down(struct virtio_device *vdev) netif_tx_lock_bh(vi->dev); netif_device_detach(vi->dev); netif_tx_unlock_bh(vi->dev); - cancel_delayed_work_sync(&vi->refill); - - if (netif_running(vi->dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) { - napi_disable(&vi->rq[i].napi); - virtnet_napi_tx_disable(&vi->sq[i].napi); - } - } + if (netif_running(vi->dev)) + virtnet_close(vi->dev); } static int init_vqs(struct virtnet_info *vi); @@ -2339,7 +2409,7 @@ static int init_vqs(struct virtnet_info *vi); static int virtnet_restore_up(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - int err, i; + int err; err = init_vqs(vi); if (err) @@ -2348,15 +2418,9 @@ static int virtnet_restore_up(struct virtio_device *vdev) virtio_device_ready(vdev); if (netif_running(vi->dev)) { - for (i = 0; i < vi->curr_queue_pairs; i++) - if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); - - for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); - virtnet_napi_tx_enable(vi, vi->sq[i].vq, - &vi->sq[i].napi); - } + err = virtnet_open(vi->dev); + if (err) + return err; } netif_tx_lock_bh(vi->dev); @@ -2416,7 +2480,7 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) { - NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first"); + NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); return -EOPNOTSUPP; } @@ -2437,10 +2501,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, /* XDP requires extra queues for XDP_TX */ if (curr_qp + xdp_qp > vi->max_queue_pairs) { - NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available"); - netdev_warn(dev, "request %i queues but max is %i\n", + netdev_warn(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", curr_qp + xdp_qp, vi->max_queue_pairs); - return -ENOMEM; + xdp_qp = 0; } old_prog = rtnl_dereference(vi->rq[0].xdp_prog); @@ -2477,11 +2540,14 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, vi->xdp_queue_pairs = xdp_qp; if (prog) { + vi->xdp_enabled = true; for (i = 0; i < vi->max_queue_pairs; i++) { rcu_assign_pointer(vi->rq[i].xdp_prog, prog); if (i == 0 && !old_prog) virtnet_clear_guest_offloads(vi); } + } else { + vi->xdp_enabled = false; } for (i = 0; i < vi->max_queue_pairs; i++) { @@ -2565,14 +2631,15 @@ static int virtnet_set_features(struct net_device *dev, u64 offloads; int err; - if ((dev->features ^ features) & NETIF_F_LRO) { - if (vi->xdp_queue_pairs) + if ((dev->features ^ features) & NETIF_F_GRO_HW) { + if (vi->xdp_enabled) return -EBUSY; - if (features & NETIF_F_LRO) + if (features & NETIF_F_GRO_HW) offloads = vi->guest_offloads_capable; else - offloads = 0; + offloads = vi->guest_offloads_capable & + ~GUEST_OFFLOAD_GRO_HW_MASK; err = virtnet_set_guest_offloads(vi, offloads); if (err) @@ -3048,9 +3115,9 @@ static int virtnet_probe(struct virtio_device *vdev) dev->features |= NETIF_F_RXCSUM; if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) - dev->features |= NETIF_F_LRO; + dev->features |= NETIF_F_GRO_HW; if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) - dev->hw_features |= NETIF_F_LRO; + dev->hw_features |= NETIF_F_GRO_HW; dev->vlan_features = dev->features; @@ -3108,6 +3175,7 @@ static int virtnet_probe(struct virtio_device *vdev) dev_err(&vdev->dev, "device MTU appears to have changed it is now %d < %d", mtu, dev->min_mtu); + err = -EINVAL; goto free; } @@ -3257,8 +3325,11 @@ static __maybe_unused int virtnet_restore(struct virtio_device *vdev) virtnet_set_queues(vi, vi->curr_queue_pairs); err = virtnet_cpu_notif_add(vi); - if (err) + if (err) { + virtnet_freeze_down(vdev); + remove_vq_common(vi); return err; + } return 0; } |