aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/net/virtio_net.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r--drivers/net/virtio_net.c209
1 files changed, 140 insertions, 69 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 030d30603c29..f4c03518d7d2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -63,6 +63,11 @@ static const unsigned long guest_offloads[] = {
VIRTIO_NET_F_GUEST_CSUM
};
+#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
+ (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
+ (1ULL << VIRTIO_NET_F_GUEST_ECN) | \
+ (1ULL << VIRTIO_NET_F_GUEST_UFO))
+
struct virtnet_stat_desc {
char desc[ETH_GSTRING_LEN];
size_t offset;
@@ -190,6 +195,9 @@ struct virtnet_info {
/* # of XDP queue pairs currently used by the driver */
u16 xdp_queue_pairs;
+ /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
+ bool xdp_enabled;
+
/* I like... big packets and I cannot lie! */
bool big_packets;
@@ -371,7 +379,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
struct receive_queue *rq,
struct page *page, unsigned int offset,
unsigned int len, unsigned int truesize,
- bool hdr_valid)
+ bool hdr_valid, unsigned int metasize)
{
struct sk_buff *skb;
struct virtio_net_hdr_mrg_rxbuf *hdr;
@@ -393,6 +401,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
else
hdr_padded_len = sizeof(struct padded_vnet_hdr);
+ /* hdr_valid means no XDP, so we can copy the vnet header */
if (hdr_valid)
memcpy(hdr, p, hdr_len);
@@ -400,11 +409,20 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
offset += hdr_padded_len;
p += hdr_padded_len;
- copy = len;
- if (copy > skb_tailroom(skb))
- copy = skb_tailroom(skb);
+ /* Copy all frame if it fits skb->head, otherwise
+ * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
+ */
+ if (len <= skb_tailroom(skb))
+ copy = len;
+ else
+ copy = ETH_HLEN + metasize;
skb_put_data(skb, p, copy);
+ if (metasize) {
+ __skb_pull(skb, metasize);
+ skb_metadata_set(skb, metasize);
+ }
+
len -= copy;
offset += copy;
@@ -450,10 +468,6 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
struct virtio_net_hdr_mrg_rxbuf *hdr;
int err;
- /* virtqueue want to use data area in-front of packet */
- if (unlikely(xdpf->metasize > 0))
- return -EOPNOTSUPP;
-
if (unlikely(xdpf->headroom < vi->hdr_len))
return -EOVERFLOW;
@@ -474,12 +488,41 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
return 0;
}
-static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi)
-{
- unsigned int qp;
-
- qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
- return &vi->sq[qp];
+/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
+ * the current cpu, so it does not need to be locked.
+ *
+ * Here we use marco instead of inline functions because we have to deal with
+ * three issues at the same time: 1. the choice of sq. 2. judge and execute the
+ * lock/unlock of txq 3. make sparse happy. It is difficult for two inline
+ * functions to perfectly solve these three problems at the same time.
+ */
+#define virtnet_xdp_get_sq(vi) ({ \
+ struct netdev_queue *txq; \
+ typeof(vi) v = (vi); \
+ unsigned int qp; \
+ \
+ if (v->curr_queue_pairs > nr_cpu_ids) { \
+ qp = v->curr_queue_pairs - v->xdp_queue_pairs; \
+ qp += smp_processor_id(); \
+ txq = netdev_get_tx_queue(v->dev, qp); \
+ __netif_tx_acquire(txq); \
+ } else { \
+ qp = smp_processor_id() % v->curr_queue_pairs; \
+ txq = netdev_get_tx_queue(v->dev, qp); \
+ __netif_tx_lock(txq, raw_smp_processor_id()); \
+ } \
+ v->sq + qp; \
+})
+
+#define virtnet_xdp_put_sq(vi, q) { \
+ struct netdev_queue *txq; \
+ typeof(vi) v = (vi); \
+ \
+ txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \
+ if (v->curr_queue_pairs > nr_cpu_ids) \
+ __netif_tx_release(txq); \
+ else \
+ __netif_tx_unlock(txq); \
}
static int virtnet_xdp_xmit(struct net_device *dev,
@@ -505,7 +548,7 @@ static int virtnet_xdp_xmit(struct net_device *dev,
if (!xdp_prog)
return -ENXIO;
- sq = virtnet_xdp_sq(vi);
+ sq = virtnet_xdp_get_sq(vi);
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
ret = -EINVAL;
@@ -553,12 +596,13 @@ out:
sq->stats.kicks += kicks;
u64_stats_update_end(&sq->stats.syncp);
+ virtnet_xdp_put_sq(vi, sq);
return ret;
}
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
{
- return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
+ return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0;
}
/* We copy the packet for XDP in the following cases:
@@ -644,6 +688,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
unsigned int delta = 0;
struct page *xdp_page;
int err;
+ unsigned int metasize = 0;
len -= vi->hdr_len;
stats->bytes += len;
@@ -683,8 +728,8 @@ static struct sk_buff *receive_small(struct net_device *dev,
xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
xdp.data = xdp.data_hard_start + xdp_headroom;
- xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
+ xdp.data_meta = xdp.data;
xdp.rxq = &rq->xdp_rxq;
orig_data = xdp.data;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@ -695,6 +740,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
/* Recalculate length in case bpf program changed it */
delta = orig_data - xdp.data;
len = xdp.data_end - xdp.data;
+ metasize = xdp.data - xdp.data_meta;
break;
case XDP_TX:
stats->xdp_tx++;
@@ -740,6 +786,9 @@ static struct sk_buff *receive_small(struct net_device *dev,
memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
} /* keep zeroed vnet hdr since packet was changed by bpf */
+ if (metasize)
+ skb_metadata_set(skb, metasize);
+
err:
return skb;
@@ -760,8 +809,8 @@ static struct sk_buff *receive_big(struct net_device *dev,
struct virtnet_rq_stats *stats)
{
struct page *page = buf;
- struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
- PAGE_SIZE, true);
+ struct sk_buff *skb =
+ page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0);
stats->bytes += len - vi->hdr_len;
if (unlikely(!skb))
@@ -793,6 +842,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
unsigned int truesize;
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
int err;
+ unsigned int metasize = 0;
head_skb = NULL;
stats->bytes += len - vi->hdr_len;
@@ -839,8 +889,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
data = page_address(xdp_page) + offset;
xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
xdp.data = data + vi->hdr_len;
- xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + (len - vi->hdr_len);
+ xdp.data_meta = xdp.data;
xdp.rxq = &rq->xdp_rxq;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@ -848,24 +898,27 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
switch (act) {
case XDP_PASS:
+ metasize = xdp.data - xdp.data_meta;
+
/* recalculate offset to account for any header
- * adjustments. Note other cases do not build an
- * skb and avoid using offset
+ * adjustments and minus the metasize to copy the
+ * metadata in page_to_skb(). Note other cases do not
+ * build an skb and avoid using offset
*/
- offset = xdp.data -
- page_address(xdp_page) - vi->hdr_len;
+ offset = xdp.data - page_address(xdp_page) -
+ vi->hdr_len - metasize;
- /* recalculate len if xdp.data or xdp.data_end were
- * adjusted
+ /* recalculate len if xdp.data, xdp.data_end or
+ * xdp.data_meta were adjusted
*/
- len = xdp.data_end - xdp.data + vi->hdr_len;
+ len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
/* We can only create skb based on xdp_page. */
if (unlikely(xdp_page != page)) {
rcu_read_unlock();
put_page(page);
- head_skb = page_to_skb(vi, rq, xdp_page,
- offset, len,
- PAGE_SIZE, false);
+ head_skb = page_to_skb(vi, rq, xdp_page, offset,
+ len, PAGE_SIZE, false,
+ metasize);
return head_skb;
}
break;
@@ -921,7 +974,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
goto err_skb;
}
- head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
+ head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
+ metasize);
curr_skb = head_skb;
if (unlikely(!curr_skb))
@@ -1437,12 +1491,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
xdp_do_flush_map();
if (xdp_xmit & VIRTIO_XDP_TX) {
- sq = virtnet_xdp_sq(vi);
+ sq = virtnet_xdp_get_sq(vi);
if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
u64_stats_update_begin(&sq->stats.syncp);
sq->stats.kicks++;
u64_stats_update_end(&sq->stats.syncp);
}
+ virtnet_xdp_put_sq(vi, sq);
}
return received;
@@ -1483,6 +1538,8 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
struct virtnet_info *vi = sq->vq->vdev->priv;
unsigned int index = vq2txq(sq->vq);
struct netdev_queue *txq;
+ int opaque;
+ bool done;
if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
/* We don't need to enable cb for XDP */
@@ -1492,10 +1549,28 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
txq = netdev_get_tx_queue(vi->dev, index);
__netif_tx_lock(txq, raw_smp_processor_id());
+ virtqueue_disable_cb(sq->vq);
free_old_xmit_skbs(sq, true);
+
+ opaque = virtqueue_enable_cb_prepare(sq->vq);
+
+ done = napi_complete_done(napi, 0);
+
+ if (!done)
+ virtqueue_disable_cb(sq->vq);
+
__netif_tx_unlock(txq);
- virtqueue_napi_complete(napi, sq->vq, 0);
+ if (done) {
+ if (unlikely(virtqueue_poll(sq->vq, opaque))) {
+ if (napi_schedule_prep(napi)) {
+ __netif_tx_lock(txq, raw_smp_processor_id());
+ virtqueue_disable_cb(sq->vq);
+ __netif_tx_unlock(txq);
+ __napi_schedule(napi);
+ }
+ }
+ }
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
netif_tx_wake_queue(txq);
@@ -1527,7 +1602,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
virtio_is_little_endian(vi->vdev), false,
0))
- BUG();
+ return -EPROTO;
if (vi->mergeable_rx_bufs)
hdr->num_buffers = 0;
@@ -2067,14 +2142,16 @@ static int virtnet_set_channels(struct net_device *dev,
get_online_cpus();
err = _virtnet_set_queues(vi, queue_pairs);
- if (!err) {
- netif_set_real_num_tx_queues(dev, queue_pairs);
- netif_set_real_num_rx_queues(dev, queue_pairs);
-
- virtnet_set_affinity(vi);
+ if (err) {
+ put_online_cpus();
+ goto err;
}
+ virtnet_set_affinity(vi);
put_online_cpus();
+ netif_set_real_num_tx_queues(dev, queue_pairs);
+ netif_set_real_num_rx_queues(dev, queue_pairs);
+ err:
return err;
}
@@ -2316,7 +2393,6 @@ static const struct ethtool_ops virtnet_ethtool_ops = {
static void virtnet_freeze_down(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
- int i;
/* Make sure no work handler is accessing the device */
flush_work(&vi->config_work);
@@ -2324,14 +2400,8 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
netif_tx_lock_bh(vi->dev);
netif_device_detach(vi->dev);
netif_tx_unlock_bh(vi->dev);
- cancel_delayed_work_sync(&vi->refill);
-
- if (netif_running(vi->dev)) {
- for (i = 0; i < vi->max_queue_pairs; i++) {
- napi_disable(&vi->rq[i].napi);
- virtnet_napi_tx_disable(&vi->sq[i].napi);
- }
- }
+ if (netif_running(vi->dev))
+ virtnet_close(vi->dev);
}
static int init_vqs(struct virtnet_info *vi);
@@ -2339,7 +2409,7 @@ static int init_vqs(struct virtnet_info *vi);
static int virtnet_restore_up(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
- int err, i;
+ int err;
err = init_vqs(vi);
if (err)
@@ -2348,15 +2418,9 @@ static int virtnet_restore_up(struct virtio_device *vdev)
virtio_device_ready(vdev);
if (netif_running(vi->dev)) {
- for (i = 0; i < vi->curr_queue_pairs; i++)
- if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
- schedule_delayed_work(&vi->refill, 0);
-
- for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
- virtnet_napi_tx_enable(vi, vi->sq[i].vq,
- &vi->sq[i].napi);
- }
+ err = virtnet_open(vi->dev);
+ if (err)
+ return err;
}
netif_tx_lock_bh(vi->dev);
@@ -2416,7 +2480,7 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
- NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first");
+ NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
return -EOPNOTSUPP;
}
@@ -2437,10 +2501,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
/* XDP requires extra queues for XDP_TX */
if (curr_qp + xdp_qp > vi->max_queue_pairs) {
- NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available");
- netdev_warn(dev, "request %i queues but max is %i\n",
+ netdev_warn(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
curr_qp + xdp_qp, vi->max_queue_pairs);
- return -ENOMEM;
+ xdp_qp = 0;
}
old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
@@ -2477,11 +2540,14 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
vi->xdp_queue_pairs = xdp_qp;
if (prog) {
+ vi->xdp_enabled = true;
for (i = 0; i < vi->max_queue_pairs; i++) {
rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
if (i == 0 && !old_prog)
virtnet_clear_guest_offloads(vi);
}
+ } else {
+ vi->xdp_enabled = false;
}
for (i = 0; i < vi->max_queue_pairs; i++) {
@@ -2565,14 +2631,15 @@ static int virtnet_set_features(struct net_device *dev,
u64 offloads;
int err;
- if ((dev->features ^ features) & NETIF_F_LRO) {
- if (vi->xdp_queue_pairs)
+ if ((dev->features ^ features) & NETIF_F_GRO_HW) {
+ if (vi->xdp_enabled)
return -EBUSY;
- if (features & NETIF_F_LRO)
+ if (features & NETIF_F_GRO_HW)
offloads = vi->guest_offloads_capable;
else
- offloads = 0;
+ offloads = vi->guest_offloads_capable &
+ ~GUEST_OFFLOAD_GRO_HW_MASK;
err = virtnet_set_guest_offloads(vi, offloads);
if (err)
@@ -3048,9 +3115,9 @@ static int virtnet_probe(struct virtio_device *vdev)
dev->features |= NETIF_F_RXCSUM;
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
- dev->features |= NETIF_F_LRO;
+ dev->features |= NETIF_F_GRO_HW;
if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
- dev->hw_features |= NETIF_F_LRO;
+ dev->hw_features |= NETIF_F_GRO_HW;
dev->vlan_features = dev->features;
@@ -3108,6 +3175,7 @@ static int virtnet_probe(struct virtio_device *vdev)
dev_err(&vdev->dev,
"device MTU appears to have changed it is now %d < %d",
mtu, dev->min_mtu);
+ err = -EINVAL;
goto free;
}
@@ -3257,8 +3325,11 @@ static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
virtnet_set_queues(vi, vi->curr_queue_pairs);
err = virtnet_cpu_notif_add(vi);
- if (err)
+ if (err) {
+ virtnet_freeze_down(vdev);
+ remove_vq_common(vi);
return err;
+ }
return 0;
}