aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/xen-netfront.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-05 20:13:21 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-05 20:13:21 -0700
commit47ec5303d73ea344e84f46660fff693c57641386 (patch)
treea2252debab749de29620c43285295d60c4741119 /drivers/net/xen-netfront.c
parentMerge tag 'drm-next-2020-08-06' of git://anongit.freedesktop.org/drm/drm (diff)
parentnet: thunderx: initialize VF's mailbox mutex before first usage (diff)
downloadlinux-dev-47ec5303d73ea344e84f46660fff693c57641386.tar.xz
linux-dev-47ec5303d73ea344e84f46660fff693c57641386.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller: 1) Support 6Ghz band in ath11k driver, from Rajkumar Manoharan. 2) Support UDP segmentation in code TSO code, from Eric Dumazet. 3) Allow flashing different flash images in cxgb4 driver, from Vishal Kulkarni. 4) Add drop frames counter and flow status to tc flower offloading, from Po Liu. 5) Support n-tuple filters in cxgb4, from Vishal Kulkarni. 6) Various new indirect call avoidance, from Eric Dumazet and Brian Vazquez. 7) Fix BPF verifier failures on 32-bit pointer arithmetic, from Yonghong Song. 8) Support querying and setting hardware address of a port function via devlink, use this in mlx5, from Parav Pandit. 9) Support hw ipsec offload on bonding slaves, from Jarod Wilson. 10) Switch qca8k driver over to phylink, from Jonathan McDowell. 11) In bpftool, show list of processes holding BPF FD references to maps, programs, links, and btf objects. From Andrii Nakryiko. 12) Several conversions over to generic power management, from Vaibhav Gupta. 13) Add support for SO_KEEPALIVE et al. to bpf_setsockopt(), from Dmitry Yakunin. 14) Various https url conversions, from Alexander A. Klimov. 15) Timestamping and PHC support for mscc PHY driver, from Antoine Tenart. 16) Support bpf iterating over tcp and udp sockets, from Yonghong Song. 17) Support 5GBASE-T i40e NICs, from Aleksandr Loktionov. 18) Add kTLS RX HW offload support to mlx5e, from Tariq Toukan. 19) Fix the ->ndo_start_xmit() return type to be netdev_tx_t in several drivers. From Luc Van Oostenryck. 20) XDP support for xen-netfront, from Denis Kirjanov. 21) Support receive buffer autotuning in MPTCP, from Florian Westphal. 22) Support EF100 chip in sfc driver, from Edward Cree. 23) Add XDP support to mvpp2 driver, from Matteo Croce. 24) Support MPTCP in sock_diag, from Paolo Abeni. 25) Commonize UDP tunnel offloading code by creating udp_tunnel_nic infrastructure, from Jakub Kicinski. 26) Several pci_ --> dma_ API conversions, from Christophe JAILLET. 27) Add FLOW_ACTION_POLICE support to mlxsw, from Ido Schimmel. 28) Add SK_LOOKUP bpf program type, from Jakub Sitnicki. 29) Refactor a lot of networking socket option handling code in order to avoid set_fs() calls, from Christoph Hellwig. 30) Add rfc4884 support to icmp code, from Willem de Bruijn. 31) Support TBF offload in dpaa2-eth driver, from Ioana Ciornei. 32) Support XDP_REDIRECT in qede driver, from Alexander Lobakin. 33) Support PCI relaxed ordering in mlx5 driver, from Aya Levin. 34) Support TCP syncookies in MPTCP, from Flowian Westphal. 35) Fix several tricky cases of PMTU handling wrt. briding, from Stefano Brivio. * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2056 commits) net: thunderx: initialize VF's mailbox mutex before first usage usb: hso: remove bogus check for EINPROGRESS usb: hso: no complaint about kmalloc failure hso: fix bailout in error case of probe ip_tunnel_core: Fix build for archs without _HAVE_ARCH_IPV6_CSUM selftests/net: relax cpu affinity requirement in msg_zerocopy test mptcp: be careful on subflow creation selftests: rtnetlink: make kci_test_encap() return sub-test result selftests: rtnetlink: correct the final return value for the test net: dsa: sja1105: use detected device id instead of DT one on mismatch tipc: set ub->ifindex for local ipv6 address ipv6: add ipv6_dev_find() net: openvswitch: silence suspicious RCU usage warning Revert "vxlan: fix tos value before xmit" ptp: only allow phase values lower than 1 period farsync: switch from 'pci_' to 'dma_' API wan: wanxl: switch from 'pci_' to 'dma_' API hv_netvsc: do not use VF device if link is down dpaa2-eth: Fix passing zero to 'PTR_ERR' warning net: macb: Properly handle phylink on at91sam9x ...
Diffstat (limited to 'drivers/net/xen-netfront.c')
-rw-r--r--drivers/net/xen-netfront.c319
1 files changed, 309 insertions, 10 deletions
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 88280057e032..458be6882b98 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -44,6 +44,9 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <net/ip.h>
+#include <linux/bpf.h>
+#include <net/page_pool.h>
+#include <linux/bpf_trace.h>
#include <xen/xen.h>
#include <xen/xenbus.h>
@@ -104,6 +107,8 @@ struct netfront_queue {
char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
struct netfront_info *info;
+ struct bpf_prog __rcu *xdp_prog;
+
struct napi_struct napi;
/* Split event channels support, tx_* == rx_* when using
@@ -146,6 +151,9 @@ struct netfront_queue {
struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
grant_ref_t gref_rx_head;
grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
+
+ struct page_pool *page_pool;
+ struct xdp_rxq_info xdp_rxq;
};
struct netfront_info {
@@ -161,6 +169,10 @@ struct netfront_info {
struct netfront_stats __percpu *rx_stats;
struct netfront_stats __percpu *tx_stats;
+ /* XDP state */
+ bool netback_has_xdp_headroom;
+ bool netfront_xdp_enabled;
+
atomic_t rx_gso_checksum_fixup;
};
@@ -267,8 +279,8 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
if (unlikely(!skb))
return NULL;
- page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
- if (!page) {
+ page = page_pool_dev_alloc_pages(queue->page_pool);
+ if (unlikely(!page)) {
kfree_skb(skb);
return NULL;
}
@@ -562,6 +574,66 @@ static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
return queue_idx;
}
+static int xennet_xdp_xmit_one(struct net_device *dev,
+ struct netfront_queue *queue,
+ struct xdp_frame *xdpf)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
+ int notify;
+
+ xennet_make_first_txreq(queue, NULL,
+ virt_to_page(xdpf->data),
+ offset_in_page(xdpf->data),
+ xdpf->len);
+
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
+ if (notify)
+ notify_remote_via_irq(queue->tx_irq);
+
+ u64_stats_update_begin(&tx_stats->syncp);
+ tx_stats->bytes += xdpf->len;
+ tx_stats->packets++;
+ u64_stats_update_end(&tx_stats->syncp);
+
+ xennet_tx_buf_gc(queue);
+
+ return 0;
+}
+
+static int xennet_xdp_xmit(struct net_device *dev, int n,
+ struct xdp_frame **frames, u32 flags)
+{
+ unsigned int num_queues = dev->real_num_tx_queues;
+ struct netfront_info *np = netdev_priv(dev);
+ struct netfront_queue *queue = NULL;
+ unsigned long irq_flags;
+ int drops = 0;
+ int i, err;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ queue = &np->queues[smp_processor_id() % num_queues];
+
+ spin_lock_irqsave(&queue->tx_lock, irq_flags);
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+
+ if (!xdpf)
+ continue;
+ err = xennet_xdp_xmit_one(dev, queue, xdpf);
+ if (err) {
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ }
+ }
+ spin_unlock_irqrestore(&queue->tx_lock, irq_flags);
+
+ return n - drops;
+}
+
+
#define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1)
static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -684,6 +756,9 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
/* First request has the packet length. */
first_tx->size = skb->len;
+ /* timestamp packet in software */
+ skb_tx_timestamp(skb);
+
RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
if (notify)
notify_remote_via_irq(queue->tx_irq);
@@ -780,23 +855,82 @@ static int xennet_get_extras(struct netfront_queue *queue,
return err;
}
+static u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata,
+ struct xen_netif_rx_response *rx, struct bpf_prog *prog,
+ struct xdp_buff *xdp, bool *need_xdp_flush)
+{
+ struct xdp_frame *xdpf;
+ u32 len = rx->status;
+ u32 act;
+ int err;
+
+ xdp->data_hard_start = page_address(pdata);
+ xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM;
+ xdp_set_data_meta_invalid(xdp);
+ xdp->data_end = xdp->data + len;
+ xdp->rxq = &queue->xdp_rxq;
+ xdp->frame_sz = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM;
+
+ act = bpf_prog_run_xdp(prog, xdp);
+ switch (act) {
+ case XDP_TX:
+ get_page(pdata);
+ xdpf = xdp_convert_buff_to_frame(xdp);
+ err = xennet_xdp_xmit(queue->info->netdev, 1, &xdpf, 0);
+ if (unlikely(err < 0))
+ trace_xdp_exception(queue->info->netdev, prog, act);
+ break;
+ case XDP_REDIRECT:
+ get_page(pdata);
+ err = xdp_do_redirect(queue->info->netdev, xdp, prog);
+ *need_xdp_flush = true;
+ if (unlikely(err))
+ trace_xdp_exception(queue->info->netdev, prog, act);
+ break;
+ case XDP_PASS:
+ case XDP_DROP:
+ break;
+
+ case XDP_ABORTED:
+ trace_xdp_exception(queue->info->netdev, prog, act);
+ break;
+
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ }
+
+ return act;
+}
+
static int xennet_get_responses(struct netfront_queue *queue,
struct netfront_rx_info *rinfo, RING_IDX rp,
- struct sk_buff_head *list)
+ struct sk_buff_head *list,
+ bool *need_xdp_flush)
{
struct xen_netif_rx_response *rx = &rinfo->rx;
- struct xen_netif_extra_info *extras = rinfo->extras;
- struct device *dev = &queue->info->netdev->dev;
+ int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD);
RING_IDX cons = queue->rx.rsp_cons;
struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
+ struct xen_netif_extra_info *extras = rinfo->extras;
grant_ref_t ref = xennet_get_rx_ref(queue, cons);
- int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD);
+ struct device *dev = &queue->info->netdev->dev;
+ struct bpf_prog *xdp_prog;
+ struct xdp_buff xdp;
+ unsigned long ret;
int slots = 1;
int err = 0;
- unsigned long ret;
+ u32 verdict;
if (rx->flags & XEN_NETRXF_extra_info) {
err = xennet_get_extras(queue, extras, rp);
+ if (!err) {
+ if (extras[XEN_NETIF_EXTRA_TYPE_XDP - 1].type) {
+ struct xen_netif_extra_info *xdp;
+
+ xdp = &extras[XEN_NETIF_EXTRA_TYPE_XDP - 1];
+ rx->offset = xdp->u.xdp.headroom;
+ }
+ }
cons = queue->rx.rsp_cons;
}
@@ -829,9 +963,24 @@ static int xennet_get_responses(struct netfront_queue *queue,
gnttab_release_grant_reference(&queue->gref_rx_head, ref);
- __skb_queue_tail(list, skb);
-
+ rcu_read_lock();
+ xdp_prog = rcu_dereference(queue->xdp_prog);
+ if (xdp_prog) {
+ if (!(rx->flags & XEN_NETRXF_more_data)) {
+ /* currently only a single page contains data */
+ verdict = xennet_run_xdp(queue,
+ skb_frag_page(&skb_shinfo(skb)->frags[0]),
+ rx, xdp_prog, &xdp, need_xdp_flush);
+ if (verdict != XDP_PASS)
+ err = -EINVAL;
+ } else {
+ /* drop the frame */
+ err = -EINVAL;
+ }
+ }
+ rcu_read_unlock();
next:
+ __skb_queue_tail(list, skb);
if (!(rx->flags & XEN_NETRXF_more_data))
break;
@@ -1000,6 +1149,7 @@ static int xennet_poll(struct napi_struct *napi, int budget)
struct sk_buff_head errq;
struct sk_buff_head tmpq;
int err;
+ bool need_xdp_flush = false;
spin_lock(&queue->rx_lock);
@@ -1016,7 +1166,8 @@ static int xennet_poll(struct napi_struct *napi, int budget)
memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx));
memset(extras, 0, sizeof(rinfo.extras));
- err = xennet_get_responses(queue, &rinfo, rp, &tmpq);
+ err = xennet_get_responses(queue, &rinfo, rp, &tmpq,
+ &need_xdp_flush);
if (unlikely(err)) {
err:
@@ -1062,6 +1213,8 @@ err:
i = ++queue->rx.rsp_cons;
work_done++;
}
+ if (need_xdp_flush)
+ xdp_do_flush();
__skb_queue_purge(&errq);
@@ -1263,6 +1416,80 @@ static void xennet_poll_controller(struct net_device *dev)
}
#endif
+#define NETBACK_XDP_HEADROOM_DISABLE 0
+#define NETBACK_XDP_HEADROOM_ENABLE 1
+
+static int talk_to_netback_xdp(struct netfront_info *np, int xdp)
+{
+ int err;
+ unsigned short headroom;
+
+ headroom = xdp ? XDP_PACKET_HEADROOM : 0;
+ err = xenbus_printf(XBT_NIL, np->xbdev->nodename,
+ "xdp-headroom", "%hu",
+ headroom);
+ if (err)
+ pr_warn("Error writing xdp-headroom\n");
+
+ return err;
+}
+
+static int xennet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ unsigned long max_mtu = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM;
+ struct netfront_info *np = netdev_priv(dev);
+ struct bpf_prog *old_prog;
+ unsigned int i, err;
+
+ if (dev->mtu > max_mtu) {
+ netdev_warn(dev, "XDP requires MTU less than %lu\n", max_mtu);
+ return -EINVAL;
+ }
+
+ if (!np->netback_has_xdp_headroom)
+ return 0;
+
+ xenbus_switch_state(np->xbdev, XenbusStateReconfiguring);
+
+ err = talk_to_netback_xdp(np, prog ? NETBACK_XDP_HEADROOM_ENABLE :
+ NETBACK_XDP_HEADROOM_DISABLE);
+ if (err)
+ return err;
+
+ /* avoid the race with XDP headroom adjustment */
+ wait_event(module_wq,
+ xenbus_read_driver_state(np->xbdev->otherend) ==
+ XenbusStateReconfigured);
+ np->netfront_xdp_enabled = true;
+
+ old_prog = rtnl_dereference(np->queues[0].xdp_prog);
+
+ if (prog)
+ bpf_prog_add(prog, dev->real_num_tx_queues);
+
+ for (i = 0; i < dev->real_num_tx_queues; ++i)
+ rcu_assign_pointer(np->queues[i].xdp_prog, prog);
+
+ if (old_prog)
+ for (i = 0; i < dev->real_num_tx_queues; ++i)
+ bpf_prog_put(old_prog);
+
+ xenbus_switch_state(np->xbdev, XenbusStateConnected);
+
+ return 0;
+}
+
+static int xennet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return xennet_xdp_set(dev, xdp->prog, xdp->extack);
+ default:
+ return -EINVAL;
+ }
+}
+
static const struct net_device_ops xennet_netdev_ops = {
.ndo_open = xennet_open,
.ndo_stop = xennet_close,
@@ -1274,6 +1501,8 @@ static const struct net_device_ops xennet_netdev_ops = {
.ndo_fix_features = xennet_fix_features,
.ndo_set_features = xennet_set_features,
.ndo_select_queue = xennet_select_queue,
+ .ndo_bpf = xennet_xdp,
+ .ndo_xdp_xmit = xennet_xdp_xmit,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = xennet_poll_controller,
#endif
@@ -1333,6 +1562,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
SET_NETDEV_DEV(netdev, &dev->dev);
np->netdev = netdev;
+ np->netfront_xdp_enabled = false;
netif_carrier_off(netdev);
@@ -1424,6 +1654,8 @@ static void xennet_disconnect_backend(struct netfront_info *info)
queue->rx_ring_ref = GRANT_INVALID_REF;
queue->tx.sring = NULL;
queue->rx.sring = NULL;
+
+ page_pool_destroy(queue->page_pool);
}
}
@@ -1759,6 +1991,51 @@ static void xennet_destroy_queues(struct netfront_info *info)
info->queues = NULL;
}
+
+
+static int xennet_create_page_pool(struct netfront_queue *queue)
+{
+ int err;
+ struct page_pool_params pp_params = {
+ .order = 0,
+ .flags = 0,
+ .pool_size = NET_RX_RING_SIZE,
+ .nid = NUMA_NO_NODE,
+ .dev = &queue->info->netdev->dev,
+ .offset = XDP_PACKET_HEADROOM,
+ .max_len = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM,
+ };
+
+ queue->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(queue->page_pool)) {
+ err = PTR_ERR(queue->page_pool);
+ queue->page_pool = NULL;
+ return err;
+ }
+
+ err = xdp_rxq_info_reg(&queue->xdp_rxq, queue->info->netdev,
+ queue->id);
+ if (err) {
+ netdev_err(queue->info->netdev, "xdp_rxq_info_reg failed\n");
+ goto err_free_pp;
+ }
+
+ err = xdp_rxq_info_reg_mem_model(&queue->xdp_rxq,
+ MEM_TYPE_PAGE_POOL, queue->page_pool);
+ if (err) {
+ netdev_err(queue->info->netdev, "xdp_rxq_info_reg_mem_model failed\n");
+ goto err_unregister_rxq;
+ }
+ return 0;
+
+err_unregister_rxq:
+ xdp_rxq_info_unreg(&queue->xdp_rxq);
+err_free_pp:
+ page_pool_destroy(queue->page_pool);
+ queue->page_pool = NULL;
+ return err;
+}
+
static int xennet_create_queues(struct netfront_info *info,
unsigned int *num_queues)
{
@@ -1784,6 +2061,14 @@ static int xennet_create_queues(struct netfront_info *info,
break;
}
+ /* use page pool recycling instead of buddy allocator */
+ ret = xennet_create_page_pool(queue);
+ if (ret < 0) {
+ dev_err(&info->xbdev->dev, "can't allocate page pool\n");
+ *num_queues = i;
+ return ret;
+ }
+
netif_napi_add(queue->info->netdev, &queue->napi,
xennet_poll, 64);
if (netif_running(info->netdev))
@@ -1830,6 +2115,17 @@ static int talk_to_netback(struct xenbus_device *dev,
goto out_unlocked;
}
+ info->netback_has_xdp_headroom = xenbus_read_unsigned(info->xbdev->otherend,
+ "feature-xdp-headroom", 0);
+ if (info->netback_has_xdp_headroom) {
+ /* set the current xen-netfront xdp state */
+ err = talk_to_netback_xdp(info, info->netfront_xdp_enabled ?
+ NETBACK_XDP_HEADROOM_ENABLE :
+ NETBACK_XDP_HEADROOM_DISABLE);
+ if (err)
+ goto out_unlocked;
+ }
+
rtnl_lock();
if (info->queues)
xennet_destroy_queues(info);
@@ -1964,6 +2260,8 @@ static int xennet_connect(struct net_device *dev)
err = talk_to_netback(np->xbdev, np);
if (err)
return err;
+ if (np->netback_has_xdp_headroom)
+ pr_info("backend supports XDP headroom\n");
/* talk_to_netback() sets the correct number of queues */
num_queues = dev->real_num_tx_queues;
@@ -2100,6 +2398,7 @@ static const struct ethtool_ops xennet_ethtool_ops =
.get_sset_count = xennet_get_sset_count,
.get_ethtool_stats = xennet_get_ethtool_stats,
.get_strings = xennet_get_strings,
+ .get_ts_info = ethtool_op_get_ts_info,
};
#ifdef CONFIG_SYSFS