aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/hyperv
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/hyperv')
-rw-r--r--drivers/net/hyperv/hyperv_net.h77
-rw-r--r--drivers/net/hyperv/netvsc.c178
-rw-r--r--drivers/net/hyperv/netvsc_bpf.c103
-rw-r--r--drivers/net/hyperv/netvsc_drv.c190
-rw-r--r--drivers/net/hyperv/rndis_filter.c14
5 files changed, 436 insertions, 126 deletions
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 315278a7cf88..dd5919ec408b 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -15,6 +15,7 @@
#include <linux/list.h>
#include <linux/hyperv.h>
#include <linux/rndis.h>
+#include <linux/jhash.h>
/* RSS related */
#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */
@@ -164,6 +165,7 @@ struct hv_netvsc_packet {
u32 total_bytes;
u32 send_buf_index;
u32 total_data_buflen;
+ struct hv_dma_range *dma_range;
};
#define NETVSC_HASH_KEYLEN 40
@@ -236,6 +238,7 @@ int netvsc_recv_callback(struct net_device *net,
void netvsc_channel_cb(void *context);
int netvsc_poll(struct napi_struct *napi, int budget);
+void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev);
u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
struct xdp_buff *xdp);
unsigned int netvsc_xdp_fraglen(unsigned int len);
@@ -245,6 +248,8 @@ int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog,
struct netvsc_device *nvdev);
int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog);
int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf);
+int netvsc_ndoxdp_xmit(struct net_device *ndev, int n,
+ struct xdp_frame **frames, u32 flags);
int rndis_set_subchannel(struct net_device *ndev,
struct netvsc_device *nvdev,
@@ -941,12 +946,21 @@ struct nvsc_rsc {
#define NVSC_RSC_CSUM_INFO BIT(1) /* valid/present bit for 'csum_info' */
#define NVSC_RSC_HASH_INFO BIT(2) /* valid/present bit for 'hash_info' */
-struct netvsc_stats {
+struct netvsc_stats_tx {
+ u64 packets;
+ u64 bytes;
+ u64 xdp_xmit;
+ struct u64_stats_sync syncp;
+};
+
+struct netvsc_stats_rx {
u64 packets;
u64 bytes;
u64 broadcast;
u64 multicast;
u64 xdp_drop;
+ u64 xdp_redirect;
+ u64 xdp_tx;
struct u64_stats_sync syncp;
};
@@ -1037,7 +1051,8 @@ struct net_device_context {
u32 vf_alloc;
/* Serial number of the VF to team with */
u32 vf_serial;
-
+ /* completion variable to confirm vf association */
+ struct completion vf_add;
/* Is the current data path through the VF NIC? */
bool data_path_is_vf;
@@ -1045,6 +1060,55 @@ struct net_device_context {
struct netvsc_device_info *saved_netvsc_dev_info;
};
+/* Azure hosts don't support non-TCP port numbers in hashing for fragmented
+ * packets. We can use ethtool to change UDP hash level when necessary.
+ */
+static inline u32 netvsc_get_hash(struct sk_buff *skb,
+ const struct net_device_context *ndc)
+{
+ struct flow_keys flow;
+ u32 hash, pkt_proto = 0;
+ static u32 hashrnd __read_mostly;
+
+ net_get_random_once(&hashrnd, sizeof(hashrnd));
+
+ if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
+ return 0;
+
+ switch (flow.basic.ip_proto) {
+ case IPPROTO_TCP:
+ if (flow.basic.n_proto == htons(ETH_P_IP))
+ pkt_proto = HV_TCP4_L4HASH;
+ else if (flow.basic.n_proto == htons(ETH_P_IPV6))
+ pkt_proto = HV_TCP6_L4HASH;
+
+ break;
+
+ case IPPROTO_UDP:
+ if (flow.basic.n_proto == htons(ETH_P_IP))
+ pkt_proto = HV_UDP4_L4HASH;
+ else if (flow.basic.n_proto == htons(ETH_P_IPV6))
+ pkt_proto = HV_UDP6_L4HASH;
+
+ break;
+ }
+
+ if (pkt_proto & ndc->l4_hash) {
+ return skb_get_hash(skb);
+ } else {
+ if (flow.basic.n_proto == htons(ETH_P_IP))
+ hash = jhash2((u32 *)&flow.addrs.v4addrs, 2, hashrnd);
+ else if (flow.basic.n_proto == htons(ETH_P_IPV6))
+ hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd);
+ else
+ return 0;
+
+ __skb_set_sw_hash(skb, hash, false);
+ }
+
+ return hash;
+}
+
/* Per channel data */
struct netvsc_channel {
struct vmbus_channel *channel;
@@ -1059,9 +1123,10 @@ struct netvsc_channel {
struct bpf_prog __rcu *bpf_prog;
struct xdp_rxq_info xdp_rxq;
+ bool xdp_flush;
- struct netvsc_stats tx_stats;
- struct netvsc_stats rx_stats;
+ struct netvsc_stats_tx tx_stats;
+ struct netvsc_stats_rx rx_stats;
};
/* Per netvsc device */
@@ -1074,6 +1139,7 @@ struct netvsc_device {
/* Receive buffer allocated by us but manages by NetVSP */
void *recv_buf;
+ void *recv_original_buf;
u32 recv_buf_size; /* allocated bytes */
struct vmbus_gpadl recv_buf_gpadl_handle;
u32 recv_section_cnt;
@@ -1082,6 +1148,7 @@ struct netvsc_device {
/* Send buffer allocated by us */
void *send_buf;
+ void *send_original_buf;
u32 send_buf_size;
struct vmbus_gpadl send_buf_gpadl_handle;
u32 send_section_cnt;
@@ -1731,4 +1798,6 @@ struct rndis_message {
#define RETRY_US_HI 10000
#define RETRY_MAX 2000 /* >10 sec */
+void netvsc_dma_unmap(struct hv_device *hv_dev,
+ struct hv_netvsc_packet *packet);
#endif /* _HYPERV_NET_H */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 396bc1c204e6..9352dad58996 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -20,6 +20,7 @@
#include <linux/vmalloc.h>
#include <linux/rtnetlink.h>
#include <linux/prefetch.h>
+#include <linux/filter.h>
#include <asm/sync_bitops.h>
#include <asm/mshyperv.h>
@@ -153,9 +154,18 @@ static void free_netvsc_device(struct rcu_head *head)
int i;
kfree(nvdev->extension);
- vfree(nvdev->recv_buf);
- vfree(nvdev->send_buf);
- kfree(nvdev->send_section_map);
+
+ if (nvdev->recv_original_buf)
+ vfree(nvdev->recv_original_buf);
+ else
+ vfree(nvdev->recv_buf);
+
+ if (nvdev->send_original_buf)
+ vfree(nvdev->send_original_buf);
+ else
+ vfree(nvdev->send_buf);
+
+ bitmap_free(nvdev->send_section_map);
for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq);
@@ -336,8 +346,8 @@ static int netvsc_init_buf(struct hv_device *device,
struct net_device *ndev = hv_get_drvdata(device);
struct nvsp_message *init_packet;
unsigned int buf_size;
- size_t map_words;
int i, ret = 0;
+ void *vaddr;
/* Get receive buffer area. */
buf_size = device_info->recv_sections * device_info->recv_section_size;
@@ -373,6 +383,17 @@ static int netvsc_init_buf(struct hv_device *device,
goto cleanup;
}
+ if (hv_isolation_type_snp()) {
+ vaddr = hv_map_memory(net_device->recv_buf, buf_size);
+ if (!vaddr) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ net_device->recv_original_buf = net_device->recv_buf;
+ net_device->recv_buf = vaddr;
+ }
+
/* Notify the NetVsp of the gpadl handle */
init_packet = &net_device->channel_init_pkt;
memset(init_packet, 0, sizeof(struct nvsp_message));
@@ -476,6 +497,17 @@ static int netvsc_init_buf(struct hv_device *device,
goto cleanup;
}
+ if (hv_isolation_type_snp()) {
+ vaddr = hv_map_memory(net_device->send_buf, buf_size);
+ if (!vaddr) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ net_device->send_original_buf = net_device->send_buf;
+ net_device->send_buf = vaddr;
+ }
+
/* Notify the NetVsp of the gpadl handle */
init_packet = &net_device->channel_init_pkt;
memset(init_packet, 0, sizeof(struct nvsp_message));
@@ -528,10 +560,9 @@ static int netvsc_init_buf(struct hv_device *device,
net_device->send_section_size, net_device->send_section_cnt);
/* Setup state for managing the send buffer. */
- map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG);
-
- net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL);
- if (net_device->send_section_map == NULL) {
+ net_device->send_section_map = bitmap_zalloc(net_device->send_section_cnt,
+ GFP_KERNEL);
+ if (!net_device->send_section_map) {
ret = -ENOMEM;
goto cleanup;
}
@@ -731,6 +762,12 @@ void netvsc_device_remove(struct hv_device *device)
netvsc_teardown_send_gpadl(device, net_device, ndev);
}
+ if (net_device->recv_original_buf)
+ hv_unmap_memory(net_device->recv_buf);
+
+ if (net_device->send_original_buf)
+ hv_unmap_memory(net_device->send_buf);
+
/* Release all resources */
free_netvsc_device_rcu(net_device);
}
@@ -756,9 +793,9 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
int queue_sends;
u64 cmd_rqst;
- cmd_rqst = channel->request_addr_callback(channel, (u64)desc->trans_id);
+ cmd_rqst = channel->request_addr_callback(channel, desc->trans_id);
if (cmd_rqst == VMBUS_RQST_ERROR) {
- netdev_err(ndev, "Incorrect transaction id\n");
+ netdev_err(ndev, "Invalid transaction ID %llx\n", desc->trans_id);
return;
}
@@ -766,10 +803,10 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
/* Notify the layer above us */
if (likely(skb)) {
- const struct hv_netvsc_packet *packet
+ struct hv_netvsc_packet *packet
= (struct hv_netvsc_packet *)skb->cb;
u32 send_index = packet->send_buf_index;
- struct netvsc_stats *tx_stats;
+ struct netvsc_stats_tx *tx_stats;
if (send_index != NETVSC_INVALID_INDEX)
netvsc_free_send_slot(net_device, send_index);
@@ -782,6 +819,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
tx_stats->bytes += packet->total_bytes;
u64_stats_update_end(&tx_stats->syncp);
+ netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
napi_consume_skb(skb, budget);
}
@@ -817,9 +855,9 @@ static void netvsc_send_completion(struct net_device *ndev,
/* First check if this is a VMBUS completion without data payload */
if (!msglen) {
cmd_rqst = incoming_channel->request_addr_callback(incoming_channel,
- (u64)desc->trans_id);
+ desc->trans_id);
if (cmd_rqst == VMBUS_RQST_ERROR) {
- netdev_err(ndev, "Invalid transaction id\n");
+ netdev_err(ndev, "Invalid transaction ID %llx\n", desc->trans_id);
return;
}
@@ -946,6 +984,88 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
memset(dest, 0, padding);
}
+void netvsc_dma_unmap(struct hv_device *hv_dev,
+ struct hv_netvsc_packet *packet)
+{
+ u32 page_count = packet->cp_partial ?
+ packet->page_buf_cnt - packet->rmsg_pgcnt :
+ packet->page_buf_cnt;
+ int i;
+
+ if (!hv_is_isolation_supported())
+ return;
+
+ if (!packet->dma_range)
+ return;
+
+ for (i = 0; i < page_count; i++)
+ dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma,
+ packet->dma_range[i].mapping_size,
+ DMA_TO_DEVICE);
+
+ kfree(packet->dma_range);
+}
+
+/* netvsc_dma_map - Map swiotlb bounce buffer with data page of
+ * packet sent by vmbus_sendpacket_pagebuffer() in the Isolation
+ * VM.
+ *
+ * In isolation VM, netvsc send buffer has been marked visible to
+ * host and so the data copied to send buffer doesn't need to use
+ * bounce buffer. The data pages handled by vmbus_sendpacket_pagebuffer()
+ * may not be copied to send buffer and so these pages need to be
+ * mapped with swiotlb bounce buffer. netvsc_dma_map() is to do
+ * that. The pfns in the struct hv_page_buffer need to be converted
+ * to bounce buffer's pfn. The loop here is necessary because the
+ * entries in the page buffer array are not necessarily full
+ * pages of data. Each entry in the array has a separate offset and
+ * len that may be non-zero, even for entries in the middle of the
+ * array. And the entries are not physically contiguous. So each
+ * entry must be individually mapped rather than as a contiguous unit.
+ * So not use dma_map_sg() here.
+ */
+static int netvsc_dma_map(struct hv_device *hv_dev,
+ struct hv_netvsc_packet *packet,
+ struct hv_page_buffer *pb)
+{
+ u32 page_count = packet->cp_partial ?
+ packet->page_buf_cnt - packet->rmsg_pgcnt :
+ packet->page_buf_cnt;
+ dma_addr_t dma;
+ int i;
+
+ if (!hv_is_isolation_supported())
+ return 0;
+
+ packet->dma_range = kcalloc(page_count,
+ sizeof(*packet->dma_range),
+ GFP_KERNEL);
+ if (!packet->dma_range)
+ return -ENOMEM;
+
+ for (i = 0; i < page_count; i++) {
+ char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT)
+ + pb[i].offset);
+ u32 len = pb[i].len;
+
+ dma = dma_map_single(&hv_dev->device, src, len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&hv_dev->device, dma)) {
+ kfree(packet->dma_range);
+ return -ENOMEM;
+ }
+
+ /* pb[].offset and pb[].len are not changed during dma mapping
+ * and so not reassign.
+ */
+ packet->dma_range[i].dma = dma;
+ packet->dma_range[i].mapping_size = len;
+ pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT;
+ }
+
+ return 0;
+}
+
static inline int netvsc_send_pkt(
struct hv_device *device,
struct hv_netvsc_packet *packet,
@@ -986,14 +1106,24 @@ static inline int netvsc_send_pkt(
trace_nvsp_send_pkt(ndev, out_channel, rpkt);
+ packet->dma_range = NULL;
if (packet->page_buf_cnt) {
if (packet->cp_partial)
pb += packet->rmsg_pgcnt;
+ ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb);
+ if (ret) {
+ ret = -EAGAIN;
+ goto exit;
+ }
+
ret = vmbus_sendpacket_pagebuffer(out_channel,
pb, packet->page_buf_cnt,
&nvmsg, sizeof(nvmsg),
req_id);
+
+ if (ret)
+ netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
} else {
ret = vmbus_sendpacket(out_channel,
&nvmsg, sizeof(nvmsg),
@@ -1001,6 +1131,7 @@ static inline int netvsc_send_pkt(
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
}
+exit:
if (ret == 0) {
atomic_inc_return(&nvchan->queue_sends);
@@ -1449,6 +1580,10 @@ static void netvsc_send_vf(struct net_device *ndev,
net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
+
+ if (net_device_ctx->vf_alloc)
+ complete(&net_device_ctx->vf_add);
+
netdev_info(ndev, "VF slot %u %s\n",
net_device_ctx->vf_serial,
net_device_ctx->vf_alloc ? "added" : "removed");
@@ -1500,7 +1635,6 @@ static int netvsc_process_raw_pkt(struct hv_device *device,
case VM_PKT_DATA_USING_XFER_PAGES:
return netvsc_receive(ndev, net_device, nvchan, desc);
- break;
case VM_PKT_DATA_INBAND:
netvsc_receive_inband(ndev, net_device, desc);
@@ -1541,12 +1675,17 @@ int netvsc_poll(struct napi_struct *napi, int budget)
if (!nvchan->desc)
nvchan->desc = hv_pkt_iter_first(channel);
+ nvchan->xdp_flush = false;
+
while (nvchan->desc && work_done < budget) {
work_done += netvsc_process_raw_pkt(device, nvchan, net_device,
ndev, nvchan->desc, budget);
nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
}
+ if (nvchan->xdp_flush)
+ xdp_do_flush();
+
/* Send any pending receive completions */
ret = send_recv_completions(ndev, net_device, nvchan);
@@ -1644,8 +1783,7 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
}
/* Enable NAPI handler before init callbacks */
- netif_napi_add(ndev, &net_device->chan_table[0].napi,
- netvsc_poll, NAPI_POLL_WEIGHT);
+ netif_napi_add(ndev, &net_device->chan_table[0].napi, netvsc_poll);
/* Open the channel */
device->channel->next_request_id_callback = vmbus_next_request_id;
@@ -1693,6 +1831,12 @@ cleanup:
netif_napi_del(&net_device->chan_table[0].napi);
cleanup2:
+ if (net_device->recv_original_buf)
+ hv_unmap_memory(net_device->recv_buf);
+
+ if (net_device->send_original_buf)
+ hv_unmap_memory(net_device->send_buf);
+
free_netvsc_device(&net_device->rcu);
return ERR_PTR(ret);
diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c
index aa877da113f8..4a9522689fa4 100644
--- a/drivers/net/hyperv/netvsc_bpf.c
+++ b/drivers/net/hyperv/netvsc_bpf.c
@@ -10,6 +10,7 @@
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
+#include <linux/netpoll.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/kernel.h>
@@ -23,11 +24,13 @@
u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
struct xdp_buff *xdp)
{
+ struct netvsc_stats_rx *rx_stats = &nvchan->rx_stats;
void *data = nvchan->rsc.data[0];
u32 len = nvchan->rsc.len[0];
struct page *page = NULL;
struct bpf_prog *prog;
u32 act = XDP_PASS;
+ bool drop = true;
xdp->data_hard_start = NULL;
@@ -60,21 +63,46 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
switch (act) {
case XDP_PASS:
case XDP_TX:
+ drop = false;
+ break;
+
case XDP_DROP:
break;
+ case XDP_REDIRECT:
+ if (!xdp_do_redirect(ndev, xdp, prog)) {
+ nvchan->xdp_flush = true;
+ drop = false;
+
+ u64_stats_update_begin(&rx_stats->syncp);
+
+ rx_stats->xdp_redirect++;
+ rx_stats->packets++;
+ rx_stats->bytes += nvchan->rsc.pktlen;
+
+ u64_stats_update_end(&rx_stats->syncp);
+
+ break;
+ } else {
+ u64_stats_update_begin(&rx_stats->syncp);
+ rx_stats->xdp_drop++;
+ u64_stats_update_end(&rx_stats->syncp);
+ }
+
+ fallthrough;
+
case XDP_ABORTED:
trace_xdp_exception(ndev, prog, act);
break;
default:
- bpf_warn_invalid_xdp_action(act);
+ bpf_warn_invalid_xdp_action(ndev, prog, act);
}
out:
rcu_read_unlock();
- if (page && act != XDP_PASS && act != XDP_TX) {
+ if (page && drop) {
__free_page(page);
xdp->data_hard_start = NULL;
}
@@ -137,7 +165,6 @@ int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog,
int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
{
struct netdev_bpf xdp;
- bpf_op_t ndo_bpf;
int ret;
ASSERT_RTNL();
@@ -145,8 +172,7 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
if (!vf_netdev)
return 0;
- ndo_bpf = vf_netdev->netdev_ops->ndo_bpf;
- if (!ndo_bpf)
+ if (!vf_netdev->netdev_ops->ndo_bpf)
return 0;
memset(&xdp, 0, sizeof(xdp));
@@ -157,7 +183,7 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
xdp.command = XDP_SETUP_PROG;
xdp.prog = prog;
- ret = ndo_bpf(vf_netdev, &xdp);
+ ret = vf_netdev->netdev_ops->ndo_bpf(vf_netdev, &xdp);
if (ret && prog)
bpf_prog_put(prog);
@@ -199,3 +225,68 @@ int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
return -EINVAL;
}
}
+
+static int netvsc_ndoxdp_xmit_fm(struct net_device *ndev,
+ struct xdp_frame *frame, u16 q_idx)
+{
+ struct sk_buff *skb;
+
+ skb = xdp_build_skb_from_frame(frame, ndev);
+ if (unlikely(!skb))
+ return -ENOMEM;
+
+ netvsc_get_hash(skb, netdev_priv(ndev));
+
+ skb_record_rx_queue(skb, q_idx);
+
+ netvsc_xdp_xmit(skb, ndev);
+
+ return 0;
+}
+
+int netvsc_ndoxdp_xmit(struct net_device *ndev, int n,
+ struct xdp_frame **frames, u32 flags)
+{
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
+ const struct net_device_ops *vf_ops;
+ struct netvsc_stats_tx *tx_stats;
+ struct netvsc_device *nvsc_dev;
+ struct net_device *vf_netdev;
+ int i, count = 0;
+ u16 q_idx;
+
+ /* Don't transmit if netvsc_device is gone */
+ nvsc_dev = rcu_dereference_bh(ndev_ctx->nvdev);
+ if (unlikely(!nvsc_dev || nvsc_dev->destroy))
+ return 0;
+
+ /* If VF is present and up then redirect packets to it.
+ * Skip the VF if it is marked down or has no carrier.
+ * If netpoll is in uses, then VF can not be used either.
+ */
+ vf_netdev = rcu_dereference_bh(ndev_ctx->vf_netdev);
+ if (vf_netdev && netif_running(vf_netdev) &&
+ netif_carrier_ok(vf_netdev) && !netpoll_tx_running(ndev) &&
+ vf_netdev->netdev_ops->ndo_xdp_xmit &&
+ ndev_ctx->data_path_is_vf) {
+ vf_ops = vf_netdev->netdev_ops;
+ return vf_ops->ndo_xdp_xmit(vf_netdev, n, frames, flags);
+ }
+
+ q_idx = smp_processor_id() % ndev->real_num_tx_queues;
+
+ for (i = 0; i < n; i++) {
+ if (netvsc_ndoxdp_xmit_fm(ndev, frames[i], q_idx))
+ break;
+
+ count++;
+ }
+
+ tx_stats = &nvsc_dev->chan_table[q_idx].tx_stats;
+
+ u64_stats_update_begin(&tx_stats->syncp);
+ tx_stats->xdp_xmit += count;
+ u64_stats_update_end(&tx_stats->syncp);
+
+ return count;
+}
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 7e66ae1d2a59..89eb4f179a3c 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -242,56 +242,6 @@ static inline void *init_ppi_data(struct rndis_message *msg,
return ppi + 1;
}
-/* Azure hosts don't support non-TCP port numbers in hashing for fragmented
- * packets. We can use ethtool to change UDP hash level when necessary.
- */
-static inline u32 netvsc_get_hash(
- struct sk_buff *skb,
- const struct net_device_context *ndc)
-{
- struct flow_keys flow;
- u32 hash, pkt_proto = 0;
- static u32 hashrnd __read_mostly;
-
- net_get_random_once(&hashrnd, sizeof(hashrnd));
-
- if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
- return 0;
-
- switch (flow.basic.ip_proto) {
- case IPPROTO_TCP:
- if (flow.basic.n_proto == htons(ETH_P_IP))
- pkt_proto = HV_TCP4_L4HASH;
- else if (flow.basic.n_proto == htons(ETH_P_IPV6))
- pkt_proto = HV_TCP6_L4HASH;
-
- break;
-
- case IPPROTO_UDP:
- if (flow.basic.n_proto == htons(ETH_P_IP))
- pkt_proto = HV_UDP4_L4HASH;
- else if (flow.basic.n_proto == htons(ETH_P_IPV6))
- pkt_proto = HV_UDP6_L4HASH;
-
- break;
- }
-
- if (pkt_proto & ndc->l4_hash) {
- return skb_get_hash(skb);
- } else {
- if (flow.basic.n_proto == htons(ETH_P_IP))
- hash = jhash2((u32 *)&flow.addrs.v4addrs, 2, hashrnd);
- else if (flow.basic.n_proto == htons(ETH_P_IPV6))
- hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd);
- else
- return 0;
-
- __skb_set_sw_hash(skb, hash, false);
- }
-
- return hash;
-}
-
static inline int netvsc_get_tx_queue(struct net_device *ndev,
struct sk_buff *skb, int old_idx)
{
@@ -804,7 +754,7 @@ void netvsc_linkstatus_callback(struct net_device *net,
}
/* This function should only be called after skb_record_rx_queue() */
-static void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev)
+void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev)
{
int rc;
@@ -925,7 +875,7 @@ int netvsc_recv_callback(struct net_device *net,
struct vmbus_channel *channel = nvchan->channel;
u16 q_idx = channel->offermsg.offer.sub_channel_index;
struct sk_buff *skb;
- struct netvsc_stats *rx_stats = &nvchan->rx_stats;
+ struct netvsc_stats_rx *rx_stats = &nvchan->rx_stats;
struct xdp_buff xdp;
u32 act;
@@ -934,6 +884,9 @@ int netvsc_recv_callback(struct net_device *net,
act = netvsc_run_xdp(net, nvchan, &xdp);
+ if (act == XDP_REDIRECT)
+ return NVSP_STAT_SUCCESS;
+
if (act != XDP_PASS && act != XDP_TX) {
u64_stats_update_begin(&rx_stats->syncp);
rx_stats->xdp_drop++;
@@ -958,6 +911,9 @@ int netvsc_recv_callback(struct net_device *net,
* statistics will not work correctly.
*/
u64_stats_update_begin(&rx_stats->syncp);
+ if (act == XDP_TX)
+ rx_stats->xdp_tx++;
+
rx_stats->packets++;
rx_stats->bytes += nvchan->rsc.pktlen;
@@ -979,8 +935,8 @@ int netvsc_recv_callback(struct net_device *net,
static void netvsc_get_drvinfo(struct net_device *net,
struct ethtool_drvinfo *info)
{
- strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
- strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
+ strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
+ strscpy(info->fw_version, "N/A", sizeof(info->fw_version));
}
static void netvsc_get_channels(struct net_device *net,
@@ -1353,28 +1309,29 @@ static void netvsc_get_pcpu_stats(struct net_device *net,
/* fetch percpu stats of netvsc */
for (i = 0; i < nvdev->num_chn; i++) {
const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
- const struct netvsc_stats *stats;
+ const struct netvsc_stats_tx *tx_stats;
+ const struct netvsc_stats_rx *rx_stats;
struct netvsc_ethtool_pcpu_stats *this_tot =
&pcpu_tot[nvchan->channel->target_cpu];
u64 packets, bytes;
unsigned int start;
- stats = &nvchan->tx_stats;
+ tx_stats = &nvchan->tx_stats;
do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- packets = stats->packets;
- bytes = stats->bytes;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
+ packets = tx_stats->packets;
+ bytes = tx_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
this_tot->tx_bytes += bytes;
this_tot->tx_packets += packets;
- stats = &nvchan->rx_stats;
+ rx_stats = &nvchan->rx_stats;
do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- packets = stats->packets;
- bytes = stats->bytes;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
+ packets = rx_stats->packets;
+ bytes = rx_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
this_tot->rx_bytes += bytes;
this_tot->rx_packets += packets;
@@ -1406,27 +1363,28 @@ static void netvsc_get_stats64(struct net_device *net,
for (i = 0; i < nvdev->num_chn; i++) {
const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
- const struct netvsc_stats *stats;
+ const struct netvsc_stats_tx *tx_stats;
+ const struct netvsc_stats_rx *rx_stats;
u64 packets, bytes, multicast;
unsigned int start;
- stats = &nvchan->tx_stats;
+ tx_stats = &nvchan->tx_stats;
do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- packets = stats->packets;
- bytes = stats->bytes;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
+ packets = tx_stats->packets;
+ bytes = tx_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
t->tx_bytes += bytes;
t->tx_packets += packets;
- stats = &nvchan->rx_stats;
+ rx_stats = &nvchan->rx_stats;
do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- packets = stats->packets;
- bytes = stats->bytes;
- multicast = stats->multicast + stats->broadcast;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
+ packets = rx_stats->packets;
+ bytes = rx_stats->bytes;
+ multicast = rx_stats->multicast + rx_stats->broadcast;
+ } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
t->rx_bytes += bytes;
t->rx_packets += packets;
@@ -1515,8 +1473,8 @@ static const struct {
/* statistics per queue (rx/tx packets/bytes) */
#define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats))
-/* 5 statistics per queue (rx/tx packets/bytes, rx xdp_drop) */
-#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 5)
+/* 8 statistics per queue (rx/tx packets/bytes, XDP actions) */
+#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 8)
static int netvsc_get_sset_count(struct net_device *dev, int string_set)
{
@@ -1543,12 +1501,16 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
struct net_device_context *ndc = netdev_priv(dev);
struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
const void *nds = &ndc->eth_stats;
- const struct netvsc_stats *qstats;
+ const struct netvsc_stats_tx *tx_stats;
+ const struct netvsc_stats_rx *rx_stats;
struct netvsc_vf_pcpu_stats sum;
struct netvsc_ethtool_pcpu_stats *pcpu_sum;
unsigned int start;
u64 packets, bytes;
u64 xdp_drop;
+ u64 xdp_redirect;
+ u64 xdp_tx;
+ u64 xdp_xmit;
int i, j, cpu;
if (!nvdev)
@@ -1562,31 +1524,40 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset);
for (j = 0; j < nvdev->num_chn; j++) {
- qstats = &nvdev->chan_table[j].tx_stats;
+ tx_stats = &nvdev->chan_table[j].tx_stats;
do {
- start = u64_stats_fetch_begin_irq(&qstats->syncp);
- packets = qstats->packets;
- bytes = qstats->bytes;
- } while (u64_stats_fetch_retry_irq(&qstats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
+ packets = tx_stats->packets;
+ bytes = tx_stats->bytes;
+ xdp_xmit = tx_stats->xdp_xmit;
+ } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
data[i++] = packets;
data[i++] = bytes;
+ data[i++] = xdp_xmit;
- qstats = &nvdev->chan_table[j].rx_stats;
+ rx_stats = &nvdev->chan_table[j].rx_stats;
do {
- start = u64_stats_fetch_begin_irq(&qstats->syncp);
- packets = qstats->packets;
- bytes = qstats->bytes;
- xdp_drop = qstats->xdp_drop;
- } while (u64_stats_fetch_retry_irq(&qstats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
+ packets = rx_stats->packets;
+ bytes = rx_stats->bytes;
+ xdp_drop = rx_stats->xdp_drop;
+ xdp_redirect = rx_stats->xdp_redirect;
+ xdp_tx = rx_stats->xdp_tx;
+ } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
data[i++] = packets;
data[i++] = bytes;
data[i++] = xdp_drop;
+ data[i++] = xdp_redirect;
+ data[i++] = xdp_tx;
}
pcpu_sum = kvmalloc_array(num_possible_cpus(),
sizeof(struct netvsc_ethtool_pcpu_stats),
GFP_KERNEL);
+ if (!pcpu_sum)
+ return;
+
netvsc_get_pcpu_stats(dev, pcpu_sum);
for_each_present_cpu(cpu) {
struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu];
@@ -1619,9 +1590,12 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
for (i = 0; i < nvdev->num_chn; i++) {
ethtool_sprintf(&p, "tx_queue_%u_packets", i);
ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
+ ethtool_sprintf(&p, "tx_queue_%u_xdp_xmit", i);
ethtool_sprintf(&p, "rx_queue_%u_packets", i);
ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
ethtool_sprintf(&p, "rx_queue_%u_xdp_drop", i);
+ ethtool_sprintf(&p, "rx_queue_%u_xdp_redirect", i);
+ ethtool_sprintf(&p, "rx_queue_%u_xdp_tx", i);
}
for_each_present_cpu(cpu) {
@@ -1858,7 +1832,9 @@ static void __netvsc_get_ringparam(struct netvsc_device *nvdev,
}
static void netvsc_get_ringparam(struct net_device *ndev,
- struct ethtool_ringparam *ring)
+ struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kernel_ring,
+ struct netlink_ext_ack *extack)
{
struct net_device_context *ndevctx = netdev_priv(ndev);
struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
@@ -1870,7 +1846,9 @@ static void netvsc_get_ringparam(struct net_device *ndev,
}
static int netvsc_set_ringparam(struct net_device *ndev,
- struct ethtool_ringparam *ring)
+ struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kernel_ring,
+ struct netlink_ext_ack *extack)
{
struct net_device_context *ndevctx = netdev_priv(ndev);
struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
@@ -2050,6 +2028,7 @@ static const struct net_device_ops device_ops = {
.ndo_select_queue = netvsc_select_queue,
.ndo_get_stats64 = netvsc_get_stats64,
.ndo_bpf = netvsc_bpf,
+ .ndo_xdp_xmit = netvsc_ndoxdp_xmit,
};
/*
@@ -2334,6 +2313,18 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
}
+ /* Fallback path to check synthetic vf with
+ * help of mac addr
+ */
+ list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) {
+ ndev = hv_get_drvdata(ndev_ctx->device_ctx);
+ if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr)) {
+ netdev_notice(vf_netdev,
+ "falling back to mac addr based matching\n");
+ return ndev;
+ }
+ }
+
netdev_notice(vf_netdev,
"no netdev found for vf serial:%u\n", serial);
return NULL;
@@ -2430,6 +2421,11 @@ static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event)
if (net_device_ctx->data_path_is_vf == vf_is_up)
return NOTIFY_OK;
+ if (vf_is_up && !net_device_ctx->vf_alloc) {
+ netdev_info(ndev, "Waiting for the VF association from host\n");
+ wait_for_completion(&net_device_ctx->vf_add);
+ }
+
ret = netvsc_switch_datapath(ndev, vf_is_up);
if (ret) {
@@ -2461,6 +2457,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
netvsc_vf_setxdp(vf_netdev, NULL);
+ reinit_completion(&net_device_ctx->vf_add);
netdev_rx_handler_unregister(vf_netdev);
netdev_upper_dev_unlink(vf_netdev, ndev);
RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
@@ -2500,6 +2497,7 @@ static int netvsc_probe(struct hv_device *dev,
INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
+ init_completion(&net_device_ctx->vf_add);
spin_lock_init(&net_device_ctx->lock);
INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
@@ -2512,6 +2510,7 @@ static int netvsc_probe(struct hv_device *dev,
net->netdev_ops = &device_ops;
net->ethtool_ops = &ethtool_ops;
SET_NETDEV_DEV(net, &dev->device);
+ dma_set_min_align_mask(&dev->device, HV_HYP_PAGE_SIZE - 1);
/* We always need headroom for rndis header */
net->needed_headroom = RNDIS_AND_PPI_SIZE;
@@ -2663,7 +2662,10 @@ static int netvsc_suspend(struct hv_device *dev)
/* Save the current config info */
ndev_ctx->saved_netvsc_dev_info = netvsc_devinfo_get(nvdev);
-
+ if (!ndev_ctx->saved_netvsc_dev_info) {
+ ret = -ENOMEM;
+ goto out;
+ }
ret = netvsc_detach(net, nvdev);
out:
rtnl_unlock();
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index f6c9c2a670f9..eea777ec2541 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -20,6 +20,7 @@
#include <linux/vmalloc.h>
#include <linux/rtnetlink.h>
#include <linux/ucs2_string.h>
+#include <linux/string.h>
#include "hyperv_net.h"
#include "netvsc_trace.h"
@@ -335,9 +336,10 @@ static void rndis_filter_receive_response(struct net_device *ndev,
if (resp->msg_len <=
sizeof(struct rndis_message) + RNDIS_EXT_LEN) {
memcpy(&request->response_msg, resp, RNDIS_HEADER_SIZE + sizeof(*req_id));
- memcpy((void *)&request->response_msg + RNDIS_HEADER_SIZE + sizeof(*req_id),
+ unsafe_memcpy((void *)&request->response_msg + RNDIS_HEADER_SIZE + sizeof(*req_id),
data + RNDIS_HEADER_SIZE + sizeof(*req_id),
- resp->msg_len - RNDIS_HEADER_SIZE - sizeof(*req_id));
+ resp->msg_len - RNDIS_HEADER_SIZE - sizeof(*req_id),
+ "request->response_msg is followed by a padding of RNDIS_EXT_LEN inside rndis_request");
if (request->request_msg.ndis_msg_type ==
RNDIS_MSG_QUERY && request->request_msg.msg.
query_req.oid == RNDIS_OID_GEN_MEDIA_CONNECT_STATUS)
@@ -361,6 +363,8 @@ static void rndis_filter_receive_response(struct net_device *ndev,
}
}
+ netvsc_dma_unmap(((struct net_device_context *)
+ netdev_priv(ndev))->device_ctx, &request->pkt);
complete(&request->wait_event);
} else {
netdev_err(ndev,
@@ -1347,7 +1351,7 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
struct net_device_context *net_device_ctx = netdev_priv(net);
struct ndis_offload hwcaps;
struct ndis_offload_params offloads;
- unsigned int gso_max_size = GSO_MAX_SIZE;
+ unsigned int gso_max_size = GSO_LEGACY_MAX_SIZE;
int ret;
/* Find HW offload capabilities */
@@ -1429,7 +1433,7 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
*/
net->features &= ~NETVSC_SUPPORTED_HW_FEATURES | net->hw_features;
- netif_set_gso_max_size(net, gso_max_size);
+ netif_set_tso_max_size(net, gso_max_size);
ret = rndis_filter_set_offload_params(net, nvdev, &offloads);
@@ -1573,7 +1577,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
for (i = 1; i < net_device->num_chn; i++)
netif_napi_add(net, &net_device->chan_table[i].napi,
- netvsc_poll, NAPI_POLL_WEIGHT);
+ netvsc_poll);
return net_device;