diff options
Diffstat (limited to 'drivers/net/hyperv')
-rw-r--r-- | drivers/net/hyperv/hyperv_net.h | 77 | ||||
-rw-r--r-- | drivers/net/hyperv/netvsc.c | 178 | ||||
-rw-r--r-- | drivers/net/hyperv/netvsc_bpf.c | 103 | ||||
-rw-r--r-- | drivers/net/hyperv/netvsc_drv.c | 190 | ||||
-rw-r--r-- | drivers/net/hyperv/rndis_filter.c | 14 |
5 files changed, 436 insertions, 126 deletions
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 315278a7cf88..dd5919ec408b 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -15,6 +15,7 @@ #include <linux/list.h> #include <linux/hyperv.h> #include <linux/rndis.h> +#include <linux/jhash.h> /* RSS related */ #define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */ @@ -164,6 +165,7 @@ struct hv_netvsc_packet { u32 total_bytes; u32 send_buf_index; u32 total_data_buflen; + struct hv_dma_range *dma_range; }; #define NETVSC_HASH_KEYLEN 40 @@ -236,6 +238,7 @@ int netvsc_recv_callback(struct net_device *net, void netvsc_channel_cb(void *context); int netvsc_poll(struct napi_struct *napi, int budget); +void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev); u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan, struct xdp_buff *xdp); unsigned int netvsc_xdp_fraglen(unsigned int len); @@ -245,6 +248,8 @@ int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog, struct netvsc_device *nvdev); int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog); int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf); +int netvsc_ndoxdp_xmit(struct net_device *ndev, int n, + struct xdp_frame **frames, u32 flags); int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev, @@ -941,12 +946,21 @@ struct nvsc_rsc { #define NVSC_RSC_CSUM_INFO BIT(1) /* valid/present bit for 'csum_info' */ #define NVSC_RSC_HASH_INFO BIT(2) /* valid/present bit for 'hash_info' */ -struct netvsc_stats { +struct netvsc_stats_tx { + u64 packets; + u64 bytes; + u64 xdp_xmit; + struct u64_stats_sync syncp; +}; + +struct netvsc_stats_rx { u64 packets; u64 bytes; u64 broadcast; u64 multicast; u64 xdp_drop; + u64 xdp_redirect; + u64 xdp_tx; struct u64_stats_sync syncp; }; @@ -1037,7 +1051,8 @@ struct net_device_context { u32 vf_alloc; /* Serial number of the VF to team with */ u32 vf_serial; - + /* completion variable to confirm vf association */ + struct completion vf_add; /* Is the current data path through the VF NIC? */ bool data_path_is_vf; @@ -1045,6 +1060,55 @@ struct net_device_context { struct netvsc_device_info *saved_netvsc_dev_info; }; +/* Azure hosts don't support non-TCP port numbers in hashing for fragmented + * packets. We can use ethtool to change UDP hash level when necessary. + */ +static inline u32 netvsc_get_hash(struct sk_buff *skb, + const struct net_device_context *ndc) +{ + struct flow_keys flow; + u32 hash, pkt_proto = 0; + static u32 hashrnd __read_mostly; + + net_get_random_once(&hashrnd, sizeof(hashrnd)); + + if (!skb_flow_dissect_flow_keys(skb, &flow, 0)) + return 0; + + switch (flow.basic.ip_proto) { + case IPPROTO_TCP: + if (flow.basic.n_proto == htons(ETH_P_IP)) + pkt_proto = HV_TCP4_L4HASH; + else if (flow.basic.n_proto == htons(ETH_P_IPV6)) + pkt_proto = HV_TCP6_L4HASH; + + break; + + case IPPROTO_UDP: + if (flow.basic.n_proto == htons(ETH_P_IP)) + pkt_proto = HV_UDP4_L4HASH; + else if (flow.basic.n_proto == htons(ETH_P_IPV6)) + pkt_proto = HV_UDP6_L4HASH; + + break; + } + + if (pkt_proto & ndc->l4_hash) { + return skb_get_hash(skb); + } else { + if (flow.basic.n_proto == htons(ETH_P_IP)) + hash = jhash2((u32 *)&flow.addrs.v4addrs, 2, hashrnd); + else if (flow.basic.n_proto == htons(ETH_P_IPV6)) + hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd); + else + return 0; + + __skb_set_sw_hash(skb, hash, false); + } + + return hash; +} + /* Per channel data */ struct netvsc_channel { struct vmbus_channel *channel; @@ -1059,9 +1123,10 @@ struct netvsc_channel { struct bpf_prog __rcu *bpf_prog; struct xdp_rxq_info xdp_rxq; + bool xdp_flush; - struct netvsc_stats tx_stats; - struct netvsc_stats rx_stats; + struct netvsc_stats_tx tx_stats; + struct netvsc_stats_rx rx_stats; }; /* Per netvsc device */ @@ -1074,6 +1139,7 @@ struct netvsc_device { /* Receive buffer allocated by us but manages by NetVSP */ void *recv_buf; + void *recv_original_buf; u32 recv_buf_size; /* allocated bytes */ struct vmbus_gpadl recv_buf_gpadl_handle; u32 recv_section_cnt; @@ -1082,6 +1148,7 @@ struct netvsc_device { /* Send buffer allocated by us */ void *send_buf; + void *send_original_buf; u32 send_buf_size; struct vmbus_gpadl send_buf_gpadl_handle; u32 send_section_cnt; @@ -1731,4 +1798,6 @@ struct rndis_message { #define RETRY_US_HI 10000 #define RETRY_MAX 2000 /* >10 sec */ +void netvsc_dma_unmap(struct hv_device *hv_dev, + struct hv_netvsc_packet *packet); #endif /* _HYPERV_NET_H */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 396bc1c204e6..9352dad58996 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -20,6 +20,7 @@ #include <linux/vmalloc.h> #include <linux/rtnetlink.h> #include <linux/prefetch.h> +#include <linux/filter.h> #include <asm/sync_bitops.h> #include <asm/mshyperv.h> @@ -153,9 +154,18 @@ static void free_netvsc_device(struct rcu_head *head) int i; kfree(nvdev->extension); - vfree(nvdev->recv_buf); - vfree(nvdev->send_buf); - kfree(nvdev->send_section_map); + + if (nvdev->recv_original_buf) + vfree(nvdev->recv_original_buf); + else + vfree(nvdev->recv_buf); + + if (nvdev->send_original_buf) + vfree(nvdev->send_original_buf); + else + vfree(nvdev->send_buf); + + bitmap_free(nvdev->send_section_map); for (i = 0; i < VRSS_CHANNEL_MAX; i++) { xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq); @@ -336,8 +346,8 @@ static int netvsc_init_buf(struct hv_device *device, struct net_device *ndev = hv_get_drvdata(device); struct nvsp_message *init_packet; unsigned int buf_size; - size_t map_words; int i, ret = 0; + void *vaddr; /* Get receive buffer area. */ buf_size = device_info->recv_sections * device_info->recv_section_size; @@ -373,6 +383,17 @@ static int netvsc_init_buf(struct hv_device *device, goto cleanup; } + if (hv_isolation_type_snp()) { + vaddr = hv_map_memory(net_device->recv_buf, buf_size); + if (!vaddr) { + ret = -ENOMEM; + goto cleanup; + } + + net_device->recv_original_buf = net_device->recv_buf; + net_device->recv_buf = vaddr; + } + /* Notify the NetVsp of the gpadl handle */ init_packet = &net_device->channel_init_pkt; memset(init_packet, 0, sizeof(struct nvsp_message)); @@ -476,6 +497,17 @@ static int netvsc_init_buf(struct hv_device *device, goto cleanup; } + if (hv_isolation_type_snp()) { + vaddr = hv_map_memory(net_device->send_buf, buf_size); + if (!vaddr) { + ret = -ENOMEM; + goto cleanup; + } + + net_device->send_original_buf = net_device->send_buf; + net_device->send_buf = vaddr; + } + /* Notify the NetVsp of the gpadl handle */ init_packet = &net_device->channel_init_pkt; memset(init_packet, 0, sizeof(struct nvsp_message)); @@ -528,10 +560,9 @@ static int netvsc_init_buf(struct hv_device *device, net_device->send_section_size, net_device->send_section_cnt); /* Setup state for managing the send buffer. */ - map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG); - - net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL); - if (net_device->send_section_map == NULL) { + net_device->send_section_map = bitmap_zalloc(net_device->send_section_cnt, + GFP_KERNEL); + if (!net_device->send_section_map) { ret = -ENOMEM; goto cleanup; } @@ -731,6 +762,12 @@ void netvsc_device_remove(struct hv_device *device) netvsc_teardown_send_gpadl(device, net_device, ndev); } + if (net_device->recv_original_buf) + hv_unmap_memory(net_device->recv_buf); + + if (net_device->send_original_buf) + hv_unmap_memory(net_device->send_buf); + /* Release all resources */ free_netvsc_device_rcu(net_device); } @@ -756,9 +793,9 @@ static void netvsc_send_tx_complete(struct net_device *ndev, int queue_sends; u64 cmd_rqst; - cmd_rqst = channel->request_addr_callback(channel, (u64)desc->trans_id); + cmd_rqst = channel->request_addr_callback(channel, desc->trans_id); if (cmd_rqst == VMBUS_RQST_ERROR) { - netdev_err(ndev, "Incorrect transaction id\n"); + netdev_err(ndev, "Invalid transaction ID %llx\n", desc->trans_id); return; } @@ -766,10 +803,10 @@ static void netvsc_send_tx_complete(struct net_device *ndev, /* Notify the layer above us */ if (likely(skb)) { - const struct hv_netvsc_packet *packet + struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)skb->cb; u32 send_index = packet->send_buf_index; - struct netvsc_stats *tx_stats; + struct netvsc_stats_tx *tx_stats; if (send_index != NETVSC_INVALID_INDEX) netvsc_free_send_slot(net_device, send_index); @@ -782,6 +819,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev, tx_stats->bytes += packet->total_bytes; u64_stats_update_end(&tx_stats->syncp); + netvsc_dma_unmap(ndev_ctx->device_ctx, packet); napi_consume_skb(skb, budget); } @@ -817,9 +855,9 @@ static void netvsc_send_completion(struct net_device *ndev, /* First check if this is a VMBUS completion without data payload */ if (!msglen) { cmd_rqst = incoming_channel->request_addr_callback(incoming_channel, - (u64)desc->trans_id); + desc->trans_id); if (cmd_rqst == VMBUS_RQST_ERROR) { - netdev_err(ndev, "Invalid transaction id\n"); + netdev_err(ndev, "Invalid transaction ID %llx\n", desc->trans_id); return; } @@ -946,6 +984,88 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device, memset(dest, 0, padding); } +void netvsc_dma_unmap(struct hv_device *hv_dev, + struct hv_netvsc_packet *packet) +{ + u32 page_count = packet->cp_partial ? + packet->page_buf_cnt - packet->rmsg_pgcnt : + packet->page_buf_cnt; + int i; + + if (!hv_is_isolation_supported()) + return; + + if (!packet->dma_range) + return; + + for (i = 0; i < page_count; i++) + dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma, + packet->dma_range[i].mapping_size, + DMA_TO_DEVICE); + + kfree(packet->dma_range); +} + +/* netvsc_dma_map - Map swiotlb bounce buffer with data page of + * packet sent by vmbus_sendpacket_pagebuffer() in the Isolation + * VM. + * + * In isolation VM, netvsc send buffer has been marked visible to + * host and so the data copied to send buffer doesn't need to use + * bounce buffer. The data pages handled by vmbus_sendpacket_pagebuffer() + * may not be copied to send buffer and so these pages need to be + * mapped with swiotlb bounce buffer. netvsc_dma_map() is to do + * that. The pfns in the struct hv_page_buffer need to be converted + * to bounce buffer's pfn. The loop here is necessary because the + * entries in the page buffer array are not necessarily full + * pages of data. Each entry in the array has a separate offset and + * len that may be non-zero, even for entries in the middle of the + * array. And the entries are not physically contiguous. So each + * entry must be individually mapped rather than as a contiguous unit. + * So not use dma_map_sg() here. + */ +static int netvsc_dma_map(struct hv_device *hv_dev, + struct hv_netvsc_packet *packet, + struct hv_page_buffer *pb) +{ + u32 page_count = packet->cp_partial ? + packet->page_buf_cnt - packet->rmsg_pgcnt : + packet->page_buf_cnt; + dma_addr_t dma; + int i; + + if (!hv_is_isolation_supported()) + return 0; + + packet->dma_range = kcalloc(page_count, + sizeof(*packet->dma_range), + GFP_KERNEL); + if (!packet->dma_range) + return -ENOMEM; + + for (i = 0; i < page_count; i++) { + char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT) + + pb[i].offset); + u32 len = pb[i].len; + + dma = dma_map_single(&hv_dev->device, src, len, + DMA_TO_DEVICE); + if (dma_mapping_error(&hv_dev->device, dma)) { + kfree(packet->dma_range); + return -ENOMEM; + } + + /* pb[].offset and pb[].len are not changed during dma mapping + * and so not reassign. + */ + packet->dma_range[i].dma = dma; + packet->dma_range[i].mapping_size = len; + pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT; + } + + return 0; +} + static inline int netvsc_send_pkt( struct hv_device *device, struct hv_netvsc_packet *packet, @@ -986,14 +1106,24 @@ static inline int netvsc_send_pkt( trace_nvsp_send_pkt(ndev, out_channel, rpkt); + packet->dma_range = NULL; if (packet->page_buf_cnt) { if (packet->cp_partial) pb += packet->rmsg_pgcnt; + ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb); + if (ret) { + ret = -EAGAIN; + goto exit; + } + ret = vmbus_sendpacket_pagebuffer(out_channel, pb, packet->page_buf_cnt, &nvmsg, sizeof(nvmsg), req_id); + + if (ret) + netvsc_dma_unmap(ndev_ctx->device_ctx, packet); } else { ret = vmbus_sendpacket(out_channel, &nvmsg, sizeof(nvmsg), @@ -1001,6 +1131,7 @@ static inline int netvsc_send_pkt( VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); } +exit: if (ret == 0) { atomic_inc_return(&nvchan->queue_sends); @@ -1449,6 +1580,10 @@ static void netvsc_send_vf(struct net_device *ndev, net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated; net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial; + + if (net_device_ctx->vf_alloc) + complete(&net_device_ctx->vf_add); + netdev_info(ndev, "VF slot %u %s\n", net_device_ctx->vf_serial, net_device_ctx->vf_alloc ? "added" : "removed"); @@ -1500,7 +1635,6 @@ static int netvsc_process_raw_pkt(struct hv_device *device, case VM_PKT_DATA_USING_XFER_PAGES: return netvsc_receive(ndev, net_device, nvchan, desc); - break; case VM_PKT_DATA_INBAND: netvsc_receive_inband(ndev, net_device, desc); @@ -1541,12 +1675,17 @@ int netvsc_poll(struct napi_struct *napi, int budget) if (!nvchan->desc) nvchan->desc = hv_pkt_iter_first(channel); + nvchan->xdp_flush = false; + while (nvchan->desc && work_done < budget) { work_done += netvsc_process_raw_pkt(device, nvchan, net_device, ndev, nvchan->desc, budget); nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc); } + if (nvchan->xdp_flush) + xdp_do_flush(); + /* Send any pending receive completions */ ret = send_recv_completions(ndev, net_device, nvchan); @@ -1644,8 +1783,7 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, } /* Enable NAPI handler before init callbacks */ - netif_napi_add(ndev, &net_device->chan_table[0].napi, - netvsc_poll, NAPI_POLL_WEIGHT); + netif_napi_add(ndev, &net_device->chan_table[0].napi, netvsc_poll); /* Open the channel */ device->channel->next_request_id_callback = vmbus_next_request_id; @@ -1693,6 +1831,12 @@ cleanup: netif_napi_del(&net_device->chan_table[0].napi); cleanup2: + if (net_device->recv_original_buf) + hv_unmap_memory(net_device->recv_buf); + + if (net_device->send_original_buf) + hv_unmap_memory(net_device->send_buf); + free_netvsc_device(&net_device->rcu); return ERR_PTR(ret); diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c index aa877da113f8..4a9522689fa4 100644 --- a/drivers/net/hyperv/netvsc_bpf.c +++ b/drivers/net/hyperv/netvsc_bpf.c @@ -10,6 +10,7 @@ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> +#include <linux/netpoll.h> #include <linux/bpf.h> #include <linux/bpf_trace.h> #include <linux/kernel.h> @@ -23,11 +24,13 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan, struct xdp_buff *xdp) { + struct netvsc_stats_rx *rx_stats = &nvchan->rx_stats; void *data = nvchan->rsc.data[0]; u32 len = nvchan->rsc.len[0]; struct page *page = NULL; struct bpf_prog *prog; u32 act = XDP_PASS; + bool drop = true; xdp->data_hard_start = NULL; @@ -60,21 +63,46 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan, switch (act) { case XDP_PASS: case XDP_TX: + drop = false; + break; + case XDP_DROP: break; + case XDP_REDIRECT: + if (!xdp_do_redirect(ndev, xdp, prog)) { + nvchan->xdp_flush = true; + drop = false; + + u64_stats_update_begin(&rx_stats->syncp); + + rx_stats->xdp_redirect++; + rx_stats->packets++; + rx_stats->bytes += nvchan->rsc.pktlen; + + u64_stats_update_end(&rx_stats->syncp); + + break; + } else { + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->xdp_drop++; + u64_stats_update_end(&rx_stats->syncp); + } + + fallthrough; + case XDP_ABORTED: trace_xdp_exception(ndev, prog, act); break; default: - bpf_warn_invalid_xdp_action(act); + bpf_warn_invalid_xdp_action(ndev, prog, act); } out: rcu_read_unlock(); - if (page && act != XDP_PASS && act != XDP_TX) { + if (page && drop) { __free_page(page); xdp->data_hard_start = NULL; } @@ -137,7 +165,6 @@ int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog, int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog) { struct netdev_bpf xdp; - bpf_op_t ndo_bpf; int ret; ASSERT_RTNL(); @@ -145,8 +172,7 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog) if (!vf_netdev) return 0; - ndo_bpf = vf_netdev->netdev_ops->ndo_bpf; - if (!ndo_bpf) + if (!vf_netdev->netdev_ops->ndo_bpf) return 0; memset(&xdp, 0, sizeof(xdp)); @@ -157,7 +183,7 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog) xdp.command = XDP_SETUP_PROG; xdp.prog = prog; - ret = ndo_bpf(vf_netdev, &xdp); + ret = vf_netdev->netdev_ops->ndo_bpf(vf_netdev, &xdp); if (ret && prog) bpf_prog_put(prog); @@ -199,3 +225,68 @@ int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf) return -EINVAL; } } + +static int netvsc_ndoxdp_xmit_fm(struct net_device *ndev, + struct xdp_frame *frame, u16 q_idx) +{ + struct sk_buff *skb; + + skb = xdp_build_skb_from_frame(frame, ndev); + if (unlikely(!skb)) + return -ENOMEM; + + netvsc_get_hash(skb, netdev_priv(ndev)); + + skb_record_rx_queue(skb, q_idx); + + netvsc_xdp_xmit(skb, ndev); + + return 0; +} + +int netvsc_ndoxdp_xmit(struct net_device *ndev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct net_device_context *ndev_ctx = netdev_priv(ndev); + const struct net_device_ops *vf_ops; + struct netvsc_stats_tx *tx_stats; + struct netvsc_device *nvsc_dev; + struct net_device *vf_netdev; + int i, count = 0; + u16 q_idx; + + /* Don't transmit if netvsc_device is gone */ + nvsc_dev = rcu_dereference_bh(ndev_ctx->nvdev); + if (unlikely(!nvsc_dev || nvsc_dev->destroy)) + return 0; + + /* If VF is present and up then redirect packets to it. + * Skip the VF if it is marked down or has no carrier. + * If netpoll is in uses, then VF can not be used either. + */ + vf_netdev = rcu_dereference_bh(ndev_ctx->vf_netdev); + if (vf_netdev && netif_running(vf_netdev) && + netif_carrier_ok(vf_netdev) && !netpoll_tx_running(ndev) && + vf_netdev->netdev_ops->ndo_xdp_xmit && + ndev_ctx->data_path_is_vf) { + vf_ops = vf_netdev->netdev_ops; + return vf_ops->ndo_xdp_xmit(vf_netdev, n, frames, flags); + } + + q_idx = smp_processor_id() % ndev->real_num_tx_queues; + + for (i = 0; i < n; i++) { + if (netvsc_ndoxdp_xmit_fm(ndev, frames[i], q_idx)) + break; + + count++; + } + + tx_stats = &nvsc_dev->chan_table[q_idx].tx_stats; + + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->xdp_xmit += count; + u64_stats_update_end(&tx_stats->syncp); + + return count; +} diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 7e66ae1d2a59..89eb4f179a3c 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -242,56 +242,6 @@ static inline void *init_ppi_data(struct rndis_message *msg, return ppi + 1; } -/* Azure hosts don't support non-TCP port numbers in hashing for fragmented - * packets. We can use ethtool to change UDP hash level when necessary. - */ -static inline u32 netvsc_get_hash( - struct sk_buff *skb, - const struct net_device_context *ndc) -{ - struct flow_keys flow; - u32 hash, pkt_proto = 0; - static u32 hashrnd __read_mostly; - - net_get_random_once(&hashrnd, sizeof(hashrnd)); - - if (!skb_flow_dissect_flow_keys(skb, &flow, 0)) - return 0; - - switch (flow.basic.ip_proto) { - case IPPROTO_TCP: - if (flow.basic.n_proto == htons(ETH_P_IP)) - pkt_proto = HV_TCP4_L4HASH; - else if (flow.basic.n_proto == htons(ETH_P_IPV6)) - pkt_proto = HV_TCP6_L4HASH; - - break; - - case IPPROTO_UDP: - if (flow.basic.n_proto == htons(ETH_P_IP)) - pkt_proto = HV_UDP4_L4HASH; - else if (flow.basic.n_proto == htons(ETH_P_IPV6)) - pkt_proto = HV_UDP6_L4HASH; - - break; - } - - if (pkt_proto & ndc->l4_hash) { - return skb_get_hash(skb); - } else { - if (flow.basic.n_proto == htons(ETH_P_IP)) - hash = jhash2((u32 *)&flow.addrs.v4addrs, 2, hashrnd); - else if (flow.basic.n_proto == htons(ETH_P_IPV6)) - hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd); - else - return 0; - - __skb_set_sw_hash(skb, hash, false); - } - - return hash; -} - static inline int netvsc_get_tx_queue(struct net_device *ndev, struct sk_buff *skb, int old_idx) { @@ -804,7 +754,7 @@ void netvsc_linkstatus_callback(struct net_device *net, } /* This function should only be called after skb_record_rx_queue() */ -static void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev) +void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev) { int rc; @@ -925,7 +875,7 @@ int netvsc_recv_callback(struct net_device *net, struct vmbus_channel *channel = nvchan->channel; u16 q_idx = channel->offermsg.offer.sub_channel_index; struct sk_buff *skb; - struct netvsc_stats *rx_stats = &nvchan->rx_stats; + struct netvsc_stats_rx *rx_stats = &nvchan->rx_stats; struct xdp_buff xdp; u32 act; @@ -934,6 +884,9 @@ int netvsc_recv_callback(struct net_device *net, act = netvsc_run_xdp(net, nvchan, &xdp); + if (act == XDP_REDIRECT) + return NVSP_STAT_SUCCESS; + if (act != XDP_PASS && act != XDP_TX) { u64_stats_update_begin(&rx_stats->syncp); rx_stats->xdp_drop++; @@ -958,6 +911,9 @@ int netvsc_recv_callback(struct net_device *net, * statistics will not work correctly. */ u64_stats_update_begin(&rx_stats->syncp); + if (act == XDP_TX) + rx_stats->xdp_tx++; + rx_stats->packets++; rx_stats->bytes += nvchan->rsc.pktlen; @@ -979,8 +935,8 @@ int netvsc_recv_callback(struct net_device *net, static void netvsc_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) { - strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); - strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); + strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); + strscpy(info->fw_version, "N/A", sizeof(info->fw_version)); } static void netvsc_get_channels(struct net_device *net, @@ -1353,28 +1309,29 @@ static void netvsc_get_pcpu_stats(struct net_device *net, /* fetch percpu stats of netvsc */ for (i = 0; i < nvdev->num_chn; i++) { const struct netvsc_channel *nvchan = &nvdev->chan_table[i]; - const struct netvsc_stats *stats; + const struct netvsc_stats_tx *tx_stats; + const struct netvsc_stats_rx *rx_stats; struct netvsc_ethtool_pcpu_stats *this_tot = &pcpu_tot[nvchan->channel->target_cpu]; u64 packets, bytes; unsigned int start; - stats = &nvchan->tx_stats; + tx_stats = &nvchan->tx_stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; + } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); this_tot->tx_bytes += bytes; this_tot->tx_packets += packets; - stats = &nvchan->rx_stats; + rx_stats = &nvchan->rx_stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; + } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); this_tot->rx_bytes += bytes; this_tot->rx_packets += packets; @@ -1406,27 +1363,28 @@ static void netvsc_get_stats64(struct net_device *net, for (i = 0; i < nvdev->num_chn; i++) { const struct netvsc_channel *nvchan = &nvdev->chan_table[i]; - const struct netvsc_stats *stats; + const struct netvsc_stats_tx *tx_stats; + const struct netvsc_stats_rx *rx_stats; u64 packets, bytes, multicast; unsigned int start; - stats = &nvchan->tx_stats; + tx_stats = &nvchan->tx_stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; + } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); t->tx_bytes += bytes; t->tx_packets += packets; - stats = &nvchan->rx_stats; + rx_stats = &nvchan->rx_stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - multicast = stats->multicast + stats->broadcast; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; + multicast = rx_stats->multicast + rx_stats->broadcast; + } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); t->rx_bytes += bytes; t->rx_packets += packets; @@ -1515,8 +1473,8 @@ static const struct { /* statistics per queue (rx/tx packets/bytes) */ #define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats)) -/* 5 statistics per queue (rx/tx packets/bytes, rx xdp_drop) */ -#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 5) +/* 8 statistics per queue (rx/tx packets/bytes, XDP actions) */ +#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 8) static int netvsc_get_sset_count(struct net_device *dev, int string_set) { @@ -1543,12 +1501,16 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, struct net_device_context *ndc = netdev_priv(dev); struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); const void *nds = &ndc->eth_stats; - const struct netvsc_stats *qstats; + const struct netvsc_stats_tx *tx_stats; + const struct netvsc_stats_rx *rx_stats; struct netvsc_vf_pcpu_stats sum; struct netvsc_ethtool_pcpu_stats *pcpu_sum; unsigned int start; u64 packets, bytes; u64 xdp_drop; + u64 xdp_redirect; + u64 xdp_tx; + u64 xdp_xmit; int i, j, cpu; if (!nvdev) @@ -1562,31 +1524,40 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset); for (j = 0; j < nvdev->num_chn; j++) { - qstats = &nvdev->chan_table[j].tx_stats; + tx_stats = &nvdev->chan_table[j].tx_stats; do { - start = u64_stats_fetch_begin_irq(&qstats->syncp); - packets = qstats->packets; - bytes = qstats->bytes; - } while (u64_stats_fetch_retry_irq(&qstats->syncp, start)); + start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; + xdp_xmit = tx_stats->xdp_xmit; + } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); data[i++] = packets; data[i++] = bytes; + data[i++] = xdp_xmit; - qstats = &nvdev->chan_table[j].rx_stats; + rx_stats = &nvdev->chan_table[j].rx_stats; do { - start = u64_stats_fetch_begin_irq(&qstats->syncp); - packets = qstats->packets; - bytes = qstats->bytes; - xdp_drop = qstats->xdp_drop; - } while (u64_stats_fetch_retry_irq(&qstats->syncp, start)); + start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; + xdp_drop = rx_stats->xdp_drop; + xdp_redirect = rx_stats->xdp_redirect; + xdp_tx = rx_stats->xdp_tx; + } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); data[i++] = packets; data[i++] = bytes; data[i++] = xdp_drop; + data[i++] = xdp_redirect; + data[i++] = xdp_tx; } pcpu_sum = kvmalloc_array(num_possible_cpus(), sizeof(struct netvsc_ethtool_pcpu_stats), GFP_KERNEL); + if (!pcpu_sum) + return; + netvsc_get_pcpu_stats(dev, pcpu_sum); for_each_present_cpu(cpu) { struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu]; @@ -1619,9 +1590,12 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data) for (i = 0; i < nvdev->num_chn; i++) { ethtool_sprintf(&p, "tx_queue_%u_packets", i); ethtool_sprintf(&p, "tx_queue_%u_bytes", i); + ethtool_sprintf(&p, "tx_queue_%u_xdp_xmit", i); ethtool_sprintf(&p, "rx_queue_%u_packets", i); ethtool_sprintf(&p, "rx_queue_%u_bytes", i); ethtool_sprintf(&p, "rx_queue_%u_xdp_drop", i); + ethtool_sprintf(&p, "rx_queue_%u_xdp_redirect", i); + ethtool_sprintf(&p, "rx_queue_%u_xdp_tx", i); } for_each_present_cpu(cpu) { @@ -1858,7 +1832,9 @@ static void __netvsc_get_ringparam(struct netvsc_device *nvdev, } static void netvsc_get_ringparam(struct net_device *ndev, - struct ethtool_ringparam *ring) + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) { struct net_device_context *ndevctx = netdev_priv(ndev); struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); @@ -1870,7 +1846,9 @@ static void netvsc_get_ringparam(struct net_device *ndev, } static int netvsc_set_ringparam(struct net_device *ndev, - struct ethtool_ringparam *ring) + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) { struct net_device_context *ndevctx = netdev_priv(ndev); struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); @@ -2050,6 +2028,7 @@ static const struct net_device_ops device_ops = { .ndo_select_queue = netvsc_select_queue, .ndo_get_stats64 = netvsc_get_stats64, .ndo_bpf = netvsc_bpf, + .ndo_xdp_xmit = netvsc_ndoxdp_xmit, }; /* @@ -2334,6 +2313,18 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev) } + /* Fallback path to check synthetic vf with + * help of mac addr + */ + list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) { + ndev = hv_get_drvdata(ndev_ctx->device_ctx); + if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr)) { + netdev_notice(vf_netdev, + "falling back to mac addr based matching\n"); + return ndev; + } + } + netdev_notice(vf_netdev, "no netdev found for vf serial:%u\n", serial); return NULL; @@ -2430,6 +2421,11 @@ static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event) if (net_device_ctx->data_path_is_vf == vf_is_up) return NOTIFY_OK; + if (vf_is_up && !net_device_ctx->vf_alloc) { + netdev_info(ndev, "Waiting for the VF association from host\n"); + wait_for_completion(&net_device_ctx->vf_add); + } + ret = netvsc_switch_datapath(ndev, vf_is_up); if (ret) { @@ -2461,6 +2457,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) netvsc_vf_setxdp(vf_netdev, NULL); + reinit_completion(&net_device_ctx->vf_add); netdev_rx_handler_unregister(vf_netdev); netdev_upper_dev_unlink(vf_netdev, ndev); RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); @@ -2500,6 +2497,7 @@ static int netvsc_probe(struct hv_device *dev, INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); + init_completion(&net_device_ctx->vf_add); spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup); @@ -2512,6 +2510,7 @@ static int netvsc_probe(struct hv_device *dev, net->netdev_ops = &device_ops; net->ethtool_ops = ðtool_ops; SET_NETDEV_DEV(net, &dev->device); + dma_set_min_align_mask(&dev->device, HV_HYP_PAGE_SIZE - 1); /* We always need headroom for rndis header */ net->needed_headroom = RNDIS_AND_PPI_SIZE; @@ -2663,7 +2662,10 @@ static int netvsc_suspend(struct hv_device *dev) /* Save the current config info */ ndev_ctx->saved_netvsc_dev_info = netvsc_devinfo_get(nvdev); - + if (!ndev_ctx->saved_netvsc_dev_info) { + ret = -ENOMEM; + goto out; + } ret = netvsc_detach(net, nvdev); out: rtnl_unlock(); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index f6c9c2a670f9..eea777ec2541 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -20,6 +20,7 @@ #include <linux/vmalloc.h> #include <linux/rtnetlink.h> #include <linux/ucs2_string.h> +#include <linux/string.h> #include "hyperv_net.h" #include "netvsc_trace.h" @@ -335,9 +336,10 @@ static void rndis_filter_receive_response(struct net_device *ndev, if (resp->msg_len <= sizeof(struct rndis_message) + RNDIS_EXT_LEN) { memcpy(&request->response_msg, resp, RNDIS_HEADER_SIZE + sizeof(*req_id)); - memcpy((void *)&request->response_msg + RNDIS_HEADER_SIZE + sizeof(*req_id), + unsafe_memcpy((void *)&request->response_msg + RNDIS_HEADER_SIZE + sizeof(*req_id), data + RNDIS_HEADER_SIZE + sizeof(*req_id), - resp->msg_len - RNDIS_HEADER_SIZE - sizeof(*req_id)); + resp->msg_len - RNDIS_HEADER_SIZE - sizeof(*req_id), + "request->response_msg is followed by a padding of RNDIS_EXT_LEN inside rndis_request"); if (request->request_msg.ndis_msg_type == RNDIS_MSG_QUERY && request->request_msg.msg. query_req.oid == RNDIS_OID_GEN_MEDIA_CONNECT_STATUS) @@ -361,6 +363,8 @@ static void rndis_filter_receive_response(struct net_device *ndev, } } + netvsc_dma_unmap(((struct net_device_context *) + netdev_priv(ndev))->device_ctx, &request->pkt); complete(&request->wait_event); } else { netdev_err(ndev, @@ -1347,7 +1351,7 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, struct net_device_context *net_device_ctx = netdev_priv(net); struct ndis_offload hwcaps; struct ndis_offload_params offloads; - unsigned int gso_max_size = GSO_MAX_SIZE; + unsigned int gso_max_size = GSO_LEGACY_MAX_SIZE; int ret; /* Find HW offload capabilities */ @@ -1429,7 +1433,7 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, */ net->features &= ~NETVSC_SUPPORTED_HW_FEATURES | net->hw_features; - netif_set_gso_max_size(net, gso_max_size); + netif_set_tso_max_size(net, gso_max_size); ret = rndis_filter_set_offload_params(net, nvdev, &offloads); @@ -1573,7 +1577,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, for (i = 1; i < net_device->num_chn; i++) netif_napi_add(net, &net_device->chan_table[i].napi, - netvsc_poll, NAPI_POLL_WEIGHT); + netvsc_poll); return net_device; |