diff options
Diffstat (limited to 'drivers/net/ethernet/microsoft/mana/mana_en.c')
-rw-r--r-- | drivers/net/ethernet/microsoft/mana/mana_en.c | 354 |
1 files changed, 316 insertions, 38 deletions
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 72cbf45c42d8..9259a74eca40 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1,9 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Copyright (c) 2021, Microsoft Corporation. */ +#include <uapi/linux/bpf.h> + #include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> +#include <linux/filter.h> #include <linux/mm.h> #include <net/checksum.h> @@ -125,7 +128,7 @@ frag_err: return -ENOMEM; } -static int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) +int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) { enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT; struct mana_port_context *apc = netdev_priv(ndev); @@ -134,7 +137,7 @@ static int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) bool ipv4 = false, ipv6 = false; struct mana_tx_package pkg = {}; struct netdev_queue *net_txq; - struct mana_stats *tx_stats; + struct mana_stats_tx *tx_stats; struct gdma_queue *gdma_sq; unsigned int csum_type; struct mana_txq *txq; @@ -297,7 +300,8 @@ static void mana_get_stats64(struct net_device *ndev, { struct mana_port_context *apc = netdev_priv(ndev); unsigned int num_queues = apc->num_queues; - struct mana_stats *stats; + struct mana_stats_rx *rx_stats; + struct mana_stats_tx *tx_stats; unsigned int start; u64 packets, bytes; int q; @@ -308,26 +312,26 @@ static void mana_get_stats64(struct net_device *ndev, netdev_stats_to_stats64(st, &ndev->stats); for (q = 0; q < num_queues; q++) { - stats = &apc->rxqs[q]->stats; + rx_stats = &apc->rxqs[q]->stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; + } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); st->rx_packets += packets; st->rx_bytes += bytes; } for (q = 0; q < num_queues; q++) { - stats = &apc->tx_qp[q].txq.stats; + tx_stats = &apc->tx_qp[q].txq.stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; + } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); st->tx_packets += packets; st->tx_bytes += bytes; @@ -378,6 +382,8 @@ static const struct net_device_ops mana_devops = { .ndo_start_xmit = mana_start_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_get_stats64 = mana_get_stats64, + .ndo_bpf = mana_bpf, + .ndo_xdp_xmit = mana_xdp_xmit, }; static void mana_cleanup_port_context(struct mana_port_context *apc) @@ -442,6 +448,119 @@ static int mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr, return 0; } +static int mana_pf_register_hw_vport(struct mana_port_context *apc) +{ + struct mana_register_hw_vport_resp resp = {}; + struct mana_register_hw_vport_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_HW_PORT, + sizeof(req), sizeof(resp)); + req.attached_gfid = 1; + req.is_pf_default_vport = 1; + req.allow_all_ether_types = 1; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to register hw vPort: %d\n", err); + return err; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_HW_PORT, + sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(apc->ndev, "Failed to register hw vPort: %d, 0x%x\n", + err, resp.hdr.status); + return err ? err : -EPROTO; + } + + apc->port_handle = resp.hw_vport_handle; + return 0; +} + +static void mana_pf_deregister_hw_vport(struct mana_port_context *apc) +{ + struct mana_deregister_hw_vport_resp resp = {}; + struct mana_deregister_hw_vport_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_HW_PORT, + sizeof(req), sizeof(resp)); + req.hw_vport_handle = apc->port_handle; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n", + err); + return; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_HW_PORT, + sizeof(resp)); + if (err || resp.hdr.status) + netdev_err(apc->ndev, + "Failed to deregister hw vPort: %d, 0x%x\n", + err, resp.hdr.status); +} + +static int mana_pf_register_filter(struct mana_port_context *apc) +{ + struct mana_register_filter_resp resp = {}; + struct mana_register_filter_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_FILTER, + sizeof(req), sizeof(resp)); + req.vport = apc->port_handle; + memcpy(req.mac_addr, apc->mac_addr, ETH_ALEN); + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to register filter: %d\n", err); + return err; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_FILTER, + sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(apc->ndev, "Failed to register filter: %d, 0x%x\n", + err, resp.hdr.status); + return err ? err : -EPROTO; + } + + apc->pf_filter_handle = resp.filter_handle; + return 0; +} + +static void mana_pf_deregister_filter(struct mana_port_context *apc) +{ + struct mana_deregister_filter_resp resp = {}; + struct mana_deregister_filter_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_FILTER, + sizeof(req), sizeof(resp)); + req.filter_handle = apc->pf_filter_handle; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to unregister filter: %d\n", + err); + return; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_FILTER, + sizeof(resp)); + if (err || resp.hdr.status) + netdev_err(apc->ndev, + "Failed to deregister filter: %d, 0x%x\n", + err, resp.hdr.status); +} + static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, u32 proto_minor_ver, u32 proto_micro_ver, u16 *max_num_vports) @@ -749,6 +868,61 @@ out: return err; } +static int mana_fence_rq(struct mana_port_context *apc, struct mana_rxq *rxq) +{ + struct mana_fence_rq_resp resp = {}; + struct mana_fence_rq_req req = {}; + int err; + + init_completion(&rxq->fence_event); + + mana_gd_init_req_hdr(&req.hdr, MANA_FENCE_RQ, + sizeof(req), sizeof(resp)); + req.wq_obj_handle = rxq->rxobj; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to fence RQ %u: %d\n", + rxq->rxq_idx, err); + return err; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_FENCE_RQ, sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(apc->ndev, "Failed to fence RQ %u: %d, 0x%x\n", + rxq->rxq_idx, err, resp.hdr.status); + if (!err) + err = -EPROTO; + + return err; + } + + if (wait_for_completion_timeout(&rxq->fence_event, 10 * HZ) == 0) { + netdev_err(apc->ndev, "Failed to fence RQ %u: timed out\n", + rxq->rxq_idx); + return -ETIMEDOUT; + } + + return 0; +} + +static void mana_fence_rqs(struct mana_port_context *apc) +{ + unsigned int rxq_idx; + struct mana_rxq *rxq; + int err; + + for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { + rxq = apc->rxqs[rxq_idx]; + err = mana_fence_rq(apc, rxq); + + /* In case of any error, use sleep instead. */ + if (err) + msleep(100); + } +} + static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units) { u32 used_space_old; @@ -906,16 +1080,37 @@ static void mana_post_pkt_rxq(struct mana_rxq *rxq) WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1); } +static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len, + struct xdp_buff *xdp) +{ + struct sk_buff *skb = build_skb(buf_va, PAGE_SIZE); + + if (!skb) + return NULL; + + if (xdp->data_hard_start) { + skb_reserve(skb, xdp->data - xdp->data_hard_start); + skb_put(skb, xdp->data_end - xdp->data); + } else { + skb_reserve(skb, XDP_PACKET_HEADROOM); + skb_put(skb, pkt_len); + } + + return skb; +} + static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, struct mana_rxq *rxq) { - struct mana_stats *rx_stats = &rxq->stats; + struct mana_stats_rx *rx_stats = &rxq->stats; struct net_device *ndev = rxq->ndev; uint pkt_len = cqe->ppi[0].pkt_len; u16 rxq_idx = rxq->rxq_idx; struct napi_struct *napi; + struct xdp_buff xdp = {}; struct sk_buff *skb; u32 hash_value; + u32 act; rxq->rx_cq.work_done++; napi = &rxq->rx_cq.napi; @@ -925,15 +1120,19 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, return; } - skb = build_skb(buf_va, PAGE_SIZE); + act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len); - if (!skb) { - free_page((unsigned long)buf_va); - ++ndev->stats.rx_dropped; + if (act == XDP_REDIRECT && !rxq->xdp_rc) return; - } - skb_put(skb, pkt_len); + if (act != XDP_PASS && act != XDP_TX) + goto drop_xdp; + + skb = mana_build_skb(buf_va, pkt_len, &xdp); + + if (!skb) + goto drop; + skb->dev = napi->dev; skb->protocol = eth_type_trans(skb, ndev); @@ -954,12 +1153,36 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3); } - napi_gro_receive(napi, skb); - u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += pkt_len; + + if (act == XDP_TX) + rx_stats->xdp_tx++; + u64_stats_update_end(&rx_stats->syncp); + + if (act == XDP_TX) { + skb_set_queue_mapping(skb, rxq_idx); + mana_xdp_tx(skb, ndev); + return; + } + + napi_gro_receive(napi, skb); + + return; + +drop_xdp: + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->xdp_drop++; u64_stats_update_end(&rx_stats->syncp); + +drop: + WARN_ON_ONCE(rxq->xdp_save_page); + rxq->xdp_save_page = virt_to_page(buf_va); + + ++ndev->stats.rx_dropped; + + return; } static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, @@ -980,15 +1203,17 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, break; case CQE_RX_TRUNCATED: - netdev_err(ndev, "Dropped a truncated packet\n"); - return; + ++ndev->stats.rx_dropped; + rxbuf_oob = &rxq->rx_oobs[rxq->buf_index]; + netdev_warn_once(ndev, "Dropped a truncated packet\n"); + goto drop; case CQE_RX_COALESCED_4: netdev_err(ndev, "RX coalescing is unsupported\n"); return; case CQE_RX_OBJECT_FENCE: - netdev_err(ndev, "RX Fencing is unsupported\n"); + complete(&rxq->fence_event); return; default: @@ -997,9 +1222,6 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, return; } - if (oob->cqe_hdr.cqe_type != CQE_RX_OKAY) - return; - pktlen = oob->ppi[0].pkt_len; if (pktlen == 0) { @@ -1013,10 +1235,16 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, rxbuf_oob = &rxq->rx_oobs[curr]; WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1); - new_page = alloc_page(GFP_ATOMIC); + /* Reuse XDP dropped page if available */ + if (rxq->xdp_save_page) { + new_page = rxq->xdp_save_page; + rxq->xdp_save_page = NULL; + } else { + new_page = alloc_page(GFP_ATOMIC); + } if (new_page) { - da = dma_map_page(dev, new_page, 0, rxq->datasize, + da = dma_map_page(dev, new_page, XDP_PACKET_HEADROOM, rxq->datasize, DMA_FROM_DEVICE); if (dma_mapping_error(dev, da)) { @@ -1043,6 +1271,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, mana_rx_skb(old_buf, oob, rxq); +drop: mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu); mana_post_pkt_rxq(rxq); @@ -1051,11 +1280,14 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, static void mana_poll_rx_cq(struct mana_cq *cq) { struct gdma_comp *comp = cq->gdma_comp_buf; + struct mana_rxq *rxq = cq->rxq; int comp_read, i; comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); + rxq->xdp_flush = false; + for (i = 0; i < comp_read; i++) { if (WARN_ON_ONCE(comp[i].is_sq)) return; @@ -1064,8 +1296,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq) if (WARN_ON_ONCE(comp[i].wq_num != cq->rxq->gdma_id)) return; - mana_process_rx_cqe(cq->rxq, cq, &comp[i]); + mana_process_rx_cqe(rxq, cq, &comp[i]); } + + if (rxq->xdp_flush) + xdp_do_flush(); } static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) @@ -1260,7 +1495,7 @@ static int mana_create_txq(struct mana_port_context *apc, gc->cq_table[cq->gdma_id] = cq->gdma_cq; - netif_tx_napi_add(net, &cq->napi, mana_poll, NAPI_POLL_WEIGHT); + netif_napi_add_tx(net, &cq->napi, mana_poll); napi_enable(&cq->napi); mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); @@ -1291,12 +1526,18 @@ static void mana_destroy_rxq(struct mana_port_context *apc, napi_synchronize(napi); napi_disable(napi); + + xdp_rxq_info_unreg(&rxq->xdp_rxq); + netif_napi_del(napi); mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); mana_deinit_cq(apc, &rxq->rx_cq); + if (rxq->xdp_save_page) + __free_page(rxq->xdp_save_page); + for (i = 0; i < rxq->num_rx_buf; i++) { rx_oob = &rxq->rx_oobs[i]; @@ -1342,7 +1583,8 @@ static int mana_alloc_rx_wqe(struct mana_port_context *apc, if (!page) return -ENOMEM; - da = dma_map_page(dev, page, 0, rxq->datasize, DMA_FROM_DEVICE); + da = dma_map_page(dev, page, XDP_PACKET_HEADROOM, rxq->datasize, + DMA_FROM_DEVICE); if (dma_mapping_error(dev, da)) { __free_page(page); @@ -1484,7 +1726,13 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, gc->cq_table[cq->gdma_id] = cq->gdma_cq; - netif_napi_add(ndev, &cq->napi, mana_poll, 1); + netif_napi_add_weight(ndev, &cq->napi, mana_poll, 1); + + WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx, + cq->napi.napi_id)); + WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, + MEM_TYPE_PAGE_SHARED, NULL)); + napi_enable(&cq->napi); mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); @@ -1529,6 +1777,7 @@ out: static void mana_destroy_vport(struct mana_port_context *apc) { + struct gdma_dev *gd = apc->ac->gdma_dev; struct mana_rxq *rxq; u32 rxq_idx; @@ -1542,6 +1791,9 @@ static void mana_destroy_vport(struct mana_port_context *apc) } mana_destroy_txq(apc); + + if (gd->gdma_context->is_pf) + mana_pf_deregister_hw_vport(apc); } static int mana_create_vport(struct mana_port_context *apc, @@ -1552,6 +1804,12 @@ static int mana_create_vport(struct mana_port_context *apc, apc->default_rxobj = INVALID_MANA_HANDLE; + if (gd->gdma_context->is_pf) { + err = mana_pf_register_hw_vport(apc); + if (err) + return err; + } + err = mana_cfg_vport(apc, gd->pdid, gd->doorbell); if (err) return err; @@ -1572,6 +1830,7 @@ int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, bool update_hash, bool update_tab) { u32 queue_idx; + int err; int i; if (update_tab) { @@ -1581,7 +1840,13 @@ int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, } } - return mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab); + err = mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab); + if (err) + return err; + + mana_fence_rqs(apc); + + return 0; } static int mana_init_port(struct net_device *ndev) @@ -1624,6 +1889,7 @@ reset_apc: int mana_alloc_queues(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); + struct gdma_dev *gd = apc->ac->gdma_dev; int err; err = mana_create_vport(apc, ndev); @@ -1650,6 +1916,14 @@ int mana_alloc_queues(struct net_device *ndev) if (err) goto destroy_vport; + if (gd->gdma_context->is_pf) { + err = mana_pf_register_filter(apc); + if (err) + goto destroy_vport; + } + + mana_chn_setxdp(apc, mana_xdp_get(apc)); + return 0; destroy_vport: @@ -1692,12 +1966,18 @@ int mana_attach(struct net_device *ndev) static int mana_dealloc_queues(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); + struct gdma_dev *gd = apc->ac->gdma_dev; struct mana_txq *txq; int i, err; if (apc->port_is_up) return -EINVAL; + mana_chn_setxdp(apc, NULL); + + if (gd->gdma_context->is_pf) + mana_pf_deregister_filter(apc); + /* No packet can be transmitted now since apc->port_is_up is false. * There is still a tiny chance that mana_poll_tx_cq() can re-enable * a txq because it may not timely see apc->port_is_up being cleared @@ -1724,9 +2004,6 @@ static int mana_dealloc_queues(struct net_device *ndev) return err; } - /* TODO: Implement RX fencing */ - ssleep(1); - mana_destroy_vport(apc); return 0; @@ -1783,6 +2060,7 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, apc->max_queues = gc->max_num_queues; apc->num_queues = gc->max_num_queues; apc->port_handle = INVALID_MANA_HANDLE; + apc->pf_filter_handle = INVALID_MANA_HANDLE; apc->port_idx = port_idx; ndev->netdev_ops = &mana_devops; |