aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/microsoft/mana/mana_en.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/microsoft/mana/mana_en.c')
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c354
1 files changed, 316 insertions, 38 deletions
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 72cbf45c42d8..9259a74eca40 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1,9 +1,12 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* Copyright (c) 2021, Microsoft Corporation. */
+#include <uapi/linux/bpf.h>
+
#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
+#include <linux/filter.h>
#include <linux/mm.h>
#include <net/checksum.h>
@@ -125,7 +128,7 @@ frag_err:
return -ENOMEM;
}
-static int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
{
enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT;
struct mana_port_context *apc = netdev_priv(ndev);
@@ -134,7 +137,7 @@ static int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
bool ipv4 = false, ipv6 = false;
struct mana_tx_package pkg = {};
struct netdev_queue *net_txq;
- struct mana_stats *tx_stats;
+ struct mana_stats_tx *tx_stats;
struct gdma_queue *gdma_sq;
unsigned int csum_type;
struct mana_txq *txq;
@@ -297,7 +300,8 @@ static void mana_get_stats64(struct net_device *ndev,
{
struct mana_port_context *apc = netdev_priv(ndev);
unsigned int num_queues = apc->num_queues;
- struct mana_stats *stats;
+ struct mana_stats_rx *rx_stats;
+ struct mana_stats_tx *tx_stats;
unsigned int start;
u64 packets, bytes;
int q;
@@ -308,26 +312,26 @@ static void mana_get_stats64(struct net_device *ndev,
netdev_stats_to_stats64(st, &ndev->stats);
for (q = 0; q < num_queues; q++) {
- stats = &apc->rxqs[q]->stats;
+ rx_stats = &apc->rxqs[q]->stats;
do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- packets = stats->packets;
- bytes = stats->bytes;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
+ packets = rx_stats->packets;
+ bytes = rx_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
st->rx_packets += packets;
st->rx_bytes += bytes;
}
for (q = 0; q < num_queues; q++) {
- stats = &apc->tx_qp[q].txq.stats;
+ tx_stats = &apc->tx_qp[q].txq.stats;
do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- packets = stats->packets;
- bytes = stats->bytes;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
+ packets = tx_stats->packets;
+ bytes = tx_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
st->tx_packets += packets;
st->tx_bytes += bytes;
@@ -378,6 +382,8 @@ static const struct net_device_ops mana_devops = {
.ndo_start_xmit = mana_start_xmit,
.ndo_validate_addr = eth_validate_addr,
.ndo_get_stats64 = mana_get_stats64,
+ .ndo_bpf = mana_bpf,
+ .ndo_xdp_xmit = mana_xdp_xmit,
};
static void mana_cleanup_port_context(struct mana_port_context *apc)
@@ -442,6 +448,119 @@ static int mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr,
return 0;
}
+static int mana_pf_register_hw_vport(struct mana_port_context *apc)
+{
+ struct mana_register_hw_vport_resp resp = {};
+ struct mana_register_hw_vport_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_HW_PORT,
+ sizeof(req), sizeof(resp));
+ req.attached_gfid = 1;
+ req.is_pf_default_vport = 1;
+ req.allow_all_ether_types = 1;
+
+ err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
+ sizeof(resp));
+ if (err) {
+ netdev_err(apc->ndev, "Failed to register hw vPort: %d\n", err);
+ return err;
+ }
+
+ err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_HW_PORT,
+ sizeof(resp));
+ if (err || resp.hdr.status) {
+ netdev_err(apc->ndev, "Failed to register hw vPort: %d, 0x%x\n",
+ err, resp.hdr.status);
+ return err ? err : -EPROTO;
+ }
+
+ apc->port_handle = resp.hw_vport_handle;
+ return 0;
+}
+
+static void mana_pf_deregister_hw_vport(struct mana_port_context *apc)
+{
+ struct mana_deregister_hw_vport_resp resp = {};
+ struct mana_deregister_hw_vport_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_HW_PORT,
+ sizeof(req), sizeof(resp));
+ req.hw_vport_handle = apc->port_handle;
+
+ err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
+ sizeof(resp));
+ if (err) {
+ netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n",
+ err);
+ return;
+ }
+
+ err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_HW_PORT,
+ sizeof(resp));
+ if (err || resp.hdr.status)
+ netdev_err(apc->ndev,
+ "Failed to deregister hw vPort: %d, 0x%x\n",
+ err, resp.hdr.status);
+}
+
+static int mana_pf_register_filter(struct mana_port_context *apc)
+{
+ struct mana_register_filter_resp resp = {};
+ struct mana_register_filter_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_FILTER,
+ sizeof(req), sizeof(resp));
+ req.vport = apc->port_handle;
+ memcpy(req.mac_addr, apc->mac_addr, ETH_ALEN);
+
+ err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
+ sizeof(resp));
+ if (err) {
+ netdev_err(apc->ndev, "Failed to register filter: %d\n", err);
+ return err;
+ }
+
+ err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_FILTER,
+ sizeof(resp));
+ if (err || resp.hdr.status) {
+ netdev_err(apc->ndev, "Failed to register filter: %d, 0x%x\n",
+ err, resp.hdr.status);
+ return err ? err : -EPROTO;
+ }
+
+ apc->pf_filter_handle = resp.filter_handle;
+ return 0;
+}
+
+static void mana_pf_deregister_filter(struct mana_port_context *apc)
+{
+ struct mana_deregister_filter_resp resp = {};
+ struct mana_deregister_filter_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_FILTER,
+ sizeof(req), sizeof(resp));
+ req.filter_handle = apc->pf_filter_handle;
+
+ err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
+ sizeof(resp));
+ if (err) {
+ netdev_err(apc->ndev, "Failed to unregister filter: %d\n",
+ err);
+ return;
+ }
+
+ err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_FILTER,
+ sizeof(resp));
+ if (err || resp.hdr.status)
+ netdev_err(apc->ndev,
+ "Failed to deregister filter: %d, 0x%x\n",
+ err, resp.hdr.status);
+}
+
static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver,
u32 proto_minor_ver, u32 proto_micro_ver,
u16 *max_num_vports)
@@ -749,6 +868,61 @@ out:
return err;
}
+static int mana_fence_rq(struct mana_port_context *apc, struct mana_rxq *rxq)
+{
+ struct mana_fence_rq_resp resp = {};
+ struct mana_fence_rq_req req = {};
+ int err;
+
+ init_completion(&rxq->fence_event);
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_FENCE_RQ,
+ sizeof(req), sizeof(resp));
+ req.wq_obj_handle = rxq->rxobj;
+
+ err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
+ sizeof(resp));
+ if (err) {
+ netdev_err(apc->ndev, "Failed to fence RQ %u: %d\n",
+ rxq->rxq_idx, err);
+ return err;
+ }
+
+ err = mana_verify_resp_hdr(&resp.hdr, MANA_FENCE_RQ, sizeof(resp));
+ if (err || resp.hdr.status) {
+ netdev_err(apc->ndev, "Failed to fence RQ %u: %d, 0x%x\n",
+ rxq->rxq_idx, err, resp.hdr.status);
+ if (!err)
+ err = -EPROTO;
+
+ return err;
+ }
+
+ if (wait_for_completion_timeout(&rxq->fence_event, 10 * HZ) == 0) {
+ netdev_err(apc->ndev, "Failed to fence RQ %u: timed out\n",
+ rxq->rxq_idx);
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static void mana_fence_rqs(struct mana_port_context *apc)
+{
+ unsigned int rxq_idx;
+ struct mana_rxq *rxq;
+ int err;
+
+ for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
+ rxq = apc->rxqs[rxq_idx];
+ err = mana_fence_rq(apc, rxq);
+
+ /* In case of any error, use sleep instead. */
+ if (err)
+ msleep(100);
+ }
+}
+
static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units)
{
u32 used_space_old;
@@ -906,16 +1080,37 @@ static void mana_post_pkt_rxq(struct mana_rxq *rxq)
WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1);
}
+static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
+ struct xdp_buff *xdp)
+{
+ struct sk_buff *skb = build_skb(buf_va, PAGE_SIZE);
+
+ if (!skb)
+ return NULL;
+
+ if (xdp->data_hard_start) {
+ skb_reserve(skb, xdp->data - xdp->data_hard_start);
+ skb_put(skb, xdp->data_end - xdp->data);
+ } else {
+ skb_reserve(skb, XDP_PACKET_HEADROOM);
+ skb_put(skb, pkt_len);
+ }
+
+ return skb;
+}
+
static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
struct mana_rxq *rxq)
{
- struct mana_stats *rx_stats = &rxq->stats;
+ struct mana_stats_rx *rx_stats = &rxq->stats;
struct net_device *ndev = rxq->ndev;
uint pkt_len = cqe->ppi[0].pkt_len;
u16 rxq_idx = rxq->rxq_idx;
struct napi_struct *napi;
+ struct xdp_buff xdp = {};
struct sk_buff *skb;
u32 hash_value;
+ u32 act;
rxq->rx_cq.work_done++;
napi = &rxq->rx_cq.napi;
@@ -925,15 +1120,19 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
return;
}
- skb = build_skb(buf_va, PAGE_SIZE);
+ act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len);
- if (!skb) {
- free_page((unsigned long)buf_va);
- ++ndev->stats.rx_dropped;
+ if (act == XDP_REDIRECT && !rxq->xdp_rc)
return;
- }
- skb_put(skb, pkt_len);
+ if (act != XDP_PASS && act != XDP_TX)
+ goto drop_xdp;
+
+ skb = mana_build_skb(buf_va, pkt_len, &xdp);
+
+ if (!skb)
+ goto drop;
+
skb->dev = napi->dev;
skb->protocol = eth_type_trans(skb, ndev);
@@ -954,12 +1153,36 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3);
}
- napi_gro_receive(napi, skb);
-
u64_stats_update_begin(&rx_stats->syncp);
rx_stats->packets++;
rx_stats->bytes += pkt_len;
+
+ if (act == XDP_TX)
+ rx_stats->xdp_tx++;
+ u64_stats_update_end(&rx_stats->syncp);
+
+ if (act == XDP_TX) {
+ skb_set_queue_mapping(skb, rxq_idx);
+ mana_xdp_tx(skb, ndev);
+ return;
+ }
+
+ napi_gro_receive(napi, skb);
+
+ return;
+
+drop_xdp:
+ u64_stats_update_begin(&rx_stats->syncp);
+ rx_stats->xdp_drop++;
u64_stats_update_end(&rx_stats->syncp);
+
+drop:
+ WARN_ON_ONCE(rxq->xdp_save_page);
+ rxq->xdp_save_page = virt_to_page(buf_va);
+
+ ++ndev->stats.rx_dropped;
+
+ return;
}
static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
@@ -980,15 +1203,17 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
break;
case CQE_RX_TRUNCATED:
- netdev_err(ndev, "Dropped a truncated packet\n");
- return;
+ ++ndev->stats.rx_dropped;
+ rxbuf_oob = &rxq->rx_oobs[rxq->buf_index];
+ netdev_warn_once(ndev, "Dropped a truncated packet\n");
+ goto drop;
case CQE_RX_COALESCED_4:
netdev_err(ndev, "RX coalescing is unsupported\n");
return;
case CQE_RX_OBJECT_FENCE:
- netdev_err(ndev, "RX Fencing is unsupported\n");
+ complete(&rxq->fence_event);
return;
default:
@@ -997,9 +1222,6 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
return;
}
- if (oob->cqe_hdr.cqe_type != CQE_RX_OKAY)
- return;
-
pktlen = oob->ppi[0].pkt_len;
if (pktlen == 0) {
@@ -1013,10 +1235,16 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
rxbuf_oob = &rxq->rx_oobs[curr];
WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1);
- new_page = alloc_page(GFP_ATOMIC);
+ /* Reuse XDP dropped page if available */
+ if (rxq->xdp_save_page) {
+ new_page = rxq->xdp_save_page;
+ rxq->xdp_save_page = NULL;
+ } else {
+ new_page = alloc_page(GFP_ATOMIC);
+ }
if (new_page) {
- da = dma_map_page(dev, new_page, 0, rxq->datasize,
+ da = dma_map_page(dev, new_page, XDP_PACKET_HEADROOM, rxq->datasize,
DMA_FROM_DEVICE);
if (dma_mapping_error(dev, da)) {
@@ -1043,6 +1271,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
mana_rx_skb(old_buf, oob, rxq);
+drop:
mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu);
mana_post_pkt_rxq(rxq);
@@ -1051,11 +1280,14 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
static void mana_poll_rx_cq(struct mana_cq *cq)
{
struct gdma_comp *comp = cq->gdma_comp_buf;
+ struct mana_rxq *rxq = cq->rxq;
int comp_read, i;
comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER);
WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER);
+ rxq->xdp_flush = false;
+
for (i = 0; i < comp_read; i++) {
if (WARN_ON_ONCE(comp[i].is_sq))
return;
@@ -1064,8 +1296,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
if (WARN_ON_ONCE(comp[i].wq_num != cq->rxq->gdma_id))
return;
- mana_process_rx_cqe(cq->rxq, cq, &comp[i]);
+ mana_process_rx_cqe(rxq, cq, &comp[i]);
}
+
+ if (rxq->xdp_flush)
+ xdp_do_flush();
}
static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
@@ -1260,7 +1495,7 @@ static int mana_create_txq(struct mana_port_context *apc,
gc->cq_table[cq->gdma_id] = cq->gdma_cq;
- netif_tx_napi_add(net, &cq->napi, mana_poll, NAPI_POLL_WEIGHT);
+ netif_napi_add_tx(net, &cq->napi, mana_poll);
napi_enable(&cq->napi);
mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
@@ -1291,12 +1526,18 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
napi_synchronize(napi);
napi_disable(napi);
+
+ xdp_rxq_info_unreg(&rxq->xdp_rxq);
+
netif_napi_del(napi);
mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
mana_deinit_cq(apc, &rxq->rx_cq);
+ if (rxq->xdp_save_page)
+ __free_page(rxq->xdp_save_page);
+
for (i = 0; i < rxq->num_rx_buf; i++) {
rx_oob = &rxq->rx_oobs[i];
@@ -1342,7 +1583,8 @@ static int mana_alloc_rx_wqe(struct mana_port_context *apc,
if (!page)
return -ENOMEM;
- da = dma_map_page(dev, page, 0, rxq->datasize, DMA_FROM_DEVICE);
+ da = dma_map_page(dev, page, XDP_PACKET_HEADROOM, rxq->datasize,
+ DMA_FROM_DEVICE);
if (dma_mapping_error(dev, da)) {
__free_page(page);
@@ -1484,7 +1726,13 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
gc->cq_table[cq->gdma_id] = cq->gdma_cq;
- netif_napi_add(ndev, &cq->napi, mana_poll, 1);
+ netif_napi_add_weight(ndev, &cq->napi, mana_poll, 1);
+
+ WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx,
+ cq->napi.napi_id));
+ WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED, NULL));
+
napi_enable(&cq->napi);
mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
@@ -1529,6 +1777,7 @@ out:
static void mana_destroy_vport(struct mana_port_context *apc)
{
+ struct gdma_dev *gd = apc->ac->gdma_dev;
struct mana_rxq *rxq;
u32 rxq_idx;
@@ -1542,6 +1791,9 @@ static void mana_destroy_vport(struct mana_port_context *apc)
}
mana_destroy_txq(apc);
+
+ if (gd->gdma_context->is_pf)
+ mana_pf_deregister_hw_vport(apc);
}
static int mana_create_vport(struct mana_port_context *apc,
@@ -1552,6 +1804,12 @@ static int mana_create_vport(struct mana_port_context *apc,
apc->default_rxobj = INVALID_MANA_HANDLE;
+ if (gd->gdma_context->is_pf) {
+ err = mana_pf_register_hw_vport(apc);
+ if (err)
+ return err;
+ }
+
err = mana_cfg_vport(apc, gd->pdid, gd->doorbell);
if (err)
return err;
@@ -1572,6 +1830,7 @@ int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx,
bool update_hash, bool update_tab)
{
u32 queue_idx;
+ int err;
int i;
if (update_tab) {
@@ -1581,7 +1840,13 @@ int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx,
}
}
- return mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab);
+ err = mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab);
+ if (err)
+ return err;
+
+ mana_fence_rqs(apc);
+
+ return 0;
}
static int mana_init_port(struct net_device *ndev)
@@ -1624,6 +1889,7 @@ reset_apc:
int mana_alloc_queues(struct net_device *ndev)
{
struct mana_port_context *apc = netdev_priv(ndev);
+ struct gdma_dev *gd = apc->ac->gdma_dev;
int err;
err = mana_create_vport(apc, ndev);
@@ -1650,6 +1916,14 @@ int mana_alloc_queues(struct net_device *ndev)
if (err)
goto destroy_vport;
+ if (gd->gdma_context->is_pf) {
+ err = mana_pf_register_filter(apc);
+ if (err)
+ goto destroy_vport;
+ }
+
+ mana_chn_setxdp(apc, mana_xdp_get(apc));
+
return 0;
destroy_vport:
@@ -1692,12 +1966,18 @@ int mana_attach(struct net_device *ndev)
static int mana_dealloc_queues(struct net_device *ndev)
{
struct mana_port_context *apc = netdev_priv(ndev);
+ struct gdma_dev *gd = apc->ac->gdma_dev;
struct mana_txq *txq;
int i, err;
if (apc->port_is_up)
return -EINVAL;
+ mana_chn_setxdp(apc, NULL);
+
+ if (gd->gdma_context->is_pf)
+ mana_pf_deregister_filter(apc);
+
/* No packet can be transmitted now since apc->port_is_up is false.
* There is still a tiny chance that mana_poll_tx_cq() can re-enable
* a txq because it may not timely see apc->port_is_up being cleared
@@ -1724,9 +2004,6 @@ static int mana_dealloc_queues(struct net_device *ndev)
return err;
}
- /* TODO: Implement RX fencing */
- ssleep(1);
-
mana_destroy_vport(apc);
return 0;
@@ -1783,6 +2060,7 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
apc->max_queues = gc->max_num_queues;
apc->num_queues = gc->max_num_queues;
apc->port_handle = INVALID_MANA_HANDLE;
+ apc->pf_filter_handle = INVALID_MANA_HANDLE;
apc->port_idx = port_idx;
ndev->netdev_ops = &mana_devops;