aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/intel/ice/Makefile3
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h59
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c857
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.h31
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c65
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c984
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.h49
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c355
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c573
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h140
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.c273
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.h59
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c1181
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.h72
16 files changed, 3553 insertions, 1150 deletions
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 9edde960b4f2..df5a9699276a 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -13,9 +13,12 @@ ice-y := ice_main.o \
ice_nvm.o \
ice_switch.o \
ice_sched.o \
+ ice_base.o \
ice_lib.o \
+ ice_txrx_lib.o \
ice_txrx.o \
ice_flex_pipe.o \
ice_ethtool.o
ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_lib.o
+ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 45e100666049..f552a67467aa 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -29,10 +29,13 @@
#include <linux/ip.h>
#include <linux/sctp.h>
#include <linux/ipv6.h>
+#include <linux/pkt_sched.h>
#include <linux/if_bridge.h>
#include <linux/ctype.h>
+#include <linux/bpf.h>
#include <linux/avf/virtchnl.h>
#include <net/ipv6.h>
+#include <net/xdp_sock.h>
#include "ice_devids.h"
#include "ice_type.h"
#include "ice_txrx.h"
@@ -42,6 +45,7 @@
#include "ice_sched.h"
#include "ice_virtchnl_pf.h"
#include "ice_sriov.h"
+#include "ice_xsk.h"
extern const char ice_drv_ver[];
#define ICE_BAR0 0
@@ -78,8 +82,7 @@ extern const char ice_drv_ver[];
#define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
-#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - \
- (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)))
+#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - ICE_ETH_PKT_HDR_PAD)
#define ICE_UP_TABLE_TRANSLATE(val, i) \
(((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
@@ -127,6 +130,14 @@ extern const char ice_drv_ver[];
ICE_PROMISC_VLAN_TX | \
ICE_PROMISC_VLAN_RX)
+struct ice_txq_meta {
+ u32 q_teid; /* Tx-scheduler element identifier */
+ u16 q_id; /* Entry in VSI's txq_map bitmap */
+ u16 q_handle; /* Relative index of Tx queue within TC */
+ u16 vsi_idx; /* VSI index that Tx queue belongs to */
+ u8 tc; /* TC number that Tx queue belongs to */
+};
+
struct ice_tc_info {
u16 qoffset;
u16 qcount_tx;
@@ -274,6 +285,13 @@ struct ice_vsi {
u16 num_rx_desc;
u16 num_tx_desc;
struct ice_tc_cfg tc_cfg;
+ struct bpf_prog *xdp_prog;
+ struct ice_ring **xdp_rings; /* XDP ring array */
+ u16 num_xdp_txq; /* Used XDP queues */
+ u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+ struct xdp_umem **xsk_umems;
+ u16 num_xsk_umems_used;
+ u16 num_xsk_umems;
} ____cacheline_internodealigned_in_smp;
/* struct that defines an interrupt vector */
@@ -313,6 +331,7 @@ enum ice_pf_flags {
ICE_FLAG_NO_MEDIA,
ICE_FLAG_FW_LLDP_AGENT,
ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */
+ ICE_FLAG_LEGACY_RX,
ICE_PF_FLAGS_NBITS /* must be last */
};
@@ -417,6 +436,37 @@ static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev)
return np->vsi->back;
}
+static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi)
+{
+ return !!vsi->xdp_prog;
+}
+
+static inline void ice_set_ring_xdp(struct ice_ring *ring)
+{
+ ring->flags |= ICE_TX_FLAGS_RING_XDP;
+}
+
+/**
+ * ice_xsk_umem - get XDP UMEM bound to a ring
+ * @ring - ring to use
+ *
+ * Returns a pointer to xdp_umem structure if there is an UMEM present,
+ * NULL otherwise.
+ */
+static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring)
+{
+ struct xdp_umem **umems = ring->vsi->xsk_umems;
+ int qid = ring->q_index;
+
+ if (ice_ring_is_xdp(ring))
+ qid -= ring->vsi->num_xdp_txq;
+
+ if (!umems || !umems[qid] || !ice_is_xdp_ena_vsi(ring->vsi))
+ return NULL;
+
+ return umems[qid];
+}
+
/**
* ice_get_main_vsi - Get the PF VSI
* @pf: PF instance
@@ -443,6 +493,11 @@ int ice_up(struct ice_vsi *vsi);
int ice_down(struct ice_vsi *vsi);
int ice_vsi_cfg(struct ice_vsi *vsi);
struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog);
+int ice_destroy_xdp_rings(struct ice_vsi *vsi);
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
new file mode 100644
index 000000000000..69d2da14fe5c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -0,0 +1,857 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_base.h"
+#include "ice_dcb_lib.h"
+
+/**
+ * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
+ * @qs_cfg: gathered variables needed for PF->VSI queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
+{
+ int offset, i;
+
+ mutex_lock(qs_cfg->qs_mutex);
+ offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
+ 0, qs_cfg->q_count, 0);
+ if (offset >= qs_cfg->pf_map_size) {
+ mutex_unlock(qs_cfg->qs_mutex);
+ return -ENOMEM;
+ }
+
+ bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
+ for (i = 0; i < qs_cfg->q_count; i++)
+ qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset;
+ mutex_unlock(qs_cfg->qs_mutex);
+
+ return 0;
+}
+
+/**
+ * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
+{
+ int i, index = 0;
+
+ mutex_lock(qs_cfg->qs_mutex);
+ for (i = 0; i < qs_cfg->q_count; i++) {
+ index = find_next_zero_bit(qs_cfg->pf_map,
+ qs_cfg->pf_map_size, index);
+ if (index >= qs_cfg->pf_map_size)
+ goto err_scatter;
+ set_bit(index, qs_cfg->pf_map);
+ qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index;
+ }
+ mutex_unlock(qs_cfg->qs_mutex);
+
+ return 0;
+err_scatter:
+ for (index = 0; index < i; index++) {
+ clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map);
+ qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0;
+ }
+ mutex_unlock(qs_cfg->qs_mutex);
+
+ return -ENOMEM;
+}
+
+/**
+ * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
+ * @pf: the PF being configured
+ * @pf_q: the PF queue
+ * @ena: enable or disable state of the queue
+ *
+ * This routine will wait for the given Rx queue of the PF to reach the
+ * enabled or disabled state.
+ * Returns -ETIMEDOUT in case of failing to reach the requested state after
+ * multiple retries; else will return 0 in case of success.
+ */
+static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
+{
+ int i;
+
+ for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) {
+ if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) &
+ QRX_CTRL_QENA_STAT_M))
+ return 0;
+
+ usleep_range(20, 40);
+ }
+
+ return -ETIMEDOUT;
+}
+
+/**
+ * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @vsi: the VSI being configured
+ * @v_idx: index of the vector in the VSI struct
+ *
+ * We allocate one q_vector. If allocation fails we return -ENOMEM.
+ */
+static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_q_vector *q_vector;
+
+ /* allocate q_vector */
+ q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL);
+ if (!q_vector)
+ return -ENOMEM;
+
+ q_vector->vsi = vsi;
+ q_vector->v_idx = v_idx;
+ if (vsi->type == ICE_VSI_VF)
+ goto out;
+ /* only set affinity_mask if the CPU is online */
+ if (cpu_online(v_idx))
+ cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+
+ /* This will not be called in the driver load path because the netdev
+ * will not be created yet. All other cases with register the NAPI
+ * handler here (i.e. resume, reset/rebuild, etc.)
+ */
+ if (vsi->netdev)
+ netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,
+ NAPI_POLL_WEIGHT);
+
+out:
+ /* tie q_vector and VSI together */
+ vsi->q_vectors[v_idx] = q_vector;
+
+ return 0;
+}
+
+/**
+ * ice_free_q_vector - Free memory allocated for a specific interrupt vector
+ * @vsi: VSI having the memory freed
+ * @v_idx: index of the vector to be freed
+ */
+static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+ struct ice_q_vector *q_vector;
+ struct ice_pf *pf = vsi->back;
+ struct ice_ring *ring;
+
+ if (!vsi->q_vectors[v_idx]) {
+ dev_dbg(&pf->pdev->dev, "Queue vector at index %d not found\n",
+ v_idx);
+ return;
+ }
+ q_vector = vsi->q_vectors[v_idx];
+
+ ice_for_each_ring(ring, q_vector->tx)
+ ring->q_vector = NULL;
+ ice_for_each_ring(ring, q_vector->rx)
+ ring->q_vector = NULL;
+
+ /* only VSI with an associated netdev is set up with NAPI */
+ if (vsi->netdev)
+ netif_napi_del(&q_vector->napi);
+
+ devm_kfree(&pf->pdev->dev, q_vector);
+ vsi->q_vectors[v_idx] = NULL;
+}
+
+/**
+ * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set
+ * @hw: board specific structure
+ */
+static void ice_cfg_itr_gran(struct ice_hw *hw)
+{
+ u32 regval = rd32(hw, GLINT_CTL);
+
+ /* no need to update global register if ITR gran is already set */
+ if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) &&
+ (((regval & GLINT_CTL_ITR_GRAN_200_M) >>
+ GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) &&
+ (((regval & GLINT_CTL_ITR_GRAN_100_M) >>
+ GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) &&
+ (((regval & GLINT_CTL_ITR_GRAN_50_M) >>
+ GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) &&
+ (((regval & GLINT_CTL_ITR_GRAN_25_M) >>
+ GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US))
+ return;
+
+ regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) &
+ GLINT_CTL_ITR_GRAN_200_M) |
+ ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) &
+ GLINT_CTL_ITR_GRAN_100_M) |
+ ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) &
+ GLINT_CTL_ITR_GRAN_50_M) |
+ ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) &
+ GLINT_CTL_ITR_GRAN_25_M);
+ wr32(hw, GLINT_CTL, regval);
+}
+
+/**
+ * ice_calc_q_handle - calculate the queue handle
+ * @vsi: VSI that ring belongs to
+ * @ring: ring to get the absolute queue index
+ * @tc: traffic class number
+ */
+static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc)
+{
+ WARN_ONCE(ice_ring_is_xdp(ring) && tc,
+ "XDP ring can't belong to TC other than 0");
+
+ /* Idea here for calculation is that we subtract the number of queue
+ * count from TC that ring belongs to from it's absolute queue index
+ * and as a result we get the queue's index within TC.
+ */
+ return ring->q_index - vsi->tc_cfg.tc_info[tc].qoffset;
+}
+
+/**
+ * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
+ * @ring: The Tx ring to configure
+ * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
+ * @pf_q: queue index in the PF space
+ *
+ * Configure the Tx descriptor ring in TLAN context.
+ */
+static void
+ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
+{
+ struct ice_vsi *vsi = ring->vsi;
+ struct ice_hw *hw = &vsi->back->hw;
+
+ tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
+
+ tlan_ctx->port_num = vsi->port_info->lport;
+
+ /* Transmit Queue Length */
+ tlan_ctx->qlen = ring->count;
+
+ ice_set_cgd_num(tlan_ctx, ring);
+
+ /* PF number */
+ tlan_ctx->pf_num = hw->pf_id;
+
+ /* queue belongs to a specific VSI type
+ * VF / VM index should be programmed per vmvf_type setting:
+ * for vmvf_type = VF, it is VF number between 0-256
+ * for vmvf_type = VM, it is VM number between 0-767
+ * for PF or EMP this field should be set to zero
+ */
+ switch (vsi->type) {
+ case ICE_VSI_LB:
+ /* fall through */
+ case ICE_VSI_PF:
+ tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
+ break;
+ case ICE_VSI_VF:
+ /* Firmware expects vmvf_num to be absolute VF ID */
+ tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id;
+ tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
+ break;
+ default:
+ return;
+ }
+
+ /* make sure the context is associated with the right VSI */
+ tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
+
+ tlan_ctx->tso_ena = ICE_TX_LEGACY;
+ tlan_ctx->tso_qnum = pf_q;
+
+ /* Legacy or Advanced Host Interface:
+ * 0: Advanced Host Interface
+ * 1: Legacy Host Interface
+ */
+ tlan_ctx->legacy_int = ICE_TX_LEGACY;
+}
+
+/**
+ * ice_setup_rx_ctx - Configure a receive ring context
+ * @ring: The Rx ring to configure
+ *
+ * Configure the Rx descriptor ring in RLAN context.
+ */
+int ice_setup_rx_ctx(struct ice_ring *ring)
+{
+ int chain_len = ICE_MAX_CHAINED_RX_BUFS;
+ struct ice_vsi *vsi = ring->vsi;
+ u32 rxdid = ICE_RXDID_FLEX_NIC;
+ struct ice_rlan_ctx rlan_ctx;
+ struct ice_hw *hw;
+ u32 regval;
+ u16 pf_q;
+ int err;
+
+ hw = &vsi->back->hw;
+
+ /* what is Rx queue number in global space of 2K Rx queues */
+ pf_q = vsi->rxq_map[ring->q_index];
+
+ /* clear the context structure first */
+ memset(&rlan_ctx, 0, sizeof(rlan_ctx));
+
+ ring->rx_buf_len = vsi->rx_buf_len;
+
+ if (ring->vsi->type == ICE_VSI_PF) {
+ if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+ xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+ ring->q_index);
+
+ ring->xsk_umem = ice_xsk_umem(ring);
+ if (ring->xsk_umem) {
+ xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+
+ ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
+ XDP_PACKET_HEADROOM;
+ /* For AF_XDP ZC, we disallow packets to span on
+ * multiple buffers, thus letting us skip that
+ * handling in the fast-path.
+ */
+ chain_len = 1;
+ ring->zca.free = ice_zca_free;
+ err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_ZERO_COPY,
+ &ring->zca);
+ if (err)
+ return err;
+
+ dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
+ ring->q_index);
+ } else {
+ if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+ xdp_rxq_info_reg(&ring->xdp_rxq,
+ ring->netdev,
+ ring->q_index);
+
+ err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED,
+ NULL);
+ if (err)
+ return err;
+ }
+ }
+ /* Receive Queue Base Address.
+ * Indicates the starting address of the descriptor queue defined in
+ * 128 Byte units.
+ */
+ rlan_ctx.base = ring->dma >> 7;
+
+ rlan_ctx.qlen = ring->count;
+
+ /* Receive Packet Data Buffer Size.
+ * The Packet Data Buffer Size is defined in 128 byte units.
+ */
+ rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
+
+ /* use 32 byte descriptors */
+ rlan_ctx.dsize = 1;
+
+ /* Strip the Ethernet CRC bytes before the packet is posted to host
+ * memory.
+ */
+ rlan_ctx.crcstrip = 1;
+
+ /* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
+ rlan_ctx.l2tsel = 1;
+
+ rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
+ rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
+ rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
+
+ /* This controls whether VLAN is stripped from inner headers
+ * The VLAN in the inner L2 header is stripped to the receive
+ * descriptor if enabled by this flag.
+ */
+ rlan_ctx.showiv = 0;
+
+ /* Max packet size for this queue - must not be set to a larger value
+ * than 5 x DBUF
+ */
+ rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
+ chain_len * ring->rx_buf_len);
+
+ /* Rx queue threshold in units of 64 */
+ rlan_ctx.lrxqthresh = 1;
+
+ /* Enable Flexible Descriptors in the queue context which
+ * allows this driver to select a specific receive descriptor format
+ */
+ if (vsi->type != ICE_VSI_VF) {
+ regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
+ regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
+ QRXFLXP_CNTXT_RXDID_IDX_M;
+
+ /* increasing context priority to pick up profile ID;
+ * default is 0x01; setting to 0x03 to ensure profile
+ * is programming if prev context is of same priority
+ */
+ regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
+ QRXFLXP_CNTXT_RXDID_PRIO_M;
+
+ wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
+ }
+
+ /* Absolute queue number out of 2K needs to be passed */
+ err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
+ if (err) {
+ dev_err(&vsi->back->pdev->dev,
+ "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
+ pf_q, err);
+ return -EIO;
+ }
+
+ if (vsi->type == ICE_VSI_VF)
+ return 0;
+
+ /* configure Rx buffer alignment */
+ if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+ ice_clear_ring_build_skb_ena(ring);
+ else
+ ice_set_ring_build_skb_ena(ring);
+
+ /* init queue specific tail register */
+ ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
+ writel(0, ring->tail);
+
+ err = ring->xsk_umem ?
+ ice_alloc_rx_bufs_slow_zc(ring, ICE_DESC_UNUSED(ring)) :
+ ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
+ if (err)
+ dev_info(&vsi->back->pdev->dev,
+ "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
+ ring->xsk_umem ? "UMEM enabled " : "",
+ ring->q_index, pf_q);
+
+ return 0;
+}
+
+/**
+ * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * This function first tries to find contiguous space. If it is not successful,
+ * it tries with the scatter approach.
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
+{
+ int ret = 0;
+
+ ret = __ice_vsi_get_qs_contig(qs_cfg);
+ if (ret) {
+ /* contig failed, so try with scatter approach */
+ qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
+ qs_cfg->q_count = min_t(u16, qs_cfg->q_count,
+ qs_cfg->scatter_count);
+ ret = __ice_vsi_get_qs_sc(qs_cfg);
+ }
+ return ret;
+}
+
+/**
+ * ice_vsi_ctrl_rx_ring - Start or stop a VSI's Rx ring
+ * @vsi: the VSI being configured
+ * @ena: start or stop the Rx rings
+ * @rxq_idx: Rx queue index
+ */
+int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
+{
+ int pf_q = vsi->rxq_map[rxq_idx];
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ int ret = 0;
+ u32 rx_reg;
+
+ rx_reg = rd32(hw, QRX_CTRL(pf_q));
+
+ /* Skip if the queue is already in the requested state */
+ if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
+ return 0;
+
+ /* turn on/off the queue */
+ if (ena)
+ rx_reg |= QRX_CTRL_QENA_REQ_M;
+ else
+ rx_reg &= ~QRX_CTRL_QENA_REQ_M;
+ wr32(hw, QRX_CTRL(pf_q), rx_reg);
+
+ /* wait for the change to finish */
+ ret = ice_pf_rxq_wait(pf, pf_q, ena);
+ if (ret)
+ dev_err(&pf->pdev->dev,
+ "VSI idx %d Rx ring %d %sable timeout\n",
+ vsi->idx, pf_q, (ena ? "en" : "dis"));
+
+ return ret;
+}
+
+/**
+ * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * We allocate one q_vector per queue interrupt. If allocation fails we
+ * return -ENOMEM.
+ */
+int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ int v_idx = 0, num_q_vectors;
+ int err;
+
+ if (vsi->q_vectors[0]) {
+ dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
+ vsi->vsi_num);
+ return -EEXIST;
+ }
+
+ num_q_vectors = vsi->num_q_vectors;
+
+ for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
+ err = ice_vsi_alloc_q_vector(vsi, v_idx);
+ if (err)
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ while (v_idx--)
+ ice_free_q_vector(vsi, v_idx);
+
+ dev_err(&pf->pdev->dev,
+ "Failed to allocate %d q_vector for VSI %d, ret=%d\n",
+ vsi->num_q_vectors, vsi->vsi_num, err);
+ vsi->num_q_vectors = 0;
+ return err;
+}
+
+/**
+ * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * This function maps descriptor rings to the queue-specific vectors allotted
+ * through the MSI-X enabling code. On a constrained vector budget, we map Tx
+ * and Rx rings to the vector as "efficiently" as possible.
+ */
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
+{
+ int q_vectors = vsi->num_q_vectors;
+ int tx_rings_rem, rx_rings_rem;
+ int v_id;
+
+ /* initially assigning remaining rings count to VSIs num queue value */
+ tx_rings_rem = vsi->num_txq;
+ rx_rings_rem = vsi->num_rxq;
+
+ for (v_id = 0; v_id < q_vectors; v_id++) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
+ int tx_rings_per_v, rx_rings_per_v, q_id, q_base;
+
+ /* Tx rings mapping to vector */
+ tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id);
+ q_vector->num_ring_tx = tx_rings_per_v;
+ q_vector->tx.ring = NULL;
+ q_vector->tx.itr_idx = ICE_TX_ITR;
+ q_base = vsi->num_txq - tx_rings_rem;
+
+ for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
+ struct ice_ring *tx_ring = vsi->tx_rings[q_id];
+
+ tx_ring->q_vector = q_vector;
+ tx_ring->next = q_vector->tx.ring;
+ q_vector->tx.ring = tx_ring;
+ }
+ tx_rings_rem -= tx_rings_per_v;
+
+ /* Rx rings mapping to vector */
+ rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id);
+ q_vector->num_ring_rx = rx_rings_per_v;
+ q_vector->rx.ring = NULL;
+ q_vector->rx.itr_idx = ICE_RX_ITR;
+ q_base = vsi->num_rxq - rx_rings_rem;
+
+ for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
+ struct ice_ring *rx_ring = vsi->rx_rings[q_id];
+
+ rx_ring->q_vector = q_vector;
+ rx_ring->next = q_vector->rx.ring;
+ q_vector->rx.ring = rx_ring;
+ }
+ rx_rings_rem -= rx_rings_per_v;
+ }
+}
+
+/**
+ * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
+ * @vsi: the VSI having memory freed
+ */
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
+{
+ int v_idx;
+
+ ice_for_each_q_vector(vsi, v_idx)
+ ice_free_q_vector(vsi, v_idx);
+}
+
+/**
+ * ice_vsi_cfg_txq - Configure single Tx queue
+ * @vsi: the VSI that queue belongs to
+ * @ring: Tx ring to be configured
+ * @qg_buf: queue group buffer
+ */
+int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
+ struct ice_aqc_add_tx_qgrp *qg_buf)
+{
+ struct ice_tlan_ctx tlan_ctx = { 0 };
+ struct ice_aqc_add_txqs_perq *txq;
+ struct ice_pf *pf = vsi->back;
+ u8 buf_len = sizeof(*qg_buf);
+ enum ice_status status;
+ u16 pf_q;
+ u8 tc;
+
+ pf_q = ring->reg_idx;
+ ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
+ /* copy context contents into the qg_buf */
+ qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
+ ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
+ ice_tlan_ctx_info);
+
+ /* init queue specific tail reg. It is referred as
+ * transmit comm scheduler queue doorbell.
+ */
+ ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
+
+ if (IS_ENABLED(CONFIG_DCB))
+ tc = ring->dcb_tc;
+ else
+ tc = 0;
+
+ /* Add unique software queue handle of the Tx queue per
+ * TC into the VSI Tx ring
+ */
+ ring->q_handle = ice_calc_q_handle(vsi, ring, tc);
+
+ status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
+ 1, qg_buf, buf_len, NULL);
+ if (status) {
+ dev_err(&pf->pdev->dev,
+ "Failed to set LAN Tx queue context, error: %d\n",
+ status);
+ return -ENODEV;
+ }
+
+ /* Add Tx Queue TEID into the VSI Tx ring from the
+ * response. This will complete configuring and
+ * enabling the queue.
+ */
+ txq = &qg_buf->txqs[0];
+ if (pf_q == le16_to_cpu(txq->txq_id))
+ ring->txq_teid = le32_to_cpu(txq->q_teid);
+
+ return 0;
+}
+
+/**
+ * ice_cfg_itr - configure the initial interrupt throttle values
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector that's being configured
+ *
+ * Configure interrupt throttling values for the ring containers that are
+ * associated with the interrupt vector passed in.
+ */
+void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+ ice_cfg_itr_gran(hw);
+
+ if (q_vector->num_ring_rx) {
+ struct ice_ring_container *rc = &q_vector->rx;
+
+ /* if this value is set then don't overwrite with default */
+ if (!rc->itr_setting)
+ rc->itr_setting = ICE_DFLT_RX_ITR;
+
+ rc->target_itr = ITR_TO_REG(rc->itr_setting);
+ rc->next_update = jiffies + 1;
+ rc->current_itr = rc->target_itr;
+ wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+ ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
+ }
+
+ if (q_vector->num_ring_tx) {
+ struct ice_ring_container *rc = &q_vector->tx;
+
+ /* if this value is set then don't overwrite with default */
+ if (!rc->itr_setting)
+ rc->itr_setting = ICE_DFLT_TX_ITR;
+
+ rc->target_itr = ITR_TO_REG(rc->itr_setting);
+ rc->next_update = jiffies + 1;
+ rc->current_itr = rc->target_itr;
+ wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+ ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
+ }
+}
+
+/**
+ * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * @vsi: the VSI being configured
+ * @txq: Tx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
+ * within the function space.
+ */
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u32 val;
+
+ itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+
+ val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
+ ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+
+ wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+ wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]),
+ val);
+ }
+ ice_flush(hw);
+}
+
+/**
+ * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
+ * @vsi: the VSI being configured
+ * @rxq: Rx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
+ * within the function space.
+ */
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u32 val;
+
+ itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
+
+ val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
+ ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
+
+ wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_trigger_sw_intr - trigger a software interrupt
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector to trigger the software interrupt for
+ */
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+ wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
+ (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
+ GLINT_DYN_CTL_SWINT_TRIG_M |
+ GLINT_DYN_CTL_INTENA_M);
+}
+
+/**
+ * ice_vsi_stop_tx_ring - Disable single Tx ring
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ * @ring: Tx ring to be stopped
+ * @txq_meta: Meta data of Tx ring to be stopped
+ */
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+ u16 rel_vmvf_num, struct ice_ring *ring,
+ struct ice_txq_meta *txq_meta)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_q_vector *q_vector;
+ struct ice_hw *hw = &pf->hw;
+ enum ice_status status;
+ u32 val;
+
+ /* clear cause_ena bit for disabled queues */
+ val = rd32(hw, QINT_TQCTL(ring->reg_idx));
+ val &= ~QINT_TQCTL_CAUSE_ENA_M;
+ wr32(hw, QINT_TQCTL(ring->reg_idx), val);
+
+ /* software is expected to wait for 100 ns */
+ ndelay(100);
+
+ /* trigger a software interrupt for the vector
+ * associated to the queue to schedule NAPI handler
+ */
+ q_vector = ring->q_vector;
+ if (q_vector)
+ ice_trigger_sw_intr(hw, q_vector);
+
+ status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx,
+ txq_meta->tc, 1, &txq_meta->q_handle,
+ &txq_meta->q_id, &txq_meta->q_teid, rst_src,
+ rel_vmvf_num, NULL);
+
+ /* if the disable queue command was exercised during an
+ * active reset flow, ICE_ERR_RESET_ONGOING is returned.
+ * This is not an error as the reset operation disables
+ * queues at the hardware level anyway.
+ */
+ if (status == ICE_ERR_RESET_ONGOING) {
+ dev_dbg(&vsi->back->pdev->dev,
+ "Reset in progress. LAN Tx queues already disabled\n");
+ } else if (status == ICE_ERR_DOES_NOT_EXIST) {
+ dev_dbg(&vsi->back->pdev->dev,
+ "LAN Tx queues do not exist, nothing to disable\n");
+ } else if (status) {
+ dev_err(&vsi->back->pdev->dev,
+ "Failed to disable LAN Tx queues, error: %d\n", status);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_fill_txq_meta - Prepare the Tx queue's meta data
+ * @vsi: VSI that ring belongs to
+ * @ring: ring that txq_meta will be based on
+ * @txq_meta: a helper struct that wraps Tx queue's information
+ *
+ * Set up a helper struct that will contain all the necessary fields that
+ * are needed for stopping Tx queue
+ */
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
+ struct ice_txq_meta *txq_meta)
+{
+ u8 tc;
+
+ if (IS_ENABLED(CONFIG_DCB))
+ tc = ring->dcb_tc;
+ else
+ tc = 0;
+
+ txq_meta->q_id = ring->reg_idx;
+ txq_meta->q_teid = ring->txq_teid;
+ txq_meta->q_handle = ring->q_handle;
+ txq_meta->vsi_idx = vsi->idx;
+ txq_meta->tc = tc;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h
new file mode 100644
index 000000000000..407995e8e944
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_base.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_BASE_H_
+#define _ICE_BASE_H_
+
+#include "ice.h"
+
+int ice_setup_rx_ctx(struct ice_ring *ring);
+int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg);
+int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
+int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi);
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
+int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
+ struct ice_aqc_add_tx_qgrp *qg_buf);
+void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+ u16 rel_vmvf_num, struct ice_ring *ring,
+ struct ice_txq_meta *txq_meta);
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
+ struct ice_txq_meta *txq_meta);
+#endif /* _ICE_BASE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
index 661a6f7bca64..d11a0aab01ac 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
@@ -5,6 +5,7 @@
#define _ICE_DCB_LIB_H_
#include "ice.h"
+#include "ice_base.h"
#include "ice_lib.h"
#ifdef CONFIG_DCB
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 7e23034df955..7e779060069c 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -156,6 +156,7 @@ struct ice_priv_flag {
static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT),
+ ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX),
};
#define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags)
@@ -623,7 +624,7 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)
continue;
rx_buf = &rx_ring->rx_buf[i];
- received_buf = page_address(rx_buf->page);
+ received_buf = page_address(rx_buf->page) + rx_buf->page_offset;
if (ice_lbtest_check_frame(received_buf))
valid_frames++;
@@ -1256,6 +1257,11 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
"Fail to enable MIB change events\n");
}
}
+ if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) {
+ /* down and up VSI so that changes of Rx cfg are reflected. */
+ ice_down(vsi);
+ ice_up(vsi);
+ }
clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
return ret;
}
@@ -2577,6 +2583,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
{
struct ice_ring *tx_rings = NULL, *rx_rings = NULL;
struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_ring *xdp_rings = NULL;
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
int i, timeout = 50, err = 0;
@@ -2611,6 +2618,13 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
return 0;
}
+ /* If there is a AF_XDP UMEM attached to any of Rx rings,
+ * disallow changing the number of descriptors -- regardless
+ * if the netdev is running or not.
+ */
+ if (ice_xsk_any_rx_ring_ena(vsi))
+ return -EBUSY;
+
while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
timeout--;
if (!timeout)
@@ -2624,6 +2638,11 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
vsi->tx_rings[i]->count = new_tx_cnt;
for (i = 0; i < vsi->alloc_rxq; i++)
vsi->rx_rings[i]->count = new_rx_cnt;
+ if (ice_is_xdp_ena_vsi(vsi))
+ for (i = 0; i < vsi->num_xdp_txq; i++)
+ vsi->xdp_rings[i]->count = new_tx_cnt;
+ vsi->num_tx_desc = new_tx_cnt;
+ vsi->num_rx_desc = new_rx_cnt;
netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n");
goto done;
}
@@ -2650,15 +2669,43 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
tx_rings[i].tx_buf = NULL;
err = ice_setup_tx_ring(&tx_rings[i]);
if (err) {
- while (i) {
- i--;
+ while (i--)
ice_clean_tx_ring(&tx_rings[i]);
- }
devm_kfree(&pf->pdev->dev, tx_rings);
goto done;
}
}
+ if (!ice_is_xdp_ena_vsi(vsi))
+ goto process_rx;
+
+ /* alloc updated XDP resources */
+ netdev_info(netdev, "Changing XDP descriptor count from %d to %d\n",
+ vsi->xdp_rings[0]->count, new_tx_cnt);
+
+ xdp_rings = devm_kcalloc(&pf->pdev->dev, vsi->num_xdp_txq,
+ sizeof(*xdp_rings), GFP_KERNEL);
+ if (!xdp_rings) {
+ err = -ENOMEM;
+ goto free_tx;
+ }
+
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ /* clone ring and setup updated count */
+ xdp_rings[i] = *vsi->xdp_rings[i];
+ xdp_rings[i].count = new_tx_cnt;
+ xdp_rings[i].desc = NULL;
+ xdp_rings[i].tx_buf = NULL;
+ err = ice_setup_tx_ring(&xdp_rings[i]);
+ if (err) {
+ while (i--)
+ ice_clean_tx_ring(&xdp_rings[i]);
+ devm_kfree(&pf->pdev->dev, xdp_rings);
+ goto free_tx;
+ }
+ ice_set_ring_xdp(&xdp_rings[i]);
+ }
+
process_rx:
if (new_rx_cnt == vsi->rx_rings[0]->count)
goto process_link;
@@ -2737,6 +2784,16 @@ process_link:
devm_kfree(&pf->pdev->dev, rx_rings);
}
+ if (xdp_rings) {
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ ice_free_tx_ring(vsi->xdp_rings[i]);
+ *vsi->xdp_rings[i] = xdp_rings[i];
+ }
+ devm_kfree(&pf->pdev->dev, xdp_rings);
+ }
+
+ vsi->num_tx_desc = new_tx_cnt;
+ vsi->num_rx_desc = new_rx_cnt;
ice_up(vsi);
}
goto done;
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index cc755382df25..b1e96cac5b1f 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2,235 +2,11 @@
/* Copyright (c) 2018, Intel Corporation. */
#include "ice.h"
+#include "ice_base.h"
#include "ice_lib.h"
#include "ice_dcb_lib.h"
/**
- * ice_setup_rx_ctx - Configure a receive ring context
- * @ring: The Rx ring to configure
- *
- * Configure the Rx descriptor ring in RLAN context.
- */
-static int ice_setup_rx_ctx(struct ice_ring *ring)
-{
- struct ice_vsi *vsi = ring->vsi;
- struct ice_hw *hw = &vsi->back->hw;
- u32 rxdid = ICE_RXDID_FLEX_NIC;
- struct ice_rlan_ctx rlan_ctx;
- u32 regval;
- u16 pf_q;
- int err;
-
- /* what is Rx queue number in global space of 2K Rx queues */
- pf_q = vsi->rxq_map[ring->q_index];
-
- /* clear the context structure first */
- memset(&rlan_ctx, 0, sizeof(rlan_ctx));
-
- rlan_ctx.base = ring->dma >> 7;
-
- rlan_ctx.qlen = ring->count;
-
- /* Receive Packet Data Buffer Size.
- * The Packet Data Buffer Size is defined in 128 byte units.
- */
- rlan_ctx.dbuf = vsi->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
-
- /* use 32 byte descriptors */
- rlan_ctx.dsize = 1;
-
- /* Strip the Ethernet CRC bytes before the packet is posted to host
- * memory.
- */
- rlan_ctx.crcstrip = 1;
-
- /* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
- rlan_ctx.l2tsel = 1;
-
- rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
- rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
- rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
-
- /* This controls whether VLAN is stripped from inner headers
- * The VLAN in the inner L2 header is stripped to the receive
- * descriptor if enabled by this flag.
- */
- rlan_ctx.showiv = 0;
-
- /* Max packet size for this queue - must not be set to a larger value
- * than 5 x DBUF
- */
- rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
- ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len);
-
- /* Rx queue threshold in units of 64 */
- rlan_ctx.lrxqthresh = 1;
-
- /* Enable Flexible Descriptors in the queue context which
- * allows this driver to select a specific receive descriptor format
- */
- if (vsi->type != ICE_VSI_VF) {
- regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
- regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
- QRXFLXP_CNTXT_RXDID_IDX_M;
-
- /* increasing context priority to pick up profile ID;
- * default is 0x01; setting to 0x03 to ensure profile
- * is programming if prev context is of same priority
- */
- regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
- QRXFLXP_CNTXT_RXDID_PRIO_M;
-
- wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
- }
-
- /* Absolute queue number out of 2K needs to be passed */
- err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
- if (err) {
- dev_err(&vsi->back->pdev->dev,
- "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
- pf_q, err);
- return -EIO;
- }
-
- if (vsi->type == ICE_VSI_VF)
- return 0;
-
- /* init queue specific tail register */
- ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
- writel(0, ring->tail);
- ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
-
- return 0;
-}
-
-/**
- * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
- * @ring: The Tx ring to configure
- * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
- * @pf_q: queue index in the PF space
- *
- * Configure the Tx descriptor ring in TLAN context.
- */
-static void
-ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
-{
- struct ice_vsi *vsi = ring->vsi;
- struct ice_hw *hw = &vsi->back->hw;
-
- tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
-
- tlan_ctx->port_num = vsi->port_info->lport;
-
- /* Transmit Queue Length */
- tlan_ctx->qlen = ring->count;
-
- ice_set_cgd_num(tlan_ctx, ring);
-
- /* PF number */
- tlan_ctx->pf_num = hw->pf_id;
-
- /* queue belongs to a specific VSI type
- * VF / VM index should be programmed per vmvf_type setting:
- * for vmvf_type = VF, it is VF number between 0-256
- * for vmvf_type = VM, it is VM number between 0-767
- * for PF or EMP this field should be set to zero
- */
- switch (vsi->type) {
- case ICE_VSI_LB:
- /* fall through */
- case ICE_VSI_PF:
- tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
- break;
- case ICE_VSI_VF:
- /* Firmware expects vmvf_num to be absolute VF ID */
- tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id;
- tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
- break;
- default:
- return;
- }
-
- /* make sure the context is associated with the right VSI */
- tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
-
- tlan_ctx->tso_ena = ICE_TX_LEGACY;
- tlan_ctx->tso_qnum = pf_q;
-
- /* Legacy or Advanced Host Interface:
- * 0: Advanced Host Interface
- * 1: Legacy Host Interface
- */
- tlan_ctx->legacy_int = ICE_TX_LEGACY;
-}
-
-/**
- * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
- * @pf: the PF being configured
- * @pf_q: the PF queue
- * @ena: enable or disable state of the queue
- *
- * This routine will wait for the given Rx queue of the PF to reach the
- * enabled or disabled state.
- * Returns -ETIMEDOUT in case of failing to reach the requested state after
- * multiple retries; else will return 0 in case of success.
- */
-static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
-{
- int i;
-
- for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) {
- if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) &
- QRX_CTRL_QENA_STAT_M))
- return 0;
-
- usleep_range(20, 40);
- }
-
- return -ETIMEDOUT;
-}
-
-/**
- * ice_vsi_ctrl_rx_ring - Start or stop a VSI's Rx ring
- * @vsi: the VSI being configured
- * @ena: start or stop the Rx rings
- * @rxq_idx: Rx queue index
- */
-#ifndef CONFIG_PCI_IOV
-static
-#endif /* !CONFIG_PCI_IOV */
-int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
-{
- int pf_q = vsi->rxq_map[rxq_idx];
- struct ice_pf *pf = vsi->back;
- struct ice_hw *hw = &pf->hw;
- int ret = 0;
- u32 rx_reg;
-
- rx_reg = rd32(hw, QRX_CTRL(pf_q));
-
- /* Skip if the queue is already in the requested state */
- if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
- return 0;
-
- /* turn on/off the queue */
- if (ena)
- rx_reg |= QRX_CTRL_QENA_REQ_M;
- else
- rx_reg &= ~QRX_CTRL_QENA_REQ_M;
- wr32(hw, QRX_CTRL(pf_q), rx_reg);
-
- /* wait for the change to finish */
- ret = ice_pf_rxq_wait(pf, pf_q, ena);
- if (ret)
- dev_err(&pf->pdev->dev,
- "VSI idx %d Rx ring %d %sable timeout\n",
- vsi->idx, pf_q, (ena ? "en" : "dis"));
-
- return ret;
-}
-
-/**
* ice_vsi_ctrl_rx_rings - Start or stop a VSI's Rx rings
* @vsi: the VSI being configured
* @ena: start or stop the Rx rings
@@ -270,7 +46,8 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
if (!vsi->rx_rings)
goto err_rings;
- vsi->txq_map = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
+ /* XDP will have vsi->alloc_txq Tx queues as well, so double the size */
+ vsi->txq_map = devm_kcalloc(&pf->pdev->dev, (2 * vsi->alloc_txq),
sizeof(*vsi->txq_map), GFP_KERNEL);
if (!vsi->txq_map)
@@ -281,7 +58,6 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
if (!vsi->rxq_map)
goto err_rxq_map;
-
/* There is no need to allocate q_vectors for a loopback VSI. */
if (vsi->type == ICE_VSI_LB)
return 0;
@@ -606,88 +382,6 @@ unlock_pf:
}
/**
- * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
- * @qs_cfg: gathered variables needed for PF->VSI queues assignment
- *
- * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
- */
-static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
-{
- int offset, i;
-
- mutex_lock(qs_cfg->qs_mutex);
- offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
- 0, qs_cfg->q_count, 0);
- if (offset >= qs_cfg->pf_map_size) {
- mutex_unlock(qs_cfg->qs_mutex);
- return -ENOMEM;
- }
-
- bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
- for (i = 0; i < qs_cfg->q_count; i++)
- qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset;
- mutex_unlock(qs_cfg->qs_mutex);
-
- return 0;
-}
-
-/**
- * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI
- * @qs_cfg: gathered variables needed for pf->vsi queues assignment
- *
- * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
- */
-static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
-{
- int i, index = 0;
-
- mutex_lock(qs_cfg->qs_mutex);
- for (i = 0; i < qs_cfg->q_count; i++) {
- index = find_next_zero_bit(qs_cfg->pf_map,
- qs_cfg->pf_map_size, index);
- if (index >= qs_cfg->pf_map_size)
- goto err_scatter;
- set_bit(index, qs_cfg->pf_map);
- qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index;
- }
- mutex_unlock(qs_cfg->qs_mutex);
-
- return 0;
-err_scatter:
- for (index = 0; index < i; index++) {
- clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map);
- qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0;
- }
- mutex_unlock(qs_cfg->qs_mutex);
-
- return -ENOMEM;
-}
-
-/**
- * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
- * @qs_cfg: gathered variables needed for pf->vsi queues assignment
- *
- * This function first tries to find contiguous space. If it is not successful,
- * it tries with the scatter approach.
- *
- * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
- */
-static int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
-{
- int ret = 0;
-
- ret = __ice_vsi_get_qs_contig(qs_cfg);
- if (ret) {
- /* contig failed, so try with scatter approach */
- qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
- qs_cfg->q_count = min_t(u16, qs_cfg->q_count,
- qs_cfg->scatter_count);
- ret = __ice_vsi_get_qs_sc(qs_cfg);
- }
- return ret;
-}
-
-/**
* ice_vsi_get_qs - Assign queues from PF to VSI
* @vsi: the VSI to assign queues to
*
@@ -1098,129 +792,6 @@ static int ice_vsi_init(struct ice_vsi *vsi)
}
/**
- * ice_free_q_vector - Free memory allocated for a specific interrupt vector
- * @vsi: VSI having the memory freed
- * @v_idx: index of the vector to be freed
- */
-static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
-{
- struct ice_q_vector *q_vector;
- struct ice_pf *pf = vsi->back;
- struct ice_ring *ring;
-
- if (!vsi->q_vectors[v_idx]) {
- dev_dbg(&pf->pdev->dev, "Queue vector at index %d not found\n",
- v_idx);
- return;
- }
- q_vector = vsi->q_vectors[v_idx];
-
- ice_for_each_ring(ring, q_vector->tx)
- ring->q_vector = NULL;
- ice_for_each_ring(ring, q_vector->rx)
- ring->q_vector = NULL;
-
- /* only VSI with an associated netdev is set up with NAPI */
- if (vsi->netdev)
- netif_napi_del(&q_vector->napi);
-
- devm_kfree(&pf->pdev->dev, q_vector);
- vsi->q_vectors[v_idx] = NULL;
-}
-
-/**
- * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
- * @vsi: the VSI having memory freed
- */
-void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
-{
- int v_idx;
-
- ice_for_each_q_vector(vsi, v_idx)
- ice_free_q_vector(vsi, v_idx);
-}
-
-/**
- * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
- * @vsi: the VSI being configured
- * @v_idx: index of the vector in the VSI struct
- *
- * We allocate one q_vector. If allocation fails we return -ENOMEM.
- */
-static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
-{
- struct ice_pf *pf = vsi->back;
- struct ice_q_vector *q_vector;
-
- /* allocate q_vector */
- q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL);
- if (!q_vector)
- return -ENOMEM;
-
- q_vector->vsi = vsi;
- q_vector->v_idx = v_idx;
- if (vsi->type == ICE_VSI_VF)
- goto out;
- /* only set affinity_mask if the CPU is online */
- if (cpu_online(v_idx))
- cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
-
- /* This will not be called in the driver load path because the netdev
- * will not be created yet. All other cases with register the NAPI
- * handler here (i.e. resume, reset/rebuild, etc.)
- */
- if (vsi->netdev)
- netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,
- NAPI_POLL_WEIGHT);
-
-out:
- /* tie q_vector and VSI together */
- vsi->q_vectors[v_idx] = q_vector;
-
- return 0;
-}
-
-/**
- * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
- * @vsi: the VSI being configured
- *
- * We allocate one q_vector per queue interrupt. If allocation fails we
- * return -ENOMEM.
- */
-static int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
-{
- struct ice_pf *pf = vsi->back;
- int v_idx = 0, num_q_vectors;
- int err;
-
- if (vsi->q_vectors[0]) {
- dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
- vsi->vsi_num);
- return -EEXIST;
- }
-
- num_q_vectors = vsi->num_q_vectors;
-
- for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
- err = ice_vsi_alloc_q_vector(vsi, v_idx);
- if (err)
- goto err_out;
- }
-
- return 0;
-
-err_out:
- while (v_idx--)
- ice_free_q_vector(vsi, v_idx);
-
- dev_err(&pf->pdev->dev,
- "Failed to allocate %d q_vector for VSI %d, ret=%d\n",
- vsi->num_q_vectors, vsi->vsi_num, err);
- vsi->num_q_vectors = 0;
- return err;
-}
-
-/**
* ice_vsi_setup_vector_base - Set up the base vector for the given VSI
* @vsi: ptr to the VSI
*
@@ -1341,66 +912,6 @@ err_out:
}
/**
- * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
- * @vsi: the VSI being configured
- *
- * This function maps descriptor rings to the queue-specific vectors allotted
- * through the MSI-X enabling code. On a constrained vector budget, we map Tx
- * and Rx rings to the vector as "efficiently" as possible.
- */
-#ifdef CONFIG_DCB
-void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
-#else
-static void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
-#endif /* CONFIG_DCB */
-{
- int q_vectors = vsi->num_q_vectors;
- int tx_rings_rem, rx_rings_rem;
- int v_id;
-
- /* initially assigning remaining rings count to VSIs num queue value */
- tx_rings_rem = vsi->num_txq;
- rx_rings_rem = vsi->num_rxq;
-
- for (v_id = 0; v_id < q_vectors; v_id++) {
- struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
- int tx_rings_per_v, rx_rings_per_v, q_id, q_base;
-
- /* Tx rings mapping to vector */
- tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id);
- q_vector->num_ring_tx = tx_rings_per_v;
- q_vector->tx.ring = NULL;
- q_vector->tx.itr_idx = ICE_TX_ITR;
- q_base = vsi->num_txq - tx_rings_rem;
-
- for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
- struct ice_ring *tx_ring = vsi->tx_rings[q_id];
-
- tx_ring->q_vector = q_vector;
- tx_ring->next = q_vector->tx.ring;
- q_vector->tx.ring = tx_ring;
- }
- tx_rings_rem -= tx_rings_per_v;
-
- /* Rx rings mapping to vector */
- rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id);
- q_vector->num_ring_rx = rx_rings_per_v;
- q_vector->rx.ring = NULL;
- q_vector->rx.itr_idx = ICE_RX_ITR;
- q_base = vsi->num_rxq - rx_rings_rem;
-
- for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
- struct ice_ring *rx_ring = vsi->rx_rings[q_id];
-
- rx_ring->q_vector = q_vector;
- rx_ring->next = q_vector->rx.ring;
- q_vector->rx.ring = rx_ring;
- }
- rx_rings_rem -= rx_rings_per_v;
- }
-}
-
-/**
* ice_vsi_manage_rss_lut - disable/enable RSS
* @vsi: the VSI being changed
* @ena: boolean value indicating if this is an enable or disable request
@@ -1674,6 +1185,31 @@ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
}
/**
+ * ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length
+ * @vsi: VSI
+ */
+void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
+{
+ if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) {
+ vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
+ vsi->rx_buf_len = ICE_RXBUF_2048;
+#if (PAGE_SIZE < 8192)
+ } else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
+ (vsi->netdev->mtu <= ETH_DATA_LEN)) {
+ vsi->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN;
+ vsi->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN;
+#endif
+ } else {
+ vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
+#if (PAGE_SIZE < 8192)
+ vsi->rx_buf_len = ICE_RXBUF_3072;
+#else
+ vsi->rx_buf_len = ICE_RXBUF_2048;
+#endif
+ }
+}
+
+/**
* ice_vsi_cfg_rxqs - Configure the VSI for Rx
* @vsi: the VSI being configured
*
@@ -1687,13 +1223,7 @@ int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
if (vsi->type == ICE_VSI_VF)
goto setup_rings;
- if (vsi->netdev && vsi->netdev->mtu > ETH_DATA_LEN)
- vsi->max_frame = vsi->netdev->mtu +
- ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
- else
- vsi->max_frame = ICE_RXBUF_2048;
-
- vsi->rx_buf_len = ICE_RXBUF_2048;
+ ice_vsi_cfg_frame_size(vsi);
setup_rings:
/* set up individual rings */
for (i = 0; i < vsi->num_rxq; i++) {
@@ -1712,101 +1242,34 @@ setup_rings:
}
/**
- * ice_vsi_cfg_txq - Configure single Tx queue
- * @vsi: the VSI that queue belongs to
- * @ring: Tx ring to be configured
- * @tc_q_idx: queue index within given TC
- * @qg_buf: queue group buffer
- * @tc: TC that Tx ring belongs to
- */
-static int
-ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, u16 tc_q_idx,
- struct ice_aqc_add_tx_qgrp *qg_buf, u8 tc)
-{
- struct ice_tlan_ctx tlan_ctx = { 0 };
- struct ice_aqc_add_txqs_perq *txq;
- struct ice_pf *pf = vsi->back;
- u8 buf_len = sizeof(*qg_buf);
- enum ice_status status;
- u16 pf_q;
-
- pf_q = ring->reg_idx;
- ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
- /* copy context contents into the qg_buf */
- qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
- ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
- ice_tlan_ctx_info);
-
- /* init queue specific tail reg. It is referred as
- * transmit comm scheduler queue doorbell.
- */
- ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
-
- /* Add unique software queue handle of the Tx queue per
- * TC into the VSI Tx ring
- */
- ring->q_handle = tc_q_idx;
-
- status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
- 1, qg_buf, buf_len, NULL);
- if (status) {
- dev_err(&pf->pdev->dev,
- "Failed to set LAN Tx queue context, error: %d\n",
- status);
- return -ENODEV;
- }
-
- /* Add Tx Queue TEID into the VSI Tx ring from the
- * response. This will complete configuring and
- * enabling the queue.
- */
- txq = &qg_buf->txqs[0];
- if (pf_q == le16_to_cpu(txq->txq_id))
- ring->txq_teid = le32_to_cpu(txq->q_teid);
-
- return 0;
-}
-
-/**
* ice_vsi_cfg_txqs - Configure the VSI for Tx
* @vsi: the VSI being configured
* @rings: Tx ring array to be configured
- * @offset: offset within vsi->txq_map
*
* Return 0 on success and a negative value on error
* Configure the Tx VSI for operation.
*/
static int
-ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, int offset)
+ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings)
{
struct ice_aqc_add_tx_qgrp *qg_buf;
- struct ice_pf *pf = vsi->back;
- u16 q_idx = 0, i;
+ u16 q_idx = 0;
int err = 0;
- u8 tc;
- qg_buf = devm_kzalloc(&pf->pdev->dev, sizeof(*qg_buf), GFP_KERNEL);
+ qg_buf = kzalloc(sizeof(*qg_buf), GFP_KERNEL);
if (!qg_buf)
return -ENOMEM;
qg_buf->num_txqs = 1;
- /* set up and configure the Tx queues for each enabled TC */
- ice_for_each_traffic_class(tc) {
- if (!(vsi->tc_cfg.ena_tc & BIT(tc)))
- break;
-
- for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
- err = ice_vsi_cfg_txq(vsi, rings[q_idx], i + offset,
- qg_buf, tc);
- if (err)
- goto err_cfg_txqs;
-
- q_idx++;
- }
+ for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+ err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
+ if (err)
+ goto err_cfg_txqs;
}
+
err_cfg_txqs:
- devm_kfree(&pf->pdev->dev, qg_buf);
+ kfree(qg_buf);
return err;
}
@@ -1819,159 +1282,46 @@ err_cfg_txqs:
*/
int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
{
- return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, 0);
-}
-
-/**
- * ice_intrl_usec_to_reg - convert interrupt rate limit to register value
- * @intrl: interrupt rate limit in usecs
- * @gran: interrupt rate limit granularity in usecs
- *
- * This function converts a decimal interrupt rate limit in usecs to the format
- * expected by firmware.
- */
-u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
-{
- u32 val = intrl / gran;
-
- if (val)
- return val | GLINT_RATE_INTRL_ENA_M;
- return 0;
-}
-
-/**
- * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set
- * @hw: board specific structure
- */
-static void ice_cfg_itr_gran(struct ice_hw *hw)
-{
- u32 regval = rd32(hw, GLINT_CTL);
-
- /* no need to update global register if ITR gran is already set */
- if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) &&
- (((regval & GLINT_CTL_ITR_GRAN_200_M) >>
- GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) &&
- (((regval & GLINT_CTL_ITR_GRAN_100_M) >>
- GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) &&
- (((regval & GLINT_CTL_ITR_GRAN_50_M) >>
- GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) &&
- (((regval & GLINT_CTL_ITR_GRAN_25_M) >>
- GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US))
- return;
-
- regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) &
- GLINT_CTL_ITR_GRAN_200_M) |
- ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) &
- GLINT_CTL_ITR_GRAN_100_M) |
- ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) &
- GLINT_CTL_ITR_GRAN_50_M) |
- ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) &
- GLINT_CTL_ITR_GRAN_25_M);
- wr32(hw, GLINT_CTL, regval);
+ return ice_vsi_cfg_txqs(vsi, vsi->tx_rings);
}
/**
- * ice_cfg_itr - configure the initial interrupt throttle values
- * @hw: pointer to the HW structure
- * @q_vector: interrupt vector that's being configured
- *
- * Configure interrupt throttling values for the ring containers that are
- * associated with the interrupt vector passed in.
- */
-static void
-ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
-{
- ice_cfg_itr_gran(hw);
-
- if (q_vector->num_ring_rx) {
- struct ice_ring_container *rc = &q_vector->rx;
-
- /* if this value is set then don't overwrite with default */
- if (!rc->itr_setting)
- rc->itr_setting = ICE_DFLT_RX_ITR;
-
- rc->target_itr = ITR_TO_REG(rc->itr_setting);
- rc->next_update = jiffies + 1;
- rc->current_itr = rc->target_itr;
- wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
- ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
- }
-
- if (q_vector->num_ring_tx) {
- struct ice_ring_container *rc = &q_vector->tx;
-
- /* if this value is set then don't overwrite with default */
- if (!rc->itr_setting)
- rc->itr_setting = ICE_DFLT_TX_ITR;
-
- rc->target_itr = ITR_TO_REG(rc->itr_setting);
- rc->next_update = jiffies + 1;
- rc->current_itr = rc->target_itr;
- wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
- ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
- }
-}
-
-/**
- * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * ice_vsi_cfg_xdp_txqs - Configure Tx queues dedicated for XDP in given VSI
* @vsi: the VSI being configured
- * @txq: Tx queue being mapped to MSI-X vector
- * @msix_idx: MSI-X vector index within the function
- * @itr_idx: ITR index of the interrupt cause
*
- * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
- * within the function space.
+ * Return 0 on success and a negative value on error
+ * Configure the Tx queues dedicated for XDP in given VSI for operation.
*/
-#ifdef CONFIG_PCI_IOV
-void
-ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
-#else
-static void
-ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
-#endif /* CONFIG_PCI_IOV */
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
{
- struct ice_pf *pf = vsi->back;
- struct ice_hw *hw = &pf->hw;
- u32 val;
+ int ret;
+ int i;
- itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+ ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
+ if (ret)
+ return ret;
- val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
- ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+ for (i = 0; i < vsi->num_xdp_txq; i++)
+ vsi->xdp_rings[i]->xsk_umem = ice_xsk_umem(vsi->xdp_rings[i]);
- wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+ return ret;
}
/**
- * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
- * @vsi: the VSI being configured
- * @rxq: Rx queue being mapped to MSI-X vector
- * @msix_idx: MSI-X vector index within the function
- * @itr_idx: ITR index of the interrupt cause
+ * ice_intrl_usec_to_reg - convert interrupt rate limit to register value
+ * @intrl: interrupt rate limit in usecs
+ * @gran: interrupt rate limit granularity in usecs
*
- * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
- * within the function space.
+ * This function converts a decimal interrupt rate limit in usecs to the format
+ * expected by firmware.
*/
-#ifdef CONFIG_PCI_IOV
-void
-ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
-#else
-static void
-ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
-#endif /* CONFIG_PCI_IOV */
+u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
{
- struct ice_pf *pf = vsi->back;
- struct ice_hw *hw = &pf->hw;
- u32 val;
-
- itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
-
- val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
- ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
-
- wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+ u32 val = intrl / gran;
- ice_flush(hw);
+ if (val)
+ return val | GLINT_RATE_INTRL_ENA_M;
+ return 0;
}
/**
@@ -2134,109 +1484,6 @@ int ice_vsi_stop_rx_rings(struct ice_vsi *vsi)
}
/**
- * ice_trigger_sw_intr - trigger a software interrupt
- * @hw: pointer to the HW structure
- * @q_vector: interrupt vector to trigger the software interrupt for
- */
-void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
-{
- wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
- (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
- GLINT_DYN_CTL_SWINT_TRIG_M |
- GLINT_DYN_CTL_INTENA_M);
-}
-
-/**
- * ice_vsi_stop_tx_ring - Disable single Tx ring
- * @vsi: the VSI being configured
- * @rst_src: reset source
- * @rel_vmvf_num: Relative ID of VF/VM
- * @ring: Tx ring to be stopped
- * @txq_meta: Meta data of Tx ring to be stopped
- */
-#ifndef CONFIG_PCI_IOV
-static
-#endif /* !CONFIG_PCI_IOV */
-int
-ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
- u16 rel_vmvf_num, struct ice_ring *ring,
- struct ice_txq_meta *txq_meta)
-{
- struct ice_pf *pf = vsi->back;
- struct ice_q_vector *q_vector;
- struct ice_hw *hw = &pf->hw;
- enum ice_status status;
- u32 val;
-
- /* clear cause_ena bit for disabled queues */
- val = rd32(hw, QINT_TQCTL(ring->reg_idx));
- val &= ~QINT_TQCTL_CAUSE_ENA_M;
- wr32(hw, QINT_TQCTL(ring->reg_idx), val);
-
- /* software is expected to wait for 100 ns */
- ndelay(100);
-
- /* trigger a software interrupt for the vector
- * associated to the queue to schedule NAPI handler
- */
- q_vector = ring->q_vector;
- if (q_vector)
- ice_trigger_sw_intr(hw, q_vector);
-
- status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx,
- txq_meta->tc, 1, &txq_meta->q_handle,
- &txq_meta->q_id, &txq_meta->q_teid, rst_src,
- rel_vmvf_num, NULL);
-
- /* if the disable queue command was exercised during an
- * active reset flow, ICE_ERR_RESET_ONGOING is returned.
- * This is not an error as the reset operation disables
- * queues at the hardware level anyway.
- */
- if (status == ICE_ERR_RESET_ONGOING) {
- dev_dbg(&vsi->back->pdev->dev,
- "Reset in progress. LAN Tx queues already disabled\n");
- } else if (status == ICE_ERR_DOES_NOT_EXIST) {
- dev_dbg(&vsi->back->pdev->dev,
- "LAN Tx queues do not exist, nothing to disable\n");
- } else if (status) {
- dev_err(&vsi->back->pdev->dev,
- "Failed to disable LAN Tx queues, error: %d\n", status);
- return -ENODEV;
- }
-
- return 0;
-}
-
-/**
- * ice_fill_txq_meta - Prepare the Tx queue's meta data
- * @vsi: VSI that ring belongs to
- * @ring: ring that txq_meta will be based on
- * @txq_meta: a helper struct that wraps Tx queue's information
- *
- * Set up a helper struct that will contain all the necessary fields that
- * are needed for stopping Tx queue
- */
-#ifndef CONFIG_PCI_IOV
-static
-#endif /* !CONFIG_PCI_IOV */
-void
-ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
- struct ice_txq_meta *txq_meta)
-{
- u8 tc = 0;
-
-#ifdef CONFIG_DCB
- tc = ring->dcb_tc;
-#endif /* CONFIG_DCB */
- txq_meta->q_id = ring->reg_idx;
- txq_meta->q_teid = ring->txq_teid;
- txq_meta->q_handle = ring->q_handle;
- txq_meta->vsi_idx = vsi->idx;
- txq_meta->tc = tc;
-}
-
-/**
* ice_vsi_stop_tx_rings - Disable Tx rings
* @vsi: the VSI being configured
* @rst_src: reset source
@@ -2247,34 +1494,24 @@ static int
ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
u16 rel_vmvf_num, struct ice_ring **rings)
{
- u16 i, q_idx = 0;
- int status;
- u8 tc;
+ u16 q_idx;
if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
return -EINVAL;
- /* set up the Tx queue list to be disabled for each enabled TC */
- ice_for_each_traffic_class(tc) {
- if (!(vsi->tc_cfg.ena_tc & BIT(tc)))
- break;
-
- for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
- struct ice_txq_meta txq_meta = { };
-
- if (!rings || !rings[q_idx])
- return -EINVAL;
+ for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+ struct ice_txq_meta txq_meta = { };
+ int status;
- ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta);
- status = ice_vsi_stop_tx_ring(vsi, rst_src,
- rel_vmvf_num,
- rings[q_idx], &txq_meta);
+ if (!rings || !rings[q_idx])
+ return -EINVAL;
- if (status)
- return status;
+ ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta);
+ status = ice_vsi_stop_tx_ring(vsi, rst_src, rel_vmvf_num,
+ rings[q_idx], &txq_meta);
- q_idx++;
- }
+ if (status)
+ return status;
}
return 0;
@@ -2294,6 +1531,15 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
}
/**
+ * ice_vsi_stop_xdp_tx_rings - Disable XDP Tx rings
+ * @vsi: the VSI being configured
+ */
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi)
+{
+ return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings);
+}
+
+/**
* ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI
* @vsi: VSI to enable or disable VLAN pruning on
* @ena: set to true to enable VLAN pruning and false to disable it
@@ -2690,6 +1936,11 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi)
wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0);
for (q = 0; q < q_vector->num_ring_tx; q++) {
wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+ wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), 0);
+ }
txq++;
}
@@ -3064,6 +2315,11 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
vsi->base_vector = 0;
}
+ if (ice_is_xdp_ena_vsi(vsi))
+ /* return value check can be skipped here, it always returns
+ * 0 if reset is in progress
+ */
+ ice_destroy_xdp_rings(vsi);
ice_vsi_put_qs(vsi);
ice_vsi_clear_rings(vsi);
ice_vsi_free_arrays(vsi);
@@ -3085,7 +2341,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
if (ret < 0)
goto err_vsi;
-
switch (vsi->type) {
case ICE_VSI_PF:
ret = ice_vsi_alloc_q_vectors(vsi);
@@ -3105,6 +2360,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
goto err_vectors;
ice_vsi_map_rings_to_vectors(vsi);
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ vsi->num_xdp_txq = vsi->alloc_txq;
+ ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog);
+ if (ret)
+ goto err_vectors;
+ }
/* Do not exit if configuring RSS had an issue, at least
* receive traffic on first queue. Hence no need to capture
* return value
@@ -3131,9 +2392,13 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
}
/* configure VSI nodes based on number of queues and TC's */
- for (i = 0; i < vsi->tc_cfg.numtc; i++)
+ for (i = 0; i < vsi->tc_cfg.numtc; i++) {
max_txqs[i] = vsi->alloc_txq;
+ if (ice_is_xdp_ena_vsi(vsi))
+ max_txqs[i] += vsi->num_xdp_txq;
+ }
+
status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
max_txqs);
if (status) {
@@ -3271,6 +2536,51 @@ char *ice_nvm_version_str(struct ice_hw *hw)
}
/**
+ * ice_update_ring_stats - Update ring statistics
+ * @ring: ring to update
+ * @cont: used to increment per-vector counters
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ *
+ * This function assumes that caller has acquired a u64_stats_sync lock.
+ */
+static void
+ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont,
+ u64 pkts, u64 bytes)
+{
+ ring->stats.bytes += bytes;
+ ring->stats.pkts += pkts;
+ cont->total_bytes += bytes;
+ cont->total_pkts += pkts;
+}
+
+/**
+ * ice_update_tx_ring_stats - Update Tx ring specific counters
+ * @tx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes)
+{
+ u64_stats_update_begin(&tx_ring->syncp);
+ ice_update_ring_stats(tx_ring, &tx_ring->q_vector->tx, pkts, bytes);
+ u64_stats_update_end(&tx_ring->syncp);
+}
+
+/**
+ * ice_update_rx_ring_stats - Update Rx ring specific counters
+ * @rx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes)
+{
+ u64_stats_update_begin(&rx_ring->syncp);
+ ice_update_ring_stats(rx_ring, &rx_ring->q_vector->rx, pkts, bytes);
+ u64_stats_update_end(&rx_ring->syncp);
+}
+
+/**
* ice_vsi_cfg_mac_fltr - Add or remove a MAC address filter for a VSI
* @vsi: the VSI being configured MAC filter
* @macaddr: the MAC address to be added.
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 47bc033fff20..8d5a7978e066 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -6,19 +6,6 @@
#include "ice.h"
-struct ice_txq_meta {
- /* Tx-scheduler element identifier */
- u32 q_teid;
- /* Entry in VSI's txq_map bitmap */
- u16 q_id;
- /* Relative index of Tx queue within TC */
- u16 q_handle;
- /* VSI index that Tx queue belongs to */
- u16 vsi_idx;
- /* TC number that Tx queue belongs to */
- u8 tc;
-};
-
int
ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
const u8 *macaddr);
@@ -33,24 +20,6 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
void ice_vsi_cfg_msix(struct ice_vsi *vsi);
-#ifdef CONFIG_PCI_IOV
-void
-ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
-
-void
-ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
-
-int
-ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
- u16 rel_vmvf_num, struct ice_ring *ring,
- struct ice_txq_meta *txq_meta);
-
-void ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
- struct ice_txq_meta *txq_meta);
-
-int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
-#endif /* CONFIG_PCI_IOV */
-
int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid);
int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid);
@@ -67,6 +36,10 @@ int
ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
u16 rel_vmvf_num);
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi);
+
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi);
+
int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc);
void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
@@ -98,16 +71,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi);
bool ice_is_reset_in_progress(unsigned long *state);
-void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
-
-void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
-
void ice_vsi_put_qs(struct ice_vsi *vsi);
-#ifdef CONFIG_DCB
-void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
-#endif /* CONFIG_DCB */
-
void ice_vsi_dis_irq(struct ice_vsi *vsi);
void ice_vsi_free_irq(struct ice_vsi *vsi);
@@ -118,6 +83,12 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi);
int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
+void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
+
+void ice_update_rx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
+
+void ice_vsi_cfg_frame_size(struct ice_vsi *vsi);
+
u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran);
char *ice_nvm_version_str(struct ice_hw *hw);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 214cd6eca405..363b284e8aa1 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -6,6 +6,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include "ice.h"
+#include "ice_base.h"
#include "ice_lib.h"
#include "ice_dcb_lib.h"
@@ -1661,6 +1662,324 @@ free_q_irqs:
}
/**
+ * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
+ * @vsi: VSI to setup Tx rings used by XDP
+ *
+ * Return 0 on success and negative value on error
+ */
+static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
+{
+ struct device *dev = &vsi->back->pdev->dev;
+ int i;
+
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ u16 xdp_q_idx = vsi->alloc_txq + i;
+ struct ice_ring *xdp_ring;
+
+ xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL);
+
+ if (!xdp_ring)
+ goto free_xdp_rings;
+
+ xdp_ring->q_index = xdp_q_idx;
+ xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
+ xdp_ring->ring_active = false;
+ xdp_ring->vsi = vsi;
+ xdp_ring->netdev = NULL;
+ xdp_ring->dev = dev;
+ xdp_ring->count = vsi->num_tx_desc;
+ vsi->xdp_rings[i] = xdp_ring;
+ if (ice_setup_tx_ring(xdp_ring))
+ goto free_xdp_rings;
+ ice_set_ring_xdp(xdp_ring);
+ xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
+ }
+
+ return 0;
+
+free_xdp_rings:
+ for (; i >= 0; i--)
+ if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc)
+ ice_free_tx_ring(vsi->xdp_rings[i]);
+ return -ENOMEM;
+}
+
+/**
+ * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
+ * @vsi: VSI to set the bpf prog on
+ * @prog: the bpf prog pointer
+ */
+static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+ struct bpf_prog *old_prog;
+ int i;
+
+ old_prog = xchg(&vsi->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ ice_for_each_rxq(vsi, i)
+ WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+}
+
+/**
+ * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
+ * @vsi: VSI to bring up Tx rings used by XDP
+ * @prog: bpf program that will be assigned to VSI
+ *
+ * Return 0 on success and negative value on error
+ */
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+ u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+ int xdp_rings_rem = vsi->num_xdp_txq;
+ struct ice_pf *pf = vsi->back;
+ struct ice_qs_cfg xdp_qs_cfg = {
+ .qs_mutex = &pf->avail_q_mutex,
+ .pf_map = pf->avail_txqs,
+ .pf_map_size = pf->max_pf_txqs,
+ .q_count = vsi->num_xdp_txq,
+ .scatter_count = ICE_MAX_SCATTER_TXQS,
+ .vsi_map = vsi->txq_map,
+ .vsi_map_offset = vsi->alloc_txq,
+ .mapping_mode = ICE_VSI_MAP_CONTIG
+ };
+ enum ice_status status;
+ int i, v_idx;
+
+ vsi->xdp_rings = devm_kcalloc(&pf->pdev->dev, vsi->num_xdp_txq,
+ sizeof(*vsi->xdp_rings), GFP_KERNEL);
+ if (!vsi->xdp_rings)
+ return -ENOMEM;
+
+ vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode;
+ if (__ice_vsi_get_qs(&xdp_qs_cfg))
+ goto err_map_xdp;
+
+ if (ice_xdp_alloc_setup_rings(vsi))
+ goto clear_xdp_rings;
+
+ /* follow the logic from ice_vsi_map_rings_to_vectors */
+ ice_for_each_q_vector(vsi, v_idx) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+ int xdp_rings_per_v, q_id, q_base;
+
+ xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
+ vsi->num_q_vectors - v_idx);
+ q_base = vsi->num_xdp_txq - xdp_rings_rem;
+
+ for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
+ struct ice_ring *xdp_ring = vsi->xdp_rings[q_id];
+
+ xdp_ring->q_vector = q_vector;
+ xdp_ring->next = q_vector->tx.ring;
+ q_vector->tx.ring = xdp_ring;
+ }
+ xdp_rings_rem -= xdp_rings_per_v;
+ }
+
+ /* omit the scheduler update if in reset path; XDP queues will be
+ * taken into account at the end of ice_vsi_rebuild, where
+ * ice_cfg_vsi_lan is being called
+ */
+ if (ice_is_reset_in_progress(pf->state))
+ return 0;
+
+ /* tell the Tx scheduler that right now we have
+ * additional queues
+ */
+ for (i = 0; i < vsi->tc_cfg.numtc; i++)
+ max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq;
+
+ status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+ max_txqs);
+ if (status) {
+ dev_err(&pf->pdev->dev,
+ "Failed VSI LAN queue config for XDP, error:%d\n",
+ status);
+ goto clear_xdp_rings;
+ }
+ ice_vsi_assign_bpf_prog(vsi, prog);
+
+ return 0;
+clear_xdp_rings:
+ for (i = 0; i < vsi->num_xdp_txq; i++)
+ if (vsi->xdp_rings[i]) {
+ kfree_rcu(vsi->xdp_rings[i], rcu);
+ vsi->xdp_rings[i] = NULL;
+ }
+
+err_map_xdp:
+ mutex_lock(&pf->avail_q_mutex);
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+ vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+ }
+ mutex_unlock(&pf->avail_q_mutex);
+
+ devm_kfree(&pf->pdev->dev, vsi->xdp_rings);
+ return -ENOMEM;
+}
+
+/**
+ * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
+ * @vsi: VSI to remove XDP rings
+ *
+ * Detach XDP rings from irq vectors, clean up the PF bitmap and free
+ * resources
+ */
+int ice_destroy_xdp_rings(struct ice_vsi *vsi)
+{
+ u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+ struct ice_pf *pf = vsi->back;
+ int i, v_idx;
+
+ /* q_vectors are freed in reset path so there's no point in detaching
+ * rings; in case of rebuild being triggered not from reset reset bits
+ * in pf->state won't be set, so additionally check first q_vector
+ * against NULL
+ */
+ if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+ goto free_qmap;
+
+ ice_for_each_q_vector(vsi, v_idx) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+ struct ice_ring *ring;
+
+ ice_for_each_ring(ring, q_vector->tx)
+ if (!ring->tx_buf || !ice_ring_is_xdp(ring))
+ break;
+
+ /* restore the value of last node prior to XDP setup */
+ q_vector->tx.ring = ring;
+ }
+
+free_qmap:
+ mutex_lock(&pf->avail_q_mutex);
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+ vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+ }
+ mutex_unlock(&pf->avail_q_mutex);
+
+ for (i = 0; i < vsi->num_xdp_txq; i++)
+ if (vsi->xdp_rings[i]) {
+ if (vsi->xdp_rings[i]->desc)
+ ice_free_tx_ring(vsi->xdp_rings[i]);
+ kfree_rcu(vsi->xdp_rings[i], rcu);
+ vsi->xdp_rings[i] = NULL;
+ }
+
+ devm_kfree(&pf->pdev->dev, vsi->xdp_rings);
+ vsi->xdp_rings = NULL;
+
+ if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+ return 0;
+
+ ice_vsi_assign_bpf_prog(vsi, NULL);
+
+ /* notify Tx scheduler that we destroyed XDP queues and bring
+ * back the old number of child nodes
+ */
+ for (i = 0; i < vsi->tc_cfg.numtc; i++)
+ max_txqs[i] = vsi->num_txq;
+
+ return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+ max_txqs);
+}
+
+/**
+ * ice_xdp_setup_prog - Add or remove XDP eBPF program
+ * @vsi: VSI to setup XDP for
+ * @prog: XDP program
+ * @extack: netlink extended ack
+ */
+static int
+ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
+ bool if_running = netif_running(vsi->netdev);
+ int ret = 0, xdp_ring_err = 0;
+
+ if (frame_size > vsi->rx_buf_len) {
+ NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
+ return -EOPNOTSUPP;
+ }
+
+ /* need to stop netdev while setting up the program for Rx rings */
+ if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) {
+ ret = ice_down(vsi);
+ if (ret) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Preparing device for XDP attach failed");
+ return ret;
+ }
+ }
+
+ if (!ice_is_xdp_ena_vsi(vsi) && prog) {
+ vsi->num_xdp_txq = vsi->alloc_txq;
+ xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
+ if (xdp_ring_err)
+ NL_SET_ERR_MSG_MOD(extack,
+ "Setting up XDP Tx resources failed");
+ } else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
+ xdp_ring_err = ice_destroy_xdp_rings(vsi);
+ if (xdp_ring_err)
+ NL_SET_ERR_MSG_MOD(extack,
+ "Freeing XDP Tx resources failed");
+ } else {
+ ice_vsi_assign_bpf_prog(vsi, prog);
+ }
+
+ if (if_running)
+ ret = ice_up(vsi);
+
+ if (!ret && prog && vsi->xsk_umems) {
+ int i;
+
+ ice_for_each_rxq(vsi, i) {
+ struct ice_ring *rx_ring = vsi->rx_rings[i];
+
+ if (rx_ring->xsk_umem)
+ napi_schedule(&rx_ring->q_vector->napi);
+ }
+ }
+
+ return (ret || xdp_ring_err) ? -ENOMEM : 0;
+}
+
+/**
+ * ice_xdp - implements XDP handler
+ * @dev: netdevice
+ * @xdp: XDP command
+ */
+static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ struct ice_vsi *vsi = np->vsi;
+
+ if (vsi->type != ICE_VSI_PF) {
+ NL_SET_ERR_MSG_MOD(xdp->extack,
+ "XDP can be loaded only on PF VSI");
+ return -EINVAL;
+ }
+
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
+ case XDP_QUERY_PROG:
+ xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
+ return 0;
+ case XDP_SETUP_XSK_UMEM:
+ return ice_xsk_umem_setup(vsi, xdp->xsk.umem,
+ xdp->xsk.queue_id);
+ default:
+ return -EINVAL;
+ }
+}
+
+/**
* ice_ena_misc_vector - enable the non-queue interrupts
* @pf: board private structure
*/
@@ -2219,6 +2538,8 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
status = -ENODEV;
goto unroll_vsi_setup;
}
+ /* netdev has to be configured before setting frame size */
+ ice_vsi_cfg_frame_size(vsi);
/* registering the NAPI handler requires both the queues and
* netdev to be created, which are done in ice_pf_vsi_setup()
@@ -3505,6 +3826,8 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
ice_vsi_cfg_dcb_rings(vsi);
err = ice_vsi_cfg_lan_txqs(vsi);
+ if (!err && ice_is_xdp_ena_vsi(vsi))
+ err = ice_vsi_cfg_xdp_txqs(vsi);
if (!err)
err = ice_vsi_cfg_rxqs(vsi);
@@ -3920,6 +4243,13 @@ int ice_down(struct ice_vsi *vsi)
netdev_err(vsi->netdev,
"Failed stop Tx rings, VSI %d error %d\n",
vsi->vsi_num, tx_err);
+ if (!tx_err && ice_is_xdp_ena_vsi(vsi)) {
+ tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
+ if (tx_err)
+ netdev_err(vsi->netdev,
+ "Failed stop XDP rings, VSI %d error %d\n",
+ vsi->vsi_num, tx_err);
+ }
rx_err = ice_vsi_stop_rx_rings(vsi);
if (rx_err)
@@ -4329,6 +4659,18 @@ clear_recovery:
}
/**
+ * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
+ * @vsi: Pointer to VSI structure
+ */
+static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
+{
+ if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+ return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM;
+ else
+ return ICE_RXBUF_3072;
+}
+
+/**
* ice_change_mtu - NDO callback to change the MTU
* @netdev: network interface device structure
* @new_mtu: new value for maximum frame size
@@ -4347,6 +4689,16 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
return 0;
}
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ int frame_size = ice_max_xdp_frame_size(vsi);
+
+ if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
+ netdev_err(netdev, "max MTU for XDP usage is %d\n",
+ frame_size - ICE_ETH_PKT_HDR_PAD);
+ return -EINVAL;
+ }
+ }
+
if (new_mtu < netdev->min_mtu) {
netdev_err(netdev, "new MTU invalid. min_mtu is %d\n",
netdev->min_mtu);
@@ -4878,4 +5230,7 @@ static const struct net_device_ops ice_netdev_ops = {
.ndo_fdb_add = ice_fdb_add,
.ndo_fdb_del = ice_fdb_del,
.ndo_tx_timeout = ice_tx_timeout,
+ .ndo_bpf = ice_xdp,
+ .ndo_xdp_xmit = ice_xdp_xmit,
+ .ndo_xsk_wakeup = ice_xsk_wakeup,
};
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 33dd103035dc..40a29b9d3034 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -5,8 +5,13 @@
#include <linux/prefetch.h>
#include <linux/mm.h>
+#include <linux/bpf_trace.h>
+#include <net/xdp.h>
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
#include "ice.h"
#include "ice_dcb_lib.h"
+#include "ice_xsk.h"
#define ICE_RX_HDR_SIZE 256
@@ -19,7 +24,10 @@ static void
ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf)
{
if (tx_buf->skb) {
- dev_kfree_skb_any(tx_buf->skb);
+ if (ice_ring_is_xdp(ring))
+ page_frag_free(tx_buf->raw_buf);
+ else
+ dev_kfree_skb_any(tx_buf->skb);
if (dma_unmap_len(tx_buf, len))
dma_unmap_single(ring->dev,
dma_unmap_addr(tx_buf, dma),
@@ -51,6 +59,11 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
{
u16 i;
+ if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
+ ice_xsk_clean_xdp_ring(tx_ring);
+ goto tx_skip_free;
+ }
+
/* ring already cleared, nothing to do */
if (!tx_ring->tx_buf)
return;
@@ -59,6 +72,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
for (i = 0; i < tx_ring->count; i++)
ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
+tx_skip_free:
memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count);
/* Zero out the descriptor ring */
@@ -136,8 +150,11 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
total_bytes += tx_buf->bytecount;
total_pkts += tx_buf->gso_segs;
- /* free the skb */
- napi_consume_skb(tx_buf->skb, napi_budget);
+ if (ice_ring_is_xdp(tx_ring))
+ page_frag_free(tx_buf->raw_buf);
+ else
+ /* free the skb */
+ napi_consume_skb(tx_buf->skb, napi_budget);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -188,12 +205,11 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
i += tx_ring->count;
tx_ring->next_to_clean = i;
- u64_stats_update_begin(&tx_ring->syncp);
- tx_ring->stats.bytes += total_bytes;
- tx_ring->stats.pkts += total_pkts;
- u64_stats_update_end(&tx_ring->syncp);
- tx_ring->q_vector->tx.total_bytes += total_bytes;
- tx_ring->q_vector->tx.total_pkts += total_pkts;
+
+ ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes);
+
+ if (ice_ring_is_xdp(tx_ring))
+ return !!budget;
netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts,
total_bytes);
@@ -273,6 +289,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
if (!rx_ring->rx_buf)
return;
+ if (rx_ring->xsk_umem) {
+ ice_xsk_clean_rx_ring(rx_ring);
+ goto rx_skip_free;
+ }
+
/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
@@ -289,10 +310,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
*/
dma_sync_single_range_for_cpu(dev, rx_buf->dma,
rx_buf->page_offset,
- ICE_RXBUF_2048, DMA_FROM_DEVICE);
+ rx_ring->rx_buf_len,
+ DMA_FROM_DEVICE);
/* free resources associated with mapping */
- dma_unmap_page_attrs(dev, rx_buf->dma, PAGE_SIZE,
+ dma_unmap_page_attrs(dev, rx_buf->dma, ice_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
@@ -300,6 +322,7 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
rx_buf->page_offset = 0;
}
+rx_skip_free:
memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count);
/* Zero out the descriptor ring */
@@ -319,6 +342,10 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
void ice_free_rx_ring(struct ice_ring *rx_ring)
{
ice_clean_rx_ring(rx_ring);
+ if (rx_ring->vsi->type == ICE_VSI_PF)
+ if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+ rx_ring->xdp_prog = NULL;
devm_kfree(rx_ring->dev, rx_ring->rx_buf);
rx_ring->rx_buf = NULL;
@@ -363,6 +390,15 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring)
rx_ring->next_to_use = 0;
rx_ring->next_to_clean = 0;
+
+ if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+ WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
+
+ if (rx_ring->vsi->type == ICE_VSI_PF &&
+ !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+ if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+ rx_ring->q_index))
+ goto err;
return 0;
err:
@@ -372,34 +408,110 @@ err:
}
/**
- * ice_release_rx_desc - Store the new tail and head values
- * @rx_ring: ring to bump
- * @val: new head index
+ * ice_rx_offset - Return expected offset into page to access data
+ * @rx_ring: Ring we are requesting offset of
+ *
+ * Returns the offset value for ring into the data buffer.
*/
-static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
+static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
{
- u16 prev_ntu = rx_ring->next_to_use;
+ if (ice_ring_uses_build_skb(rx_ring))
+ return ICE_SKB_PAD;
+ else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+ return XDP_PACKET_HEADROOM;
- rx_ring->next_to_use = val;
+ return 0;
+}
- /* update next to alloc since we have filled the ring */
- rx_ring->next_to_alloc = val;
+/**
+ * ice_run_xdp - Executes an XDP program on initialized xdp_buff
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ * @xdp_prog: XDP program to run
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+{
+ int err, result = ICE_XDP_PASS;
+ struct ice_ring *xdp_ring;
+ u32 act;
- /* QRX_TAIL will be updated with any tail value, but hardware ignores
- * the lower 3 bits. This makes it so we only bump tail on meaningful
- * boundaries. Also, this allows us to bump tail on intervals of 8 up to
- * the budget depending on the current traffic load.
- */
- val &= ~0x7;
- if (prev_ntu != val) {
- /* Force memory writes to complete before letting h/w
- * know there are new descriptors to fetch. (Only
- * applicable for weak-ordered memory model archs,
- * such as IA-64).
- */
- wmb();
- writel(val, rx_ring->tail);
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_TX:
+ xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()];
+ result = ice_xmit_xdp_buff(xdp, xdp_ring);
+ break;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+ result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fallthrough -- not supported action */
+ case XDP_ABORTED:
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+ /* fallthrough -- handle aborts by dropping frame */
+ case XDP_DROP:
+ result = ICE_XDP_CONSUMED;
+ break;
+ }
+
+ return result;
+}
+
+/**
+ * ice_xdp_xmit - submit packets to XDP ring for transmission
+ * @dev: netdev
+ * @n: number of XDP frames to be transmitted
+ * @frames: XDP frames to be transmitted
+ * @flags: transmit flags
+ *
+ * Returns number of frames successfully sent. Frames that fail are
+ * free'ed via XDP return API.
+ * For error cases, a negative errno code is returned and no-frames
+ * are transmitted (caller must handle freeing frames).
+ */
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ unsigned int queue_index = smp_processor_id();
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_ring *xdp_ring;
+ int drops = 0, i;
+
+ if (test_bit(__ICE_DOWN, vsi->state))
+ return -ENETDOWN;
+
+ if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq)
+ return -ENXIO;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ xdp_ring = vsi->xdp_rings[queue_index];
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ int err;
+
+ err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+ if (err != ICE_XDP_TX) {
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ }
}
+
+ if (unlikely(flags & XDP_XMIT_FLUSH))
+ ice_xdp_ring_update_tail(xdp_ring);
+
+ return n - drops;
}
/**
@@ -423,28 +535,28 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi)
}
/* alloc new page for storage */
- page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+ page = dev_alloc_pages(ice_rx_pg_order(rx_ring));
if (unlikely(!page)) {
rx_ring->rx_stats.alloc_page_failed++;
return false;
}
/* map page for use */
- dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
+ dma = dma_map_page_attrs(rx_ring->dev, page, 0, ice_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
/* if mapping failed free memory back to system since
* there isn't much point in holding memory we can't use
*/
if (dma_mapping_error(rx_ring->dev, dma)) {
- __free_pages(page, 0);
+ __free_pages(page, ice_rx_pg_order(rx_ring));
rx_ring->rx_stats.alloc_page_failed++;
return false;
}
bi->dma = dma;
bi->page = page;
- bi->page_offset = 0;
+ bi->page_offset = ice_rx_offset(rx_ring);
page_ref_add(page, USHRT_MAX - 1);
bi->pagecnt_bias = USHRT_MAX;
@@ -486,7 +598,7 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
/* sync the buffer for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
bi->page_offset,
- ICE_RXBUF_2048,
+ rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
/* Refresh the desc even if buffer_addrs didn't change
@@ -557,9 +669,6 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
*/
static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
{
-#if (PAGE_SIZE >= 8192)
- unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048;
-#endif
unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
struct page *page = rx_buf->page;
@@ -572,7 +681,9 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
if (unlikely((page_count(page) - pagecnt_bias) > 1))
return false;
#else
- if (rx_buf->page_offset > last_offset)
+#define ICE_LAST_OFFSET \
+ (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048)
+ if (rx_buf->page_offset > ICE_LAST_OFFSET)
return false;
#endif /* PAGE_SIZE < 8192) */
@@ -590,6 +701,7 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
/**
* ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag
+ * @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: buffer containing page to add
* @skb: sk_buff to place the data into
* @size: packet length from rx_desc
@@ -599,13 +711,13 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
* The function will then update the page offset.
*/
static void
-ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb,
- unsigned int size)
+ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+ struct sk_buff *skb, unsigned int size)
{
#if (PAGE_SIZE >= 8192)
- unsigned int truesize = SKB_DATA_ALIGN(size);
+ unsigned int truesize = SKB_DATA_ALIGN(size + ice_rx_offset(rx_ring));
#else
- unsigned int truesize = ICE_RXBUF_2048;
+ unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
#endif
if (!size)
@@ -679,10 +791,64 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
}
/**
+ * ice_build_skb - Build skb around an existing buffer
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buf: Rx buffer to pull data from
+ * @xdp: xdp_buff pointing to the data
+ *
+ * This function builds an skb around an existing Rx buffer, taking care
+ * to set up the skb correctly and avoid any memcpy overhead.
+ */
+static struct sk_buff *
+ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+ struct xdp_buff *xdp)
+{
+ unsigned int metasize = xdp->data - xdp->data_meta;
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
+#else
+ unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+ SKB_DATA_ALIGN(xdp->data_end -
+ xdp->data_hard_start);
+#endif
+ struct sk_buff *skb;
+
+ /* Prefetch first cache line of first page. If xdp->data_meta
+ * is unused, this points exactly as xdp->data, otherwise we
+ * likely have a consumer accessing first few bytes of meta
+ * data, and then actual data.
+ */
+ prefetch(xdp->data_meta);
+#if L1_CACHE_BYTES < 128
+ prefetch((void *)(xdp->data + L1_CACHE_BYTES));
+#endif
+ /* build an skb around the page buffer */
+ skb = build_skb(xdp->data_hard_start, truesize);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* must to record Rx queue, otherwise OS features such as
+ * symmetric queue won't work
+ */
+ skb_record_rx_queue(skb, rx_ring->q_index);
+
+ /* update pointers within the skb to store the data */
+ skb_reserve(skb, xdp->data - xdp->data_hard_start);
+ __skb_put(skb, xdp->data_end - xdp->data);
+ if (metasize)
+ skb_metadata_set(skb, metasize);
+
+ /* buffer is used by skb, update page_offset */
+ ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+
+ return skb;
+}
+
+/**
* ice_construct_skb - Allocate skb and populate it
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: Rx buffer to pull data from
- * @size: the length of the packet
+ * @xdp: xdp_buff pointing to the data
*
* This function allocates an skb. It then populates it with the page
* data from the current receive descriptor, taking care to set up the
@@ -690,16 +856,16 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
*/
static struct sk_buff *
ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
- unsigned int size)
+ struct xdp_buff *xdp)
{
- void *va = page_address(rx_buf->page) + rx_buf->page_offset;
+ unsigned int size = xdp->data_end - xdp->data;
unsigned int headlen;
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(va);
+ prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
- prefetch((u8 *)va + L1_CACHE_BYTES);
+ prefetch((void *)(xdp->data + L1_CACHE_BYTES));
#endif /* L1_CACHE_BYTES */
/* allocate a skb to store the frags */
@@ -712,10 +878,11 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
/* Determine available headroom for copy */
headlen = size;
if (headlen > ICE_RX_HDR_SIZE)
- headlen = eth_get_headlen(skb->dev, va, ICE_RX_HDR_SIZE);
+ headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
- memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+ memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen,
+ sizeof(long)));
/* if we exhaust the linear part then add what is left as a frag */
size -= headlen;
@@ -723,7 +890,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
#if (PAGE_SIZE >= 8192)
unsigned int truesize = SKB_DATA_ALIGN(size);
#else
- unsigned int truesize = ICE_RXBUF_2048;
+ unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
#endif
skb_add_rx_frag(skb, 0, rx_buf->page,
rx_buf->page_offset + headlen, size, truesize);
@@ -745,11 +912,18 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: Rx buffer to pull data from
*
- * This function will clean up the contents of the rx_buf. It will
- * either recycle the buffer or unmap it and free the associated resources.
+ * This function will update next_to_clean and then clean up the contents
+ * of the rx_buf. It will either recycle the buffer or unmap it and free
+ * the associated resources.
*/
static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
{
+ u32 ntc = rx_ring->next_to_clean + 1;
+
+ /* fetch, update, and store next to clean */
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+
if (!rx_buf)
return;
@@ -759,8 +933,9 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
rx_ring->rx_stats.page_reuse_count++;
} else {
/* we are not reusing the buffer so unmap it */
- dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, PAGE_SIZE,
- DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
+ dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma,
+ ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
+ ICE_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
}
@@ -791,206 +966,31 @@ static bool ice_cleanup_headers(struct sk_buff *skb)
}
/**
- * ice_test_staterr - tests bits in Rx descriptor status and error fields
- * @rx_desc: pointer to receive descriptor (in le64 format)
- * @stat_err_bits: value to mask
- *
- * This function does some fast chicanery in order to return the
- * value of the mask which is really only used for boolean tests.
- * The status_error_len doesn't need to be shifted because it begins
- * at offset zero.
- */
-static bool
-ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
-{
- return !!(rx_desc->wb.status_error0 &
- cpu_to_le16(stat_err_bits));
-}
-
-/**
* ice_is_non_eop - process handling of non-EOP buffers
* @rx_ring: Rx ring being processed
* @rx_desc: Rx descriptor for current buffer
* @skb: Current socket buffer containing buffer in progress
*
- * This function updates next to clean. If the buffer is an EOP buffer
- * this function exits returning false, otherwise it will place the
- * sk_buff in the next buffer to be chained and return true indicating
- * that this is in fact a non-EOP buffer.
+ * If the buffer is an EOP buffer, this function exits returning false,
+ * otherwise return true indicating that this is in fact a non-EOP buffer.
*/
static bool
ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
struct sk_buff *skb)
{
- u32 ntc = rx_ring->next_to_clean + 1;
-
- /* fetch, update, and store next to clean */
- ntc = (ntc < rx_ring->count) ? ntc : 0;
- rx_ring->next_to_clean = ntc;
-
- prefetch(ICE_RX_DESC(rx_ring, ntc));
-
/* if we are the last buffer then there is nothing else to do */
#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF)))
return false;
/* place skb in next buffer to be received */
- rx_ring->rx_buf[ntc].skb = skb;
+ rx_ring->rx_buf[rx_ring->next_to_clean].skb = skb;
rx_ring->rx_stats.non_eop_descs++;
return true;
}
/**
- * ice_ptype_to_htype - get a hash type
- * @ptype: the ptype value from the descriptor
- *
- * Returns a hash type to be used by skb_set_hash
- */
-static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
-{
- return PKT_HASH_TYPE_NONE;
-}
-
-/**
- * ice_rx_hash - set the hash value in the skb
- * @rx_ring: descriptor ring
- * @rx_desc: specific descriptor
- * @skb: pointer to current skb
- * @rx_ptype: the ptype value from the descriptor
- */
-static void
-ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
- struct sk_buff *skb, u8 rx_ptype)
-{
- struct ice_32b_rx_flex_desc_nic *nic_mdid;
- u32 hash;
-
- if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
- return;
-
- if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
- return;
-
- nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
- hash = le32_to_cpu(nic_mdid->rss_hash);
- skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
-}
-
-/**
- * ice_rx_csum - Indicate in skb if checksum is good
- * @ring: the ring we care about
- * @skb: skb currently being received and modified
- * @rx_desc: the receive descriptor
- * @ptype: the packet type decoded by hardware
- *
- * skb->protocol must be set before this function is called
- */
-static void
-ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
- union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
-{
- struct ice_rx_ptype_decoded decoded;
- u32 rx_error, rx_status;
- bool ipv4, ipv6;
-
- rx_status = le16_to_cpu(rx_desc->wb.status_error0);
- rx_error = rx_status;
-
- decoded = ice_decode_rx_desc_ptype(ptype);
-
- /* Start with CHECKSUM_NONE and by default csum_level = 0 */
- skb->ip_summed = CHECKSUM_NONE;
- skb_checksum_none_assert(skb);
-
- /* check if Rx checksum is enabled */
- if (!(ring->netdev->features & NETIF_F_RXCSUM))
- return;
-
- /* check if HW has decoded the packet and checksum */
- if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
- return;
-
- if (!(decoded.known && decoded.outer_ip))
- return;
-
- ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
- (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
- ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
- (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
-
- if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
- BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
- goto checksum_fail;
- else if (ipv6 && (rx_status &
- (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
- goto checksum_fail;
-
- /* check for L4 errors and handle packets that were not able to be
- * checksummed due to arrival speed
- */
- if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
- goto checksum_fail;
-
- /* Only report checksum unnecessary for TCP, UDP, or SCTP */
- switch (decoded.inner_prot) {
- case ICE_RX_PTYPE_INNER_PROT_TCP:
- case ICE_RX_PTYPE_INNER_PROT_UDP:
- case ICE_RX_PTYPE_INNER_PROT_SCTP:
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- default:
- break;
- }
- return;
-
-checksum_fail:
- ring->vsi->back->hw_csum_rx_error++;
-}
-
-/**
- * ice_process_skb_fields - Populate skb header fields from Rx descriptor
- * @rx_ring: Rx descriptor ring packet is being transacted on
- * @rx_desc: pointer to the EOP Rx descriptor
- * @skb: pointer to current skb being populated
- * @ptype: the packet type decoded by hardware
- *
- * This function checks the ring, descriptor, and packet information in
- * order to populate the hash, checksum, VLAN, protocol, and
- * other fields within the skb.
- */
-static void
-ice_process_skb_fields(struct ice_ring *rx_ring,
- union ice_32b_rx_flex_desc *rx_desc,
- struct sk_buff *skb, u8 ptype)
-{
- ice_rx_hash(rx_ring, rx_desc, skb, ptype);
-
- /* modifies the skb - consumes the enet header */
- skb->protocol = eth_type_trans(skb, rx_ring->netdev);
-
- ice_rx_csum(rx_ring, skb, rx_desc, ptype);
-}
-
-/**
- * ice_receive_skb - Send a completed packet up the stack
- * @rx_ring: Rx ring in play
- * @skb: packet to send up
- * @vlan_tag: VLAN tag for packet
- *
- * This function sends the completed packet (via. skb) up the stack using
- * gro receive functions (with/without VLAN tag)
- */
-static void
-ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
-{
- if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
- (vlan_tag & VLAN_VID_MASK))
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
- napi_gro_receive(&rx_ring->q_vector->napi, skb);
-}
-
-/**
* ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @rx_ring: Rx descriptor ring to transact packets on
* @budget: Total limit on number of packets to process
@@ -1006,8 +1006,13 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+ unsigned int xdp_res, xdp_xmit = 0;
+ struct bpf_prog *xdp_prog = NULL;
+ struct xdp_buff xdp;
bool failure;
+ xdp.rxq = &rx_ring->xdp_rxq;
+
/* start the loop to process Rx packets bounded by 'budget' */
while (likely(total_rx_pkts < (unsigned int)budget)) {
union ice_32b_rx_flex_desc *rx_desc;
@@ -1042,10 +1047,57 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
/* retrieve a buffer from the ring */
rx_buf = ice_get_rx_buf(rx_ring, &skb, size);
+ if (!size) {
+ xdp.data = NULL;
+ xdp.data_end = NULL;
+ xdp.data_hard_start = NULL;
+ xdp.data_meta = NULL;
+ goto construct_skb;
+ }
+
+ xdp.data = page_address(rx_buf->page) + rx_buf->page_offset;
+ xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring);
+ xdp.data_meta = xdp.data;
+ xdp.data_end = xdp.data + size;
+
+ rcu_read_lock();
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+ if (!xdp_prog) {
+ rcu_read_unlock();
+ goto construct_skb;
+ }
+
+ xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog);
+ rcu_read_unlock();
+ if (!xdp_res)
+ goto construct_skb;
+ if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+ unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+ truesize = ice_rx_pg_size(rx_ring) / 2;
+#else
+ truesize = SKB_DATA_ALIGN(ice_rx_offset(rx_ring) +
+ size);
+#endif
+ xdp_xmit |= xdp_res;
+ ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+ } else {
+ rx_buf->pagecnt_bias++;
+ }
+ total_rx_bytes += size;
+ total_rx_pkts++;
+
+ cleaned_count++;
+ ice_put_rx_buf(rx_ring, rx_buf);
+ continue;
+construct_skb:
if (skb)
- ice_add_rx_frag(rx_buf, skb, size);
+ ice_add_rx_frag(rx_ring, rx_buf, skb, size);
+ else if (ice_ring_uses_build_skb(rx_ring))
+ skb = ice_build_skb(rx_ring, rx_buf, &xdp);
else
- skb = ice_construct_skb(rx_ring, rx_buf, size);
+ skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
/* exit if we failed to retrieve a buffer */
if (!skb) {
@@ -1099,13 +1151,10 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
/* return up to cleaned_count buffers to hardware */
failure = ice_alloc_rx_bufs(rx_ring, cleaned_count);
- /* update queue and vector specific stats */
- u64_stats_update_begin(&rx_ring->syncp);
- rx_ring->stats.pkts += total_rx_pkts;
- rx_ring->stats.bytes += total_rx_bytes;
- u64_stats_update_end(&rx_ring->syncp);
- rx_ring->q_vector->rx.total_pkts += total_rx_pkts;
- rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+ if (xdp_prog)
+ ice_finalize_xdp_rx(rx_ring, xdp_xmit);
+
+ ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes);
/* guarantee a trip back through this routine if there was a failure */
return failure ? budget : (int)total_rx_pkts;
@@ -1483,9 +1532,14 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
/* Since the actual Tx work is minimal, we can give the Tx a larger
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
- ice_for_each_ring(ring, q_vector->tx)
- if (!ice_clean_tx_irq(ring, budget))
+ ice_for_each_ring(ring, q_vector->tx) {
+ bool wd = ring->xsk_umem ?
+ ice_clean_tx_irq_zc(ring, budget) :
+ ice_clean_tx_irq(ring, budget);
+
+ if (!wd)
clean_complete = false;
+ }
/* Handle case where we are called by netpoll with a budget of 0 */
if (unlikely(budget <= 0))
@@ -1505,7 +1559,13 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
ice_for_each_ring(ring, q_vector->rx) {
int cleaned;
- cleaned = ice_clean_rx_irq(ring, budget_per_ring);
+ /* A dedicated path for zero-copy allows making a single
+ * comparison in the irq context instead of many inside the
+ * ice_clean_rx_irq function and makes the codebase cleaner.
+ */
+ cleaned = ring->xsk_umem ?
+ ice_clean_rx_irq_zc(ring, budget_per_ring) :
+ ice_clean_rx_irq(ring, budget_per_ring);
work_done += cleaned;
/* if we clean as many as budgeted, we must not be done */
if (cleaned >= budget_per_ring)
@@ -1527,17 +1587,6 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
return min_t(int, work_done, budget - 1);
}
-/* helper function for building cmd/type/offset */
-static __le64
-build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
-{
- return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
- (td_cmd << ICE_TXD_QW1_CMD_S) |
- (td_offset << ICE_TXD_QW1_OFFSET_S) |
- ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
- (td_tag << ICE_TXD_QW1_L2TAG1_S));
-}
-
/**
* __ice_maybe_stop_tx - 2nd level check for Tx stop conditions
* @tx_ring: the ring to be checked
@@ -1689,9 +1738,9 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
i = 0;
/* write last descriptor with RS and EOP bits */
- td_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
- tx_desc->cmd_type_offset_bsz =
- build_ctob(td_cmd, td_offset, size, td_tag);
+ td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD;
+ tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, size,
+ td_tag);
/* Force memory writes to complete before letting h/w know there
* are new descriptors to fetch.
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 94a9280193e2..a84cc0e6dd27 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -4,8 +4,12 @@
#ifndef _ICE_TXRX_H_
#define _ICE_TXRX_H_
+#include "ice_type.h"
+
#define ICE_DFLT_IRQ_WORK 256
+#define ICE_RXBUF_3072 3072
#define ICE_RXBUF_2048 2048
+#define ICE_RXBUF_1536 1536
#define ICE_MAX_CHAINED_RX_BUFS 5
#define ICE_MAX_BUF_TXD 8
#define ICE_MIN_TX_LEN 17
@@ -22,6 +26,71 @@
#define ICE_RX_BUF_WRITE 16 /* Must be power of 2 */
#define ICE_MAX_TXQ_PER_TXQG 128
+/* Attempt to maximize the headroom available for incoming frames. We use a 2K
+ * buffer for MTUs <= 1500 and need 1536/1534 to store the data for the frame.
+ * This leaves us with 512 bytes of room. From that we need to deduct the
+ * space needed for the shared info and the padding needed to IP align the
+ * frame.
+ *
+ * Note: For cache line sizes 256 or larger this value is going to end
+ * up negative. In these cases we should fall back to the legacy
+ * receive path.
+ */
+#if (PAGE_SIZE < 8192)
+#define ICE_2K_TOO_SMALL_WITH_PADDING \
+((NET_SKB_PAD + ICE_RXBUF_1536) > SKB_WITH_OVERHEAD(ICE_RXBUF_2048))
+
+/**
+ * ice_compute_pad - compute the padding
+ * rx_buf_len: buffer length
+ *
+ * Figure out the size of half page based on given buffer length and
+ * then subtract the skb_shared_info followed by subtraction of the
+ * actual buffer length; this in turn results in the actual space that
+ * is left for padding usage
+ */
+static inline int ice_compute_pad(int rx_buf_len)
+{
+ int half_page_size;
+
+ half_page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
+ return SKB_WITH_OVERHEAD(half_page_size) - rx_buf_len;
+}
+
+/**
+ * ice_skb_pad - determine the padding that we can supply
+ *
+ * Figure out the right Rx buffer size and based on that calculate the
+ * padding
+ */
+static inline int ice_skb_pad(void)
+{
+ int rx_buf_len;
+
+ /* If a 2K buffer cannot handle a standard Ethernet frame then
+ * optimize padding for a 3K buffer instead of a 1.5K buffer.
+ *
+ * For a 3K buffer we need to add enough padding to allow for
+ * tailroom due to NET_IP_ALIGN possibly shifting us out of
+ * cache-line alignment.
+ */
+ if (ICE_2K_TOO_SMALL_WITH_PADDING)
+ rx_buf_len = ICE_RXBUF_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
+ else
+ rx_buf_len = ICE_RXBUF_1536;
+
+ /* if needed make room for NET_IP_ALIGN */
+ rx_buf_len -= NET_IP_ALIGN;
+
+ return ice_compute_pad(rx_buf_len);
+}
+
+#define ICE_SKB_PAD ice_skb_pad()
+#else
+#define ICE_2K_TOO_SMALL_WITH_PADDING false
+#define ICE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+#endif
+
/* We are assuming that the cache line is always 64 Bytes here for ice.
* In order to make sure that is a correct assumption there is a check in probe
* to print a warning if the read from GLPCI_CNF2 tells us that the cache line
@@ -49,12 +118,24 @@
#define ICE_TX_FLAGS_VLAN_PR_S 29
#define ICE_TX_FLAGS_VLAN_S 16
+#define ICE_XDP_PASS 0
+#define ICE_XDP_CONSUMED BIT(0)
+#define ICE_XDP_TX BIT(1)
+#define ICE_XDP_REDIR BIT(2)
+
#define ICE_RX_DMA_ATTR \
(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+#define ICE_ETH_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
+
+#define ICE_TXD_LAST_DESC_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)
+
struct ice_tx_buf {
struct ice_tx_desc *next_to_watch;
- struct sk_buff *skb;
+ union {
+ struct sk_buff *skb;
+ void *raw_buf; /* used for XDP */
+ };
unsigned int bytecount;
unsigned short gso_segs;
u32 tx_flags;
@@ -76,9 +157,17 @@ struct ice_tx_offload_params {
struct ice_rx_buf {
struct sk_buff *skb;
dma_addr_t dma;
- struct page *page;
- unsigned int page_offset;
- u16 pagecnt_bias;
+ union {
+ struct {
+ struct page *page;
+ unsigned int page_offset;
+ u16 pagecnt_bias;
+ };
+ struct {
+ void *addr;
+ u64 handle;
+ };
+ };
};
struct ice_q_stats {
@@ -198,18 +287,44 @@ struct ice_ring {
};
struct rcu_head rcu; /* to avoid race on free */
+ struct bpf_prog *xdp_prog;
+ struct xdp_umem *xsk_umem;
+ struct zero_copy_allocator zca;
+ /* CL3 - 3rd cacheline starts here */
+ struct xdp_rxq_info xdp_rxq;
/* CLX - the below items are only accessed infrequently and should be
* in their own cache line if possible
*/
+#define ICE_TX_FLAGS_RING_XDP BIT(0)
+#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1)
+ u8 flags;
dma_addr_t dma; /* physical address of ring */
unsigned int size; /* length of descriptor ring in bytes */
u32 txq_teid; /* Added Tx queue TEID */
u16 rx_buf_len;
-#ifdef CONFIG_DCB
u8 dcb_tc; /* Traffic class of ring */
-#endif /* CONFIG_DCB */
} ____cacheline_internodealigned_in_smp;
+static inline bool ice_ring_uses_build_skb(struct ice_ring *ring)
+{
+ return !!(ring->flags & ICE_RX_FLAGS_RING_BUILD_SKB);
+}
+
+static inline void ice_set_ring_build_skb_ena(struct ice_ring *ring)
+{
+ ring->flags |= ICE_RX_FLAGS_RING_BUILD_SKB;
+}
+
+static inline void ice_clear_ring_build_skb_ena(struct ice_ring *ring)
+{
+ ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB;
+}
+
+static inline bool ice_ring_is_xdp(struct ice_ring *ring)
+{
+ return !!(ring->flags & ICE_TX_FLAGS_RING_XDP);
+}
+
struct ice_ring_container {
/* head of linked-list of rings */
struct ice_ring *ring;
@@ -230,6 +345,19 @@ struct ice_ring_container {
#define ice_for_each_ring(pos, head) \
for (pos = (head).ring; pos; pos = pos->next)
+static inline unsigned int ice_rx_pg_order(struct ice_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+ if (ring->rx_buf_len > (PAGE_SIZE / 2))
+ return 1;
+#endif
+ return 0;
+}
+
+#define ice_rx_pg_size(_ring) (PAGE_SIZE << ice_rx_pg_order(_ring))
+
+union ice_32b_rx_flex_desc;
+
bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count);
netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
void ice_clean_tx_ring(struct ice_ring *tx_ring);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
new file mode 100644
index 000000000000..35bbc4ff603c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_txrx_lib.h"
+
+/**
+ * ice_release_rx_desc - Store the new tail and head values
+ * @rx_ring: ring to bump
+ * @val: new head index
+ */
+void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
+{
+ u16 prev_ntu = rx_ring->next_to_use;
+
+ rx_ring->next_to_use = val;
+
+ /* update next to alloc since we have filled the ring */
+ rx_ring->next_to_alloc = val;
+
+ /* QRX_TAIL will be updated with any tail value, but hardware ignores
+ * the lower 3 bits. This makes it so we only bump tail on meaningful
+ * boundaries. Also, this allows us to bump tail on intervals of 8 up to
+ * the budget depending on the current traffic load.
+ */
+ val &= ~0x7;
+ if (prev_ntu != val) {
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64).
+ */
+ wmb();
+ writel(val, rx_ring->tail);
+ }
+}
+
+/**
+ * ice_ptype_to_htype - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns a hash type to be used by skb_set_hash
+ */
+static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
+{
+ return PKT_HASH_TYPE_NONE;
+}
+
+/**
+ * ice_rx_hash - set the hash value in the skb
+ * @rx_ring: descriptor ring
+ * @rx_desc: specific descriptor
+ * @skb: pointer to current skb
+ * @rx_ptype: the ptype value from the descriptor
+ */
+static void
+ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
+ struct sk_buff *skb, u8 rx_ptype)
+{
+ struct ice_32b_rx_flex_desc_nic *nic_mdid;
+ u32 hash;
+
+ if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
+ return;
+
+ if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
+ return;
+
+ nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
+ hash = le32_to_cpu(nic_mdid->rss_hash);
+ skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
+}
+
+/**
+ * ice_rx_csum - Indicate in skb if checksum is good
+ * @ring: the ring we care about
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ * @ptype: the packet type decoded by hardware
+ *
+ * skb->protocol must be set before this function is called
+ */
+static void
+ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
+ union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
+{
+ struct ice_rx_ptype_decoded decoded;
+ u32 rx_error, rx_status;
+ bool ipv4, ipv6;
+
+ rx_status = le16_to_cpu(rx_desc->wb.status_error0);
+ rx_error = rx_status;
+
+ decoded = ice_decode_rx_desc_ptype(ptype);
+
+ /* Start with CHECKSUM_NONE and by default csum_level = 0 */
+ skb->ip_summed = CHECKSUM_NONE;
+ skb_checksum_none_assert(skb);
+
+ /* check if Rx checksum is enabled */
+ if (!(ring->netdev->features & NETIF_F_RXCSUM))
+ return;
+
+ /* check if HW has decoded the packet and checksum */
+ if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
+ return;
+
+ if (!(decoded.known && decoded.outer_ip))
+ return;
+
+ ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+ (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
+ ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+ (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
+
+ if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
+ BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
+ goto checksum_fail;
+ else if (ipv6 && (rx_status &
+ (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
+ goto checksum_fail;
+
+ /* check for L4 errors and handle packets that were not able to be
+ * checksummed due to arrival speed
+ */
+ if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
+ goto checksum_fail;
+
+ /* Only report checksum unnecessary for TCP, UDP, or SCTP */
+ switch (decoded.inner_prot) {
+ case ICE_RX_PTYPE_INNER_PROT_TCP:
+ case ICE_RX_PTYPE_INNER_PROT_UDP:
+ case ICE_RX_PTYPE_INNER_PROT_SCTP:
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ default:
+ break;
+ }
+ return;
+
+checksum_fail:
+ ring->vsi->back->hw_csum_rx_error++;
+}
+
+/**
+ * ice_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: Rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ * @ptype: the packet type decoded by hardware
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
+ */
+void
+ice_process_skb_fields(struct ice_ring *rx_ring,
+ union ice_32b_rx_flex_desc *rx_desc,
+ struct sk_buff *skb, u8 ptype)
+{
+ ice_rx_hash(rx_ring, rx_desc, skb, ptype);
+
+ /* modifies the skb - consumes the enet header */
+ skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+
+ ice_rx_csum(rx_ring, skb, rx_desc, ptype);
+}
+
+/**
+ * ice_receive_skb - Send a completed packet up the stack
+ * @rx_ring: Rx ring in play
+ * @skb: packet to send up
+ * @vlan_tag: VLAN tag for packet
+ *
+ * This function sends the completed packet (via. skb) up the stack using
+ * gro receive functions (with/without VLAN tag)
+ */
+void
+ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
+{
+ if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+ (vlan_tag & VLAN_VID_MASK))
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+ napi_gro_receive(&rx_ring->q_vector->napi, skb);
+}
+
+/**
+ * ice_xmit_xdp_ring - submit single packet to XDP ring for transmission
+ * @data: packet data pointer
+ * @size: packet data size
+ * @xdp_ring: XDP ring for transmission
+ */
+int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring)
+{
+ u16 i = xdp_ring->next_to_use;
+ struct ice_tx_desc *tx_desc;
+ struct ice_tx_buf *tx_buf;
+ dma_addr_t dma;
+
+ if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
+ xdp_ring->tx_stats.tx_busy++;
+ return ICE_XDP_CONSUMED;
+ }
+
+ dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(xdp_ring->dev, dma))
+ return ICE_XDP_CONSUMED;
+
+ tx_buf = &xdp_ring->tx_buf[i];
+ tx_buf->bytecount = size;
+ tx_buf->gso_segs = 1;
+ tx_buf->raw_buf = data;
+
+ /* record length, and DMA address */
+ dma_unmap_len_set(tx_buf, len, size);
+ dma_unmap_addr_set(tx_buf, dma, dma);
+
+ tx_desc = ICE_TX_DESC(xdp_ring, i);
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD, 0,
+ size, 0);
+
+ /* Make certain all of the status bits have been updated
+ * before next_to_watch is written.
+ */
+ smp_wmb();
+
+ i++;
+ if (i == xdp_ring->count)
+ i = 0;
+
+ tx_buf->next_to_watch = tx_desc;
+ xdp_ring->next_to_use = i;
+
+ return ICE_XDP_TX;
+}
+
+/**
+ * ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it
+ * @xdp: XDP buffer
+ * @xdp_ring: XDP Tx ring
+ *
+ * Returns negative on failure, 0 on success.
+ */
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring)
+{
+ struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+
+ if (unlikely(!xdpf))
+ return ICE_XDP_CONSUMED;
+
+ return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+}
+
+/**
+ * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
+ * @rx_ring: Rx ring
+ * @xdp_res: Result of the receive batch
+ *
+ * This function bumps XDP Tx tail and/or flush redirect map, and
+ * should be called when a batch of packets has been processed in the
+ * napi loop.
+ */
+void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res)
+{
+ if (xdp_res & ICE_XDP_REDIR)
+ xdp_do_flush_map();
+
+ if (xdp_res & ICE_XDP_TX) {
+ struct ice_ring *xdp_ring =
+ rx_ring->vsi->xdp_rings[rx_ring->q_index];
+
+ ice_xdp_ring_update_tail(xdp_ring);
+ }
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
new file mode 100644
index 000000000000..ba9164dad9ae
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_TXRX_LIB_H_
+#define _ICE_TXRX_LIB_H_
+#include "ice.h"
+
+/**
+ * ice_test_staterr - tests bits in Rx descriptor status and error fields
+ * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static inline bool
+ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
+{
+ return !!(rx_desc->wb.status_error0 & cpu_to_le16(stat_err_bits));
+}
+
+static inline __le64
+build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
+{
+ return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+ (td_cmd << ICE_TXD_QW1_CMD_S) |
+ (td_offset << ICE_TXD_QW1_OFFSET_S) |
+ ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+ (td_tag << ICE_TXD_QW1_L2TAG1_S));
+}
+
+/**
+ * ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register
+ * @xdp_ring: XDP Tx ring
+ *
+ * This function updates the XDP Tx ring tail register.
+ */
+static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring)
+{
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch.
+ */
+ wmb();
+ writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+}
+
+void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res);
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring);
+int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring);
+void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val);
+void
+ice_process_skb_fields(struct ice_ring *rx_ring,
+ union ice_32b_rx_flex_desc *rx_desc,
+ struct sk_buff *skb, u8 ptype);
+void
+ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
+#endif /* !_ICE_TXRX_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index b45797f39b2f..ad757412bb04 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2018, Intel Corporation. */
#include "ice.h"
+#include "ice_base.h"
#include "ice_lib.h"
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
new file mode 100644
index 000000000000..fcffad0069d6
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -0,0 +1,1181 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
+#include <net/xdp.h>
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_type.h"
+#include "ice_xsk.h"
+#include "ice_txrx.h"
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
+
+/**
+ * ice_qp_reset_stats - Resets all stats for rings of given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
+{
+ memset(&vsi->rx_rings[q_idx]->rx_stats, 0,
+ sizeof(vsi->rx_rings[q_idx]->rx_stats));
+ memset(&vsi->tx_rings[q_idx]->stats, 0,
+ sizeof(vsi->tx_rings[q_idx]->stats));
+ if (ice_is_xdp_ena_vsi(vsi))
+ memset(&vsi->xdp_rings[q_idx]->stats, 0,
+ sizeof(vsi->xdp_rings[q_idx]->stats));
+}
+
+/**
+ * ice_qp_clean_rings - Cleans all the rings of a given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
+{
+ ice_clean_tx_ring(vsi->tx_rings[q_idx]);
+ if (ice_is_xdp_ena_vsi(vsi))
+ ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
+ ice_clean_rx_ring(vsi->rx_rings[q_idx]);
+}
+
+/**
+ * ice_qvec_toggle_napi - Enables/disables NAPI for a given q_vector
+ * @vsi: VSI that has netdev
+ * @q_vector: q_vector that has NAPI context
+ * @enable: true for enable, false for disable
+ */
+static void
+ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector,
+ bool enable)
+{
+ if (!vsi->netdev || !q_vector)
+ return;
+
+ if (enable)
+ napi_enable(&q_vector->napi);
+ else
+ napi_disable(&q_vector->napi);
+}
+
+/**
+ * ice_qvec_dis_irq - Mask off queue interrupt generation on given ring
+ * @vsi: the VSI that contains queue vector being un-configured
+ * @rx_ring: Rx ring that will have its IRQ disabled
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_ring *rx_ring,
+ struct ice_q_vector *q_vector)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ int base = vsi->base_vector;
+ u16 reg;
+ u32 val;
+
+ /* QINT_TQCTL is being cleared in ice_vsi_stop_tx_ring, so handle
+ * here only QINT_RQCTL
+ */
+ reg = rx_ring->reg_idx;
+ val = rd32(hw, QINT_RQCTL(reg));
+ val &= ~QINT_RQCTL_CAUSE_ENA_M;
+ wr32(hw, QINT_RQCTL(reg), val);
+
+ if (q_vector) {
+ u16 v_idx = q_vector->v_idx;
+
+ wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0);
+ ice_flush(hw);
+ synchronize_irq(pf->msix_entries[v_idx + base].vector);
+ }
+}
+
+/**
+ * ice_qvec_cfg_msix - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+ u16 reg_idx = q_vector->reg_idx;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ struct ice_ring *ring;
+
+ ice_cfg_itr(hw, q_vector);
+
+ wr32(hw, GLINT_RATE(reg_idx),
+ ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
+
+ ice_for_each_ring(ring, q_vector->tx)
+ ice_cfg_txq_interrupt(vsi, ring->reg_idx, reg_idx,
+ q_vector->tx.itr_idx);
+
+ ice_for_each_ring(ring, q_vector->rx)
+ ice_cfg_rxq_interrupt(vsi, ring->reg_idx, reg_idx,
+ q_vector->rx.itr_idx);
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_qvec_ena_irq - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+
+ ice_irq_dynamic_ena(hw, vsi, q_vector);
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_qp_dis - Disables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
+{
+ struct ice_txq_meta txq_meta = { };
+ struct ice_ring *tx_ring, *rx_ring;
+ struct ice_q_vector *q_vector;
+ int timeout = 50;
+ int err;
+
+ if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+ return -EINVAL;
+
+ tx_ring = vsi->tx_rings[q_idx];
+ rx_ring = vsi->rx_rings[q_idx];
+ q_vector = rx_ring->q_vector;
+
+ while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) {
+ timeout--;
+ if (!timeout)
+ return -EBUSY;
+ usleep_range(1000, 2000);
+ }
+ netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+
+ ice_qvec_dis_irq(vsi, rx_ring, q_vector);
+
+ ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
+ err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
+ if (err)
+ return err;
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+ memset(&txq_meta, 0, sizeof(txq_meta));
+ ice_fill_txq_meta(vsi, xdp_ring, &txq_meta);
+ err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring,
+ &txq_meta);
+ if (err)
+ return err;
+ }
+ err = ice_vsi_ctrl_rx_ring(vsi, false, q_idx);
+ if (err)
+ return err;
+
+ ice_qvec_toggle_napi(vsi, q_vector, false);
+ ice_qp_clean_rings(vsi, q_idx);
+ ice_qp_reset_stats(vsi, q_idx);
+
+ return 0;
+}
+
+/**
+ * ice_qp_ena - Enables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
+{
+ struct ice_aqc_add_tx_qgrp *qg_buf;
+ struct ice_ring *tx_ring, *rx_ring;
+ struct ice_q_vector *q_vector;
+ int err;
+
+ if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+ return -EINVAL;
+
+ qg_buf = kzalloc(sizeof(*qg_buf), GFP_KERNEL);
+ if (!qg_buf)
+ return -ENOMEM;
+
+ qg_buf->num_txqs = 1;
+
+ tx_ring = vsi->tx_rings[q_idx];
+ rx_ring = vsi->rx_rings[q_idx];
+ q_vector = rx_ring->q_vector;
+
+ err = ice_vsi_cfg_txq(vsi, tx_ring, qg_buf);
+ if (err)
+ goto free_buf;
+
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+ memset(qg_buf, 0, sizeof(*qg_buf));
+ qg_buf->num_txqs = 1;
+ err = ice_vsi_cfg_txq(vsi, xdp_ring, qg_buf);
+ if (err)
+ goto free_buf;
+ ice_set_ring_xdp(xdp_ring);
+ xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
+ }
+
+ err = ice_setup_rx_ctx(rx_ring);
+ if (err)
+ goto free_buf;
+
+ ice_qvec_cfg_msix(vsi, q_vector);
+
+ err = ice_vsi_ctrl_rx_ring(vsi, true, q_idx);
+ if (err)
+ goto free_buf;
+
+ clear_bit(__ICE_CFG_BUSY, vsi->state);
+ ice_qvec_toggle_napi(vsi, q_vector, true);
+ ice_qvec_ena_irq(vsi, q_vector);
+
+ netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+free_buf:
+ kfree(qg_buf);
+ return err;
+}
+
+/**
+ * ice_xsk_alloc_umems - allocate a UMEM region for an XDP socket
+ * @vsi: VSI to allocate the UMEM on
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_alloc_umems(struct ice_vsi *vsi)
+{
+ if (vsi->xsk_umems)
+ return 0;
+
+ vsi->xsk_umems = kcalloc(vsi->num_xsk_umems, sizeof(*vsi->xsk_umems),
+ GFP_KERNEL);
+
+ if (!vsi->xsk_umems) {
+ vsi->num_xsk_umems = 0;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_xsk_add_umem - add a UMEM region for XDP sockets
+ * @vsi: VSI to which the UMEM will be added
+ * @umem: pointer to a requested UMEM region
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_add_umem(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+ int err;
+
+ err = ice_xsk_alloc_umems(vsi);
+ if (err)
+ return err;
+
+ vsi->xsk_umems[qid] = umem;
+ vsi->num_xsk_umems_used++;
+
+ return 0;
+}
+
+/**
+ * ice_xsk_remove_umem - Remove an UMEM for a certain ring/qid
+ * @vsi: VSI from which the VSI will be removed
+ * @qid: Ring/qid associated with the UMEM
+ */
+static void ice_xsk_remove_umem(struct ice_vsi *vsi, u16 qid)
+{
+ vsi->xsk_umems[qid] = NULL;
+ vsi->num_xsk_umems_used--;
+
+ if (vsi->num_xsk_umems_used == 0) {
+ kfree(vsi->xsk_umems);
+ vsi->xsk_umems = NULL;
+ vsi->num_xsk_umems = 0;
+ }
+}
+
+/**
+ * ice_xsk_umem_dma_map - DMA map UMEM region for XDP sockets
+ * @vsi: VSI to map the UMEM region
+ * @umem: UMEM to map
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_umem_dma_map(struct ice_vsi *vsi, struct xdp_umem *umem)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ unsigned int i;
+
+ dev = &pf->pdev->dev;
+ for (i = 0; i < umem->npgs; i++) {
+ dma_addr_t dma = dma_map_page_attrs(dev, umem->pgs[i], 0,
+ PAGE_SIZE,
+ DMA_BIDIRECTIONAL,
+ ICE_RX_DMA_ATTR);
+ if (dma_mapping_error(dev, dma)) {
+ dev_dbg(dev,
+ "XSK UMEM DMA mapping error on page num %d", i);
+ goto out_unmap;
+ }
+
+ umem->pages[i].dma = dma;
+ }
+
+ return 0;
+
+out_unmap:
+ for (; i > 0; i--) {
+ dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR);
+ umem->pages[i].dma = 0;
+ }
+
+ return -EFAULT;
+}
+
+/**
+ * ice_xsk_umem_dma_unmap - DMA unmap UMEM region for XDP sockets
+ * @vsi: VSI from which the UMEM will be unmapped
+ * @umem: UMEM to unmap
+ */
+static void ice_xsk_umem_dma_unmap(struct ice_vsi *vsi, struct xdp_umem *umem)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ unsigned int i;
+
+ dev = &pf->pdev->dev;
+ for (i = 0; i < umem->npgs; i++) {
+ dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR);
+
+ umem->pages[i].dma = 0;
+ }
+}
+
+/**
+ * ice_xsk_umem_disable - disable a UMEM region
+ * @vsi: Current VSI
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_xsk_umem_disable(struct ice_vsi *vsi, u16 qid)
+{
+ if (!vsi->xsk_umems || qid >= vsi->num_xsk_umems ||
+ !vsi->xsk_umems[qid])
+ return -EINVAL;
+
+ ice_xsk_umem_dma_unmap(vsi, vsi->xsk_umems[qid]);
+ ice_xsk_remove_umem(vsi, qid);
+
+ return 0;
+}
+
+/**
+ * ice_xsk_umem_enable - enable a UMEM region
+ * @vsi: Current VSI
+ * @umem: pointer to a requested UMEM region
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int
+ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+ struct xdp_umem_fq_reuse *reuseq;
+ int err;
+
+ if (vsi->type != ICE_VSI_PF)
+ return -EINVAL;
+
+ vsi->num_xsk_umems = min_t(u16, vsi->num_rxq, vsi->num_txq);
+ if (qid >= vsi->num_xsk_umems)
+ return -EINVAL;
+
+ if (vsi->xsk_umems && vsi->xsk_umems[qid])
+ return -EBUSY;
+
+ reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count);
+ if (!reuseq)
+ return -ENOMEM;
+
+ xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
+
+ err = ice_xsk_umem_dma_map(vsi, umem);
+ if (err)
+ return err;
+
+ err = ice_xsk_add_umem(vsi, umem, qid);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/**
+ * ice_xsk_umem_setup - enable/disable a UMEM region depending on its state
+ * @vsi: Current VSI
+ * @umem: UMEM to enable/associate to a ring, NULL to disable
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+ bool if_running, umem_present = !!umem;
+ int ret = 0, umem_failure = 0;
+
+ if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
+
+ if (if_running) {
+ ret = ice_qp_dis(vsi, qid);
+ if (ret) {
+ netdev_err(vsi->netdev, "ice_qp_dis error = %d", ret);
+ goto xsk_umem_if_up;
+ }
+ }
+
+ umem_failure = umem_present ? ice_xsk_umem_enable(vsi, umem, qid) :
+ ice_xsk_umem_disable(vsi, qid);
+
+xsk_umem_if_up:
+ if (if_running) {
+ ret = ice_qp_ena(vsi, qid);
+ if (!ret && umem_present)
+ napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi);
+ else if (ret)
+ netdev_err(vsi->netdev, "ice_qp_ena error = %d", ret);
+ }
+
+ if (umem_failure) {
+ netdev_err(vsi->netdev, "Could not %sable UMEM, error = %d",
+ umem_present ? "en" : "dis", umem_failure);
+ return umem_failure;
+ }
+
+ return ret;
+}
+
+/**
+ * ice_zca_free - Callback for MEM_TYPE_ZERO_COPY allocations
+ * @zca: zero-cpoy allocator
+ * @handle: Buffer handle
+ */
+void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
+{
+ struct ice_rx_buf *rx_buf;
+ struct ice_ring *rx_ring;
+ struct xdp_umem *umem;
+ u64 hr, mask;
+ u16 nta;
+
+ rx_ring = container_of(zca, struct ice_ring, zca);
+ umem = rx_ring->xsk_umem;
+ hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+ mask = umem->chunk_mask;
+
+ nta = rx_ring->next_to_alloc;
+ rx_buf = &rx_ring->rx_buf[nta];
+
+ nta++;
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+ handle &= mask;
+
+ rx_buf->dma = xdp_umem_get_dma(umem, handle);
+ rx_buf->dma += hr;
+
+ rx_buf->addr = xdp_umem_get_data(umem, handle);
+ rx_buf->addr += hr;
+
+ rx_buf->handle = (u64)handle + umem->headroom;
+}
+
+/**
+ * ice_alloc_buf_fast_zc - Retrieve buffer address from XDP umem
+ * @rx_ring: ring with an xdp_umem bound to it
+ * @rx_buf: buffer to which xsk page address will be assigned
+ *
+ * This function allocates an Rx buffer in the hot path.
+ * The buffer can come from fill queue or recycle queue.
+ *
+ * Returns true if an assignment was successful, false if not.
+ */
+static __always_inline bool
+ice_alloc_buf_fast_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
+{
+ struct xdp_umem *umem = rx_ring->xsk_umem;
+ void *addr = rx_buf->addr;
+ u64 handle, hr;
+
+ if (addr) {
+ rx_ring->rx_stats.page_reuse_count++;
+ return true;
+ }
+
+ if (!xsk_umem_peek_addr(umem, &handle)) {
+ rx_ring->rx_stats.alloc_page_failed++;
+ return false;
+ }
+
+ hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+ rx_buf->dma = xdp_umem_get_dma(umem, handle);
+ rx_buf->dma += hr;
+
+ rx_buf->addr = xdp_umem_get_data(umem, handle);
+ rx_buf->addr += hr;
+
+ rx_buf->handle = handle + umem->headroom;
+
+ xsk_umem_discard_addr(umem);
+ return true;
+}
+
+/**
+ * ice_alloc_buf_slow_zc - Retrieve buffer address from XDP umem
+ * @rx_ring: ring with an xdp_umem bound to it
+ * @rx_buf: buffer to which xsk page address will be assigned
+ *
+ * This function allocates an Rx buffer in the slow path.
+ * The buffer can come from fill queue or recycle queue.
+ *
+ * Returns true if an assignment was successful, false if not.
+ */
+static __always_inline bool
+ice_alloc_buf_slow_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
+{
+ struct xdp_umem *umem = rx_ring->xsk_umem;
+ u64 handle, headroom;
+
+ if (!xsk_umem_peek_addr_rq(umem, &handle)) {
+ rx_ring->rx_stats.alloc_page_failed++;
+ return false;
+ }
+
+ handle &= umem->chunk_mask;
+ headroom = umem->headroom + XDP_PACKET_HEADROOM;
+
+ rx_buf->dma = xdp_umem_get_dma(umem, handle);
+ rx_buf->dma += headroom;
+
+ rx_buf->addr = xdp_umem_get_data(umem, handle);
+ rx_buf->addr += headroom;
+
+ rx_buf->handle = handle + umem->headroom;
+
+ xsk_umem_discard_addr_rq(umem);
+ return true;
+}
+
+/**
+ * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ * @alloc: the function pointer to call for allocation
+ *
+ * This function allocates a number of Rx buffers from the fill ring
+ * or the internal recycle mechanism and places them on the Rx ring.
+ *
+ * Returns false if all allocations were successful, true if any fail.
+ */
+static bool
+ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, int count,
+ bool alloc(struct ice_ring *, struct ice_rx_buf *))
+{
+ union ice_32b_rx_flex_desc *rx_desc;
+ u16 ntu = rx_ring->next_to_use;
+ struct ice_rx_buf *rx_buf;
+ bool ret = false;
+
+ if (!count)
+ return false;
+
+ rx_desc = ICE_RX_DESC(rx_ring, ntu);
+ rx_buf = &rx_ring->rx_buf[ntu];
+
+ do {
+ if (!alloc(rx_ring, rx_buf)) {
+ ret = true;
+ break;
+ }
+
+ dma_sync_single_range_for_device(rx_ring->dev, rx_buf->dma, 0,
+ rx_ring->rx_buf_len,
+ DMA_BIDIRECTIONAL);
+
+ rx_desc->read.pkt_addr = cpu_to_le64(rx_buf->dma);
+ rx_desc->wb.status_error0 = 0;
+
+ rx_desc++;
+ rx_buf++;
+ ntu++;
+
+ if (unlikely(ntu == rx_ring->count)) {
+ rx_desc = ICE_RX_DESC(rx_ring, 0);
+ rx_buf = rx_ring->rx_buf;
+ ntu = 0;
+ }
+ } while (--count);
+
+ if (rx_ring->next_to_use != ntu)
+ ice_release_rx_desc(rx_ring, ntu);
+
+ return ret;
+}
+
+/**
+ * ice_alloc_rx_bufs_fast_zc - allocate zero copy bufs in the hot path
+ * @rx_ring: Rx ring
+ * @count: number of bufs to allocate
+ *
+ * Returns false on success, true on failure.
+ */
+static bool ice_alloc_rx_bufs_fast_zc(struct ice_ring *rx_ring, u16 count)
+{
+ return ice_alloc_rx_bufs_zc(rx_ring, count,
+ ice_alloc_buf_fast_zc);
+}
+
+/**
+ * ice_alloc_rx_bufs_slow_zc - allocate zero copy bufs in the slow path
+ * @rx_ring: Rx ring
+ * @count: number of bufs to allocate
+ *
+ * Returns false on success, true on failure.
+ */
+bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count)
+{
+ return ice_alloc_rx_bufs_zc(rx_ring, count,
+ ice_alloc_buf_slow_zc);
+}
+
+/**
+ * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring
+ * @rx_ring: Rx ring
+ */
+static void ice_bump_ntc(struct ice_ring *rx_ring)
+{
+ int ntc = rx_ring->next_to_clean + 1;
+
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+ prefetch(ICE_RX_DESC(rx_ring, ntc));
+}
+
+/**
+ * ice_get_rx_buf_zc - Fetch the current Rx buffer
+ * @rx_ring: Rx ring
+ * @size: size of a buffer
+ *
+ * This function returns the current, received Rx buffer and does
+ * DMA synchronization.
+ *
+ * Returns a pointer to the received Rx buffer.
+ */
+static struct ice_rx_buf *ice_get_rx_buf_zc(struct ice_ring *rx_ring, int size)
+{
+ struct ice_rx_buf *rx_buf;
+
+ rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
+
+ dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, 0,
+ size, DMA_BIDIRECTIONAL);
+
+ return rx_buf;
+}
+
+/**
+ * ice_reuse_rx_buf_zc - reuse an Rx buffer
+ * @rx_ring: Rx ring
+ * @old_buf: The buffer to recycle
+ *
+ * This function recycles a finished Rx buffer, and places it on the recycle
+ * queue (next_to_alloc).
+ */
+static void
+ice_reuse_rx_buf_zc(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf)
+{
+ unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask;
+ u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
+ u16 nta = rx_ring->next_to_alloc;
+ struct ice_rx_buf *new_buf;
+
+ new_buf = &rx_ring->rx_buf[nta++];
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+ new_buf->dma = old_buf->dma & mask;
+ new_buf->dma += hr;
+
+ new_buf->addr = (void *)((unsigned long)old_buf->addr & mask);
+ new_buf->addr += hr;
+
+ new_buf->handle = old_buf->handle & mask;
+ new_buf->handle += rx_ring->xsk_umem->headroom;
+
+ old_buf->addr = NULL;
+}
+
+/**
+ * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
+ * @rx_ring: Rx ring
+ * @rx_buf: zero-copy Rx buffer
+ * @xdp: XDP buffer
+ *
+ * This function allocates a new skb from a zero-copy Rx buffer.
+ *
+ * Returns the skb on success, NULL on failure.
+ */
+static struct sk_buff *
+ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+ struct xdp_buff *xdp)
+{
+ unsigned int metasize = xdp->data - xdp->data_meta;
+ unsigned int datasize = xdp->data_end - xdp->data;
+ unsigned int datasize_hard = xdp->data_end -
+ xdp->data_hard_start;
+ struct sk_buff *skb;
+
+ skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!skb))
+ return NULL;
+
+ skb_reserve(skb, xdp->data - xdp->data_hard_start);
+ memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+ if (metasize)
+ skb_metadata_set(skb, metasize);
+
+ ice_reuse_rx_buf_zc(rx_ring, rx_buf);
+
+ return skb;
+}
+
+/**
+ * ice_run_xdp_zc - Executes an XDP program in zero-copy path
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
+{
+ int err, result = ICE_XDP_PASS;
+ struct bpf_prog *xdp_prog;
+ struct ice_ring *xdp_ring;
+ u32 act;
+
+ rcu_read_lock();
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+ if (!xdp_prog) {
+ rcu_read_unlock();
+ return ICE_XDP_PASS;
+ }
+
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+ xdp->handle += xdp->data - xdp->data_hard_start;
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_TX:
+ xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index];
+ result = ice_xmit_xdp_buff(xdp, xdp_ring);
+ break;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+ result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fallthrough -- not supported action */
+ case XDP_ABORTED:
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+ /* fallthrough -- handle aborts by dropping frame */
+ case XDP_DROP:
+ result = ICE_XDP_CONSUMED;
+ break;
+ }
+
+ rcu_read_unlock();
+ return result;
+}
+
+/**
+ * ice_clean_rx_irq_zc - consumes packets from the hardware ring
+ * @rx_ring: AF_XDP Rx ring
+ * @budget: NAPI budget
+ *
+ * Returns number of processed packets on success, remaining budget on failure.
+ */
+int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
+{
+ unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+ u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+ unsigned int xdp_xmit = 0;
+ struct xdp_buff xdp;
+ bool failure = 0;
+
+ xdp.rxq = &rx_ring->xdp_rxq;
+
+ while (likely(total_rx_packets < (unsigned int)budget)) {
+ union ice_32b_rx_flex_desc *rx_desc;
+ unsigned int size, xdp_res = 0;
+ struct ice_rx_buf *rx_buf;
+ struct sk_buff *skb;
+ u16 stat_err_bits;
+ u16 vlan_tag = 0;
+ u8 rx_ptype;
+
+ if (cleaned_count >= ICE_RX_BUF_WRITE) {
+ failure |= ice_alloc_rx_bufs_fast_zc(rx_ring,
+ cleaned_count);
+ cleaned_count = 0;
+ }
+
+ rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
+ if (!ice_test_staterr(rx_desc, stat_err_bits))
+ break;
+
+ /* This memory barrier is needed to keep us from reading
+ * any other fields out of the rx_desc until we have
+ * verified the descriptor has been written back.
+ */
+ dma_rmb();
+
+ size = le16_to_cpu(rx_desc->wb.pkt_len) &
+ ICE_RX_FLX_DESC_PKT_LEN_M;
+ if (!size)
+ break;
+
+ rx_buf = ice_get_rx_buf_zc(rx_ring, size);
+ if (!rx_buf->addr)
+ break;
+
+ xdp.data = rx_buf->addr;
+ xdp.data_meta = xdp.data;
+ xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
+ xdp.data_end = xdp.data + size;
+ xdp.handle = rx_buf->handle;
+
+ xdp_res = ice_run_xdp_zc(rx_ring, &xdp);
+ if (xdp_res) {
+ if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+ xdp_xmit |= xdp_res;
+ rx_buf->addr = NULL;
+ } else {
+ ice_reuse_rx_buf_zc(rx_ring, rx_buf);
+ }
+
+ total_rx_bytes += size;
+ total_rx_packets++;
+ cleaned_count++;
+
+ ice_bump_ntc(rx_ring);
+ continue;
+ }
+
+ /* XDP_PASS path */
+ skb = ice_construct_skb_zc(rx_ring, rx_buf, &xdp);
+ if (!skb) {
+ rx_ring->rx_stats.alloc_buf_failed++;
+ break;
+ }
+
+ cleaned_count++;
+ ice_bump_ntc(rx_ring);
+
+ if (eth_skb_pad(skb)) {
+ skb = NULL;
+ continue;
+ }
+
+ total_rx_bytes += skb->len;
+ total_rx_packets++;
+
+ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
+ if (ice_test_staterr(rx_desc, stat_err_bits))
+ vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
+
+ rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+ ICE_RX_FLEX_DESC_PTYPE_M;
+
+ ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+ ice_receive_skb(rx_ring, skb, vlan_tag);
+ }
+
+ ice_finalize_xdp_rx(rx_ring, xdp_xmit);
+ ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
+
+ return failure ? budget : (int)total_rx_packets;
+}
+
+/**
+ * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries
+ * @xdp_ring: XDP Tx ring
+ * @budget: max number of frames to xmit
+ *
+ * Returns true if cleanup/transmission is done.
+ */
+static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget)
+{
+ struct ice_tx_desc *tx_desc = NULL;
+ bool work_done = true;
+ struct xdp_desc desc;
+ dma_addr_t dma;
+
+ while (likely(budget-- > 0)) {
+ struct ice_tx_buf *tx_buf;
+
+ if (unlikely(!ICE_DESC_UNUSED(xdp_ring))) {
+ xdp_ring->tx_stats.tx_busy++;
+ work_done = false;
+ break;
+ }
+
+ tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
+
+ if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
+ break;
+
+ dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
+
+ dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
+ DMA_BIDIRECTIONAL);
+
+ tx_buf->bytecount = desc.len;
+
+ tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD,
+ 0, desc.len, 0);
+
+ xdp_ring->next_to_use++;
+ if (xdp_ring->next_to_use == xdp_ring->count)
+ xdp_ring->next_to_use = 0;
+ }
+
+ if (tx_desc) {
+ ice_xdp_ring_update_tail(xdp_ring);
+ xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+ }
+
+ return budget > 0 && work_done;
+}
+
+/**
+ * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
+ * @xdp_ring: XDP Tx ring
+ * @tx_buf: Tx buffer to clean
+ */
+static void
+ice_clean_xdp_tx_buf(struct ice_ring *xdp_ring, struct ice_tx_buf *tx_buf)
+{
+ xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
+ dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buf, len, 0);
+}
+
+/**
+ * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries
+ * @xdp_ring: XDP Tx ring
+ * @budget: NAPI budget
+ *
+ * Returns true if cleanup/tranmission is done.
+ */
+bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget)
+{
+ int total_packets = 0, total_bytes = 0;
+ s16 ntc = xdp_ring->next_to_clean;
+ struct ice_tx_desc *tx_desc;
+ struct ice_tx_buf *tx_buf;
+ bool xmit_done = true;
+ u32 xsk_frames = 0;
+
+ tx_desc = ICE_TX_DESC(xdp_ring, ntc);
+ tx_buf = &xdp_ring->tx_buf[ntc];
+ ntc -= xdp_ring->count;
+
+ do {
+ if (!(tx_desc->cmd_type_offset_bsz &
+ cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+ break;
+
+ total_bytes += tx_buf->bytecount;
+ total_packets++;
+
+ if (tx_buf->raw_buf) {
+ ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+ tx_buf->raw_buf = NULL;
+ } else {
+ xsk_frames++;
+ }
+
+ tx_desc->cmd_type_offset_bsz = 0;
+ tx_buf++;
+ tx_desc++;
+ ntc++;
+
+ if (unlikely(!ntc)) {
+ ntc -= xdp_ring->count;
+ tx_buf = xdp_ring->tx_buf;
+ tx_desc = ICE_TX_DESC(xdp_ring, 0);
+ }
+
+ prefetch(tx_desc);
+
+ } while (likely(--budget));
+
+ ntc += xdp_ring->count;
+ xdp_ring->next_to_clean = ntc;
+
+ if (xsk_frames)
+ xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames);
+
+ ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes);
+ xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK);
+
+ return budget > 0 && xmit_done;
+}
+
+/**
+ * ice_xsk_wakeup - Implements ndo_xsk_wakeup
+ * @netdev: net_device
+ * @queue_id: queue to wake up
+ * @flags: ignored in our case, since we have Rx and Tx in the same NAPI
+ *
+ * Returns negative on error, zero otherwise.
+ */
+int
+ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
+ u32 __always_unused flags)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_q_vector *q_vector;
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_ring *ring;
+
+ if (test_bit(__ICE_DOWN, vsi->state))
+ return -ENETDOWN;
+
+ if (!ice_is_xdp_ena_vsi(vsi))
+ return -ENXIO;
+
+ if (queue_id >= vsi->num_txq)
+ return -ENXIO;
+
+ if (!vsi->xdp_rings[queue_id]->xsk_umem)
+ return -ENXIO;
+
+ ring = vsi->xdp_rings[queue_id];
+
+ /* The idea here is that if NAPI is running, mark a miss, so
+ * it will run again. If not, trigger an interrupt and
+ * schedule the NAPI from interrupt context. If NAPI would be
+ * scheduled here, the interrupt affinity would not be
+ * honored.
+ */
+ q_vector = ring->q_vector;
+ if (!napi_if_scheduled_mark_missed(&q_vector->napi))
+ ice_trigger_sw_intr(&vsi->back->hw, q_vector);
+
+ return 0;
+}
+
+/**
+ * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP UMEM attached
+ * @vsi: VSI to be checked
+ *
+ * Returns true if any of the Rx rings has an AF_XDP UMEM attached
+ */
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
+{
+ int i;
+
+ if (!vsi->xsk_umems)
+ return false;
+
+ for (i = 0; i < vsi->num_xsk_umems; i++) {
+ if (vsi->xsk_umems[i])
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * ice_xsk_clean_rx_ring - clean UMEM queues connected to a given Rx ring
+ * @rx_ring: ring to be cleaned
+ */
+void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring)
+{
+ u16 i;
+
+ for (i = 0; i < rx_ring->count; i++) {
+ struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
+
+ if (!rx_buf->addr)
+ continue;
+
+ xsk_umem_fq_reuse(rx_ring->xsk_umem, rx_buf->handle);
+ rx_buf->addr = NULL;
+ }
+}
+
+/**
+ * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its UMEM queues
+ * @xdp_ring: XDP_Tx ring
+ */
+void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring)
+{
+ u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use;
+ u32 xsk_frames = 0;
+
+ while (ntc != ntu) {
+ struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
+
+ if (tx_buf->raw_buf)
+ ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+ else
+ xsk_frames++;
+
+ tx_buf->raw_buf = NULL;
+
+ ntc++;
+ if (ntc >= xdp_ring->count)
+ ntc = 0;
+ }
+
+ if (xsk_frames)
+ xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
new file mode 100644
index 000000000000..3479e1de98fe
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_XSK_H_
+#define _ICE_XSK_H_
+#include "ice_txrx.h"
+#include "ice.h"
+
+struct ice_vsi;
+
+#ifdef CONFIG_XDP_SOCKETS
+int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid);
+void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
+int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget);
+bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget);
+int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
+bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count);
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
+void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring);
+void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring);
+#else
+static inline int
+ice_xsk_umem_setup(struct ice_vsi __always_unused *vsi,
+ struct xdp_umem __always_unused *umem,
+ u16 __always_unused qid)
+{
+ return -ENOTSUPP;
+}
+
+static inline void
+ice_zca_free(struct zero_copy_allocator __always_unused *zca,
+ unsigned long __always_unused handle)
+{
+}
+
+static inline int
+ice_clean_rx_irq_zc(struct ice_ring __always_unused *rx_ring,
+ int __always_unused budget)
+{
+ return 0;
+}
+
+static inline bool
+ice_clean_tx_irq_zc(struct ice_ring __always_unused *xdp_ring,
+ int __always_unused budget)
+{
+ return false;
+}
+
+static inline bool
+ice_alloc_rx_bufs_slow_zc(struct ice_ring __always_unused *rx_ring,
+ u16 __always_unused count)
+{
+ return false;
+}
+
+static inline bool ice_xsk_any_rx_ring_ena(struct ice_vsi __always_unused *vsi)
+{
+ return false;
+}
+
+static inline int
+ice_xsk_wakeup(struct net_device __always_unused *netdev,
+ u32 __always_unused queue_id, u32 __always_unused flags)
+{
+ return -ENOTSUPP;
+}
+
+#define ice_xsk_clean_rx_ring(rx_ring) do {} while (0)
+#define ice_xsk_clean_xdp_ring(xdp_ring) do {} while (0)
+#endif /* CONFIG_XDP_SOCKETS */
+#endif /* !_ICE_XSK_H_ */