aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/intel/ice
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/intel/ice')
-rw-r--r--drivers/net/ethernet/intel/ice/Makefile5
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h71
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h79
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c859
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.h31
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c205
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h12
-rw-r--r--drivers/net/ethernet/intel/ice/ice_controlq.c65
-rw-r--r--drivers/net/ethernet/intel/ice/ice_controlq.h7
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb.c6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.c313
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.h28
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_nl.c933
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_nl.h19
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c524
-rw-r--r--drivers/net/ethernet/intel/ice/ice_hw_autogen.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c1327
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.h55
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c810
-rw-r--r--drivers/net/ethernet/intel/ice/ice_nvm.c51
-rw-r--r--drivers/net/ethernet/intel/ice/ice_nvm.h8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.c1269
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.h39
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c16
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.h5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c600
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h140
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.c273
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.h59
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h69
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c535
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h12
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c1181
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.h72
35 files changed, 7753 insertions, 1930 deletions
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 9edde960b4f2..7cb829132d28 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -13,9 +13,12 @@ ice-y := ice_main.o \
ice_nvm.o \
ice_switch.o \
ice_sched.o \
+ ice_base.o \
ice_lib.o \
+ ice_txrx_lib.o \
ice_txrx.o \
ice_flex_pipe.o \
ice_ethtool.o
ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
-ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_lib.o
+ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
+ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 45e100666049..f972dce8aebb 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -29,10 +29,13 @@
#include <linux/ip.h>
#include <linux/sctp.h>
#include <linux/ipv6.h>
+#include <linux/pkt_sched.h>
#include <linux/if_bridge.h>
#include <linux/ctype.h>
+#include <linux/bpf.h>
#include <linux/avf/virtchnl.h>
#include <net/ipv6.h>
+#include <net/xdp_sock.h>
#include "ice_devids.h"
#include "ice_type.h"
#include "ice_txrx.h"
@@ -42,6 +45,7 @@
#include "ice_sched.h"
#include "ice_virtchnl_pf.h"
#include "ice_sriov.h"
+#include "ice_xsk.h"
extern const char ice_drv_ver[];
#define ICE_BAR0 0
@@ -78,8 +82,7 @@ extern const char ice_drv_ver[];
#define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
-#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - \
- (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)))
+#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - ICE_ETH_PKT_HDR_PAD)
#define ICE_UP_TABLE_TRANSLATE(val, i) \
(((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
@@ -127,6 +130,16 @@ extern const char ice_drv_ver[];
ICE_PROMISC_VLAN_TX | \
ICE_PROMISC_VLAN_RX)
+#define ice_pf_to_dev(pf) (&((pf)->pdev->dev))
+
+struct ice_txq_meta {
+ u32 q_teid; /* Tx-scheduler element identifier */
+ u16 q_id; /* Entry in VSI's txq_map bitmap */
+ u16 q_handle; /* Relative index of Tx queue within TC */
+ u16 vsi_idx; /* VSI index that Tx queue belongs to */
+ u8 tc; /* TC number that Tx queue belongs to */
+};
+
struct ice_tc_info {
u16 qoffset;
u16 qcount_tx;
@@ -169,6 +182,7 @@ enum ice_state {
__ICE_NEEDS_RESTART,
__ICE_PREPARED_FOR_RESET, /* set by driver when prepared */
__ICE_RESET_OICR_RECV, /* set by driver after rcv reset OICR */
+ __ICE_DCBNL_DEVRESET, /* set by dcbnl devreset */
__ICE_PFR_REQ, /* set by driver and peers */
__ICE_CORER_REQ, /* set by driver and peers */
__ICE_GLOBR_REQ, /* set by driver and peers */
@@ -271,9 +285,18 @@ struct ice_vsi {
u16 num_txq; /* Used Tx queues */
u16 alloc_rxq; /* Allocated Rx queues */
u16 num_rxq; /* Used Rx queues */
+ u16 req_txq; /* User requested Tx queues */
+ u16 req_rxq; /* User requested Rx queues */
u16 num_rx_desc;
u16 num_tx_desc;
struct ice_tc_cfg tc_cfg;
+ struct bpf_prog *xdp_prog;
+ struct ice_ring **xdp_rings; /* XDP ring array */
+ u16 num_xdp_txq; /* Used XDP queues */
+ u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+ struct xdp_umem **xsk_umems;
+ u16 num_xsk_umems_used;
+ u16 num_xsk_umems;
} ____cacheline_internodealigned_in_smp;
/* struct that defines an interrupt vector */
@@ -313,6 +336,7 @@ enum ice_pf_flags {
ICE_FLAG_NO_MEDIA,
ICE_FLAG_FW_LLDP_AGENT,
ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */
+ ICE_FLAG_LEGACY_RX,
ICE_PF_FLAGS_NBITS /* must be last */
};
@@ -346,6 +370,7 @@ struct ice_pf {
struct work_struct serv_task;
struct mutex avail_q_mutex; /* protects access to avail_[rx|tx]qs */
struct mutex sw_mutex; /* lock for protecting VSI alloc flow */
+ struct mutex tc_mutex; /* lock to protect TC changes */
u32 msg_enable;
u32 hw_csum_rx_error;
u32 oicr_idx; /* Other interrupt cause MSIX vector index */
@@ -417,6 +442,37 @@ static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev)
return np->vsi->back;
}
+static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi)
+{
+ return !!vsi->xdp_prog;
+}
+
+static inline void ice_set_ring_xdp(struct ice_ring *ring)
+{
+ ring->flags |= ICE_TX_FLAGS_RING_XDP;
+}
+
+/**
+ * ice_xsk_umem - get XDP UMEM bound to a ring
+ * @ring - ring to use
+ *
+ * Returns a pointer to xdp_umem structure if there is an UMEM present,
+ * NULL otherwise.
+ */
+static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring)
+{
+ struct xdp_umem **umems = ring->vsi->xsk_umems;
+ int qid = ring->q_index;
+
+ if (ice_ring_is_xdp(ring))
+ qid -= ring->vsi->num_xdp_txq;
+
+ if (!umems || !umems[qid] || !ice_is_xdp_ena_vsi(ring->vsi))
+ return NULL;
+
+ return umems[qid];
+}
+
/**
* ice_get_main_vsi - Get the PF VSI
* @pf: PF instance
@@ -437,20 +493,23 @@ void ice_set_ethtool_ops(struct net_device *netdev);
void ice_set_ethtool_safe_mode_ops(struct net_device *netdev);
u16 ice_get_avail_txq_count(struct ice_pf *pf);
u16 ice_get_avail_rxq_count(struct ice_pf *pf);
+int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx);
void ice_update_vsi_stats(struct ice_vsi *vsi);
void ice_update_pf_stats(struct ice_pf *pf);
int ice_up(struct ice_vsi *vsi);
int ice_down(struct ice_vsi *vsi);
int ice_vsi_cfg(struct ice_vsi *vsi);
struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog);
+int ice_destroy_xdp_rings(struct ice_vsi *vsi);
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
+int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
-#ifdef CONFIG_DCB
-int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked);
-void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked);
-#endif /* CONFIG_DCB */
int ice_open(struct net_device *netdev);
int ice_stop(struct net_device *netdev);
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 023e3d2fee5f..5421fc413f94 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -742,6 +742,10 @@ struct ice_aqc_add_elem {
struct ice_aqc_txsched_elem_data generic[1];
};
+struct ice_aqc_conf_elem {
+ struct ice_aqc_txsched_elem_data generic[1];
+};
+
struct ice_aqc_get_elem {
struct ice_aqc_txsched_elem_data generic[1];
};
@@ -783,6 +787,44 @@ struct ice_aqc_port_ets_elem {
__le32 tc_node_teid[8]; /* Used for response, reserved in command */
};
+/* Rate limiting profile for
+ * Add RL profile (indirect 0x0410)
+ * Query RL profile (indirect 0x0411)
+ * Remove RL profile (indirect 0x0415)
+ * These indirect commands acts on single or multiple
+ * RL profiles with specified data.
+ */
+struct ice_aqc_rl_profile {
+ __le16 num_profiles;
+ __le16 num_processed; /* Only for response. Reserved in Command. */
+ u8 reserved[4];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+struct ice_aqc_rl_profile_elem {
+ u8 level;
+ u8 flags;
+#define ICE_AQC_RL_PROFILE_TYPE_S 0x0
+#define ICE_AQC_RL_PROFILE_TYPE_M (0x3 << ICE_AQC_RL_PROFILE_TYPE_S)
+#define ICE_AQC_RL_PROFILE_TYPE_CIR 0
+#define ICE_AQC_RL_PROFILE_TYPE_EIR 1
+#define ICE_AQC_RL_PROFILE_TYPE_SRL 2
+/* The following flag is used for Query RL Profile Data */
+#define ICE_AQC_RL_PROFILE_INVAL_S 0x7
+#define ICE_AQC_RL_PROFILE_INVAL_M (0x1 << ICE_AQC_RL_PROFILE_INVAL_S)
+
+ __le16 profile_id;
+ __le16 max_burst_size;
+ __le16 rl_multiply;
+ __le16 wake_up_calc;
+ __le16 rl_encode;
+};
+
+struct ice_aqc_rl_profile_generic_elem {
+ struct ice_aqc_rl_profile_elem generic[1];
+};
+
/* Query Scheduler Resource Allocation (indirect 0x0412)
* This indirect command retrieves the scheduler resources allocated by
* EMP Firmware to the given PF.
@@ -1044,6 +1086,10 @@ struct ice_aqc_get_link_status_data {
#define ICE_AQ_LINK_TOPO_CONFLICT BIT(0)
#define ICE_AQ_LINK_MEDIA_CONFLICT BIT(1)
#define ICE_AQ_LINK_TOPO_CORRUPT BIT(2)
+#define ICE_AQ_LINK_TOPO_UNREACH_PRT BIT(4)
+#define ICE_AQ_LINK_TOPO_UNDRUTIL_PRT BIT(5)
+#define ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA BIT(6)
+#define ICE_AQ_LINK_TOPO_UNSUPP_MEDIA BIT(7)
u8 reserved1;
u8 link_info;
#define ICE_AQ_LINK_UP BIT(0) /* Link Status */
@@ -1147,6 +1193,33 @@ struct ice_aqc_set_port_id_led {
u8 rsvd[13];
};
+/* Read/Write SFF EEPROM command (indirect 0x06EE) */
+struct ice_aqc_sff_eeprom {
+ u8 lport_num;
+ u8 lport_num_valid;
+#define ICE_AQC_SFF_PORT_NUM_VALID BIT(0)
+ __le16 i2c_bus_addr;
+#define ICE_AQC_SFF_I2CBUS_7BIT_M 0x7F
+#define ICE_AQC_SFF_I2CBUS_10BIT_M 0x3FF
+#define ICE_AQC_SFF_I2CBUS_TYPE_M BIT(10)
+#define ICE_AQC_SFF_I2CBUS_TYPE_7BIT 0
+#define ICE_AQC_SFF_I2CBUS_TYPE_10BIT ICE_AQC_SFF_I2CBUS_TYPE_M
+#define ICE_AQC_SFF_SET_EEPROM_PAGE_S 11
+#define ICE_AQC_SFF_SET_EEPROM_PAGE_M (0x3 << ICE_AQC_SFF_SET_EEPROM_PAGE_S)
+#define ICE_AQC_SFF_NO_PAGE_CHANGE 0
+#define ICE_AQC_SFF_SET_23_ON_MISMATCH 1
+#define ICE_AQC_SFF_SET_22_ON_MISMATCH 2
+#define ICE_AQC_SFF_IS_WRITE BIT(15)
+ __le16 i2c_mem_addr;
+ __le16 eeprom_page;
+#define ICE_AQC_SFF_EEPROM_BANK_S 0
+#define ICE_AQC_SFF_EEPROM_BANK_M (0xFF << ICE_AQC_SFF_EEPROM_BANK_S)
+#define ICE_AQC_SFF_EEPROM_PAGE_S 8
+#define ICE_AQC_SFF_EEPROM_PAGE_M (0xFF << ICE_AQC_SFF_EEPROM_PAGE_S)
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
/* NVM Read command (indirect 0x0701)
* NVM Erase commands (direct 0x0702)
* NVM Update commands (indirect 0x0703)
@@ -1618,6 +1691,7 @@ struct ice_aq_desc {
struct ice_aqc_get_phy_caps get_phy;
struct ice_aqc_set_phy_cfg set_phy;
struct ice_aqc_restart_an restart_an;
+ struct ice_aqc_sff_eeprom read_write_sff_param;
struct ice_aqc_set_port_id_led set_port_id_led;
struct ice_aqc_get_sw_cfg get_sw_conf;
struct ice_aqc_sw_rules sw_rules;
@@ -1625,6 +1699,7 @@ struct ice_aq_desc {
struct ice_aqc_sched_elem_cmd sched_elem_cmd;
struct ice_aqc_query_txsched_res query_sched_res;
struct ice_aqc_query_port_ets port_ets;
+ struct ice_aqc_rl_profile rl_profile;
struct ice_aqc_nvm nvm;
struct ice_aqc_nvm_checksum nvm_checksum;
struct ice_aqc_pf_vf_msg virt;
@@ -1726,12 +1801,15 @@ enum ice_adminq_opc {
/* transmit scheduler commands */
ice_aqc_opc_get_dflt_topo = 0x0400,
ice_aqc_opc_add_sched_elems = 0x0401,
+ ice_aqc_opc_cfg_sched_elems = 0x0403,
ice_aqc_opc_get_sched_elems = 0x0404,
ice_aqc_opc_suspend_sched_elems = 0x0409,
ice_aqc_opc_resume_sched_elems = 0x040A,
ice_aqc_opc_query_port_ets = 0x040E,
ice_aqc_opc_delete_sched_elems = 0x040F,
+ ice_aqc_opc_add_rl_profiles = 0x0410,
ice_aqc_opc_query_sched_res = 0x0412,
+ ice_aqc_opc_remove_rl_profiles = 0x0415,
/* PHY commands */
ice_aqc_opc_get_phy_caps = 0x0600,
@@ -1741,6 +1819,7 @@ enum ice_adminq_opc {
ice_aqc_opc_set_event_mask = 0x0613,
ice_aqc_opc_set_mac_lb = 0x0620,
ice_aqc_opc_set_port_id_led = 0x06E9,
+ ice_aqc_opc_sff_eeprom = 0x06EE,
/* NVM commands */
ice_aqc_opc_nvm_read = 0x0701,
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
new file mode 100644
index 000000000000..77d6a0291e97
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -0,0 +1,859 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_base.h"
+#include "ice_dcb_lib.h"
+
+/**
+ * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
+ * @qs_cfg: gathered variables needed for PF->VSI queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
+{
+ int offset, i;
+
+ mutex_lock(qs_cfg->qs_mutex);
+ offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
+ 0, qs_cfg->q_count, 0);
+ if (offset >= qs_cfg->pf_map_size) {
+ mutex_unlock(qs_cfg->qs_mutex);
+ return -ENOMEM;
+ }
+
+ bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
+ for (i = 0; i < qs_cfg->q_count; i++)
+ qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset;
+ mutex_unlock(qs_cfg->qs_mutex);
+
+ return 0;
+}
+
+/**
+ * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
+{
+ int i, index = 0;
+
+ mutex_lock(qs_cfg->qs_mutex);
+ for (i = 0; i < qs_cfg->q_count; i++) {
+ index = find_next_zero_bit(qs_cfg->pf_map,
+ qs_cfg->pf_map_size, index);
+ if (index >= qs_cfg->pf_map_size)
+ goto err_scatter;
+ set_bit(index, qs_cfg->pf_map);
+ qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index;
+ }
+ mutex_unlock(qs_cfg->qs_mutex);
+
+ return 0;
+err_scatter:
+ for (index = 0; index < i; index++) {
+ clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map);
+ qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0;
+ }
+ mutex_unlock(qs_cfg->qs_mutex);
+
+ return -ENOMEM;
+}
+
+/**
+ * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
+ * @pf: the PF being configured
+ * @pf_q: the PF queue
+ * @ena: enable or disable state of the queue
+ *
+ * This routine will wait for the given Rx queue of the PF to reach the
+ * enabled or disabled state.
+ * Returns -ETIMEDOUT in case of failing to reach the requested state after
+ * multiple retries; else will return 0 in case of success.
+ */
+static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
+{
+ int i;
+
+ for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) {
+ if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) &
+ QRX_CTRL_QENA_STAT_M))
+ return 0;
+
+ usleep_range(20, 40);
+ }
+
+ return -ETIMEDOUT;
+}
+
+/**
+ * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @vsi: the VSI being configured
+ * @v_idx: index of the vector in the VSI struct
+ *
+ * We allocate one q_vector. If allocation fails we return -ENOMEM.
+ */
+static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_q_vector *q_vector;
+
+ /* allocate q_vector */
+ q_vector = devm_kzalloc(ice_pf_to_dev(pf), sizeof(*q_vector),
+ GFP_KERNEL);
+ if (!q_vector)
+ return -ENOMEM;
+
+ q_vector->vsi = vsi;
+ q_vector->v_idx = v_idx;
+ if (vsi->type == ICE_VSI_VF)
+ goto out;
+ /* only set affinity_mask if the CPU is online */
+ if (cpu_online(v_idx))
+ cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+
+ /* This will not be called in the driver load path because the netdev
+ * will not be created yet. All other cases with register the NAPI
+ * handler here (i.e. resume, reset/rebuild, etc.)
+ */
+ if (vsi->netdev)
+ netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,
+ NAPI_POLL_WEIGHT);
+
+out:
+ /* tie q_vector and VSI together */
+ vsi->q_vectors[v_idx] = q_vector;
+
+ return 0;
+}
+
+/**
+ * ice_free_q_vector - Free memory allocated for a specific interrupt vector
+ * @vsi: VSI having the memory freed
+ * @v_idx: index of the vector to be freed
+ */
+static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+ struct ice_q_vector *q_vector;
+ struct ice_pf *pf = vsi->back;
+ struct ice_ring *ring;
+ struct device *dev;
+
+ dev = ice_pf_to_dev(pf);
+ if (!vsi->q_vectors[v_idx]) {
+ dev_dbg(dev, "Queue vector at index %d not found\n", v_idx);
+ return;
+ }
+ q_vector = vsi->q_vectors[v_idx];
+
+ ice_for_each_ring(ring, q_vector->tx)
+ ring->q_vector = NULL;
+ ice_for_each_ring(ring, q_vector->rx)
+ ring->q_vector = NULL;
+
+ /* only VSI with an associated netdev is set up with NAPI */
+ if (vsi->netdev)
+ netif_napi_del(&q_vector->napi);
+
+ devm_kfree(dev, q_vector);
+ vsi->q_vectors[v_idx] = NULL;
+}
+
+/**
+ * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set
+ * @hw: board specific structure
+ */
+static void ice_cfg_itr_gran(struct ice_hw *hw)
+{
+ u32 regval = rd32(hw, GLINT_CTL);
+
+ /* no need to update global register if ITR gran is already set */
+ if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) &&
+ (((regval & GLINT_CTL_ITR_GRAN_200_M) >>
+ GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) &&
+ (((regval & GLINT_CTL_ITR_GRAN_100_M) >>
+ GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) &&
+ (((regval & GLINT_CTL_ITR_GRAN_50_M) >>
+ GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) &&
+ (((regval & GLINT_CTL_ITR_GRAN_25_M) >>
+ GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US))
+ return;
+
+ regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) &
+ GLINT_CTL_ITR_GRAN_200_M) |
+ ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) &
+ GLINT_CTL_ITR_GRAN_100_M) |
+ ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) &
+ GLINT_CTL_ITR_GRAN_50_M) |
+ ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) &
+ GLINT_CTL_ITR_GRAN_25_M);
+ wr32(hw, GLINT_CTL, regval);
+}
+
+/**
+ * ice_calc_q_handle - calculate the queue handle
+ * @vsi: VSI that ring belongs to
+ * @ring: ring to get the absolute queue index
+ * @tc: traffic class number
+ */
+static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc)
+{
+ WARN_ONCE(ice_ring_is_xdp(ring) && tc,
+ "XDP ring can't belong to TC other than 0");
+
+ /* Idea here for calculation is that we subtract the number of queue
+ * count from TC that ring belongs to from it's absolute queue index
+ * and as a result we get the queue's index within TC.
+ */
+ return ring->q_index - vsi->tc_cfg.tc_info[tc].qoffset;
+}
+
+/**
+ * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
+ * @ring: The Tx ring to configure
+ * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
+ * @pf_q: queue index in the PF space
+ *
+ * Configure the Tx descriptor ring in TLAN context.
+ */
+static void
+ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
+{
+ struct ice_vsi *vsi = ring->vsi;
+ struct ice_hw *hw = &vsi->back->hw;
+
+ tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
+
+ tlan_ctx->port_num = vsi->port_info->lport;
+
+ /* Transmit Queue Length */
+ tlan_ctx->qlen = ring->count;
+
+ ice_set_cgd_num(tlan_ctx, ring);
+
+ /* PF number */
+ tlan_ctx->pf_num = hw->pf_id;
+
+ /* queue belongs to a specific VSI type
+ * VF / VM index should be programmed per vmvf_type setting:
+ * for vmvf_type = VF, it is VF number between 0-256
+ * for vmvf_type = VM, it is VM number between 0-767
+ * for PF or EMP this field should be set to zero
+ */
+ switch (vsi->type) {
+ case ICE_VSI_LB:
+ /* fall through */
+ case ICE_VSI_PF:
+ tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
+ break;
+ case ICE_VSI_VF:
+ /* Firmware expects vmvf_num to be absolute VF ID */
+ tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id;
+ tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
+ break;
+ default:
+ return;
+ }
+
+ /* make sure the context is associated with the right VSI */
+ tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
+
+ tlan_ctx->tso_ena = ICE_TX_LEGACY;
+ tlan_ctx->tso_qnum = pf_q;
+
+ /* Legacy or Advanced Host Interface:
+ * 0: Advanced Host Interface
+ * 1: Legacy Host Interface
+ */
+ tlan_ctx->legacy_int = ICE_TX_LEGACY;
+}
+
+/**
+ * ice_setup_rx_ctx - Configure a receive ring context
+ * @ring: The Rx ring to configure
+ *
+ * Configure the Rx descriptor ring in RLAN context.
+ */
+int ice_setup_rx_ctx(struct ice_ring *ring)
+{
+ int chain_len = ICE_MAX_CHAINED_RX_BUFS;
+ struct ice_vsi *vsi = ring->vsi;
+ u32 rxdid = ICE_RXDID_FLEX_NIC;
+ struct ice_rlan_ctx rlan_ctx;
+ struct ice_hw *hw;
+ u32 regval;
+ u16 pf_q;
+ int err;
+
+ hw = &vsi->back->hw;
+
+ /* what is Rx queue number in global space of 2K Rx queues */
+ pf_q = vsi->rxq_map[ring->q_index];
+
+ /* clear the context structure first */
+ memset(&rlan_ctx, 0, sizeof(rlan_ctx));
+
+ ring->rx_buf_len = vsi->rx_buf_len;
+
+ if (ring->vsi->type == ICE_VSI_PF) {
+ if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+ xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+ ring->q_index);
+
+ ring->xsk_umem = ice_xsk_umem(ring);
+ if (ring->xsk_umem) {
+ xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+
+ ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
+ XDP_PACKET_HEADROOM;
+ /* For AF_XDP ZC, we disallow packets to span on
+ * multiple buffers, thus letting us skip that
+ * handling in the fast-path.
+ */
+ chain_len = 1;
+ ring->zca.free = ice_zca_free;
+ err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_ZERO_COPY,
+ &ring->zca);
+ if (err)
+ return err;
+
+ dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
+ ring->q_index);
+ } else {
+ if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+ xdp_rxq_info_reg(&ring->xdp_rxq,
+ ring->netdev,
+ ring->q_index);
+
+ err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED,
+ NULL);
+ if (err)
+ return err;
+ }
+ }
+ /* Receive Queue Base Address.
+ * Indicates the starting address of the descriptor queue defined in
+ * 128 Byte units.
+ */
+ rlan_ctx.base = ring->dma >> 7;
+
+ rlan_ctx.qlen = ring->count;
+
+ /* Receive Packet Data Buffer Size.
+ * The Packet Data Buffer Size is defined in 128 byte units.
+ */
+ rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
+
+ /* use 32 byte descriptors */
+ rlan_ctx.dsize = 1;
+
+ /* Strip the Ethernet CRC bytes before the packet is posted to host
+ * memory.
+ */
+ rlan_ctx.crcstrip = 1;
+
+ /* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
+ rlan_ctx.l2tsel = 1;
+
+ rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
+ rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
+ rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
+
+ /* This controls whether VLAN is stripped from inner headers
+ * The VLAN in the inner L2 header is stripped to the receive
+ * descriptor if enabled by this flag.
+ */
+ rlan_ctx.showiv = 0;
+
+ /* Max packet size for this queue - must not be set to a larger value
+ * than 5 x DBUF
+ */
+ rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
+ chain_len * ring->rx_buf_len);
+
+ /* Rx queue threshold in units of 64 */
+ rlan_ctx.lrxqthresh = 1;
+
+ /* Enable Flexible Descriptors in the queue context which
+ * allows this driver to select a specific receive descriptor format
+ */
+ if (vsi->type != ICE_VSI_VF) {
+ regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
+ regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
+ QRXFLXP_CNTXT_RXDID_IDX_M;
+
+ /* increasing context priority to pick up profile ID;
+ * default is 0x01; setting to 0x03 to ensure profile
+ * is programming if prev context is of same priority
+ */
+ regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
+ QRXFLXP_CNTXT_RXDID_PRIO_M;
+
+ wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
+ }
+
+ /* Absolute queue number out of 2K needs to be passed */
+ err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
+ if (err) {
+ dev_err(&vsi->back->pdev->dev,
+ "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
+ pf_q, err);
+ return -EIO;
+ }
+
+ if (vsi->type == ICE_VSI_VF)
+ return 0;
+
+ /* configure Rx buffer alignment */
+ if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+ ice_clear_ring_build_skb_ena(ring);
+ else
+ ice_set_ring_build_skb_ena(ring);
+
+ /* init queue specific tail register */
+ ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
+ writel(0, ring->tail);
+
+ err = ring->xsk_umem ?
+ ice_alloc_rx_bufs_slow_zc(ring, ICE_DESC_UNUSED(ring)) :
+ ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
+ if (err)
+ dev_info(&vsi->back->pdev->dev,
+ "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
+ ring->xsk_umem ? "UMEM enabled " : "",
+ ring->q_index, pf_q);
+
+ return 0;
+}
+
+/**
+ * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * This function first tries to find contiguous space. If it is not successful,
+ * it tries with the scatter approach.
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
+{
+ int ret = 0;
+
+ ret = __ice_vsi_get_qs_contig(qs_cfg);
+ if (ret) {
+ /* contig failed, so try with scatter approach */
+ qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
+ qs_cfg->q_count = min_t(u16, qs_cfg->q_count,
+ qs_cfg->scatter_count);
+ ret = __ice_vsi_get_qs_sc(qs_cfg);
+ }
+ return ret;
+}
+
+/**
+ * ice_vsi_ctrl_rx_ring - Start or stop a VSI's Rx ring
+ * @vsi: the VSI being configured
+ * @ena: start or stop the Rx rings
+ * @rxq_idx: Rx queue index
+ */
+int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
+{
+ int pf_q = vsi->rxq_map[rxq_idx];
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ int ret = 0;
+ u32 rx_reg;
+
+ rx_reg = rd32(hw, QRX_CTRL(pf_q));
+
+ /* Skip if the queue is already in the requested state */
+ if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
+ return 0;
+
+ /* turn on/off the queue */
+ if (ena)
+ rx_reg |= QRX_CTRL_QENA_REQ_M;
+ else
+ rx_reg &= ~QRX_CTRL_QENA_REQ_M;
+ wr32(hw, QRX_CTRL(pf_q), rx_reg);
+
+ /* wait for the change to finish */
+ ret = ice_pf_rxq_wait(pf, pf_q, ena);
+ if (ret)
+ dev_err(ice_pf_to_dev(pf),
+ "VSI idx %d Rx ring %d %sable timeout\n",
+ vsi->idx, pf_q, (ena ? "en" : "dis"));
+
+ return ret;
+}
+
+/**
+ * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * We allocate one q_vector per queue interrupt. If allocation fails we
+ * return -ENOMEM.
+ */
+int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ int v_idx = 0, num_q_vectors;
+ struct device *dev;
+ int err;
+
+ dev = ice_pf_to_dev(pf);
+ if (vsi->q_vectors[0]) {
+ dev_dbg(dev, "VSI %d has existing q_vectors\n", vsi->vsi_num);
+ return -EEXIST;
+ }
+
+ num_q_vectors = vsi->num_q_vectors;
+
+ for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
+ err = ice_vsi_alloc_q_vector(vsi, v_idx);
+ if (err)
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ while (v_idx--)
+ ice_free_q_vector(vsi, v_idx);
+
+ dev_err(dev, "Failed to allocate %d q_vector for VSI %d, ret=%d\n",
+ vsi->num_q_vectors, vsi->vsi_num, err);
+ vsi->num_q_vectors = 0;
+ return err;
+}
+
+/**
+ * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * This function maps descriptor rings to the queue-specific vectors allotted
+ * through the MSI-X enabling code. On a constrained vector budget, we map Tx
+ * and Rx rings to the vector as "efficiently" as possible.
+ */
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
+{
+ int q_vectors = vsi->num_q_vectors;
+ int tx_rings_rem, rx_rings_rem;
+ int v_id;
+
+ /* initially assigning remaining rings count to VSIs num queue value */
+ tx_rings_rem = vsi->num_txq;
+ rx_rings_rem = vsi->num_rxq;
+
+ for (v_id = 0; v_id < q_vectors; v_id++) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
+ int tx_rings_per_v, rx_rings_per_v, q_id, q_base;
+
+ /* Tx rings mapping to vector */
+ tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id);
+ q_vector->num_ring_tx = tx_rings_per_v;
+ q_vector->tx.ring = NULL;
+ q_vector->tx.itr_idx = ICE_TX_ITR;
+ q_base = vsi->num_txq - tx_rings_rem;
+
+ for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
+ struct ice_ring *tx_ring = vsi->tx_rings[q_id];
+
+ tx_ring->q_vector = q_vector;
+ tx_ring->next = q_vector->tx.ring;
+ q_vector->tx.ring = tx_ring;
+ }
+ tx_rings_rem -= tx_rings_per_v;
+
+ /* Rx rings mapping to vector */
+ rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id);
+ q_vector->num_ring_rx = rx_rings_per_v;
+ q_vector->rx.ring = NULL;
+ q_vector->rx.itr_idx = ICE_RX_ITR;
+ q_base = vsi->num_rxq - rx_rings_rem;
+
+ for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
+ struct ice_ring *rx_ring = vsi->rx_rings[q_id];
+
+ rx_ring->q_vector = q_vector;
+ rx_ring->next = q_vector->rx.ring;
+ q_vector->rx.ring = rx_ring;
+ }
+ rx_rings_rem -= rx_rings_per_v;
+ }
+}
+
+/**
+ * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
+ * @vsi: the VSI having memory freed
+ */
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
+{
+ int v_idx;
+
+ ice_for_each_q_vector(vsi, v_idx)
+ ice_free_q_vector(vsi, v_idx);
+}
+
+/**
+ * ice_vsi_cfg_txq - Configure single Tx queue
+ * @vsi: the VSI that queue belongs to
+ * @ring: Tx ring to be configured
+ * @qg_buf: queue group buffer
+ */
+int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
+ struct ice_aqc_add_tx_qgrp *qg_buf)
+{
+ struct ice_tlan_ctx tlan_ctx = { 0 };
+ struct ice_aqc_add_txqs_perq *txq;
+ struct ice_pf *pf = vsi->back;
+ u8 buf_len = sizeof(*qg_buf);
+ enum ice_status status;
+ u16 pf_q;
+ u8 tc;
+
+ pf_q = ring->reg_idx;
+ ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
+ /* copy context contents into the qg_buf */
+ qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
+ ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
+ ice_tlan_ctx_info);
+
+ /* init queue specific tail reg. It is referred as
+ * transmit comm scheduler queue doorbell.
+ */
+ ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
+
+ if (IS_ENABLED(CONFIG_DCB))
+ tc = ring->dcb_tc;
+ else
+ tc = 0;
+
+ /* Add unique software queue handle of the Tx queue per
+ * TC into the VSI Tx ring
+ */
+ ring->q_handle = ice_calc_q_handle(vsi, ring, tc);
+
+ status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
+ 1, qg_buf, buf_len, NULL);
+ if (status) {
+ dev_err(ice_pf_to_dev(pf),
+ "Failed to set LAN Tx queue context, error: %d\n",
+ status);
+ return -ENODEV;
+ }
+
+ /* Add Tx Queue TEID into the VSI Tx ring from the
+ * response. This will complete configuring and
+ * enabling the queue.
+ */
+ txq = &qg_buf->txqs[0];
+ if (pf_q == le16_to_cpu(txq->txq_id))
+ ring->txq_teid = le32_to_cpu(txq->q_teid);
+
+ return 0;
+}
+
+/**
+ * ice_cfg_itr - configure the initial interrupt throttle values
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector that's being configured
+ *
+ * Configure interrupt throttling values for the ring containers that are
+ * associated with the interrupt vector passed in.
+ */
+void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+ ice_cfg_itr_gran(hw);
+
+ if (q_vector->num_ring_rx) {
+ struct ice_ring_container *rc = &q_vector->rx;
+
+ /* if this value is set then don't overwrite with default */
+ if (!rc->itr_setting)
+ rc->itr_setting = ICE_DFLT_RX_ITR;
+
+ rc->target_itr = ITR_TO_REG(rc->itr_setting);
+ rc->next_update = jiffies + 1;
+ rc->current_itr = rc->target_itr;
+ wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+ ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
+ }
+
+ if (q_vector->num_ring_tx) {
+ struct ice_ring_container *rc = &q_vector->tx;
+
+ /* if this value is set then don't overwrite with default */
+ if (!rc->itr_setting)
+ rc->itr_setting = ICE_DFLT_TX_ITR;
+
+ rc->target_itr = ITR_TO_REG(rc->itr_setting);
+ rc->next_update = jiffies + 1;
+ rc->current_itr = rc->target_itr;
+ wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+ ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
+ }
+}
+
+/**
+ * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * @vsi: the VSI being configured
+ * @txq: Tx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
+ * within the function space.
+ */
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u32 val;
+
+ itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+
+ val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
+ ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+
+ wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+ wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]),
+ val);
+ }
+ ice_flush(hw);
+}
+
+/**
+ * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
+ * @vsi: the VSI being configured
+ * @rxq: Rx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
+ * within the function space.
+ */
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u32 val;
+
+ itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
+
+ val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
+ ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
+
+ wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_trigger_sw_intr - trigger a software interrupt
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector to trigger the software interrupt for
+ */
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+ wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
+ (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
+ GLINT_DYN_CTL_SWINT_TRIG_M |
+ GLINT_DYN_CTL_INTENA_M);
+}
+
+/**
+ * ice_vsi_stop_tx_ring - Disable single Tx ring
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ * @ring: Tx ring to be stopped
+ * @txq_meta: Meta data of Tx ring to be stopped
+ */
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+ u16 rel_vmvf_num, struct ice_ring *ring,
+ struct ice_txq_meta *txq_meta)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_q_vector *q_vector;
+ struct ice_hw *hw = &pf->hw;
+ enum ice_status status;
+ u32 val;
+
+ /* clear cause_ena bit for disabled queues */
+ val = rd32(hw, QINT_TQCTL(ring->reg_idx));
+ val &= ~QINT_TQCTL_CAUSE_ENA_M;
+ wr32(hw, QINT_TQCTL(ring->reg_idx), val);
+
+ /* software is expected to wait for 100 ns */
+ ndelay(100);
+
+ /* trigger a software interrupt for the vector
+ * associated to the queue to schedule NAPI handler
+ */
+ q_vector = ring->q_vector;
+ if (q_vector)
+ ice_trigger_sw_intr(hw, q_vector);
+
+ status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx,
+ txq_meta->tc, 1, &txq_meta->q_handle,
+ &txq_meta->q_id, &txq_meta->q_teid, rst_src,
+ rel_vmvf_num, NULL);
+
+ /* if the disable queue command was exercised during an
+ * active reset flow, ICE_ERR_RESET_ONGOING is returned.
+ * This is not an error as the reset operation disables
+ * queues at the hardware level anyway.
+ */
+ if (status == ICE_ERR_RESET_ONGOING) {
+ dev_dbg(&vsi->back->pdev->dev,
+ "Reset in progress. LAN Tx queues already disabled\n");
+ } else if (status == ICE_ERR_DOES_NOT_EXIST) {
+ dev_dbg(&vsi->back->pdev->dev,
+ "LAN Tx queues do not exist, nothing to disable\n");
+ } else if (status) {
+ dev_err(&vsi->back->pdev->dev,
+ "Failed to disable LAN Tx queues, error: %d\n", status);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_fill_txq_meta - Prepare the Tx queue's meta data
+ * @vsi: VSI that ring belongs to
+ * @ring: ring that txq_meta will be based on
+ * @txq_meta: a helper struct that wraps Tx queue's information
+ *
+ * Set up a helper struct that will contain all the necessary fields that
+ * are needed for stopping Tx queue
+ */
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
+ struct ice_txq_meta *txq_meta)
+{
+ u8 tc;
+
+ if (IS_ENABLED(CONFIG_DCB))
+ tc = ring->dcb_tc;
+ else
+ tc = 0;
+
+ txq_meta->q_id = ring->reg_idx;
+ txq_meta->q_teid = ring->txq_teid;
+ txq_meta->q_handle = ring->q_handle;
+ txq_meta->vsi_idx = vsi->idx;
+ txq_meta->tc = tc;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h
new file mode 100644
index 000000000000..407995e8e944
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_base.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_BASE_H_
+#define _ICE_BASE_H_
+
+#include "ice.h"
+
+int ice_setup_rx_ctx(struct ice_ring *ring);
+int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg);
+int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
+int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi);
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
+int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
+ struct ice_aqc_add_tx_qgrp *qg_buf);
+void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+ u16 rel_vmvf_num, struct ice_ring *ring,
+ struct ice_txq_meta *txq_meta);
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
+ struct ice_txq_meta *txq_meta);
+#endif /* _ICE_BASE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 3a6b3950eb0e..fb1d930470c7 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -855,6 +855,9 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
goto err_unroll_sched;
}
INIT_LIST_HEAD(&hw->agg_list);
+ /* Initialize max burst size */
+ if (!hw->max_burst_size)
+ ice_cfg_rl_burst_size(hw, ICE_SCHED_DFLT_BURST_SIZE);
status = ice_init_fltr_mgmt_struct(hw);
if (status)
@@ -1067,6 +1070,72 @@ enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req)
}
/**
+ * ice_get_pfa_module_tlv - Reads sub module TLV from NVM PFA
+ * @hw: pointer to hardware structure
+ * @module_tlv: pointer to module TLV to return
+ * @module_tlv_len: pointer to module TLV length to return
+ * @module_type: module type requested
+ *
+ * Finds the requested sub module TLV type from the Preserved Field
+ * Area (PFA) and returns the TLV pointer and length. The caller can
+ * use these to read the variable length TLV value.
+ */
+enum ice_status
+ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
+ u16 module_type)
+{
+ enum ice_status status;
+ u16 pfa_len, pfa_ptr;
+ u16 next_tlv;
+
+ status = ice_read_sr_word(hw, ICE_SR_PFA_PTR, &pfa_ptr);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Preserved Field Array pointer.\n");
+ return status;
+ }
+ status = ice_read_sr_word(hw, pfa_ptr, &pfa_len);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read PFA length.\n");
+ return status;
+ }
+ /* Starting with first TLV after PFA length, iterate through the list
+ * of TLVs to find the requested one.
+ */
+ next_tlv = pfa_ptr + 1;
+ while (next_tlv < pfa_ptr + pfa_len) {
+ u16 tlv_sub_module_type;
+ u16 tlv_len;
+
+ /* Read TLV type */
+ status = ice_read_sr_word(hw, next_tlv, &tlv_sub_module_type);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV type.\n");
+ break;
+ }
+ /* Read TLV length */
+ status = ice_read_sr_word(hw, next_tlv + 1, &tlv_len);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV length.\n");
+ break;
+ }
+ if (tlv_sub_module_type == module_type) {
+ if (tlv_len) {
+ *module_tlv = next_tlv;
+ *module_tlv_len = tlv_len;
+ return 0;
+ }
+ return ICE_ERR_INVAL_SIZE;
+ }
+ /* Check next TLV, i.e. current TLV pointer + length + 2 words
+ * (for current TLV's type and length)
+ */
+ next_tlv = next_tlv + tlv_len + 2;
+ }
+ /* Module does not exist */
+ return ICE_ERR_DOES_NOT_EXIST;
+}
+
+/**
* ice_copy_rxq_ctx_to_hw
* @hw: pointer to the hardware structure
* @ice_rxq_ctx: pointer to the rxq context
@@ -1182,56 +1251,6 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = {
{ 0 }
};
-/**
- * ice_debug_cq
- * @hw: pointer to the hardware structure
- * @mask: debug mask
- * @desc: pointer to control queue descriptor
- * @buf: pointer to command buffer
- * @buf_len: max length of buf
- *
- * Dumps debug log about control command with descriptor contents.
- */
-void
-ice_debug_cq(struct ice_hw *hw, u32 __maybe_unused mask, void *desc, void *buf,
- u16 buf_len)
-{
- struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc;
- u16 len;
-
-#ifndef CONFIG_DYNAMIC_DEBUG
- if (!(mask & hw->debug_mask))
- return;
-#endif
-
- if (!desc)
- return;
-
- len = le16_to_cpu(cq_desc->datalen);
-
- ice_debug(hw, mask,
- "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
- le16_to_cpu(cq_desc->opcode),
- le16_to_cpu(cq_desc->flags),
- le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval));
- ice_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n",
- le32_to_cpu(cq_desc->cookie_high),
- le32_to_cpu(cq_desc->cookie_low));
- ice_debug(hw, mask, "\tparam (0,1) 0x%08X 0x%08X\n",
- le32_to_cpu(cq_desc->params.generic.param0),
- le32_to_cpu(cq_desc->params.generic.param1));
- ice_debug(hw, mask, "\taddr (h,l) 0x%08X 0x%08X\n",
- le32_to_cpu(cq_desc->params.generic.addr_high),
- le32_to_cpu(cq_desc->params.generic.addr_low));
- if (buf && cq_desc->datalen != 0) {
- ice_debug(hw, mask, "Buffer:\n");
- if (buf_len < len)
- len = buf_len;
-
- ice_debug_array(hw, mask, 16, 1, (u8 *)buf, len);
- }
-}
-
/* FW Admin Queue command wrappers */
/* Software lock/mutex that is meant to be held while the Global Config Lock
@@ -1654,6 +1673,10 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
ice_debug(hw, ICE_DBG_INIT,
"%s: valid_functions (bitmap) = %d\n", prefix,
caps->valid_functions);
+
+ /* store func count for resource management purposes */
+ if (dev_p)
+ dev_p->num_funcs = hweight32(number);
break;
case ICE_AQC_CAPS_SRIOV:
caps->sr_iov_1_1 = (number == 1);
@@ -1760,6 +1783,18 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
break;
}
}
+
+ /* Re-calculate capabilities that are dependent on the number of
+ * physical ports; i.e. some features are not supported or function
+ * differently on devices with more than 4 ports.
+ */
+ if (hw->dev_caps.num_funcs > 4) {
+ /* Max 4 TCs per port */
+ caps->maxtc = 4;
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: maxtc = %d (based on #ports)\n", prefix,
+ caps->maxtc);
+ }
}
/**
@@ -1856,8 +1891,7 @@ void ice_set_safe_mode_caps(struct ice_hw *hw)
struct ice_hw_dev_caps *dev_caps = &hw->dev_caps;
u32 valid_func, rxq_first_id, txq_first_id;
u32 msix_vector_first_id, max_mtu;
- u32 num_func = 0;
- u8 i;
+ u32 num_funcs;
/* cache some func_caps values that should be restored after memset */
valid_func = func_caps->common_cap.valid_functions;
@@ -1890,6 +1924,7 @@ void ice_set_safe_mode_caps(struct ice_hw *hw)
rxq_first_id = dev_caps->common_cap.rxq_first_id;
msix_vector_first_id = dev_caps->common_cap.msix_vector_first_id;
max_mtu = dev_caps->common_cap.max_mtu;
+ num_funcs = dev_caps->num_funcs;
/* unset dev capabilities */
memset(dev_caps, 0, sizeof(*dev_caps));
@@ -1900,19 +1935,14 @@ void ice_set_safe_mode_caps(struct ice_hw *hw)
dev_caps->common_cap.rxq_first_id = rxq_first_id;
dev_caps->common_cap.msix_vector_first_id = msix_vector_first_id;
dev_caps->common_cap.max_mtu = max_mtu;
-
- /* valid_func is a bitmap. get number of functions */
-#define ICE_MAX_FUNCS 8
- for (i = 0; i < ICE_MAX_FUNCS; i++)
- if (valid_func & BIT(i))
- num_func++;
+ dev_caps->num_funcs = num_funcs;
/* one Tx and one Rx queue per function in safe mode */
- dev_caps->common_cap.num_rxq = num_func;
- dev_caps->common_cap.num_txq = num_func;
+ dev_caps->common_cap.num_rxq = num_funcs;
+ dev_caps->common_cap.num_txq = num_funcs;
/* two MSIX vectors per function */
- dev_caps->common_cap.num_msix_vectors = 2 * num_func;
+ dev_caps->common_cap.num_msix_vectors = 2 * num_funcs;
}
/**
@@ -2556,6 +2586,52 @@ ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
}
/**
+ * ice_aq_sff_eeprom
+ * @hw: pointer to the HW struct
+ * @lport: bits [7:0] = logical port, bit [8] = logical port valid
+ * @bus_addr: I2C bus address of the eeprom (typically 0xA0, 0=topo default)
+ * @mem_addr: I2C offset. lower 8 bits for address, 8 upper bits zero padding.
+ * @page: QSFP page
+ * @set_page: set or ignore the page
+ * @data: pointer to data buffer to be read/written to the I2C device.
+ * @length: 1-16 for read, 1 for write.
+ * @write: 0 read, 1 for write.
+ * @cd: pointer to command details structure or NULL
+ *
+ * Read/Write SFF EEPROM (0x06EE)
+ */
+enum ice_status
+ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr,
+ u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length,
+ bool write, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_sff_eeprom *cmd;
+ struct ice_aq_desc desc;
+ enum ice_status status;
+
+ if (!data || (mem_addr & 0xff00))
+ return ICE_ERR_PARAM;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_sff_eeprom);
+ cmd = &desc.params.read_write_sff_param;
+ desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD | ICE_AQ_FLAG_BUF);
+ cmd->lport_num = (u8)(lport & 0xff);
+ cmd->lport_num_valid = (u8)((lport >> 8) & 0x01);
+ cmd->i2c_bus_addr = cpu_to_le16(((bus_addr >> 1) &
+ ICE_AQC_SFF_I2CBUS_7BIT_M) |
+ ((set_page <<
+ ICE_AQC_SFF_SET_EEPROM_PAGE_S) &
+ ICE_AQC_SFF_SET_EEPROM_PAGE_M));
+ cmd->i2c_mem_addr = cpu_to_le16(mem_addr & 0xff);
+ cmd->eeprom_page = cpu_to_le16((u16)page << ICE_AQC_SFF_EEPROM_PAGE_S);
+ if (write)
+ cmd->i2c_bus_addr |= cpu_to_le16(ICE_AQC_SFF_IS_WRITE);
+
+ status = ice_aq_send_cmd(hw, &desc, data, length, cd);
+ return status;
+}
+
+/**
* __ice_aq_get_set_rss_lut
* @hw: pointer to the hardware structure
* @vsi_id: VSI FW index
@@ -3148,7 +3224,7 @@ ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
* @tc: TC number
* @q_handle: software queue handle
*/
-static struct ice_q_ctx *
+struct ice_q_ctx *
ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle)
{
struct ice_vsi_ctx *vsi;
@@ -3245,9 +3321,12 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle,
node.node_teid = buf->txqs[0].q_teid;
node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF;
q_ctx->q_handle = q_handle;
+ q_ctx->q_teid = le32_to_cpu(node.node_teid);
- /* add a leaf node into schduler tree queue layer */
+ /* add a leaf node into scheduler tree queue layer */
status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node);
+ if (!status)
+ status = ice_sched_replay_q_bw(pi, q_ctx);
ena_txq_exit:
mutex_unlock(&pi->sched_lock);
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index c3df92f57777..b22aa561e253 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -6,16 +6,18 @@
#include "ice.h"
#include "ice_type.h"
+#include "ice_nvm.h"
#include "ice_flex_pipe.h"
#include "ice_switch.h"
#include <linux/avf/virtchnl.h>
enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw);
-void
-ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf, u16 buf_len);
enum ice_status ice_init_hw(struct ice_hw *hw);
void ice_deinit_hw(struct ice_hw *hw);
+enum ice_status
+ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
+ u16 module_type);
enum ice_status ice_check_reset(struct ice_hw *hw);
enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req);
enum ice_status ice_create_all_ctrlq(struct ice_hw *hw);
@@ -117,6 +119,10 @@ ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd);
enum ice_status
ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr,
+ u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length,
+ bool write, struct ice_sq_cd *cd);
enum ice_status
ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
@@ -133,6 +139,8 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle,
enum ice_status ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle);
void ice_replay_post(struct ice_hw *hw);
void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf);
+struct ice_q_ctx *
+ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle);
void
ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
u64 *prev_stat, u64 *cur_stat);
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index 2353166c654e..dd946866d7b8 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -810,6 +810,52 @@ static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
}
/**
+ * ice_debug_cq
+ * @hw: pointer to the hardware structure
+ * @desc: pointer to control queue descriptor
+ * @buf: pointer to command buffer
+ * @buf_len: max length of buf
+ *
+ * Dumps debug log about control command with descriptor contents.
+ */
+static void ice_debug_cq(struct ice_hw *hw, void *desc, void *buf, u16 buf_len)
+{
+ struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc;
+ u16 len;
+
+ if (!IS_ENABLED(CONFIG_DYNAMIC_DEBUG) &&
+ !((ICE_DBG_AQ_DESC | ICE_DBG_AQ_DESC_BUF) & hw->debug_mask))
+ return;
+
+ if (!desc)
+ return;
+
+ len = le16_to_cpu(cq_desc->datalen);
+
+ ice_debug(hw, ICE_DBG_AQ_DESC,
+ "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
+ le16_to_cpu(cq_desc->opcode),
+ le16_to_cpu(cq_desc->flags),
+ le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval));
+ ice_debug(hw, ICE_DBG_AQ_DESC, "\tcookie (h,l) 0x%08X 0x%08X\n",
+ le32_to_cpu(cq_desc->cookie_high),
+ le32_to_cpu(cq_desc->cookie_low));
+ ice_debug(hw, ICE_DBG_AQ_DESC, "\tparam (0,1) 0x%08X 0x%08X\n",
+ le32_to_cpu(cq_desc->params.generic.param0),
+ le32_to_cpu(cq_desc->params.generic.param1));
+ ice_debug(hw, ICE_DBG_AQ_DESC, "\taddr (h,l) 0x%08X 0x%08X\n",
+ le32_to_cpu(cq_desc->params.generic.addr_high),
+ le32_to_cpu(cq_desc->params.generic.addr_low));
+ if (buf && cq_desc->datalen != 0) {
+ ice_debug(hw, ICE_DBG_AQ_DESC_BUF, "Buffer:\n");
+ if (buf_len < len)
+ len = buf_len;
+
+ ice_debug_array(hw, ICE_DBG_AQ_DESC_BUF, 16, 1, (u8 *)buf, len);
+ }
+}
+
+/**
* ice_sq_done - check if FW has processed the Admin Send Queue (ATQ)
* @hw: pointer to the HW struct
* @cq: pointer to the specific Control queue
@@ -934,10 +980,10 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
}
/* Debug desc and buffer */
- ice_debug(hw, ICE_DBG_AQ_MSG,
+ ice_debug(hw, ICE_DBG_AQ_DESC,
"ATQ: Control Send queue desc and buffer:\n");
- ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc_on_ring, buf, buf_size);
+ ice_debug_cq(hw, (void *)desc_on_ring, buf, buf_size);
(cq->sq.next_to_use)++;
if (cq->sq.next_to_use == cq->sq.count)
@@ -948,7 +994,7 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
if (ice_sq_done(hw, cq))
break;
- mdelay(1);
+ udelay(ICE_CTL_Q_SQ_CMD_USEC);
total_delay++;
} while (total_delay < cq->sq_cmd_timeout);
@@ -971,7 +1017,8 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
retval = le16_to_cpu(desc->retval);
if (retval) {
ice_debug(hw, ICE_DBG_AQ_MSG,
- "Control Send Queue command completed with error 0x%x\n",
+ "Control Send Queue command 0x%04X completed with error 0x%X\n",
+ le16_to_cpu(desc->opcode),
retval);
/* strip off FW internal code */
@@ -986,7 +1033,7 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
ice_debug(hw, ICE_DBG_AQ_MSG,
"ATQ: desc and buffer writeback:\n");
- ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, buf, buf_size);
+ ice_debug_cq(hw, (void *)desc, buf, buf_size);
/* save writeback AQ if requested */
if (details->wb_desc)
@@ -1075,7 +1122,8 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
if (flags & ICE_AQ_FLAG_ERR) {
ret_code = ICE_ERR_AQ_ERROR;
ice_debug(hw, ICE_DBG_AQ_MSG,
- "Control Receive Queue Event received with error 0x%x\n",
+ "Control Receive Queue Event 0x%04X received with error 0x%X\n",
+ le16_to_cpu(desc->opcode),
cq->rq_last_status);
}
memcpy(&e->desc, desc, sizeof(e->desc));
@@ -1084,10 +1132,9 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
if (e->msg_buf && e->msg_len)
memcpy(e->msg_buf, cq->rq.r.rq_bi[desc_idx].va, e->msg_len);
- ice_debug(hw, ICE_DBG_AQ_MSG, "ARQ: desc and buffer:\n");
+ ice_debug(hw, ICE_DBG_AQ_DESC, "ARQ: desc and buffer:\n");
- ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, e->msg_buf,
- cq->rq_buf_size);
+ ice_debug_cq(hw, (void *)desc, e->msg_buf, cq->rq_buf_size);
/* Restore the original datalen and buffer address in the desc,
* FW updates datalen to indicate the event message size
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
index 44945c2165d8..bf0ebe6149e8 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -22,7 +22,7 @@
*/
#define EXP_FW_API_VER_BRANCH 0x00
#define EXP_FW_API_VER_MAJOR 0x01
-#define EXP_FW_API_VER_MINOR 0x03
+#define EXP_FW_API_VER_MINOR 0x05
/* Different control queue types: These are mainly for SW consumption. */
enum ice_ctl_q {
@@ -31,8 +31,9 @@ enum ice_ctl_q {
ICE_CTL_Q_MAILBOX,
};
-/* Control Queue default settings */
-#define ICE_CTL_Q_SQ_CMD_TIMEOUT 250 /* msecs */
+/* Control Queue timeout settings - max delay 250ms */
+#define ICE_CTL_Q_SQ_CMD_TIMEOUT 2500 /* Count 2500 times */
+#define ICE_CTL_Q_SQ_CMD_USEC 100 /* Check every 100usec */
struct ice_ctl_q_ring {
void *dma_head; /* Virtual address to DMA head */
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
index dd7efff121bd..713e8a892e14 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -965,9 +965,9 @@ enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
pi->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) {
/* Get current DCBX configuration */
ret = ice_get_dcb_cfg(pi);
- pi->is_sw_lldp = (hw->adminq.sq_last_status == ICE_AQ_RC_EPERM);
if (ret)
return ret;
+ pi->is_sw_lldp = false;
} else if (pi->dcbx_status == ICE_DCBX_STATUS_DIS) {
return ICE_ERR_NOT_READY;
}
@@ -975,8 +975,8 @@ enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
/* Configure the LLDP MIB change event */
if (enable_mib_change) {
ret = ice_aq_cfg_lldp_mib_change(hw, true, NULL);
- if (!ret)
- pi->is_sw_lldp = false;
+ if (ret)
+ pi->is_sw_lldp = true;
}
return ret;
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index dd47869c4ad4..d3d3ec29def9 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2019, Intel Corporation. */
#include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
/**
* ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration
@@ -100,6 +101,16 @@ u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
}
/**
+ * ice_dcb_get_tc - Get the TC associated with the queue
+ * @vsi: ptr to the VSI
+ * @queue_index: queue number associated with VSI
+ */
+u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index)
+{
+ return vsi->tx_rings[queue_index]->dcb_tc;
+}
+
+/**
* ice_vsi_cfg_dcb_rings - Update rings to reflect DCB TC
* @vsi: VSI owner of rings being updated
*/
@@ -138,83 +149,62 @@ void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi)
}
/**
- * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs
- * @pf: pointer to the PF struct
- *
- * Assumed caller has already disabled all VSIs before
- * calling this function. Reconfiguring DCB based on
- * local_dcbx_cfg.
- */
-static void ice_pf_dcb_recfg(struct ice_pf *pf)
-{
- struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
- u8 tc_map = 0;
- int v, ret;
-
- /* Update each VSI */
- ice_for_each_vsi(pf, v) {
- if (!pf->vsi[v])
- continue;
-
- if (pf->vsi[v]->type == ICE_VSI_PF)
- tc_map = ice_dcb_get_ena_tc(dcbcfg);
- else
- tc_map = ICE_DFLT_TRAFFIC_CLASS;
-
- ret = ice_vsi_cfg_tc(pf->vsi[v], tc_map);
- if (ret) {
- dev_err(&pf->pdev->dev,
- "Failed to config TC for VSI index: %d\n",
- pf->vsi[v]->idx);
- continue;
- }
-
- ice_vsi_map_rings_to_vectors(pf->vsi[v]);
- }
-}
-
-/**
* ice_pf_dcb_cfg - Apply new DCB configuration
* @pf: pointer to the PF struct
* @new_cfg: DCBX config to apply
* @locked: is the RTNL held
*/
-static
int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
{
- struct ice_dcbx_cfg *old_cfg, *curr_cfg;
struct ice_aqc_port_ets_elem buf = { 0 };
- int ret = 0;
+ struct ice_dcbx_cfg *old_cfg, *curr_cfg;
+ struct device *dev = ice_pf_to_dev(pf);
+ int ret = ICE_DCB_NO_HW_CHG;
+ struct ice_vsi *pf_vsi;
curr_cfg = &pf->hw.port_info->local_dcbx_cfg;
+ /* FW does not care if change happened */
+ if (!pf->hw.port_info->is_sw_lldp)
+ ret = ICE_DCB_HW_CHG_RST;
+
/* Enable DCB tagging only when more than one TC */
if (ice_dcb_get_num_tc(new_cfg) > 1) {
- dev_dbg(&pf->pdev->dev, "DCB tagging enabled (num TC > 1)\n");
+ dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n");
set_bit(ICE_FLAG_DCB_ENA, pf->flags);
} else {
- dev_dbg(&pf->pdev->dev, "DCB tagging disabled (num TC = 1)\n");
+ dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n");
clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
}
if (!memcmp(new_cfg, curr_cfg, sizeof(*new_cfg))) {
- dev_dbg(&pf->pdev->dev, "No change in DCB config required\n");
+ dev_dbg(dev, "No change in DCB config required\n");
return ret;
}
/* Store old config in case FW config fails */
- old_cfg = devm_kzalloc(&pf->pdev->dev, sizeof(*old_cfg), GFP_KERNEL);
- memcpy(old_cfg, curr_cfg, sizeof(*old_cfg));
+ old_cfg = kmemdup(curr_cfg, sizeof(*old_cfg), GFP_KERNEL);
+ if (!old_cfg)
+ return -ENOMEM;
+
+ dev_info(dev, "Commit DCB Configuration to the hardware\n");
+ pf_vsi = ice_get_main_vsi(pf);
+ if (!pf_vsi) {
+ dev_dbg(dev, "PF VSI doesn't exist\n");
+ ret = -EINVAL;
+ goto free_cfg;
+ }
/* avoid race conditions by holding the lock while disabling and
* re-enabling the VSI
*/
if (!locked)
rtnl_lock();
- ice_pf_dis_all_vsi(pf, true);
+ ice_dis_vsi(pf_vsi, true);
memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg));
memcpy(&curr_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
+ memcpy(&new_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
/* Only send new config to HW if we are in SW LLDP mode. Otherwise,
* the new config came from the HW in the first place.
@@ -222,7 +212,7 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
if (pf->hw.port_info->is_sw_lldp) {
ret = ice_set_dcb_cfg(pf->hw.port_info);
if (ret) {
- dev_err(&pf->pdev->dev, "Set DCB Config failed\n");
+ dev_err(dev, "Set DCB Config failed\n");
/* Restore previous settings to local config */
memcpy(curr_cfg, old_cfg, sizeof(*curr_cfg));
goto out;
@@ -231,17 +221,18 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
if (ret) {
- dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+ dev_err(dev, "Query Port ETS failed\n");
goto out;
}
ice_pf_dcb_recfg(pf);
out:
- ice_pf_ena_all_vsi(pf, true);
+ ice_ena_vsi(pf_vsi, true);
if (!locked)
rtnl_unlock();
- devm_kfree(&pf->pdev->dev, old_cfg);
+free_cfg:
+ kfree(old_cfg);
return ret;
}
@@ -277,6 +268,7 @@ static bool
ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
struct ice_dcbx_cfg *new_cfg)
{
+ struct device *dev = ice_pf_to_dev(pf);
bool need_reconfig = false;
/* Check if ETS configuration has changed */
@@ -287,33 +279,33 @@ ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
&old_cfg->etscfg.prio_table,
sizeof(new_cfg->etscfg.prio_table))) {
need_reconfig = true;
- dev_dbg(&pf->pdev->dev, "ETS UP2TC changed.\n");
+ dev_dbg(dev, "ETS UP2TC changed.\n");
}
if (memcmp(&new_cfg->etscfg.tcbwtable,
&old_cfg->etscfg.tcbwtable,
sizeof(new_cfg->etscfg.tcbwtable)))
- dev_dbg(&pf->pdev->dev, "ETS TC BW Table changed.\n");
+ dev_dbg(dev, "ETS TC BW Table changed.\n");
if (memcmp(&new_cfg->etscfg.tsatable,
&old_cfg->etscfg.tsatable,
sizeof(new_cfg->etscfg.tsatable)))
- dev_dbg(&pf->pdev->dev, "ETS TSA Table changed.\n");
+ dev_dbg(dev, "ETS TSA Table changed.\n");
}
/* Check if PFC configuration has changed */
if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
need_reconfig = true;
- dev_dbg(&pf->pdev->dev, "PFC config change detected.\n");
+ dev_dbg(dev, "PFC config change detected.\n");
}
/* Check if APP Table has changed */
if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app))) {
need_reconfig = true;
- dev_dbg(&pf->pdev->dev, "APP Table change detected.\n");
+ dev_dbg(dev, "APP Table change detected.\n");
}
- dev_dbg(&pf->pdev->dev, "dcb need_reconfig=%d\n", need_reconfig);
+ dev_dbg(dev, "dcb need_reconfig=%d\n", need_reconfig);
return need_reconfig;
}
@@ -325,11 +317,12 @@ void ice_dcb_rebuild(struct ice_pf *pf)
{
struct ice_dcbx_cfg *local_dcbx_cfg, *desired_dcbx_cfg, *prev_cfg;
struct ice_aqc_port_ets_elem buf = { 0 };
+ struct device *dev = ice_pf_to_dev(pf);
enum ice_status ret;
ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
if (ret) {
- dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+ dev_err(dev, "Query Port ETS failed\n");
goto dcb_error;
}
@@ -348,17 +341,14 @@ void ice_dcb_rebuild(struct ice_pf *pf)
ice_cfg_etsrec_defaults(pf->hw.port_info);
ret = ice_set_dcb_cfg(pf->hw.port_info);
if (ret) {
- dev_err(&pf->pdev->dev, "Failed to set DCB to unwilling\n");
+ dev_err(dev, "Failed to set DCB to unwilling\n");
goto dcb_error;
}
/* Retrieve DCB config and ensure same as current in SW */
- prev_cfg = devm_kmemdup(&pf->pdev->dev, local_dcbx_cfg,
- sizeof(*prev_cfg), GFP_KERNEL);
- if (!prev_cfg) {
- dev_err(&pf->pdev->dev, "Failed to alloc space for DCB cfg\n");
+ prev_cfg = kmemdup(local_dcbx_cfg, sizeof(*prev_cfg), GFP_KERNEL);
+ if (!prev_cfg)
goto dcb_error;
- }
ice_init_dcb(&pf->hw, true);
if (pf->hw.port_info->dcbx_status == ICE_DCBX_STATUS_DIS)
@@ -368,12 +358,13 @@ void ice_dcb_rebuild(struct ice_pf *pf)
if (ice_dcb_need_recfg(pf, prev_cfg, local_dcbx_cfg)) {
/* difference in cfg detected - disable DCB till next MIB */
- dev_err(&pf->pdev->dev, "Set local MIB not accurate\n");
+ dev_err(dev, "Set local MIB not accurate\n");
+ kfree(prev_cfg);
goto dcb_error;
}
/* fetched config congruent to previous configuration */
- devm_kfree(&pf->pdev->dev, prev_cfg);
+ kfree(prev_cfg);
/* Set the local desired config */
if (local_dcbx_cfg->dcbx_mode == ICE_DCBX_MODE_CEE)
@@ -383,27 +374,30 @@ void ice_dcb_rebuild(struct ice_pf *pf)
ice_cfg_etsrec_defaults(pf->hw.port_info);
ret = ice_set_dcb_cfg(pf->hw.port_info);
if (ret) {
- dev_err(&pf->pdev->dev, "Failed to set desired config\n");
+ dev_err(dev, "Failed to set desired config\n");
goto dcb_error;
}
- dev_info(&pf->pdev->dev, "DCB restored after reset\n");
+ dev_info(dev, "DCB restored after reset\n");
ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
if (ret) {
- dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+ dev_err(dev, "Query Port ETS failed\n");
goto dcb_error;
}
return;
dcb_error:
- dev_err(&pf->pdev->dev, "Disabling DCB until new settings occur\n");
- prev_cfg = devm_kzalloc(&pf->pdev->dev, sizeof(*prev_cfg), GFP_KERNEL);
+ dev_err(dev, "Disabling DCB until new settings occur\n");
+ prev_cfg = kzalloc(sizeof(*prev_cfg), GFP_KERNEL);
+ if (!prev_cfg)
+ return;
+
prev_cfg->etscfg.willing = true;
prev_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW;
prev_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
memcpy(&prev_cfg->etsrec, &prev_cfg->etscfg, sizeof(prev_cfg->etsrec));
ice_pf_dcb_cfg(pf, prev_cfg, false);
- devm_kfree(&pf->pdev->dev, prev_cfg);
+ kfree(prev_cfg);
}
/**
@@ -418,18 +412,17 @@ static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked)
int ret = 0;
pi = pf->hw.port_info;
- newcfg = devm_kzalloc(&pf->pdev->dev, sizeof(*newcfg), GFP_KERNEL);
+ newcfg = kmemdup(&pi->local_dcbx_cfg, sizeof(*newcfg), GFP_KERNEL);
if (!newcfg)
return -ENOMEM;
- memcpy(newcfg, &pi->local_dcbx_cfg, sizeof(*newcfg));
memset(&pi->local_dcbx_cfg, 0, sizeof(*newcfg));
- dev_info(&pf->pdev->dev, "Configuring initial DCB values\n");
+ dev_info(ice_pf_to_dev(pf), "Configuring initial DCB values\n");
if (ice_pf_dcb_cfg(pf, newcfg, locked))
ret = -EINVAL;
- devm_kfree(&pf->pdev->dev, newcfg);
+ kfree(newcfg);
return ret;
}
@@ -437,9 +430,10 @@ static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked)
/**
* ice_dcb_sw_default_config - Apply a default DCB config
* @pf: PF to apply config to
+ * @ets_willing: configure ets willing
* @locked: was this function called with RTNL held
*/
-static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
+static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked)
{
struct ice_aqc_port_ets_elem buf = { 0 };
struct ice_dcbx_cfg *dcbcfg;
@@ -449,12 +443,13 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
hw = &pf->hw;
pi = hw->port_info;
- dcbcfg = devm_kzalloc(&pf->pdev->dev, sizeof(*dcbcfg), GFP_KERNEL);
+ dcbcfg = kzalloc(sizeof(*dcbcfg), GFP_KERNEL);
+ if (!dcbcfg)
+ return -ENOMEM;
- memset(dcbcfg, 0, sizeof(*dcbcfg));
memset(&pi->local_dcbx_cfg, 0, sizeof(*dcbcfg));
- dcbcfg->etscfg.willing = 1;
+ dcbcfg->etscfg.willing = ets_willing ? 1 : 0;
dcbcfg->etscfg.maxtcs = hw->func_caps.common_cap.maxtc;
dcbcfg->etscfg.tcbwtable[0] = 100;
dcbcfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
@@ -472,7 +467,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
dcbcfg->app[0].prot_id = ICE_APP_PROT_ID_FCOE;
ret = ice_pf_dcb_cfg(pf, dcbcfg, locked);
- devm_kfree(&pf->pdev->dev, dcbcfg);
+ kfree(dcbcfg);
if (ret)
return ret;
@@ -480,13 +475,112 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
}
/**
+ * ice_dcb_tc_contig - Check that TCs are contiguous
+ * @prio_table: pointer to priority table
+ *
+ * Check if TCs begin with TC0 and are contiguous
+ */
+static bool ice_dcb_tc_contig(u8 *prio_table)
+{
+ u8 max_tc = 0;
+ int i;
+
+ for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) {
+ u8 cur_tc = prio_table[i];
+
+ if (cur_tc > max_tc)
+ return false;
+ else if (cur_tc == max_tc)
+ max_tc++;
+ }
+
+ return true;
+}
+
+/**
+ * ice_dcb_noncontig_cfg - Configure DCB for non-contiguous TCs
+ * @pf: pointer to the PF struct
+ *
+ * If non-contiguous TCs, then configure SW DCB with TC0 and ETS non-willing
+ */
+static int ice_dcb_noncontig_cfg(struct ice_pf *pf)
+{
+ struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
+ struct device *dev = ice_pf_to_dev(pf);
+ int ret;
+
+ /* Configure SW DCB default with ETS non-willing */
+ ret = ice_dcb_sw_dflt_cfg(pf, false, true);
+ if (ret) {
+ dev_err(dev, "Failed to set local DCB config %d\n", ret);
+ return ret;
+ }
+
+ /* Reconfigure with ETS willing so that FW will send LLDP MIB event */
+ dcbcfg->etscfg.willing = 1;
+ ret = ice_set_dcb_cfg(pf->hw.port_info);
+ if (ret)
+ dev_err(dev, "Failed to set DCB to unwilling\n");
+
+ return ret;
+}
+
+/**
+ * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs
+ * @pf: pointer to the PF struct
+ *
+ * Assumed caller has already disabled all VSIs before
+ * calling this function. Reconfiguring DCB based on
+ * local_dcbx_cfg.
+ */
+void ice_pf_dcb_recfg(struct ice_pf *pf)
+{
+ struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
+ u8 tc_map = 0;
+ int v, ret;
+
+ /* Update each VSI */
+ ice_for_each_vsi(pf, v) {
+ struct ice_vsi *vsi = pf->vsi[v];
+
+ if (!vsi)
+ continue;
+
+ if (vsi->type == ICE_VSI_PF) {
+ tc_map = ice_dcb_get_ena_tc(dcbcfg);
+
+ /* If DCBX request non-contiguous TC, then configure
+ * default TC
+ */
+ if (!ice_dcb_tc_contig(dcbcfg->etscfg.prio_table)) {
+ tc_map = ICE_DFLT_TRAFFIC_CLASS;
+ ice_dcb_noncontig_cfg(pf);
+ }
+ } else {
+ tc_map = ICE_DFLT_TRAFFIC_CLASS;
+ }
+
+ ret = ice_vsi_cfg_tc(vsi, tc_map);
+ if (ret) {
+ dev_err(ice_pf_to_dev(pf), "Failed to config TC for VSI index: %d\n",
+ vsi->idx);
+ continue;
+ }
+
+ ice_vsi_map_rings_to_vectors(vsi);
+ if (vsi->type == ICE_VSI_PF)
+ ice_dcbnl_set_all(vsi);
+ }
+}
+
+/**
* ice_init_pf_dcb - initialize DCB for a PF
* @pf: PF to initialize DCB for
* @locked: Was function called with RTNL held
*/
int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
{
- struct device *dev = &pf->pdev->dev;
+ struct device *dev = ice_pf_to_dev(pf);
struct ice_port_info *port_info;
struct ice_hw *hw = &pf->hw;
int err;
@@ -495,26 +589,39 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
err = ice_init_dcb(hw, false);
if (err && !port_info->is_sw_lldp) {
- dev_err(&pf->pdev->dev, "Error initializing DCB %d\n", err);
+ dev_err(dev, "Error initializing DCB %d\n", err);
goto dcb_init_err;
}
- dev_info(&pf->pdev->dev,
+ dev_info(dev,
"DCB is enabled in the hardware, max number of TCs supported on this port are %d\n",
pf->hw.func_caps.common_cap.maxtc);
if (err) {
+ struct ice_vsi *pf_vsi;
+
/* FW LLDP is disabled, activate SW DCBX/LLDP mode */
- dev_info(&pf->pdev->dev,
- "FW LLDP is disabled, DCBx/LLDP in SW mode.\n");
+ dev_info(dev, "FW LLDP is disabled, DCBx/LLDP in SW mode.\n");
clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
- err = ice_dcb_sw_dflt_cfg(pf, locked);
+ err = ice_dcb_sw_dflt_cfg(pf, true, locked);
if (err) {
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"Failed to set local DCB config %d\n", err);
err = -EIO;
goto dcb_init_err;
}
+ /* If the FW DCBX engine is not running then Rx LLDP packets
+ * need to be redirected up the stack.
+ */
+ pf_vsi = ice_get_main_vsi(pf);
+ if (!pf_vsi) {
+ dev_err(dev, "Failed to set local DCB config\n");
+ err = -EIO;
+ goto dcb_init_err;
+ }
+
+ ice_cfg_sw_lldp(pf_vsi, false, true);
+
pf->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
return 0;
}
@@ -623,10 +730,12 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
struct ice_rq_event_info *event)
{
struct ice_aqc_port_ets_elem buf = { 0 };
+ struct device *dev = ice_pf_to_dev(pf);
struct ice_aqc_lldp_get_mib *mib;
struct ice_dcbx_cfg tmp_dcbx_cfg;
bool need_reconfig = false;
struct ice_port_info *pi;
+ struct ice_vsi *pf_vsi;
u8 type;
int ret;
@@ -635,8 +744,7 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
return;
if (pf->dcbx_cap & DCB_CAP_DCBX_HOST) {
- dev_dbg(&pf->pdev->dev,
- "MIB Change Event in HOST mode\n");
+ dev_dbg(dev, "MIB Change Event in HOST mode\n");
return;
}
@@ -645,21 +753,20 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
/* Ignore if event is not for Nearest Bridge */
type = ((mib->type >> ICE_AQ_LLDP_BRID_TYPE_S) &
ICE_AQ_LLDP_BRID_TYPE_M);
- dev_dbg(&pf->pdev->dev, "LLDP event MIB bridge type 0x%x\n", type);
+ dev_dbg(dev, "LLDP event MIB bridge type 0x%x\n", type);
if (type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
return;
/* Check MIB Type and return if event for Remote MIB update */
type = mib->type & ICE_AQ_LLDP_MIB_TYPE_M;
- dev_dbg(&pf->pdev->dev,
- "LLDP event mib type %s\n", type ? "remote" : "local");
+ dev_dbg(dev, "LLDP event mib type %s\n", type ? "remote" : "local");
if (type == ICE_AQ_LLDP_MIB_REMOTE) {
/* Update the remote cached instance and return */
ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
&pi->remote_dcbx_cfg);
if (ret) {
- dev_err(&pf->pdev->dev, "Failed to get remote DCB config\n");
+ dev_err(dev, "Failed to get remote DCB config\n");
return;
}
}
@@ -673,37 +780,43 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
/* Get updated DCBX data from firmware */
ret = ice_get_dcb_cfg(pf->hw.port_info);
if (ret) {
- dev_err(&pf->pdev->dev, "Failed to get DCB config\n");
+ dev_err(dev, "Failed to get DCB config\n");
return;
}
/* No change detected in DCBX configs */
if (!memcmp(&tmp_dcbx_cfg, &pi->local_dcbx_cfg, sizeof(tmp_dcbx_cfg))) {
- dev_dbg(&pf->pdev->dev,
- "No change detected in DCBX configuration.\n");
+ dev_dbg(dev, "No change detected in DCBX configuration.\n");
return;
}
need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg,
&pi->local_dcbx_cfg);
+ ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->local_dcbx_cfg);
if (!need_reconfig)
return;
/* Enable DCB tagging only when more than one TC */
if (ice_dcb_get_num_tc(&pi->local_dcbx_cfg) > 1) {
- dev_dbg(&pf->pdev->dev, "DCB tagging enabled (num TC > 1)\n");
+ dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n");
set_bit(ICE_FLAG_DCB_ENA, pf->flags);
} else {
- dev_dbg(&pf->pdev->dev, "DCB tagging disabled (num TC = 1)\n");
+ dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n");
clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
}
+ pf_vsi = ice_get_main_vsi(pf);
+ if (!pf_vsi) {
+ dev_dbg(dev, "PF VSI doesn't exist\n");
+ return;
+ }
+
rtnl_lock();
- ice_pf_dis_all_vsi(pf, true);
+ ice_dis_vsi(pf_vsi, true);
ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
if (ret) {
- dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+ dev_err(dev, "Query Port ETS failed\n");
rtnl_unlock();
return;
}
@@ -711,6 +824,6 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
/* changes in configuration update VSI */
ice_pf_dcb_recfg(pf);
- ice_pf_ena_all_vsi(pf, true);
+ ice_ena_vsi(pf_vsi, true);
rtnl_unlock();
}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
index 661a6f7bca64..f15e5776f287 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
@@ -5,14 +5,22 @@
#define _ICE_DCB_LIB_H_
#include "ice.h"
+#include "ice_base.h"
#include "ice_lib.h"
#ifdef CONFIG_DCB
-#define ICE_TC_MAX_BW 100 /* Default Max BW percentage */
+#define ICE_TC_MAX_BW 100 /* Default Max BW percentage */
+#define ICE_DCB_HW_CHG_RST 0 /* DCB configuration changed with reset */
+#define ICE_DCB_NO_HW_CHG 1 /* DCB configuration did not change */
+#define ICE_DCB_HW_CHG 2 /* DCB configuration changed, no reset */
void ice_dcb_rebuild(struct ice_pf *pf);
u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg);
u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
+u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index);
+int
+ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked);
+void ice_pf_dcb_recfg(struct ice_pf *pf);
void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi);
int ice_init_pf_dcb(struct ice_pf *pf, bool locked);
void ice_update_dcb_stats(struct ice_pf *pf);
@@ -41,10 +49,25 @@ static inline u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
return 1;
}
+static inline u8
+ice_dcb_get_tc(struct ice_vsi __always_unused *vsi,
+ int __always_unused queue_index)
+{
+ return 0;
+}
+
static inline int
ice_init_pf_dcb(struct ice_pf *pf, bool __always_unused locked)
{
- dev_dbg(&pf->pdev->dev, "DCB not supported\n");
+ dev_dbg(ice_pf_to_dev(pf), "DCB not supported\n");
+ return -EOPNOTSUPP;
+}
+
+static inline int
+ice_pf_dcb_cfg(struct ice_pf __always_unused *pf,
+ struct ice_dcbx_cfg __always_unused *new_cfg,
+ bool __always_unused locked)
+{
return -EOPNOTSUPP;
}
@@ -56,6 +79,7 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_ring __always_unused *tx_ring,
}
#define ice_update_dcb_stats(pf) do {} while (0)
+#define ice_pf_dcb_recfg(pf) do {} while (0)
#define ice_vsi_cfg_dcb_rings(vsi) do {} while (0)
#define ice_dcb_process_lldp_set_mib_change(pf, event) do {} while (0)
#define ice_set_cgd_num(tlan_ctx, ring) do {} while (0)
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
new file mode 100644
index 000000000000..d870c1aedc17
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
@@ -0,0 +1,933 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_dcb.h"
+#include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
+#include <net/dcbnl.h>
+
+#define ICE_APP_PROT_ID_ROCE 0x8915
+
+/**
+ * ice_dcbnl_devreset - perform enough of a ifdown/ifup to sync DCBNL info
+ * @netdev: device associated with interface that needs reset
+ */
+static void ice_dcbnl_devreset(struct net_device *netdev)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+ while (ice_is_reset_in_progress(pf->state))
+ usleep_range(1000, 2000);
+
+ set_bit(__ICE_DCBNL_DEVRESET, pf->state);
+ dev_close(netdev);
+ netdev_state_change(netdev);
+ dev_open(netdev, NULL);
+ netdev_state_change(netdev);
+ clear_bit(__ICE_DCBNL_DEVRESET, pf->state);
+}
+
+/**
+ * ice_dcbnl_getets - retrieve local ETS configuration
+ * @netdev: the relevant netdev
+ * @ets: struct to hold ETS configuration
+ */
+static int ice_dcbnl_getets(struct net_device *netdev, struct ieee_ets *ets)
+{
+ struct ice_dcbx_cfg *dcbxcfg;
+ struct ice_port_info *pi;
+ struct ice_pf *pf;
+
+ pf = ice_netdev_to_pf(netdev);
+ pi = pf->hw.port_info;
+ dcbxcfg = &pi->local_dcbx_cfg;
+
+ ets->willing = dcbxcfg->etscfg.willing;
+ ets->ets_cap = dcbxcfg->etscfg.maxtcs;
+ ets->cbs = dcbxcfg->etscfg.cbs;
+ memcpy(ets->tc_tx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_tx_bw));
+ memcpy(ets->tc_rx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_rx_bw));
+ memcpy(ets->tc_tsa, dcbxcfg->etscfg.tsatable, sizeof(ets->tc_tsa));
+ memcpy(ets->prio_tc, dcbxcfg->etscfg.prio_table, sizeof(ets->prio_tc));
+ memcpy(ets->tc_reco_bw, dcbxcfg->etsrec.tcbwtable,
+ sizeof(ets->tc_reco_bw));
+ memcpy(ets->tc_reco_tsa, dcbxcfg->etsrec.tsatable,
+ sizeof(ets->tc_reco_tsa));
+ memcpy(ets->reco_prio_tc, dcbxcfg->etscfg.prio_table,
+ sizeof(ets->reco_prio_tc));
+
+ return 0;
+}
+
+/**
+ * ice_dcbnl_setets - set IEEE ETS configuration
+ * @netdev: pointer to relevant netdev
+ * @ets: struct to hold ETS configuration
+ */
+static int ice_dcbnl_setets(struct net_device *netdev, struct ieee_ets *ets)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcbx_cfg *new_cfg;
+ int bwcfg = 0, bwrec = 0;
+ int err, i, max_tc = 0;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+ return -EINVAL;
+
+ new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+ mutex_lock(&pf->tc_mutex);
+
+ new_cfg->etscfg.willing = ets->willing;
+ new_cfg->etscfg.cbs = ets->cbs;
+ ice_for_each_traffic_class(i) {
+ new_cfg->etscfg.tcbwtable[i] = ets->tc_tx_bw[i];
+ bwcfg += ets->tc_tx_bw[i];
+ new_cfg->etscfg.tsatable[i] = ets->tc_tsa[i];
+ new_cfg->etscfg.prio_table[i] = ets->prio_tc[i];
+ if (ets->prio_tc[i] > max_tc)
+ max_tc = ets->prio_tc[i];
+ new_cfg->etsrec.tcbwtable[i] = ets->tc_reco_bw[i];
+ bwrec += ets->tc_reco_bw[i];
+ new_cfg->etsrec.tsatable[i] = ets->tc_reco_tsa[i];
+ new_cfg->etsrec.prio_table[i] = ets->reco_prio_tc[i];
+ }
+
+ /* max_tc is a 1-8 value count of number of TC's, not a 0-7 value
+ * for the TC's index number. Add one to value if not zero, and
+ * for zero set it to the FW's default value
+ */
+ if (max_tc)
+ max_tc++;
+ else
+ max_tc = IEEE_8021QAZ_MAX_TCS;
+
+ new_cfg->etscfg.maxtcs = max_tc;
+
+ if (!bwcfg)
+ new_cfg->etscfg.tcbwtable[0] = 100;
+
+ if (!bwrec)
+ new_cfg->etsrec.tcbwtable[0] = 100;
+
+ err = ice_pf_dcb_cfg(pf, new_cfg, true);
+ /* return of zero indicates new cfg applied */
+ if (err == ICE_DCB_HW_CHG_RST)
+ ice_dcbnl_devreset(netdev);
+ if (err == ICE_DCB_NO_HW_CHG)
+ err = ICE_DCB_HW_CHG_RST;
+
+ mutex_unlock(&pf->tc_mutex);
+ return err;
+}
+
+/**
+ * ice_dcbnl_getnumtcs - Get max number of traffic classes supported
+ * @dev: pointer to netdev struct
+ * @tcid: TC ID
+ * @num: total number of TCs supported by the adapter
+ *
+ * Return the total number of TCs supported
+ */
+static int
+ice_dcbnl_getnumtcs(struct net_device *dev, int __always_unused tcid, u8 *num)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(dev);
+
+ if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
+ return -EINVAL;
+
+ *num = IEEE_8021QAZ_MAX_TCS;
+ return 0;
+}
+
+/**
+ * ice_dcbnl_getdcbx - retrieve current DCBX capability
+ * @netdev: pointer to the netdev struct
+ */
+static u8 ice_dcbnl_getdcbx(struct net_device *netdev)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+ return pf->dcbx_cap;
+}
+
+/**
+ * ice_dcbnl_setdcbx - set required DCBX capability
+ * @netdev: the corresponding netdev
+ * @mode: required mode
+ */
+static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+ /* No support for LLD_MANAGED modes or CEE+IEEE */
+ if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
+ ((mode & DCB_CAP_DCBX_VER_IEEE) && (mode & DCB_CAP_DCBX_VER_CEE)) ||
+ !(mode & DCB_CAP_DCBX_HOST))
+ return ICE_DCB_NO_HW_CHG;
+
+ /* Already set to the given mode no change */
+ if (mode == pf->dcbx_cap)
+ return ICE_DCB_NO_HW_CHG;
+
+ pf->dcbx_cap = mode;
+ if (mode & DCB_CAP_DCBX_VER_CEE)
+ pf->hw.port_info->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE;
+ else
+ pf->hw.port_info->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE;
+
+ dev_info(ice_pf_to_dev(pf), "DCBx mode = 0x%x\n", mode);
+ return ICE_DCB_HW_CHG_RST;
+}
+
+/**
+ * ice_dcbnl_get_perm_hw_addr - MAC address used by DCBX
+ * @netdev: pointer to netdev struct
+ * @perm_addr: buffer to return permanent MAC address
+ */
+static void ice_dcbnl_get_perm_hw_addr(struct net_device *netdev, u8 *perm_addr)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_port_info *pi = pf->hw.port_info;
+ int i, j;
+
+ memset(perm_addr, 0xff, MAX_ADDR_LEN);
+
+ for (i = 0; i < netdev->addr_len; i++)
+ perm_addr[i] = pi->mac.perm_addr[i];
+
+ for (j = 0; j < netdev->addr_len; j++, i++)
+ perm_addr[i] = pi->mac.perm_addr[j];
+}
+
+/**
+ * ice_get_pfc_delay - Retrieve PFC Link Delay
+ * @hw: pointer to HW struct
+ * @delay: holds the PFC Link Delay value
+ */
+static void ice_get_pfc_delay(struct ice_hw *hw, u16 *delay)
+{
+ u32 val;
+
+ val = rd32(hw, PRTDCB_GENC);
+ *delay = (u16)((val & PRTDCB_GENC_PFCLDA_M) >> PRTDCB_GENC_PFCLDA_S);
+}
+
+/**
+ * ice_dcbnl_getpfc - retrieve local IEEE PFC config
+ * @netdev: pointer to netdev struct
+ * @pfc: struct to hold PFC info
+ */
+static int ice_dcbnl_getpfc(struct net_device *netdev, struct ieee_pfc *pfc)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_port_info *pi = pf->hw.port_info;
+ struct ice_dcbx_cfg *dcbxcfg;
+ int i;
+
+ dcbxcfg = &pi->local_dcbx_cfg;
+ pfc->pfc_cap = dcbxcfg->pfc.pfccap;
+ pfc->pfc_en = dcbxcfg->pfc.pfcena;
+ pfc->mbc = dcbxcfg->pfc.mbc;
+ ice_get_pfc_delay(&pf->hw, &pfc->delay);
+
+ ice_for_each_traffic_class(i) {
+ pfc->requests[i] = pf->stats.priority_xoff_tx[i];
+ pfc->indications[i] = pf->stats.priority_xoff_rx[i];
+ }
+
+ return 0;
+}
+
+/**
+ * ice_dcbnl_setpfc - set local IEEE PFC config
+ * @netdev: pointer to relevant netdev
+ * @pfc: pointer to struct holding PFC config
+ */
+static int ice_dcbnl_setpfc(struct net_device *netdev, struct ieee_pfc *pfc)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcbx_cfg *new_cfg;
+ int err;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+ return -EINVAL;
+
+ mutex_lock(&pf->tc_mutex);
+
+ new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+ if (pfc->pfc_cap)
+ new_cfg->pfc.pfccap = pfc->pfc_cap;
+ else
+ new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc;
+
+ new_cfg->pfc.pfcena = pfc->pfc_en;
+
+ err = ice_pf_dcb_cfg(pf, new_cfg, true);
+ if (err == ICE_DCB_HW_CHG_RST)
+ ice_dcbnl_devreset(netdev);
+ if (err == ICE_DCB_NO_HW_CHG)
+ err = ICE_DCB_HW_CHG_RST;
+ mutex_unlock(&pf->tc_mutex);
+ return err;
+}
+
+/**
+ * ice_dcbnl_get_pfc_cfg - Get CEE PFC config
+ * @netdev: pointer to netdev struct
+ * @prio: corresponding user priority
+ * @setting: the PFC setting for given priority
+ */
+static void
+ice_dcbnl_get_pfc_cfg(struct net_device *netdev, int prio, u8 *setting)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_port_info *pi = pf->hw.port_info;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return;
+
+ if (prio >= ICE_MAX_USER_PRIORITY)
+ return;
+
+ *setting = (pi->local_dcbx_cfg.pfc.pfcena >> prio) & 0x1;
+ dev_dbg(ice_pf_to_dev(pf),
+ "Get PFC Config up=%d, setting=%d, pfcenable=0x%x\n",
+ prio, *setting, pi->local_dcbx_cfg.pfc.pfcena);
+}
+
+/**
+ * ice_dcbnl_set_pfc_cfg - Set CEE PFC config
+ * @netdev: the corresponding netdev
+ * @prio: User Priority
+ * @set: PFC setting to apply
+ */
+static void ice_dcbnl_set_pfc_cfg(struct net_device *netdev, int prio, u8 set)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcbx_cfg *new_cfg;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return;
+
+ if (prio >= ICE_MAX_USER_PRIORITY)
+ return;
+
+ new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+ new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc;
+ if (set)
+ new_cfg->pfc.pfcena |= BIT(prio);
+ else
+ new_cfg->pfc.pfcena &= ~BIT(prio);
+
+ dev_dbg(ice_pf_to_dev(pf), "Set PFC config UP:%d set:%d pfcena:0x%x\n",
+ prio, set, new_cfg->pfc.pfcena);
+}
+
+/**
+ * ice_dcbnl_getpfcstate - get CEE PFC mode
+ * @netdev: pointer to netdev struct
+ */
+static u8 ice_dcbnl_getpfcstate(struct net_device *netdev)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_port_info *pi = pf->hw.port_info;
+
+ /* Return enabled if any UP enabled for PFC */
+ if (pi->local_dcbx_cfg.pfc.pfcena)
+ return 1;
+
+ return 0;
+}
+
+/**
+ * ice_dcbnl_getstate - get DCB enabled state
+ * @netdev: pointer to netdev struct
+ */
+static u8 ice_dcbnl_getstate(struct net_device *netdev)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ u8 state = 0;
+
+ state = test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+
+ dev_dbg(ice_pf_to_dev(pf), "DCB enabled state = %d\n", state);
+ return state;
+}
+
+/**
+ * ice_dcbnl_setstate - Set CEE DCB state
+ * @netdev: pointer to relevant netdev
+ * @state: state value to set
+ */
+static u8 ice_dcbnl_setstate(struct net_device *netdev, u8 state)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return ICE_DCB_NO_HW_CHG;
+
+ /* Nothing to do */
+ if (!!state == test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+ return ICE_DCB_NO_HW_CHG;
+
+ if (state) {
+ set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+ memcpy(&pf->hw.port_info->desired_dcbx_cfg,
+ &pf->hw.port_info->local_dcbx_cfg,
+ sizeof(struct ice_dcbx_cfg));
+ } else {
+ clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+ }
+
+ return ICE_DCB_HW_CHG;
+}
+
+/**
+ * ice_dcbnl_get_pg_tc_cfg_tx - get CEE PG Tx config
+ * @netdev: pointer to netdev struct
+ * @prio: the corresponding user priority
+ * @prio_type: traffic priority type
+ * @pgid: the BW group ID the traffic class belongs to
+ * @bw_pct: BW percentage for the corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int prio,
+ u8 __always_unused *prio_type, u8 *pgid,
+ u8 __always_unused *bw_pct,
+ u8 __always_unused *up_map)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_port_info *pi = pf->hw.port_info;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return;
+
+ if (prio >= ICE_MAX_USER_PRIORITY)
+ return;
+
+ *pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio];
+ dev_dbg(ice_pf_to_dev(pf),
+ "Get PG config prio=%d tc=%d\n", prio, *pgid);
+}
+
+/**
+ * ice_dcbnl_set_pg_tc_cfg_tx - set CEE PG Tx config
+ * @netdev: pointer to relevant netdev
+ * @tc: the corresponding traffic class
+ * @prio_type: the traffic priority type
+ * @bwg_id: the BW group ID the TC belongs to
+ * @bw_pct: the BW perventage for the BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
+ u8 __always_unused prio_type,
+ u8 __always_unused bwg_id,
+ u8 __always_unused bw_pct, u8 up_map)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcbx_cfg *new_cfg;
+ int i;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return;
+
+ if (tc >= ICE_MAX_TRAFFIC_CLASS)
+ return;
+
+ new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+ /* prio_type, bwg_id and bw_pct per UP are not supported */
+
+ ice_for_each_traffic_class(i) {
+ if (up_map & BIT(i))
+ new_cfg->etscfg.prio_table[i] = tc;
+ }
+ new_cfg->etscfg.tsatable[tc] = ICE_IEEE_TSA_ETS;
+}
+
+/**
+ * ice_dcbnl_get_pg_bwg_cfg_tx - Get CEE PGBW config
+ * @netdev: pointer to the netdev struct
+ * @pgid: corresponding traffic class
+ * @bw_pct: the BW percentage for the corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 *bw_pct)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_port_info *pi = pf->hw.port_info;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return;
+
+ if (pgid >= ICE_MAX_TRAFFIC_CLASS)
+ return;
+
+ *bw_pct = pi->local_dcbx_cfg.etscfg.tcbwtable[pgid];
+ dev_dbg(ice_pf_to_dev(pf), "Get PG BW config tc=%d bw_pct=%d\n",
+ pgid, *bw_pct);
+}
+
+/**
+ * ice_dcbnl_set_pg_bwg_cfg_tx - set CEE PG Tx BW config
+ * @netdev: the corresponding netdev
+ * @pgid: Correspongind traffic class
+ * @bw_pct: the BW percentage for the specified TC
+ */
+static void
+ice_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 bw_pct)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcbx_cfg *new_cfg;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return;
+
+ if (pgid >= ICE_MAX_TRAFFIC_CLASS)
+ return;
+
+ new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+ new_cfg->etscfg.tcbwtable[pgid] = bw_pct;
+}
+
+/**
+ * ice_dcbnl_get_pg_tc_cfg_rx - Get CEE PG Rx config
+ * @netdev: pointer to netdev struct
+ * @prio: the corresponding user priority
+ * @prio_type: the traffic priority type
+ * @pgid: the PG ID
+ * @bw_pct: the BW percentage for the corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int prio,
+ u8 __always_unused *prio_type, u8 *pgid,
+ u8 __always_unused *bw_pct,
+ u8 __always_unused *up_map)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_port_info *pi = pf->hw.port_info;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return;
+
+ if (prio >= ICE_MAX_USER_PRIORITY)
+ return;
+
+ *pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio];
+}
+
+/**
+ * ice_dcbnl_get_pg_bwg_cfg_rx - Get CEE PG BW Rx config
+ * @netdev: pointer to netdev struct
+ * @pgid: the corresponding traffic class
+ * @bw_pct: the BW percentage for the corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int __always_unused pgid,
+ u8 *bw_pct)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return;
+
+ *bw_pct = 0;
+}
+
+/**
+ * ice_dcbnl_get_cap - Get DCBX capabilities of adapter
+ * @netdev: pointer to netdev struct
+ * @capid: the capability type
+ * @cap: the capability value
+ */
+static u8 ice_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+ if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)))
+ return ICE_DCB_NO_HW_CHG;
+
+ switch (capid) {
+ case DCB_CAP_ATTR_PG:
+ *cap = true;
+ break;
+ case DCB_CAP_ATTR_PFC:
+ *cap = true;
+ break;
+ case DCB_CAP_ATTR_UP2TC:
+ *cap = false;
+ break;
+ case DCB_CAP_ATTR_PG_TCS:
+ *cap = 0x80;
+ break;
+ case DCB_CAP_ATTR_PFC_TCS:
+ *cap = 0x80;
+ break;
+ case DCB_CAP_ATTR_GSP:
+ *cap = false;
+ break;
+ case DCB_CAP_ATTR_BCN:
+ *cap = false;
+ break;
+ case DCB_CAP_ATTR_DCBX:
+ *cap = pf->dcbx_cap;
+ break;
+ default:
+ *cap = false;
+ break;
+ }
+
+ dev_dbg(ice_pf_to_dev(pf), "DCBX Get Capability cap=%d capval=0x%x\n",
+ capid, *cap);
+ return 0;
+}
+
+/**
+ * ice_dcbnl_getapp - get CEE APP
+ * @netdev: pointer to netdev struct
+ * @idtype: the App selector
+ * @id: the App ethtype or port number
+ */
+static int ice_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct dcb_app app = {
+ .selector = idtype,
+ .protocol = id,
+ };
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return -EINVAL;
+
+ return dcb_getapp(netdev, &app);
+}
+
+/**
+ * ice_dcbnl_find_app - Search for APP in given DCB config
+ * @cfg: struct to hold DCBX config
+ * @app: struct to hold app data to look for
+ */
+static bool
+ice_dcbnl_find_app(struct ice_dcbx_cfg *cfg,
+ struct ice_dcb_app_priority_table *app)
+{
+ int i;
+
+ for (i = 0; i < cfg->numapps; i++) {
+ if (app->selector == cfg->app[i].selector &&
+ app->prot_id == cfg->app[i].prot_id &&
+ app->priority == cfg->app[i].priority)
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * ice_dcbnl_setapp - set local IEEE App config
+ * @netdev: relevant netdev struct
+ * @app: struct to hold app config info
+ */
+static int ice_dcbnl_setapp(struct net_device *netdev, struct dcb_app *app)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcb_app_priority_table new_app;
+ struct ice_dcbx_cfg *old_cfg, *new_cfg;
+ int ret;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+ return -EINVAL;
+
+ mutex_lock(&pf->tc_mutex);
+
+ new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+ old_cfg = &pf->hw.port_info->local_dcbx_cfg;
+
+ if (old_cfg->numapps == ICE_DCBX_MAX_APPS) {
+ ret = -EINVAL;
+ goto setapp_out;
+ }
+
+ ret = dcb_ieee_setapp(netdev, app);
+ if (ret)
+ goto setapp_out;
+
+ new_app.selector = app->selector;
+ new_app.prot_id = app->protocol;
+ new_app.priority = app->priority;
+ if (ice_dcbnl_find_app(old_cfg, &new_app)) {
+ ret = 0;
+ goto setapp_out;
+ }
+
+ new_cfg->app[new_cfg->numapps++] = new_app;
+ ret = ice_pf_dcb_cfg(pf, new_cfg, true);
+ /* return of zero indicates new cfg applied */
+ if (ret == ICE_DCB_HW_CHG_RST)
+ ice_dcbnl_devreset(netdev);
+ if (ret == ICE_DCB_NO_HW_CHG)
+ ret = ICE_DCB_HW_CHG_RST;
+
+setapp_out:
+ mutex_unlock(&pf->tc_mutex);
+ return ret;
+}
+
+/**
+ * ice_dcbnl_delapp - Delete local IEEE App config
+ * @netdev: relevant netdev
+ * @app: struct to hold app too delete
+ *
+ * Will not delete first application required by the FW
+ */
+static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcbx_cfg *old_cfg, *new_cfg;
+ int i, j, ret = 0;
+
+ if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)
+ return -EINVAL;
+
+ mutex_lock(&pf->tc_mutex);
+ ret = dcb_ieee_delapp(netdev, app);
+ if (ret)
+ goto delapp_out;
+
+ old_cfg = &pf->hw.port_info->local_dcbx_cfg;
+
+ if (old_cfg->numapps == 1)
+ goto delapp_out;
+
+ new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+ for (i = 1; i < new_cfg->numapps; i++) {
+ if (app->selector == new_cfg->app[i].selector &&
+ app->protocol == new_cfg->app[i].prot_id &&
+ app->priority == new_cfg->app[i].priority) {
+ new_cfg->app[i].selector = 0;
+ new_cfg->app[i].prot_id = 0;
+ new_cfg->app[i].priority = 0;
+ break;
+ }
+ }
+
+ /* Did not find DCB App */
+ if (i == new_cfg->numapps) {
+ ret = -EINVAL;
+ goto delapp_out;
+ }
+
+ new_cfg->numapps--;
+
+ for (j = i; j < new_cfg->numapps; j++) {
+ new_cfg->app[i].selector = old_cfg->app[j + 1].selector;
+ new_cfg->app[i].prot_id = old_cfg->app[j + 1].prot_id;
+ new_cfg->app[i].priority = old_cfg->app[j + 1].priority;
+ }
+
+ ret = ice_pf_dcb_cfg(pf, new_cfg, true);
+ /* return of zero indicates new cfg applied */
+ if (ret == ICE_DCB_HW_CHG_RST)
+ ice_dcbnl_devreset(netdev);
+ if (ret == ICE_DCB_NO_HW_CHG)
+ ret = ICE_DCB_HW_CHG_RST;
+
+delapp_out:
+ mutex_unlock(&pf->tc_mutex);
+ return ret;
+}
+
+/**
+ * ice_dcbnl_cee_set_all - Commit CEE DCB settings to HW
+ * @netdev: the corresponding netdev
+ */
+static u8 ice_dcbnl_cee_set_all(struct net_device *netdev)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_dcbx_cfg *new_cfg;
+ int err;
+
+ if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+ !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return ICE_DCB_NO_HW_CHG;
+
+ new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+ mutex_lock(&pf->tc_mutex);
+
+ err = ice_pf_dcb_cfg(pf, new_cfg, true);
+
+ mutex_unlock(&pf->tc_mutex);
+ return (err != ICE_DCB_HW_CHG_RST) ? ICE_DCB_NO_HW_CHG : err;
+}
+
+static const struct dcbnl_rtnl_ops dcbnl_ops = {
+ /* IEEE 802.1Qaz std */
+ .ieee_getets = ice_dcbnl_getets,
+ .ieee_setets = ice_dcbnl_setets,
+ .ieee_getpfc = ice_dcbnl_getpfc,
+ .ieee_setpfc = ice_dcbnl_setpfc,
+ .ieee_setapp = ice_dcbnl_setapp,
+ .ieee_delapp = ice_dcbnl_delapp,
+
+ /* CEE std */
+ .getstate = ice_dcbnl_getstate,
+ .setstate = ice_dcbnl_setstate,
+ .getpermhwaddr = ice_dcbnl_get_perm_hw_addr,
+ .setpgtccfgtx = ice_dcbnl_set_pg_tc_cfg_tx,
+ .setpgbwgcfgtx = ice_dcbnl_set_pg_bwg_cfg_tx,
+ .getpgtccfgtx = ice_dcbnl_get_pg_tc_cfg_tx,
+ .getpgbwgcfgtx = ice_dcbnl_get_pg_bwg_cfg_tx,
+ .getpgtccfgrx = ice_dcbnl_get_pg_tc_cfg_rx,
+ .getpgbwgcfgrx = ice_dcbnl_get_pg_bwg_cfg_rx,
+ .setpfccfg = ice_dcbnl_set_pfc_cfg,
+ .getpfccfg = ice_dcbnl_get_pfc_cfg,
+ .setall = ice_dcbnl_cee_set_all,
+ .getcap = ice_dcbnl_get_cap,
+ .getnumtcs = ice_dcbnl_getnumtcs,
+ .getpfcstate = ice_dcbnl_getpfcstate,
+ .getapp = ice_dcbnl_getapp,
+
+ /* DCBX configuration */
+ .getdcbx = ice_dcbnl_getdcbx,
+ .setdcbx = ice_dcbnl_setdcbx,
+};
+
+/**
+ * ice_dcbnl_set_all - set all the apps and ieee data from DCBX config
+ * @vsi: pointer to VSI struct
+ */
+void ice_dcbnl_set_all(struct ice_vsi *vsi)
+{
+ struct net_device *netdev = vsi->netdev;
+ struct ice_dcbx_cfg *dcbxcfg;
+ struct ice_port_info *pi;
+ struct dcb_app sapp;
+ struct ice_pf *pf;
+ int i;
+
+ if (!netdev)
+ return;
+
+ pf = ice_netdev_to_pf(netdev);
+ pi = pf->hw.port_info;
+
+ /* SW DCB taken care of by SW Default Config */
+ if (pf->dcbx_cap & DCB_CAP_DCBX_HOST)
+ return;
+
+ /* DCB not enabled */
+ if (!test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+ return;
+
+ dcbxcfg = &pi->local_dcbx_cfg;
+
+ for (i = 0; i < dcbxcfg->numapps; i++) {
+ u8 prio, tc_map;
+
+ prio = dcbxcfg->app[i].priority;
+ tc_map = BIT(dcbxcfg->etscfg.prio_table[prio]);
+
+ /* Add APP only if the TC is enabled for this VSI */
+ if (tc_map & vsi->tc_cfg.ena_tc) {
+ sapp.selector = dcbxcfg->app[i].selector;
+ sapp.protocol = dcbxcfg->app[i].prot_id;
+ sapp.priority = prio;
+ dcb_ieee_setapp(netdev, &sapp);
+ }
+ }
+ /* Notify user-space of the changes */
+ dcbnl_ieee_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_SET, 0, 0);
+}
+
+/**
+ * ice_dcbnl_vsi_del_app - Delete APP on all VSIs
+ * @vsi: pointer to the main VSI
+ * @app: APP to delete
+ *
+ * Delete given APP from all the VSIs for given PF
+ */
+static void
+ice_dcbnl_vsi_del_app(struct ice_vsi *vsi,
+ struct ice_dcb_app_priority_table *app)
+{
+ struct dcb_app sapp;
+ int err;
+
+ sapp.selector = app->selector;
+ sapp.protocol = app->prot_id;
+ sapp.priority = app->priority;
+ err = ice_dcbnl_delapp(vsi->netdev, &sapp);
+ dev_dbg(&vsi->back->pdev->dev,
+ "Deleting app for VSI idx=%d err=%d sel=%d proto=0x%x, prio=%d\n",
+ vsi->idx, err, app->selector, app->prot_id, app->priority);
+}
+
+/**
+ * ice_dcbnl_flush_apps - Delete all removed APPs
+ * @pf: the corresponding PF
+ * @old_cfg: old DCBX configuration data
+ * @new_cfg: new DCBX configuration data
+ *
+ * Find and delete all APPS that are not present in the passed
+ * DCB configuration
+ */
+void
+ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+ struct ice_dcbx_cfg *new_cfg)
+{
+ struct ice_vsi *main_vsi = ice_get_main_vsi(pf);
+ int i;
+
+ if (!main_vsi)
+ return;
+
+ for (i = 0; i < old_cfg->numapps; i++) {
+ struct ice_dcb_app_priority_table app = old_cfg->app[i];
+
+ /* The APP is not available anymore delete it */
+ if (!ice_dcbnl_find_app(new_cfg, &app))
+ ice_dcbnl_vsi_del_app(main_vsi, &app);
+ }
+}
+
+/**
+ * ice_dcbnl_setup - setup DCBNL
+ * @vsi: VSI to get associated netdev from
+ */
+void ice_dcbnl_setup(struct ice_vsi *vsi)
+{
+ struct net_device *netdev = vsi->netdev;
+ struct ice_pf *pf;
+
+ pf = ice_netdev_to_pf(netdev);
+ if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
+ return;
+
+ netdev->dcbnl_ops = &dcbnl_ops;
+ ice_dcbnl_set_all(vsi);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.h b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h
new file mode 100644
index 000000000000..6c630a362293
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DCB_NL_H_
+#define _ICE_DCB_NL_H_
+
+#ifdef CONFIG_DCB
+void ice_dcbnl_setup(struct ice_vsi *vsi);
+void ice_dcbnl_set_all(struct ice_vsi *vsi);
+void
+ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+ struct ice_dcbx_cfg *new_cfg);
+#else
+#define ice_dcbnl_setup(vsi) do {} while (0)
+#define ice_dcbnl_set_all(vsi) do {} while (0)
+#define ice_dcbnl_flush_apps(pf, old_cfg, new_cfg) do {} while (0)
+#endif /* CONFIG_DCB */
+
+#endif /* _ICE_DCB_NL_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 7e23034df955..aec3c6c379df 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -156,6 +156,7 @@ struct ice_priv_flag {
static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT),
+ ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX),
};
#define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags)
@@ -247,7 +248,7 @@ ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom,
int ret = 0;
u16 *buf;
- dev = &pf->pdev->dev;
+ dev = ice_pf_to_dev(pf);
eeprom->magic = hw->vendor_id | (hw->device_id << 16);
@@ -342,6 +343,7 @@ static u64 ice_eeprom_test(struct net_device *netdev)
static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
{
struct ice_pf *pf = (struct ice_pf *)hw->back;
+ struct device *dev = ice_pf_to_dev(pf);
static const u32 patterns[] = {
0x5A5A5A5A, 0xA5A5A5A5,
0x00000000, 0xFFFFFFFF
@@ -357,7 +359,7 @@ static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
val = rd32(hw, reg);
if (val == pattern)
continue;
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n"
, __func__, reg, pattern, val);
return 1;
@@ -366,7 +368,7 @@ static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
wr32(hw, reg, orig_val);
val = rd32(hw, reg);
if (val != orig_val) {
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n"
, __func__, reg, orig_val, val);
return 1;
@@ -506,7 +508,7 @@ static int ice_lbtest_create_frame(struct ice_pf *pf, u8 **ret_data, u16 size)
if (!pf)
return -EINVAL;
- data = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL);
+ data = devm_kzalloc(ice_pf_to_dev(pf), size, GFP_KERNEL);
if (!data)
return -ENOMEM;
@@ -623,7 +625,7 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)
continue;
rx_buf = &rx_ring->rx_buf[i];
- received_buf = page_address(rx_buf->page);
+ received_buf = page_address(rx_buf->page) + rx_buf->page_offset;
if (ice_lbtest_check_frame(received_buf))
valid_frames++;
@@ -648,9 +650,11 @@ static u64 ice_loopback_test(struct net_device *netdev)
u8 broadcast[ETH_ALEN], ret = 0;
int num_frames, valid_frames;
LIST_HEAD(tmp_list);
+ struct device *dev;
u8 *tx_frame;
int i;
+ dev = ice_pf_to_dev(pf);
netdev_info(netdev, "loopback test\n");
test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info);
@@ -711,12 +715,12 @@ static u64 ice_loopback_test(struct net_device *netdev)
ret = 10;
lbtest_free_frame:
- devm_kfree(&pf->pdev->dev, tx_frame);
+ devm_kfree(dev, tx_frame);
remove_mac_filters:
if (ice_remove_mac(&pf->hw, &tmp_list))
netdev_err(netdev, "Could not remove MAC filter for the test VSI");
free_mac_list:
- ice_free_fltr_list(&pf->pdev->dev, &tmp_list);
+ ice_free_fltr_list(dev, &tmp_list);
lbtest_mac_dis:
/* Disable MAC loopback after the test is completed. */
if (ice_aq_set_mac_loopback(&pf->hw, false, NULL))
@@ -773,6 +777,9 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
struct ice_netdev_priv *np = netdev_priv(netdev);
bool if_running = netif_running(netdev);
struct ice_pf *pf = np->vsi->back;
+ struct device *dev;
+
+ dev = ice_pf_to_dev(pf);
if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
netdev_info(netdev, "offline testing starting\n");
@@ -780,7 +787,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
set_bit(__ICE_TESTING, pf->state);
if (ice_active_vfs(pf)) {
- dev_warn(&pf->pdev->dev,
+ dev_warn(dev,
"Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
data[ICE_ETH_TEST_REG] = 1;
data[ICE_ETH_TEST_EEPROM] = 1;
@@ -815,8 +822,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
int status = ice_open(netdev);
if (status) {
- dev_err(&pf->pdev->dev,
- "Could not open device %s, err %d",
+ dev_err(dev, "Could not open device %s, err %d",
pf->int_name, status);
}
}
@@ -961,7 +967,7 @@ static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec)
}
/* Get last SW configuration */
- caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+ caps = kzalloc(sizeof(*caps), GFP_KERNEL);
if (!caps)
return -ENOMEM;
@@ -1006,7 +1012,7 @@ static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec)
}
done:
- devm_kfree(&vsi->back->pdev->dev, caps);
+ kfree(caps);
return err;
}
@@ -1082,7 +1088,7 @@ ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
break;
}
- caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+ caps = kzalloc(sizeof(*caps), GFP_KERNEL);
if (!caps)
return -ENOMEM;
@@ -1109,7 +1115,7 @@ ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
fecparam->fec |= ETHTOOL_FEC_OFF;
done:
- devm_kfree(&vsi->back->pdev->dev, caps);
+ kfree(caps);
return err;
}
@@ -1154,12 +1160,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
DECLARE_BITMAP(orig_flags, ICE_PF_FLAGS_NBITS);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
+ struct device *dev;
int ret = 0;
u32 i;
if (flags > BIT(ICE_PRIV_FLAG_ARRAY_SIZE))
return -EINVAL;
+ dev = ice_pf_to_dev(pf);
set_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
bitmap_copy(orig_flags, pf->flags, ICE_PF_FLAGS_NBITS);
@@ -1188,7 +1196,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
* events to respond to.
*/
if (status)
- dev_info(&pf->pdev->dev,
+ dev_info(dev,
"Failed to unreg for LLDP events\n");
/* The AQ call to stop the FW LLDP agent will generate
@@ -1196,20 +1204,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
*/
status = ice_aq_stop_lldp(&pf->hw, true, true, NULL);
if (status)
- dev_warn(&pf->pdev->dev,
- "Fail to stop LLDP agent\n");
+ dev_warn(dev, "Fail to stop LLDP agent\n");
/* Use case for having the FW LLDP agent stopped
* will likely not need DCB, so failure to init is
* not a concern of ethtool
*/
status = ice_init_pf_dcb(pf, true);
if (status)
- dev_warn(&pf->pdev->dev, "Fail to init DCB\n");
-
- /* Forward LLDP packets to default VSI so that they
- * are passed up the stack
- */
- ice_cfg_sw_lldp(vsi, false, true);
+ dev_warn(dev, "Fail to init DCB\n");
} else {
enum ice_status status;
bool dcbx_agent_status;
@@ -1219,8 +1221,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
*/
status = ice_aq_start_lldp(&pf->hw, true, NULL);
if (status)
- dev_warn(&pf->pdev->dev,
- "Fail to start LLDP Agent\n");
+ dev_warn(dev, "Fail to start LLDP Agent\n");
/* AQ command to start FW DCBX agent will fail if
* the agent is already started
@@ -1229,10 +1230,9 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
&dcbx_agent_status,
NULL);
if (status)
- dev_dbg(&pf->pdev->dev,
- "Failed to start FW DCBX\n");
+ dev_dbg(dev, "Failed to start FW DCBX\n");
- dev_info(&pf->pdev->dev, "FW DCBX agent is %s\n",
+ dev_info(dev, "FW DCBX agent is %s\n",
dcbx_agent_status ? "ACTIVE" : "DISABLED");
/* Failure to configure MIB change or init DCB is not
@@ -1242,7 +1242,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
*/
status = ice_init_pf_dcb(pf, true);
if (status)
- dev_dbg(&pf->pdev->dev, "Fail to init DCB\n");
+ dev_dbg(dev, "Fail to init DCB\n");
/* Remove rule to direct LLDP packets to default VSI.
* The FW LLDP engine will now be consuming them.
@@ -1252,10 +1252,15 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
/* Register for MIB change events */
status = ice_cfg_lldp_mib_change(&pf->hw, true);
if (status)
- dev_dbg(&pf->pdev->dev,
+ dev_dbg(dev,
"Fail to enable MIB change events\n");
}
}
+ if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) {
+ /* down and up VSI so that changes of Rx cfg are reflected. */
+ ice_down(vsi);
+ ice_up(vsi);
+ }
clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
return ret;
}
@@ -2140,7 +2145,7 @@ ice_get_link_ksettings(struct net_device *netdev,
/* flow control is symmetric and always supported */
ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
- caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+ caps = kzalloc(sizeof(*caps), GFP_KERNEL);
if (!caps)
return -ENOMEM;
@@ -2198,7 +2203,7 @@ ice_get_link_ksettings(struct net_device *netdev,
ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
done:
- devm_kfree(&vsi->back->pdev->dev, caps);
+ kfree(caps);
return err;
}
@@ -2427,8 +2432,7 @@ ice_set_link_ksettings(struct net_device *netdev,
usleep_range(TEST_SET_BITS_SLEEP_MIN, TEST_SET_BITS_SLEEP_MAX);
}
- abilities = devm_kzalloc(&pf->pdev->dev, sizeof(*abilities),
- GFP_KERNEL);
+ abilities = kzalloc(sizeof(*abilities), GFP_KERNEL);
if (!abilities)
return -ENOMEM;
@@ -2520,7 +2524,7 @@ ice_set_link_ksettings(struct net_device *netdev,
}
done:
- devm_kfree(&pf->pdev->dev, abilities);
+ kfree(abilities);
clear_bit(__ICE_CFG_BUSY, pf->state);
return err;
@@ -2577,6 +2581,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
{
struct ice_ring *tx_rings = NULL, *rx_rings = NULL;
struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_ring *xdp_rings = NULL;
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
int i, timeout = 50, err = 0;
@@ -2611,6 +2616,13 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
return 0;
}
+ /* If there is a AF_XDP UMEM attached to any of Rx rings,
+ * disallow changing the number of descriptors -- regardless
+ * if the netdev is running or not.
+ */
+ if (ice_xsk_any_rx_ring_ena(vsi))
+ return -EBUSY;
+
while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
timeout--;
if (!timeout)
@@ -2624,6 +2636,11 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
vsi->tx_rings[i]->count = new_tx_cnt;
for (i = 0; i < vsi->alloc_rxq; i++)
vsi->rx_rings[i]->count = new_rx_cnt;
+ if (ice_is_xdp_ena_vsi(vsi))
+ for (i = 0; i < vsi->num_xdp_txq; i++)
+ vsi->xdp_rings[i]->count = new_tx_cnt;
+ vsi->num_tx_desc = new_tx_cnt;
+ vsi->num_rx_desc = new_rx_cnt;
netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n");
goto done;
}
@@ -2635,14 +2652,13 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
netdev_info(netdev, "Changing Tx descriptor count from %d to %d\n",
vsi->tx_rings[0]->count, new_tx_cnt);
- tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
- sizeof(*tx_rings), GFP_KERNEL);
+ tx_rings = kcalloc(vsi->num_txq, sizeof(*tx_rings), GFP_KERNEL);
if (!tx_rings) {
err = -ENOMEM;
goto done;
}
- for (i = 0; i < vsi->alloc_txq; i++) {
+ ice_for_each_txq(vsi, i) {
/* clone ring and setup updated count */
tx_rings[i] = *vsi->tx_rings[i];
tx_rings[i].count = new_tx_cnt;
@@ -2650,15 +2666,42 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
tx_rings[i].tx_buf = NULL;
err = ice_setup_tx_ring(&tx_rings[i]);
if (err) {
- while (i) {
- i--;
+ while (i--)
ice_clean_tx_ring(&tx_rings[i]);
- }
- devm_kfree(&pf->pdev->dev, tx_rings);
+ kfree(tx_rings);
goto done;
}
}
+ if (!ice_is_xdp_ena_vsi(vsi))
+ goto process_rx;
+
+ /* alloc updated XDP resources */
+ netdev_info(netdev, "Changing XDP descriptor count from %d to %d\n",
+ vsi->xdp_rings[0]->count, new_tx_cnt);
+
+ xdp_rings = kcalloc(vsi->num_xdp_txq, sizeof(*xdp_rings), GFP_KERNEL);
+ if (!xdp_rings) {
+ err = -ENOMEM;
+ goto free_tx;
+ }
+
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ /* clone ring and setup updated count */
+ xdp_rings[i] = *vsi->xdp_rings[i];
+ xdp_rings[i].count = new_tx_cnt;
+ xdp_rings[i].desc = NULL;
+ xdp_rings[i].tx_buf = NULL;
+ err = ice_setup_tx_ring(&xdp_rings[i]);
+ if (err) {
+ while (i--)
+ ice_clean_tx_ring(&xdp_rings[i]);
+ kfree(xdp_rings);
+ goto free_tx;
+ }
+ ice_set_ring_xdp(&xdp_rings[i]);
+ }
+
process_rx:
if (new_rx_cnt == vsi->rx_rings[0]->count)
goto process_link;
@@ -2667,14 +2710,13 @@ process_rx:
netdev_info(netdev, "Changing Rx descriptor count from %d to %d\n",
vsi->rx_rings[0]->count, new_rx_cnt);
- rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
- sizeof(*rx_rings), GFP_KERNEL);
+ rx_rings = kcalloc(vsi->num_rxq, sizeof(*rx_rings), GFP_KERNEL);
if (!rx_rings) {
err = -ENOMEM;
goto done;
}
- for (i = 0; i < vsi->alloc_rxq; i++) {
+ ice_for_each_rxq(vsi, i) {
/* clone ring and setup updated count */
rx_rings[i] = *vsi->rx_rings[i];
rx_rings[i].count = new_rx_cnt;
@@ -2698,7 +2740,7 @@ rx_unwind:
i--;
ice_free_rx_ring(&rx_rings[i]);
}
- devm_kfree(&pf->pdev->dev, rx_rings);
+ kfree(rx_rings);
err = -ENOMEM;
goto free_tx;
}
@@ -2712,15 +2754,15 @@ process_link:
ice_down(vsi);
if (tx_rings) {
- for (i = 0; i < vsi->alloc_txq; i++) {
+ ice_for_each_txq(vsi, i) {
ice_free_tx_ring(vsi->tx_rings[i]);
*vsi->tx_rings[i] = tx_rings[i];
}
- devm_kfree(&pf->pdev->dev, tx_rings);
+ kfree(tx_rings);
}
if (rx_rings) {
- for (i = 0; i < vsi->alloc_rxq; i++) {
+ ice_for_each_rxq(vsi, i) {
ice_free_rx_ring(vsi->rx_rings[i]);
/* copy the real tail offset */
rx_rings[i].tail = vsi->rx_rings[i]->tail;
@@ -2734,9 +2776,19 @@ process_link:
rx_rings[i].next_to_alloc = 0;
*vsi->rx_rings[i] = rx_rings[i];
}
- devm_kfree(&pf->pdev->dev, rx_rings);
+ kfree(rx_rings);
+ }
+
+ if (xdp_rings) {
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ ice_free_tx_ring(vsi->xdp_rings[i]);
+ *vsi->xdp_rings[i] = xdp_rings[i];
+ }
+ kfree(xdp_rings);
}
+ vsi->num_tx_desc = new_tx_cnt;
+ vsi->num_rx_desc = new_rx_cnt;
ice_up(vsi);
}
goto done;
@@ -2744,9 +2796,9 @@ process_link:
free_tx:
/* error cleanup if the Rx allocations failed after getting Tx */
if (tx_rings) {
- for (i = 0; i < vsi->alloc_txq; i++)
+ ice_for_each_txq(vsi, i)
ice_free_tx_ring(&tx_rings[i]);
- devm_kfree(&pf->pdev->dev, tx_rings);
+ kfree(tx_rings);
}
done:
@@ -2794,7 +2846,6 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_port_info *pi = np->vsi->port_info;
struct ice_aqc_get_phy_caps_data *pcaps;
- struct ice_vsi *vsi = np->vsi;
struct ice_dcbx_cfg *dcbx_cfg;
enum ice_status status;
@@ -2804,8 +2855,7 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
dcbx_cfg = &pi->local_dcbx_cfg;
- pcaps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*pcaps),
- GFP_KERNEL);
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
return;
@@ -2828,7 +2878,7 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
pause->rx_pause = 1;
out:
- devm_kfree(&vsi->back->pdev->dev, pcaps);
+ kfree(pcaps);
}
/**
@@ -3009,7 +3059,7 @@ ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc)
return -EIO;
}
- lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL);
+ lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
if (!lut)
return -ENOMEM;
@@ -3022,7 +3072,7 @@ ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc)
indir[i] = (u32)(lut[i]);
out:
- devm_kfree(&pf->pdev->dev, lut);
+ kfree(lut);
return ret;
}
@@ -3043,8 +3093,10 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
+ struct device *dev;
u8 *seed = NULL;
+ dev = ice_pf_to_dev(pf);
if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;
@@ -3057,8 +3109,7 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
if (key) {
if (!vsi->rss_hkey_user) {
vsi->rss_hkey_user =
- devm_kzalloc(&pf->pdev->dev,
- ICE_VSIQF_HKEY_ARRAY_SIZE,
+ devm_kzalloc(dev, ICE_VSIQF_HKEY_ARRAY_SIZE,
GFP_KERNEL);
if (!vsi->rss_hkey_user)
return -ENOMEM;
@@ -3068,8 +3119,7 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
}
if (!vsi->rss_lut_user) {
- vsi->rss_lut_user = devm_kzalloc(&pf->pdev->dev,
- vsi->rss_table_size,
+ vsi->rss_lut_user = devm_kzalloc(dev, vsi->rss_table_size,
GFP_KERNEL);
if (!vsi->rss_lut_user)
return -ENOMEM;
@@ -3092,6 +3142,188 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
return 0;
}
+/**
+ * ice_get_max_txq - return the maximum number of Tx queues for in a PF
+ * @pf: PF structure
+ */
+static int ice_get_max_txq(struct ice_pf *pf)
+{
+ return min_t(int, num_online_cpus(),
+ pf->hw.func_caps.common_cap.num_txq);
+}
+
+/**
+ * ice_get_max_rxq - return the maximum number of Rx queues for in a PF
+ * @pf: PF structure
+ */
+static int ice_get_max_rxq(struct ice_pf *pf)
+{
+ return min_t(int, num_online_cpus(),
+ pf->hw.func_caps.common_cap.num_rxq);
+}
+
+/**
+ * ice_get_combined_cnt - return the current number of combined channels
+ * @vsi: PF VSI pointer
+ *
+ * Go through all queue vectors and count ones that have both Rx and Tx ring
+ * attached
+ */
+static u32 ice_get_combined_cnt(struct ice_vsi *vsi)
+{
+ u32 combined = 0;
+ int q_idx;
+
+ ice_for_each_q_vector(vsi, q_idx) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+ if (q_vector->rx.ring && q_vector->tx.ring)
+ combined++;
+ }
+
+ return combined;
+}
+
+/**
+ * ice_get_channels - get the current and max supported channels
+ * @dev: network interface device structure
+ * @ch: ethtool channel data structure
+ */
+static void
+ice_get_channels(struct net_device *dev, struct ethtool_channels *ch)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+
+ /* check to see if VSI is active */
+ if (test_bit(__ICE_DOWN, vsi->state))
+ return;
+
+ /* report maximum channels */
+ ch->max_rx = ice_get_max_rxq(pf);
+ ch->max_tx = ice_get_max_txq(pf);
+ ch->max_combined = min_t(int, ch->max_rx, ch->max_tx);
+
+ /* report current channels */
+ ch->combined_count = ice_get_combined_cnt(vsi);
+ ch->rx_count = vsi->num_rxq - ch->combined_count;
+ ch->tx_count = vsi->num_txq - ch->combined_count;
+}
+
+/**
+ * ice_vsi_set_dflt_rss_lut - set default RSS LUT with requested RSS size
+ * @vsi: VSI to reconfigure RSS LUT on
+ * @req_rss_size: requested range of queue numbers for hashing
+ *
+ * Set the VSI's RSS parameters, configure the RSS LUT based on these.
+ */
+static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size)
+{
+ struct ice_pf *pf = vsi->back;
+ enum ice_status status;
+ struct device *dev;
+ struct ice_hw *hw;
+ int err = 0;
+ u8 *lut;
+
+ dev = ice_pf_to_dev(pf);
+ hw = &pf->hw;
+
+ if (!req_rss_size)
+ return -EINVAL;
+
+ lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+ if (!lut)
+ return -ENOMEM;
+
+ /* set RSS LUT parameters */
+ if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+ vsi->rss_size = 1;
+ } else {
+ struct ice_hw_common_caps *caps = &hw->func_caps.common_cap;
+
+ vsi->rss_size = min_t(int, req_rss_size,
+ BIT(caps->rss_table_entry_width));
+ }
+
+ /* create/set RSS LUT */
+ ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
+ status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut,
+ vsi->rss_table_size);
+ if (status) {
+ dev_err(dev, "Cannot set RSS lut, err %d aq_err %d\n",
+ status, hw->adminq.rq_last_status);
+ err = -EIO;
+ }
+
+ kfree(lut);
+ return err;
+}
+
+/**
+ * ice_set_channels - set the number channels
+ * @dev: network interface device structure
+ * @ch: ethtool channel data structure
+ */
+static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ int new_rx = 0, new_tx = 0;
+ u32 curr_combined;
+
+ /* do not support changing channels in Safe Mode */
+ if (ice_is_safe_mode(pf)) {
+ netdev_err(dev, "Changing channel in Safe Mode is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ /* do not support changing other_count */
+ if (ch->other_count)
+ return -EINVAL;
+
+ curr_combined = ice_get_combined_cnt(vsi);
+
+ /* these checks are for cases where user didn't specify a particular
+ * value on cmd line but we get non-zero value anyway via
+ * get_channels(); look at ethtool.c in ethtool repository (the user
+ * space part), particularly, do_schannels() routine
+ */
+ if (ch->rx_count == vsi->num_rxq - curr_combined)
+ ch->rx_count = 0;
+ if (ch->tx_count == vsi->num_txq - curr_combined)
+ ch->tx_count = 0;
+ if (ch->combined_count == curr_combined)
+ ch->combined_count = 0;
+
+ if (!(ch->combined_count || (ch->rx_count && ch->tx_count))) {
+ netdev_err(dev, "Please specify at least 1 Rx and 1 Tx channel\n");
+ return -EINVAL;
+ }
+
+ new_rx = ch->combined_count + ch->rx_count;
+ new_tx = ch->combined_count + ch->tx_count;
+
+ if (new_rx > ice_get_max_rxq(pf)) {
+ netdev_err(dev, "Maximum allowed Rx channels is %d\n",
+ ice_get_max_rxq(pf));
+ return -EINVAL;
+ }
+ if (new_tx > ice_get_max_txq(pf)) {
+ netdev_err(dev, "Maximum allowed Tx channels is %d\n",
+ ice_get_max_txq(pf));
+ return -EINVAL;
+ }
+
+ ice_vsi_recfg_qs(vsi, new_rx, new_tx);
+
+ if (new_rx && !netif_is_rxfh_configured(dev))
+ return ice_vsi_set_dflt_rss_lut(vsi, new_rx);
+
+ return 0;
+}
+
enum ice_container_type {
ICE_RX_CONTAINER,
ICE_TX_CONTAINER,
@@ -3131,7 +3363,7 @@ ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type,
ec->tx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC;
break;
default:
- dev_dbg(&pf->pdev->dev, "Invalid c_type %d\n", c_type);
+ dev_dbg(ice_pf_to_dev(pf), "Invalid c_type %d\n", c_type);
return -EINVAL;
}
@@ -3271,7 +3503,8 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
break;
default:
- dev_dbg(&pf->pdev->dev, "Invalid container type %d\n", c_type);
+ dev_dbg(ice_pf_to_dev(pf), "Invalid container type %d\n",
+ c_type);
return -EINVAL;
}
@@ -3368,10 +3601,17 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
struct ice_vsi *vsi = np->vsi;
if (q_num < 0) {
- int i;
+ int v_idx;
+
+ ice_for_each_q_vector(vsi, v_idx) {
+ /* In some cases if DCB is configured the num_[rx|tx]q
+ * can be less than vsi->num_q_vectors. This check
+ * accounts for that so we don't report a false failure
+ */
+ if (v_idx >= vsi->num_rxq && v_idx >= vsi->num_txq)
+ goto set_complete;
- ice_for_each_q_vector(vsi, i) {
- if (ice_set_q_coalesce(vsi, ec, i))
+ if (ice_set_q_coalesce(vsi, ec, v_idx))
return -EINVAL;
}
goto set_complete;
@@ -3398,6 +3638,151 @@ ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num,
return __ice_set_coalesce(netdev, ec, q_num);
}
+#define ICE_I2C_EEPROM_DEV_ADDR 0xA0
+#define ICE_I2C_EEPROM_DEV_ADDR2 0xA2
+#define ICE_MODULE_TYPE_SFP 0x03
+#define ICE_MODULE_TYPE_QSFP_PLUS 0x0D
+#define ICE_MODULE_TYPE_QSFP28 0x11
+#define ICE_MODULE_SFF_ADDR_MODE 0x04
+#define ICE_MODULE_SFF_DIAG_CAPAB 0x40
+#define ICE_MODULE_REVISION_ADDR 0x01
+#define ICE_MODULE_SFF_8472_COMP 0x5E
+#define ICE_MODULE_SFF_8472_SWAP 0x5C
+#define ICE_MODULE_QSFP_MAX_LEN 640
+
+/**
+ * ice_get_module_info - get SFF module type and revision information
+ * @netdev: network interface device structure
+ * @modinfo: module EEPROM size and layout information structure
+ */
+static int
+ice_get_module_info(struct net_device *netdev,
+ struct ethtool_modinfo *modinfo)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ enum ice_status status;
+ u8 sff8472_comp = 0;
+ u8 sff8472_swap = 0;
+ u8 sff8636_rev = 0;
+ u8 value = 0;
+
+ status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, 0x00, 0x00,
+ 0, &value, 1, 0, NULL);
+ if (status)
+ return -EIO;
+
+ switch (value) {
+ case ICE_MODULE_TYPE_SFP:
+ status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+ ICE_MODULE_SFF_8472_COMP, 0x00, 0,
+ &sff8472_comp, 1, 0, NULL);
+ if (status)
+ return -EIO;
+ status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+ ICE_MODULE_SFF_8472_SWAP, 0x00, 0,
+ &sff8472_swap, 1, 0, NULL);
+ if (status)
+ return -EIO;
+
+ if (sff8472_swap & ICE_MODULE_SFF_ADDR_MODE) {
+ modinfo->type = ETH_MODULE_SFF_8079;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+ } else if (sff8472_comp &&
+ (sff8472_swap & ICE_MODULE_SFF_DIAG_CAPAB)) {
+ modinfo->type = ETH_MODULE_SFF_8472;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8079;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+ }
+ break;
+ case ICE_MODULE_TYPE_QSFP_PLUS:
+ case ICE_MODULE_TYPE_QSFP28:
+ status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+ ICE_MODULE_REVISION_ADDR, 0x00, 0,
+ &sff8636_rev, 1, 0, NULL);
+ if (status)
+ return -EIO;
+ /* Check revision compliance */
+ if (sff8636_rev > 0x02) {
+ /* Module is SFF-8636 compliant */
+ modinfo->type = ETH_MODULE_SFF_8636;
+ modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
+ }
+ break;
+ default:
+ netdev_warn(netdev,
+ "SFF Module Type not recognized.\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * ice_get_module_eeprom - fill buffer with SFF EEPROM contents
+ * @netdev: network interface device structure
+ * @ee: EEPROM dump request structure
+ * @data: buffer to be filled with EEPROM contents
+ */
+static int
+ice_get_module_eeprom(struct net_device *netdev,
+ struct ethtool_eeprom *ee, u8 *data)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ u8 addr = ICE_I2C_EEPROM_DEV_ADDR;
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ enum ice_status status;
+ bool is_sfp = false;
+ u16 offset = 0;
+ u8 value = 0;
+ u8 page = 0;
+ int i;
+
+ status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0,
+ &value, 1, 0, NULL);
+ if (status)
+ return -EIO;
+
+ if (!ee || !ee->len || !data)
+ return -EINVAL;
+
+ if (value == ICE_MODULE_TYPE_SFP)
+ is_sfp = true;
+
+ for (i = 0; i < ee->len; i++) {
+ offset = i + ee->offset;
+
+ /* Check if we need to access the other memory page */
+ if (is_sfp) {
+ if (offset >= ETH_MODULE_SFF_8079_LEN) {
+ offset -= ETH_MODULE_SFF_8079_LEN;
+ addr = ICE_I2C_EEPROM_DEV_ADDR2;
+ }
+ } else {
+ while (offset >= ETH_MODULE_SFF_8436_LEN) {
+ /* Compute memory page number and offset. */
+ offset -= ETH_MODULE_SFF_8436_LEN / 2;
+ page++;
+ }
+ }
+
+ status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, !is_sfp,
+ &value, 1, 0, NULL);
+ if (status)
+ value = 0;
+ data[i] = value;
+ }
+ return 0;
+}
+
static const struct ethtool_ops ice_ethtool_ops = {
.get_link_ksettings = ice_get_link_ksettings,
.set_link_ksettings = ice_set_link_ksettings,
@@ -3428,11 +3813,15 @@ static const struct ethtool_ops ice_ethtool_ops = {
.get_rxfh_indir_size = ice_get_rxfh_indir_size,
.get_rxfh = ice_get_rxfh,
.set_rxfh = ice_set_rxfh,
+ .get_channels = ice_get_channels,
+ .set_channels = ice_set_channels,
.get_ts_info = ethtool_op_get_ts_info,
.get_per_queue_coalesce = ice_get_per_q_coalesce,
.set_per_queue_coalesce = ice_set_per_q_coalesce,
.get_fecparam = ice_get_fecparam,
.set_fecparam = ice_set_fecparam,
+ .get_module_info = ice_get_module_info,
+ .get_module_eeprom = ice_get_module_eeprom,
};
static const struct ethtool_ops ice_ethtool_safe_mode_ops = {
@@ -3451,6 +3840,7 @@ static const struct ethtool_ops ice_ethtool_safe_mode_ops = {
.get_ringparam = ice_get_ringparam,
.set_ringparam = ice_set_ringparam,
.nway_reset = ice_nway_reset,
+ .get_channels = ice_get_channels,
};
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 152fbd556e9b..e8f32350fed2 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -52,6 +52,9 @@
#define PF_MBX_ATQLEN_ATQLEN_M ICE_M(0x3FF, 0)
#define PF_MBX_ATQLEN_ATQENABLE_M BIT(31)
#define PF_MBX_ATQT 0x0022E300
+#define PRTDCB_GENC 0x00083000
+#define PRTDCB_GENC_PFCLDA_S 16
+#define PRTDCB_GENC_PFCLDA_M ICE_M(0xFFFF, 16)
#define PRTDCB_GENS 0x00083020
#define PRTDCB_GENS_DCBX_STATUS_S 0
#define PRTDCB_GENS_DCBX_STATUS_M ICE_M(0x7, 0)
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 2aac8f13daeb..ad34f22d44ef 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -211,7 +211,7 @@ enum ice_flex_rx_mdid {
/* Rx/Tx Flag64 packet flag bits */
enum ice_flg64_bits {
ICE_FLG_PKT_DSI = 0,
- ICE_FLG_EVLAN_x8100 = 15,
+ ICE_FLG_EVLAN_x8100 = 14,
ICE_FLG_EVLAN_x9100,
ICE_FLG_VLAN_x8100,
ICE_FLG_TNL_MAC = 22,
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index cc755382df25..e7449248fab4 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2,232 +2,26 @@
/* Copyright (c) 2018, Intel Corporation. */
#include "ice.h"
+#include "ice_base.h"
#include "ice_lib.h"
#include "ice_dcb_lib.h"
/**
- * ice_setup_rx_ctx - Configure a receive ring context
- * @ring: The Rx ring to configure
- *
- * Configure the Rx descriptor ring in RLAN context.
+ * ice_vsi_type_str - maps VSI type enum to string equivalents
+ * @type: VSI type enum
*/
-static int ice_setup_rx_ctx(struct ice_ring *ring)
+const char *ice_vsi_type_str(enum ice_vsi_type type)
{
- struct ice_vsi *vsi = ring->vsi;
- struct ice_hw *hw = &vsi->back->hw;
- u32 rxdid = ICE_RXDID_FLEX_NIC;
- struct ice_rlan_ctx rlan_ctx;
- u32 regval;
- u16 pf_q;
- int err;
-
- /* what is Rx queue number in global space of 2K Rx queues */
- pf_q = vsi->rxq_map[ring->q_index];
-
- /* clear the context structure first */
- memset(&rlan_ctx, 0, sizeof(rlan_ctx));
-
- rlan_ctx.base = ring->dma >> 7;
-
- rlan_ctx.qlen = ring->count;
-
- /* Receive Packet Data Buffer Size.
- * The Packet Data Buffer Size is defined in 128 byte units.
- */
- rlan_ctx.dbuf = vsi->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
-
- /* use 32 byte descriptors */
- rlan_ctx.dsize = 1;
-
- /* Strip the Ethernet CRC bytes before the packet is posted to host
- * memory.
- */
- rlan_ctx.crcstrip = 1;
-
- /* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
- rlan_ctx.l2tsel = 1;
-
- rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
- rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
- rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
-
- /* This controls whether VLAN is stripped from inner headers
- * The VLAN in the inner L2 header is stripped to the receive
- * descriptor if enabled by this flag.
- */
- rlan_ctx.showiv = 0;
-
- /* Max packet size for this queue - must not be set to a larger value
- * than 5 x DBUF
- */
- rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
- ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len);
-
- /* Rx queue threshold in units of 64 */
- rlan_ctx.lrxqthresh = 1;
-
- /* Enable Flexible Descriptors in the queue context which
- * allows this driver to select a specific receive descriptor format
- */
- if (vsi->type != ICE_VSI_VF) {
- regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
- regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
- QRXFLXP_CNTXT_RXDID_IDX_M;
-
- /* increasing context priority to pick up profile ID;
- * default is 0x01; setting to 0x03 to ensure profile
- * is programming if prev context is of same priority
- */
- regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
- QRXFLXP_CNTXT_RXDID_PRIO_M;
-
- wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
- }
-
- /* Absolute queue number out of 2K needs to be passed */
- err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
- if (err) {
- dev_err(&vsi->back->pdev->dev,
- "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
- pf_q, err);
- return -EIO;
- }
-
- if (vsi->type == ICE_VSI_VF)
- return 0;
-
- /* init queue specific tail register */
- ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
- writel(0, ring->tail);
- ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
-
- return 0;
-}
-
-/**
- * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
- * @ring: The Tx ring to configure
- * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
- * @pf_q: queue index in the PF space
- *
- * Configure the Tx descriptor ring in TLAN context.
- */
-static void
-ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
-{
- struct ice_vsi *vsi = ring->vsi;
- struct ice_hw *hw = &vsi->back->hw;
-
- tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
-
- tlan_ctx->port_num = vsi->port_info->lport;
-
- /* Transmit Queue Length */
- tlan_ctx->qlen = ring->count;
-
- ice_set_cgd_num(tlan_ctx, ring);
-
- /* PF number */
- tlan_ctx->pf_num = hw->pf_id;
-
- /* queue belongs to a specific VSI type
- * VF / VM index should be programmed per vmvf_type setting:
- * for vmvf_type = VF, it is VF number between 0-256
- * for vmvf_type = VM, it is VM number between 0-767
- * for PF or EMP this field should be set to zero
- */
- switch (vsi->type) {
- case ICE_VSI_LB:
- /* fall through */
+ switch (type) {
case ICE_VSI_PF:
- tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
- break;
+ return "ICE_VSI_PF";
case ICE_VSI_VF:
- /* Firmware expects vmvf_num to be absolute VF ID */
- tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id;
- tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
- break;
+ return "ICE_VSI_VF";
+ case ICE_VSI_LB:
+ return "ICE_VSI_LB";
default:
- return;
- }
-
- /* make sure the context is associated with the right VSI */
- tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
-
- tlan_ctx->tso_ena = ICE_TX_LEGACY;
- tlan_ctx->tso_qnum = pf_q;
-
- /* Legacy or Advanced Host Interface:
- * 0: Advanced Host Interface
- * 1: Legacy Host Interface
- */
- tlan_ctx->legacy_int = ICE_TX_LEGACY;
-}
-
-/**
- * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
- * @pf: the PF being configured
- * @pf_q: the PF queue
- * @ena: enable or disable state of the queue
- *
- * This routine will wait for the given Rx queue of the PF to reach the
- * enabled or disabled state.
- * Returns -ETIMEDOUT in case of failing to reach the requested state after
- * multiple retries; else will return 0 in case of success.
- */
-static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
-{
- int i;
-
- for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) {
- if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) &
- QRX_CTRL_QENA_STAT_M))
- return 0;
-
- usleep_range(20, 40);
+ return "unknown";
}
-
- return -ETIMEDOUT;
-}
-
-/**
- * ice_vsi_ctrl_rx_ring - Start or stop a VSI's Rx ring
- * @vsi: the VSI being configured
- * @ena: start or stop the Rx rings
- * @rxq_idx: Rx queue index
- */
-#ifndef CONFIG_PCI_IOV
-static
-#endif /* !CONFIG_PCI_IOV */
-int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
-{
- int pf_q = vsi->rxq_map[rxq_idx];
- struct ice_pf *pf = vsi->back;
- struct ice_hw *hw = &pf->hw;
- int ret = 0;
- u32 rx_reg;
-
- rx_reg = rd32(hw, QRX_CTRL(pf_q));
-
- /* Skip if the queue is already in the requested state */
- if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
- return 0;
-
- /* turn on/off the queue */
- if (ena)
- rx_reg |= QRX_CTRL_QENA_REQ_M;
- else
- rx_reg &= ~QRX_CTRL_QENA_REQ_M;
- wr32(hw, QRX_CTRL(pf_q), rx_reg);
-
- /* wait for the change to finish */
- ret = ice_pf_rxq_wait(pf, pf_q, ena);
- if (ret)
- dev_err(&pf->pdev->dev,
- "VSI idx %d Rx ring %d %sable timeout\n",
- vsi->idx, pf_q, (ena ? "en" : "dis"));
-
- return ret;
}
/**
@@ -258,36 +52,39 @@ static int ice_vsi_ctrl_rx_rings(struct ice_vsi *vsi, bool ena)
static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
{
struct ice_pf *pf = vsi->back;
+ struct device *dev;
+
+ dev = ice_pf_to_dev(pf);
/* allocate memory for both Tx and Rx ring pointers */
- vsi->tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
+ vsi->tx_rings = devm_kcalloc(dev, vsi->alloc_txq,
sizeof(*vsi->tx_rings), GFP_KERNEL);
if (!vsi->tx_rings)
return -ENOMEM;
- vsi->rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
+ vsi->rx_rings = devm_kcalloc(dev, vsi->alloc_rxq,
sizeof(*vsi->rx_rings), GFP_KERNEL);
if (!vsi->rx_rings)
goto err_rings;
- vsi->txq_map = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
+ /* XDP will have vsi->alloc_txq Tx queues as well, so double the size */
+ vsi->txq_map = devm_kcalloc(dev, (2 * vsi->alloc_txq),
sizeof(*vsi->txq_map), GFP_KERNEL);
if (!vsi->txq_map)
goto err_txq_map;
- vsi->rxq_map = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
+ vsi->rxq_map = devm_kcalloc(dev, vsi->alloc_rxq,
sizeof(*vsi->rxq_map), GFP_KERNEL);
if (!vsi->rxq_map)
goto err_rxq_map;
-
/* There is no need to allocate q_vectors for a loopback VSI. */
if (vsi->type == ICE_VSI_LB)
return 0;
/* allocate memory for q_vector pointers */
- vsi->q_vectors = devm_kcalloc(&pf->pdev->dev, vsi->num_q_vectors,
+ vsi->q_vectors = devm_kcalloc(dev, vsi->num_q_vectors,
sizeof(*vsi->q_vectors), GFP_KERNEL);
if (!vsi->q_vectors)
goto err_vectors;
@@ -295,13 +92,13 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
return 0;
err_vectors:
- devm_kfree(&pf->pdev->dev, vsi->rxq_map);
+ devm_kfree(dev, vsi->rxq_map);
err_rxq_map:
- devm_kfree(&pf->pdev->dev, vsi->txq_map);
+ devm_kfree(dev, vsi->txq_map);
err_txq_map:
- devm_kfree(&pf->pdev->dev, vsi->rx_rings);
+ devm_kfree(dev, vsi->rx_rings);
err_rings:
- devm_kfree(&pf->pdev->dev, vsi->tx_rings);
+ devm_kfree(dev, vsi->tx_rings);
return -ENOMEM;
}
@@ -345,15 +142,24 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
case ICE_VSI_PF:
vsi->alloc_txq = min_t(int, ice_get_avail_txq_count(pf),
num_online_cpus());
+ if (vsi->req_txq) {
+ vsi->alloc_txq = vsi->req_txq;
+ vsi->num_txq = vsi->req_txq;
+ }
pf->num_lan_tx = vsi->alloc_txq;
/* only 1 Rx queue unless RSS is enabled */
- if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+ if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
vsi->alloc_rxq = 1;
- else
+ } else {
vsi->alloc_rxq = min_t(int, ice_get_avail_rxq_count(pf),
num_online_cpus());
+ if (vsi->req_rxq) {
+ vsi->alloc_rxq = vsi->req_rxq;
+ vsi->num_rxq = vsi->req_rxq;
+ }
+ }
pf->num_lan_rx = vsi->alloc_rxq;
@@ -375,7 +181,7 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
vsi->alloc_rxq = 1;
break;
default:
- dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
+ dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi->type);
break;
}
@@ -421,7 +227,7 @@ void ice_vsi_delete(struct ice_vsi *vsi)
struct ice_vsi_ctx *ctxt;
enum ice_status status;
- ctxt = devm_kzalloc(&pf->pdev->dev, sizeof(*ctxt), GFP_KERNEL);
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
if (!ctxt)
return;
@@ -433,10 +239,10 @@ void ice_vsi_delete(struct ice_vsi *vsi)
status = ice_free_vsi(&pf->hw, vsi->idx, ctxt, false, NULL);
if (status)
- dev_err(&pf->pdev->dev, "Failed to delete VSI %i in FW\n",
- vsi->vsi_num);
+ dev_err(ice_pf_to_dev(pf), "Failed to delete VSI %i in FW - error: %d\n",
+ vsi->vsi_num, status);
- devm_kfree(&pf->pdev->dev, ctxt);
+ kfree(ctxt);
}
/**
@@ -446,26 +252,29 @@ void ice_vsi_delete(struct ice_vsi *vsi)
static void ice_vsi_free_arrays(struct ice_vsi *vsi)
{
struct ice_pf *pf = vsi->back;
+ struct device *dev;
+
+ dev = ice_pf_to_dev(pf);
/* free the ring and vector containers */
if (vsi->q_vectors) {
- devm_kfree(&pf->pdev->dev, vsi->q_vectors);
+ devm_kfree(dev, vsi->q_vectors);
vsi->q_vectors = NULL;
}
if (vsi->tx_rings) {
- devm_kfree(&pf->pdev->dev, vsi->tx_rings);
+ devm_kfree(dev, vsi->tx_rings);
vsi->tx_rings = NULL;
}
if (vsi->rx_rings) {
- devm_kfree(&pf->pdev->dev, vsi->rx_rings);
+ devm_kfree(dev, vsi->rx_rings);
vsi->rx_rings = NULL;
}
if (vsi->txq_map) {
- devm_kfree(&pf->pdev->dev, vsi->txq_map);
+ devm_kfree(dev, vsi->txq_map);
vsi->txq_map = NULL;
}
if (vsi->rxq_map) {
- devm_kfree(&pf->pdev->dev, vsi->rxq_map);
+ devm_kfree(dev, vsi->rxq_map);
vsi->rxq_map = NULL;
}
}
@@ -482,6 +291,7 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi)
int ice_vsi_clear(struct ice_vsi *vsi)
{
struct ice_pf *pf = NULL;
+ struct device *dev;
if (!vsi)
return 0;
@@ -490,10 +300,10 @@ int ice_vsi_clear(struct ice_vsi *vsi)
return -EINVAL;
pf = vsi->back;
+ dev = ice_pf_to_dev(pf);
if (!pf->vsi[vsi->idx] || pf->vsi[vsi->idx] != vsi) {
- dev_dbg(&pf->pdev->dev, "vsi does not exist at pf->vsi[%d]\n",
- vsi->idx);
+ dev_dbg(dev, "vsi does not exist at pf->vsi[%d]\n", vsi->idx);
return -EINVAL;
}
@@ -506,7 +316,7 @@ int ice_vsi_clear(struct ice_vsi *vsi)
ice_vsi_free_arrays(vsi);
mutex_unlock(&pf->sw_mutex);
- devm_kfree(&pf->pdev->dev, vsi);
+ devm_kfree(dev, vsi);
return 0;
}
@@ -539,6 +349,7 @@ static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data)
static struct ice_vsi *
ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id)
{
+ struct device *dev = ice_pf_to_dev(pf);
struct ice_vsi *vsi = NULL;
/* Need to protect the allocation of the VSIs at the PF level */
@@ -549,11 +360,11 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id)
* is available to be populated
*/
if (pf->next_vsi == ICE_NO_VSI) {
- dev_dbg(&pf->pdev->dev, "out of VSI slots!\n");
+ dev_dbg(dev, "out of VSI slots!\n");
goto unlock_pf;
}
- vsi = devm_kzalloc(&pf->pdev->dev, sizeof(*vsi), GFP_KERNEL);
+ vsi = devm_kzalloc(dev, sizeof(*vsi), GFP_KERNEL);
if (!vsi)
goto unlock_pf;
@@ -585,7 +396,7 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id)
goto err_rings;
break;
default:
- dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
+ dev_warn(dev, "Unknown VSI type %d\n", vsi->type);
goto unlock_pf;
}
@@ -598,7 +409,7 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id)
goto unlock_pf;
err_rings:
- devm_kfree(&pf->pdev->dev, vsi);
+ devm_kfree(dev, vsi);
vsi = NULL;
unlock_pf:
mutex_unlock(&pf->sw_mutex);
@@ -606,88 +417,6 @@ unlock_pf:
}
/**
- * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
- * @qs_cfg: gathered variables needed for PF->VSI queues assignment
- *
- * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
- */
-static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
-{
- int offset, i;
-
- mutex_lock(qs_cfg->qs_mutex);
- offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
- 0, qs_cfg->q_count, 0);
- if (offset >= qs_cfg->pf_map_size) {
- mutex_unlock(qs_cfg->qs_mutex);
- return -ENOMEM;
- }
-
- bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
- for (i = 0; i < qs_cfg->q_count; i++)
- qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset;
- mutex_unlock(qs_cfg->qs_mutex);
-
- return 0;
-}
-
-/**
- * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI
- * @qs_cfg: gathered variables needed for pf->vsi queues assignment
- *
- * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
- */
-static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
-{
- int i, index = 0;
-
- mutex_lock(qs_cfg->qs_mutex);
- for (i = 0; i < qs_cfg->q_count; i++) {
- index = find_next_zero_bit(qs_cfg->pf_map,
- qs_cfg->pf_map_size, index);
- if (index >= qs_cfg->pf_map_size)
- goto err_scatter;
- set_bit(index, qs_cfg->pf_map);
- qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index;
- }
- mutex_unlock(qs_cfg->qs_mutex);
-
- return 0;
-err_scatter:
- for (index = 0; index < i; index++) {
- clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map);
- qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0;
- }
- mutex_unlock(qs_cfg->qs_mutex);
-
- return -ENOMEM;
-}
-
-/**
- * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
- * @qs_cfg: gathered variables needed for pf->vsi queues assignment
- *
- * This function first tries to find contiguous space. If it is not successful,
- * it tries with the scatter approach.
- *
- * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
- */
-static int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
-{
- int ret = 0;
-
- ret = __ice_vsi_get_qs_contig(qs_cfg);
- if (ret) {
- /* contig failed, so try with scatter approach */
- qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
- qs_cfg->q_count = min_t(u16, qs_cfg->q_count,
- qs_cfg->scatter_count);
- ret = __ice_vsi_get_qs_sc(qs_cfg);
- }
- return ret;
-}
-
-/**
* ice_vsi_get_qs - Assign queues from PF to VSI
* @vsi: the VSI to assign queues to
*
@@ -769,14 +498,15 @@ bool ice_is_safe_mode(struct ice_pf *pf)
*/
static void ice_rss_clean(struct ice_vsi *vsi)
{
- struct ice_pf *pf;
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
- pf = vsi->back;
+ dev = ice_pf_to_dev(pf);
if (vsi->rss_hkey_user)
- devm_kfree(&pf->pdev->dev, vsi->rss_hkey_user);
+ devm_kfree(dev, vsi->rss_hkey_user);
if (vsi->rss_lut_user)
- devm_kfree(&pf->pdev->dev, vsi->rss_lut_user);
+ devm_kfree(dev, vsi->rss_lut_user);
}
/**
@@ -814,7 +544,7 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
case ICE_VSI_LB:
break;
default:
- dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n",
+ dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n",
vsi->type);
break;
}
@@ -918,7 +648,9 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
else
max_rss = ICE_MAX_SMALL_RSS_QS;
qcount_rx = min_t(int, rx_numq_tc, max_rss);
- qcount_rx = min_t(int, qcount_rx, vsi->rss_size);
+ if (!vsi->req_rxq)
+ qcount_rx = min_t(int, qcount_rx,
+ vsi->rss_size);
}
}
@@ -990,9 +722,11 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
{
u8 lut_type, hash_type;
+ struct device *dev;
struct ice_pf *pf;
pf = vsi->back;
+ dev = ice_pf_to_dev(pf);
switch (vsi->type) {
case ICE_VSI_PF:
@@ -1006,10 +740,11 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
break;
case ICE_VSI_LB:
- dev_dbg(&pf->pdev->dev, "Unsupported VSI type %d\n", vsi->type);
+ dev_dbg(dev, "Unsupported VSI type %s\n",
+ ice_vsi_type_str(vsi->type));
return;
default:
- dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
+ dev_warn(dev, "Unknown VSI type %d\n", vsi->type);
return;
}
@@ -1022,18 +757,21 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
/**
* ice_vsi_init - Create and initialize a VSI
* @vsi: the VSI being configured
+ * @init_vsi: is this call creating a VSI
*
* This initializes a VSI context depending on the VSI type to be added and
* passes it down to the add_vsi aq command to create a new VSI.
*/
-static int ice_vsi_init(struct ice_vsi *vsi)
+static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi)
{
struct ice_pf *pf = vsi->back;
struct ice_hw *hw = &pf->hw;
struct ice_vsi_ctx *ctxt;
+ struct device *dev;
int ret = 0;
- ctxt = devm_kzalloc(&pf->pdev->dev, sizeof(*ctxt), GFP_KERNEL);
+ dev = ice_pf_to_dev(pf);
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
if (!ctxt)
return -ENOMEM;
@@ -1050,7 +788,8 @@ static int ice_vsi_init(struct ice_vsi *vsi)
ctxt->vf_num = vsi->vf_id + hw->func_caps.vf_base_id;
break;
default:
- return -ENODEV;
+ ret = -ENODEV;
+ goto out;
}
ice_set_dflt_vsi_ctx(ctxt);
@@ -1059,11 +798,24 @@ static int ice_vsi_init(struct ice_vsi *vsi)
ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
/* Set LUT type and HASH type if RSS is enabled */
- if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+ if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
ice_set_rss_vsi_ctx(ctxt, vsi);
+ /* if updating VSI context, make sure to set valid_section:
+ * to indicate which section of VSI context being updated
+ */
+ if (!init_vsi)
+ ctxt->info.valid_sections |=
+ cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
+ }
ctxt->info.sw_id = vsi->port_info->sw_id;
ice_vsi_setup_q_map(vsi, ctxt);
+ if (!init_vsi) /* means VSI being updated */
+ /* must to indicate which section of VSI context are
+ * being modified
+ */
+ ctxt->info.valid_sections |=
+ cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
/* Enable MAC Antispoof with new VSI being initialized or updated */
if (vsi->type == ICE_VSI_VF && pf->vf[vsi->vf_id].spoofchk) {
@@ -1080,11 +832,20 @@ static int ice_vsi_init(struct ice_vsi *vsi)
cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
}
- ret = ice_add_vsi(hw, vsi->idx, ctxt, NULL);
- if (ret) {
- dev_err(&pf->pdev->dev,
- "Add VSI failed, err %d\n", ret);
- return -EIO;
+ if (init_vsi) {
+ ret = ice_add_vsi(hw, vsi->idx, ctxt, NULL);
+ if (ret) {
+ dev_err(dev, "Add VSI failed, err %d\n", ret);
+ ret = -EIO;
+ goto out;
+ }
+ } else {
+ ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (ret) {
+ dev_err(dev, "Update VSI failed, err %d\n", ret);
+ ret = -EIO;
+ goto out;
+ }
}
/* keep context for update VSI operations */
@@ -1093,131 +854,9 @@ static int ice_vsi_init(struct ice_vsi *vsi)
/* record VSI number returned */
vsi->vsi_num = ctxt->vsi_num;
- devm_kfree(&pf->pdev->dev, ctxt);
- return ret;
-}
-
-/**
- * ice_free_q_vector - Free memory allocated for a specific interrupt vector
- * @vsi: VSI having the memory freed
- * @v_idx: index of the vector to be freed
- */
-static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
-{
- struct ice_q_vector *q_vector;
- struct ice_pf *pf = vsi->back;
- struct ice_ring *ring;
-
- if (!vsi->q_vectors[v_idx]) {
- dev_dbg(&pf->pdev->dev, "Queue vector at index %d not found\n",
- v_idx);
- return;
- }
- q_vector = vsi->q_vectors[v_idx];
-
- ice_for_each_ring(ring, q_vector->tx)
- ring->q_vector = NULL;
- ice_for_each_ring(ring, q_vector->rx)
- ring->q_vector = NULL;
-
- /* only VSI with an associated netdev is set up with NAPI */
- if (vsi->netdev)
- netif_napi_del(&q_vector->napi);
-
- devm_kfree(&pf->pdev->dev, q_vector);
- vsi->q_vectors[v_idx] = NULL;
-}
-
-/**
- * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
- * @vsi: the VSI having memory freed
- */
-void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
-{
- int v_idx;
-
- ice_for_each_q_vector(vsi, v_idx)
- ice_free_q_vector(vsi, v_idx);
-}
-
-/**
- * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
- * @vsi: the VSI being configured
- * @v_idx: index of the vector in the VSI struct
- *
- * We allocate one q_vector. If allocation fails we return -ENOMEM.
- */
-static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
-{
- struct ice_pf *pf = vsi->back;
- struct ice_q_vector *q_vector;
-
- /* allocate q_vector */
- q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL);
- if (!q_vector)
- return -ENOMEM;
-
- q_vector->vsi = vsi;
- q_vector->v_idx = v_idx;
- if (vsi->type == ICE_VSI_VF)
- goto out;
- /* only set affinity_mask if the CPU is online */
- if (cpu_online(v_idx))
- cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
-
- /* This will not be called in the driver load path because the netdev
- * will not be created yet. All other cases with register the NAPI
- * handler here (i.e. resume, reset/rebuild, etc.)
- */
- if (vsi->netdev)
- netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,
- NAPI_POLL_WEIGHT);
-
out:
- /* tie q_vector and VSI together */
- vsi->q_vectors[v_idx] = q_vector;
-
- return 0;
-}
-
-/**
- * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
- * @vsi: the VSI being configured
- *
- * We allocate one q_vector per queue interrupt. If allocation fails we
- * return -ENOMEM.
- */
-static int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
-{
- struct ice_pf *pf = vsi->back;
- int v_idx = 0, num_q_vectors;
- int err;
-
- if (vsi->q_vectors[0]) {
- dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
- vsi->vsi_num);
- return -EEXIST;
- }
-
- num_q_vectors = vsi->num_q_vectors;
-
- for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
- err = ice_vsi_alloc_q_vector(vsi, v_idx);
- if (err)
- goto err_out;
- }
-
- return 0;
-
-err_out:
- while (v_idx--)
- ice_free_q_vector(vsi, v_idx);
-
- dev_err(&pf->pdev->dev,
- "Failed to allocate %d q_vector for VSI %d, ret=%d\n",
- vsi->num_q_vectors, vsi->vsi_num, err);
- vsi->num_q_vectors = 0;
- return err;
+ kfree(ctxt);
+ return ret;
}
/**
@@ -1233,14 +872,16 @@ err_out:
static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
{
struct ice_pf *pf = vsi->back;
+ struct device *dev;
u16 num_q_vectors;
+ dev = ice_pf_to_dev(pf);
/* SRIOV doesn't grab irq_tracker entries for each VSI */
if (vsi->type == ICE_VSI_VF)
return 0;
if (vsi->base_vector) {
- dev_dbg(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n",
+ dev_dbg(dev, "VSI %d has non-zero base vector %d\n",
vsi->vsi_num, vsi->base_vector);
return -EEXIST;
}
@@ -1250,7 +891,7 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
vsi->base_vector = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
vsi->idx);
if (vsi->base_vector < 0) {
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"Failed to get tracking for %d vectors for VSI %d, err=%d\n",
num_q_vectors, vsi->vsi_num, vsi->base_vector);
return -ENOENT;
@@ -1293,8 +934,10 @@ static void ice_vsi_clear_rings(struct ice_vsi *vsi)
static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
{
struct ice_pf *pf = vsi->back;
+ struct device *dev;
int i;
+ dev = ice_pf_to_dev(pf);
/* Allocate Tx rings */
for (i = 0; i < vsi->alloc_txq; i++) {
struct ice_ring *ring;
@@ -1309,7 +952,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
ring->reg_idx = vsi->txq_map[i];
ring->ring_active = false;
ring->vsi = vsi;
- ring->dev = &pf->pdev->dev;
+ ring->dev = dev;
ring->count = vsi->num_tx_desc;
vsi->tx_rings[i] = ring;
}
@@ -1328,7 +971,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
ring->ring_active = false;
ring->vsi = vsi;
ring->netdev = vsi->netdev;
- ring->dev = &pf->pdev->dev;
+ ring->dev = dev;
ring->count = vsi->num_rx_desc;
vsi->rx_rings[i] = ring;
}
@@ -1341,66 +984,6 @@ err_out:
}
/**
- * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
- * @vsi: the VSI being configured
- *
- * This function maps descriptor rings to the queue-specific vectors allotted
- * through the MSI-X enabling code. On a constrained vector budget, we map Tx
- * and Rx rings to the vector as "efficiently" as possible.
- */
-#ifdef CONFIG_DCB
-void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
-#else
-static void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
-#endif /* CONFIG_DCB */
-{
- int q_vectors = vsi->num_q_vectors;
- int tx_rings_rem, rx_rings_rem;
- int v_id;
-
- /* initially assigning remaining rings count to VSIs num queue value */
- tx_rings_rem = vsi->num_txq;
- rx_rings_rem = vsi->num_rxq;
-
- for (v_id = 0; v_id < q_vectors; v_id++) {
- struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
- int tx_rings_per_v, rx_rings_per_v, q_id, q_base;
-
- /* Tx rings mapping to vector */
- tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id);
- q_vector->num_ring_tx = tx_rings_per_v;
- q_vector->tx.ring = NULL;
- q_vector->tx.itr_idx = ICE_TX_ITR;
- q_base = vsi->num_txq - tx_rings_rem;
-
- for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
- struct ice_ring *tx_ring = vsi->tx_rings[q_id];
-
- tx_ring->q_vector = q_vector;
- tx_ring->next = q_vector->tx.ring;
- q_vector->tx.ring = tx_ring;
- }
- tx_rings_rem -= tx_rings_per_v;
-
- /* Rx rings mapping to vector */
- rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id);
- q_vector->num_ring_rx = rx_rings_per_v;
- q_vector->rx.ring = NULL;
- q_vector->rx.itr_idx = ICE_RX_ITR;
- q_base = vsi->num_rxq - rx_rings_rem;
-
- for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
- struct ice_ring *rx_ring = vsi->rx_rings[q_id];
-
- rx_ring->q_vector = q_vector;
- rx_ring->next = q_vector->rx.ring;
- q_vector->rx.ring = rx_ring;
- }
- rx_rings_rem -= rx_rings_per_v;
- }
-}
-
-/**
* ice_vsi_manage_rss_lut - disable/enable RSS
* @vsi: the VSI being changed
* @ena: boolean value indicating if this is an enable or disable request
@@ -1414,8 +997,7 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
int err = 0;
u8 *lut;
- lut = devm_kzalloc(&vsi->back->pdev->dev, vsi->rss_table_size,
- GFP_KERNEL);
+ lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
if (!lut)
return -ENOMEM;
@@ -1428,7 +1010,7 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
}
err = ice_set_rss(vsi, NULL, lut, vsi->rss_table_size);
- devm_kfree(&vsi->back->pdev->dev, lut);
+ kfree(lut);
return err;
}
@@ -1441,12 +1023,14 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
struct ice_aqc_get_set_rss_keys *key;
struct ice_pf *pf = vsi->back;
enum ice_status status;
+ struct device *dev;
int err = 0;
u8 *lut;
+ dev = ice_pf_to_dev(pf);
vsi->rss_size = min_t(int, vsi->rss_size, vsi->num_rxq);
- lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL);
+ lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
if (!lut)
return -ENOMEM;
@@ -1459,13 +1043,12 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
vsi->rss_table_size);
if (status) {
- dev_err(&pf->pdev->dev,
- "set_rss_lut failed, error %d\n", status);
+ dev_err(dev, "set_rss_lut failed, error %d\n", status);
err = -EIO;
goto ice_vsi_cfg_rss_exit;
}
- key = devm_kzalloc(&pf->pdev->dev, sizeof(*key), GFP_KERNEL);
+ key = kzalloc(sizeof(*key), GFP_KERNEL);
if (!key) {
err = -ENOMEM;
goto ice_vsi_cfg_rss_exit;
@@ -1482,14 +1065,13 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
status = ice_aq_set_rss_key(&pf->hw, vsi->idx, key);
if (status) {
- dev_err(&pf->pdev->dev, "set_rss_key failed, error %d\n",
- status);
+ dev_err(dev, "set_rss_key failed, error %d\n", status);
err = -EIO;
}
- devm_kfree(&pf->pdev->dev, key);
+ kfree(key);
ice_vsi_cfg_rss_exit:
- devm_kfree(&pf->pdev->dev, lut);
+ kfree(lut);
return err;
}
@@ -1509,7 +1091,7 @@ int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
struct ice_fltr_list_entry *tmp;
struct ice_pf *pf = vsi->back;
- tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_ATOMIC);
+ tmp = devm_kzalloc(ice_pf_to_dev(pf), sizeof(*tmp), GFP_ATOMIC);
if (!tmp)
return -ENOMEM;
@@ -1601,9 +1183,11 @@ int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid)
struct ice_pf *pf = vsi->back;
LIST_HEAD(tmp_add_list);
enum ice_status status;
+ struct device *dev;
int err = 0;
- tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_KERNEL);
+ dev = ice_pf_to_dev(pf);
+ tmp = devm_kzalloc(dev, sizeof(*tmp), GFP_KERNEL);
if (!tmp)
return -ENOMEM;
@@ -1620,11 +1204,11 @@ int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid)
status = ice_add_vlan(&pf->hw, &tmp_add_list);
if (status) {
err = -ENODEV;
- dev_err(&pf->pdev->dev, "Failure Adding VLAN %d on VSI %i\n",
- vid, vsi->vsi_num);
+ dev_err(dev, "Failure Adding VLAN %d on VSI %i\n", vid,
+ vsi->vsi_num);
}
- ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+ ice_free_fltr_list(dev, &tmp_add_list);
return err;
}
@@ -1641,9 +1225,11 @@ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
struct ice_pf *pf = vsi->back;
LIST_HEAD(tmp_add_list);
enum ice_status status;
+ struct device *dev;
int err = 0;
- list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL);
+ dev = ice_pf_to_dev(pf);
+ list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL);
if (!list)
return -ENOMEM;
@@ -1659,21 +1245,46 @@ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
status = ice_remove_vlan(&pf->hw, &tmp_add_list);
if (status == ICE_ERR_DOES_NOT_EXIST) {
- dev_dbg(&pf->pdev->dev,
+ dev_dbg(dev,
"Failed to remove VLAN %d on VSI %i, it does not exist, status: %d\n",
vid, vsi->vsi_num, status);
} else if (status) {
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"Error removing VLAN %d on vsi %i error: %d\n",
vid, vsi->vsi_num, status);
err = -EIO;
}
- ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+ ice_free_fltr_list(dev, &tmp_add_list);
return err;
}
/**
+ * ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length
+ * @vsi: VSI
+ */
+void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
+{
+ if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) {
+ vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
+ vsi->rx_buf_len = ICE_RXBUF_2048;
+#if (PAGE_SIZE < 8192)
+ } else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
+ (vsi->netdev->mtu <= ETH_DATA_LEN)) {
+ vsi->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN;
+ vsi->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN;
+#endif
+ } else {
+ vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
+#if (PAGE_SIZE < 8192)
+ vsi->rx_buf_len = ICE_RXBUF_3072;
+#else
+ vsi->rx_buf_len = ICE_RXBUF_2048;
+#endif
+ }
+}
+
+/**
* ice_vsi_cfg_rxqs - Configure the VSI for Rx
* @vsi: the VSI being configured
*
@@ -1687,13 +1298,7 @@ int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
if (vsi->type == ICE_VSI_VF)
goto setup_rings;
- if (vsi->netdev && vsi->netdev->mtu > ETH_DATA_LEN)
- vsi->max_frame = vsi->netdev->mtu +
- ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
- else
- vsi->max_frame = ICE_RXBUF_2048;
-
- vsi->rx_buf_len = ICE_RXBUF_2048;
+ ice_vsi_cfg_frame_size(vsi);
setup_rings:
/* set up individual rings */
for (i = 0; i < vsi->num_rxq; i++) {
@@ -1712,101 +1317,34 @@ setup_rings:
}
/**
- * ice_vsi_cfg_txq - Configure single Tx queue
- * @vsi: the VSI that queue belongs to
- * @ring: Tx ring to be configured
- * @tc_q_idx: queue index within given TC
- * @qg_buf: queue group buffer
- * @tc: TC that Tx ring belongs to
- */
-static int
-ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, u16 tc_q_idx,
- struct ice_aqc_add_tx_qgrp *qg_buf, u8 tc)
-{
- struct ice_tlan_ctx tlan_ctx = { 0 };
- struct ice_aqc_add_txqs_perq *txq;
- struct ice_pf *pf = vsi->back;
- u8 buf_len = sizeof(*qg_buf);
- enum ice_status status;
- u16 pf_q;
-
- pf_q = ring->reg_idx;
- ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
- /* copy context contents into the qg_buf */
- qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
- ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
- ice_tlan_ctx_info);
-
- /* init queue specific tail reg. It is referred as
- * transmit comm scheduler queue doorbell.
- */
- ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
-
- /* Add unique software queue handle of the Tx queue per
- * TC into the VSI Tx ring
- */
- ring->q_handle = tc_q_idx;
-
- status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
- 1, qg_buf, buf_len, NULL);
- if (status) {
- dev_err(&pf->pdev->dev,
- "Failed to set LAN Tx queue context, error: %d\n",
- status);
- return -ENODEV;
- }
-
- /* Add Tx Queue TEID into the VSI Tx ring from the
- * response. This will complete configuring and
- * enabling the queue.
- */
- txq = &qg_buf->txqs[0];
- if (pf_q == le16_to_cpu(txq->txq_id))
- ring->txq_teid = le32_to_cpu(txq->q_teid);
-
- return 0;
-}
-
-/**
* ice_vsi_cfg_txqs - Configure the VSI for Tx
* @vsi: the VSI being configured
* @rings: Tx ring array to be configured
- * @offset: offset within vsi->txq_map
*
* Return 0 on success and a negative value on error
* Configure the Tx VSI for operation.
*/
static int
-ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, int offset)
+ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings)
{
struct ice_aqc_add_tx_qgrp *qg_buf;
- struct ice_pf *pf = vsi->back;
- u16 q_idx = 0, i;
+ u16 q_idx = 0;
int err = 0;
- u8 tc;
- qg_buf = devm_kzalloc(&pf->pdev->dev, sizeof(*qg_buf), GFP_KERNEL);
+ qg_buf = kzalloc(sizeof(*qg_buf), GFP_KERNEL);
if (!qg_buf)
return -ENOMEM;
qg_buf->num_txqs = 1;
- /* set up and configure the Tx queues for each enabled TC */
- ice_for_each_traffic_class(tc) {
- if (!(vsi->tc_cfg.ena_tc & BIT(tc)))
- break;
-
- for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
- err = ice_vsi_cfg_txq(vsi, rings[q_idx], i + offset,
- qg_buf, tc);
- if (err)
- goto err_cfg_txqs;
-
- q_idx++;
- }
+ for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+ err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
+ if (err)
+ goto err_cfg_txqs;
}
+
err_cfg_txqs:
- devm_kfree(&pf->pdev->dev, qg_buf);
+ kfree(qg_buf);
return err;
}
@@ -1819,159 +1357,46 @@ err_cfg_txqs:
*/
int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
{
- return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, 0);
-}
-
-/**
- * ice_intrl_usec_to_reg - convert interrupt rate limit to register value
- * @intrl: interrupt rate limit in usecs
- * @gran: interrupt rate limit granularity in usecs
- *
- * This function converts a decimal interrupt rate limit in usecs to the format
- * expected by firmware.
- */
-u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
-{
- u32 val = intrl / gran;
-
- if (val)
- return val | GLINT_RATE_INTRL_ENA_M;
- return 0;
-}
-
-/**
- * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set
- * @hw: board specific structure
- */
-static void ice_cfg_itr_gran(struct ice_hw *hw)
-{
- u32 regval = rd32(hw, GLINT_CTL);
-
- /* no need to update global register if ITR gran is already set */
- if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) &&
- (((regval & GLINT_CTL_ITR_GRAN_200_M) >>
- GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) &&
- (((regval & GLINT_CTL_ITR_GRAN_100_M) >>
- GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) &&
- (((regval & GLINT_CTL_ITR_GRAN_50_M) >>
- GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) &&
- (((regval & GLINT_CTL_ITR_GRAN_25_M) >>
- GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US))
- return;
-
- regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) &
- GLINT_CTL_ITR_GRAN_200_M) |
- ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) &
- GLINT_CTL_ITR_GRAN_100_M) |
- ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) &
- GLINT_CTL_ITR_GRAN_50_M) |
- ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) &
- GLINT_CTL_ITR_GRAN_25_M);
- wr32(hw, GLINT_CTL, regval);
-}
-
-/**
- * ice_cfg_itr - configure the initial interrupt throttle values
- * @hw: pointer to the HW structure
- * @q_vector: interrupt vector that's being configured
- *
- * Configure interrupt throttling values for the ring containers that are
- * associated with the interrupt vector passed in.
- */
-static void
-ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
-{
- ice_cfg_itr_gran(hw);
-
- if (q_vector->num_ring_rx) {
- struct ice_ring_container *rc = &q_vector->rx;
-
- /* if this value is set then don't overwrite with default */
- if (!rc->itr_setting)
- rc->itr_setting = ICE_DFLT_RX_ITR;
-
- rc->target_itr = ITR_TO_REG(rc->itr_setting);
- rc->next_update = jiffies + 1;
- rc->current_itr = rc->target_itr;
- wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
- ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
- }
-
- if (q_vector->num_ring_tx) {
- struct ice_ring_container *rc = &q_vector->tx;
-
- /* if this value is set then don't overwrite with default */
- if (!rc->itr_setting)
- rc->itr_setting = ICE_DFLT_TX_ITR;
-
- rc->target_itr = ITR_TO_REG(rc->itr_setting);
- rc->next_update = jiffies + 1;
- rc->current_itr = rc->target_itr;
- wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
- ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
- }
+ return ice_vsi_cfg_txqs(vsi, vsi->tx_rings);
}
/**
- * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * ice_vsi_cfg_xdp_txqs - Configure Tx queues dedicated for XDP in given VSI
* @vsi: the VSI being configured
- * @txq: Tx queue being mapped to MSI-X vector
- * @msix_idx: MSI-X vector index within the function
- * @itr_idx: ITR index of the interrupt cause
*
- * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
- * within the function space.
+ * Return 0 on success and a negative value on error
+ * Configure the Tx queues dedicated for XDP in given VSI for operation.
*/
-#ifdef CONFIG_PCI_IOV
-void
-ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
-#else
-static void
-ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
-#endif /* CONFIG_PCI_IOV */
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
{
- struct ice_pf *pf = vsi->back;
- struct ice_hw *hw = &pf->hw;
- u32 val;
+ int ret;
+ int i;
- itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+ ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
+ if (ret)
+ return ret;
- val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
- ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+ for (i = 0; i < vsi->num_xdp_txq; i++)
+ vsi->xdp_rings[i]->xsk_umem = ice_xsk_umem(vsi->xdp_rings[i]);
- wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+ return ret;
}
/**
- * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
- * @vsi: the VSI being configured
- * @rxq: Rx queue being mapped to MSI-X vector
- * @msix_idx: MSI-X vector index within the function
- * @itr_idx: ITR index of the interrupt cause
+ * ice_intrl_usec_to_reg - convert interrupt rate limit to register value
+ * @intrl: interrupt rate limit in usecs
+ * @gran: interrupt rate limit granularity in usecs
*
- * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
- * within the function space.
+ * This function converts a decimal interrupt rate limit in usecs to the format
+ * expected by firmware.
*/
-#ifdef CONFIG_PCI_IOV
-void
-ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
-#else
-static void
-ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
-#endif /* CONFIG_PCI_IOV */
+u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
{
- struct ice_pf *pf = vsi->back;
- struct ice_hw *hw = &pf->hw;
- u32 val;
-
- itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
-
- val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
- ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
-
- wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+ u32 val = intrl / gran;
- ice_flush(hw);
+ if (val)
+ return val | GLINT_RATE_INTRL_ENA_M;
+ return 0;
}
/**
@@ -2028,13 +1453,12 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi)
*/
int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
{
- struct device *dev = &vsi->back->pdev->dev;
struct ice_hw *hw = &vsi->back->hw;
struct ice_vsi_ctx *ctxt;
enum ice_status status;
int ret = 0;
- ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL);
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
if (!ctxt)
return -ENOMEM;
@@ -2052,7 +1476,7 @@ int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
if (status) {
- dev_err(dev, "update VSI for VLAN insert failed, err %d aq_err %d\n",
+ dev_err(&vsi->back->pdev->dev, "update VSI for VLAN insert failed, err %d aq_err %d\n",
status, hw->adminq.sq_last_status);
ret = -EIO;
goto out;
@@ -2060,7 +1484,7 @@ int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
vsi->info.vlan_flags = ctxt->info.vlan_flags;
out:
- devm_kfree(dev, ctxt);
+ kfree(ctxt);
return ret;
}
@@ -2071,13 +1495,12 @@ out:
*/
int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
{
- struct device *dev = &vsi->back->pdev->dev;
struct ice_hw *hw = &vsi->back->hw;
struct ice_vsi_ctx *ctxt;
enum ice_status status;
int ret = 0;
- ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL);
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
if (!ctxt)
return -ENOMEM;
@@ -2099,7 +1522,7 @@ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
if (status) {
- dev_err(dev, "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n",
+ dev_err(&vsi->back->pdev->dev, "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n",
ena, status, hw->adminq.sq_last_status);
ret = -EIO;
goto out;
@@ -2107,7 +1530,7 @@ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
vsi->info.vlan_flags = ctxt->info.vlan_flags;
out:
- devm_kfree(dev, ctxt);
+ kfree(ctxt);
return ret;
}
@@ -2134,109 +1557,6 @@ int ice_vsi_stop_rx_rings(struct ice_vsi *vsi)
}
/**
- * ice_trigger_sw_intr - trigger a software interrupt
- * @hw: pointer to the HW structure
- * @q_vector: interrupt vector to trigger the software interrupt for
- */
-void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
-{
- wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
- (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
- GLINT_DYN_CTL_SWINT_TRIG_M |
- GLINT_DYN_CTL_INTENA_M);
-}
-
-/**
- * ice_vsi_stop_tx_ring - Disable single Tx ring
- * @vsi: the VSI being configured
- * @rst_src: reset source
- * @rel_vmvf_num: Relative ID of VF/VM
- * @ring: Tx ring to be stopped
- * @txq_meta: Meta data of Tx ring to be stopped
- */
-#ifndef CONFIG_PCI_IOV
-static
-#endif /* !CONFIG_PCI_IOV */
-int
-ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
- u16 rel_vmvf_num, struct ice_ring *ring,
- struct ice_txq_meta *txq_meta)
-{
- struct ice_pf *pf = vsi->back;
- struct ice_q_vector *q_vector;
- struct ice_hw *hw = &pf->hw;
- enum ice_status status;
- u32 val;
-
- /* clear cause_ena bit for disabled queues */
- val = rd32(hw, QINT_TQCTL(ring->reg_idx));
- val &= ~QINT_TQCTL_CAUSE_ENA_M;
- wr32(hw, QINT_TQCTL(ring->reg_idx), val);
-
- /* software is expected to wait for 100 ns */
- ndelay(100);
-
- /* trigger a software interrupt for the vector
- * associated to the queue to schedule NAPI handler
- */
- q_vector = ring->q_vector;
- if (q_vector)
- ice_trigger_sw_intr(hw, q_vector);
-
- status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx,
- txq_meta->tc, 1, &txq_meta->q_handle,
- &txq_meta->q_id, &txq_meta->q_teid, rst_src,
- rel_vmvf_num, NULL);
-
- /* if the disable queue command was exercised during an
- * active reset flow, ICE_ERR_RESET_ONGOING is returned.
- * This is not an error as the reset operation disables
- * queues at the hardware level anyway.
- */
- if (status == ICE_ERR_RESET_ONGOING) {
- dev_dbg(&vsi->back->pdev->dev,
- "Reset in progress. LAN Tx queues already disabled\n");
- } else if (status == ICE_ERR_DOES_NOT_EXIST) {
- dev_dbg(&vsi->back->pdev->dev,
- "LAN Tx queues do not exist, nothing to disable\n");
- } else if (status) {
- dev_err(&vsi->back->pdev->dev,
- "Failed to disable LAN Tx queues, error: %d\n", status);
- return -ENODEV;
- }
-
- return 0;
-}
-
-/**
- * ice_fill_txq_meta - Prepare the Tx queue's meta data
- * @vsi: VSI that ring belongs to
- * @ring: ring that txq_meta will be based on
- * @txq_meta: a helper struct that wraps Tx queue's information
- *
- * Set up a helper struct that will contain all the necessary fields that
- * are needed for stopping Tx queue
- */
-#ifndef CONFIG_PCI_IOV
-static
-#endif /* !CONFIG_PCI_IOV */
-void
-ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
- struct ice_txq_meta *txq_meta)
-{
- u8 tc = 0;
-
-#ifdef CONFIG_DCB
- tc = ring->dcb_tc;
-#endif /* CONFIG_DCB */
- txq_meta->q_id = ring->reg_idx;
- txq_meta->q_teid = ring->txq_teid;
- txq_meta->q_handle = ring->q_handle;
- txq_meta->vsi_idx = vsi->idx;
- txq_meta->tc = tc;
-}
-
-/**
* ice_vsi_stop_tx_rings - Disable Tx rings
* @vsi: the VSI being configured
* @rst_src: reset source
@@ -2247,34 +1567,24 @@ static int
ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
u16 rel_vmvf_num, struct ice_ring **rings)
{
- u16 i, q_idx = 0;
- int status;
- u8 tc;
+ u16 q_idx;
if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
return -EINVAL;
- /* set up the Tx queue list to be disabled for each enabled TC */
- ice_for_each_traffic_class(tc) {
- if (!(vsi->tc_cfg.ena_tc & BIT(tc)))
- break;
-
- for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
- struct ice_txq_meta txq_meta = { };
-
- if (!rings || !rings[q_idx])
- return -EINVAL;
+ for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+ struct ice_txq_meta txq_meta = { };
+ int status;
- ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta);
- status = ice_vsi_stop_tx_ring(vsi, rst_src,
- rel_vmvf_num,
- rings[q_idx], &txq_meta);
+ if (!rings || !rings[q_idx])
+ return -EINVAL;
- if (status)
- return status;
+ ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta);
+ status = ice_vsi_stop_tx_ring(vsi, rst_src, rel_vmvf_num,
+ rings[q_idx], &txq_meta);
- q_idx++;
- }
+ if (status)
+ return status;
}
return 0;
@@ -2294,6 +1604,15 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
}
/**
+ * ice_vsi_stop_xdp_tx_rings - Disable XDP Tx rings
+ * @vsi: the VSI being configured
+ */
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi)
+{
+ return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings);
+}
+
+/**
* ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI
* @vsi: VSI to enable or disable VLAN pruning on
* @ena: set to true to enable VLAN pruning and false to disable it
@@ -2304,7 +1623,6 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc)
{
struct ice_vsi_ctx *ctxt;
- struct device *dev;
struct ice_pf *pf;
int status;
@@ -2312,8 +1630,7 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc)
return -EINVAL;
pf = vsi->back;
- dev = &pf->pdev->dev;
- ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL);
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
if (!ctxt)
return -ENOMEM;
@@ -2347,11 +1664,11 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc)
vsi->info.sec_flags = ctxt->info.sec_flags;
vsi->info.sw_flags2 = ctxt->info.sw_flags2;
- devm_kfree(dev, ctxt);
+ kfree(ctxt);
return 0;
err_out:
- devm_kfree(dev, ctxt);
+ kfree(ctxt);
return -EIO;
}
@@ -2420,8 +1737,10 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule)
struct ice_pf *pf = vsi->back;
LIST_HEAD(tmp_add_list);
enum ice_status status;
+ struct device *dev;
- list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL);
+ dev = ice_pf_to_dev(pf);
+ list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL);
if (!list)
return;
@@ -2441,11 +1760,11 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule)
status = ice_remove_eth_mac(&pf->hw, &tmp_add_list);
if (status)
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"Failure Adding or Removing Ethertype on VSI %i error: %d\n",
vsi->vsi_num, status);
- ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+ ice_free_fltr_list(dev, &tmp_add_list);
}
/**
@@ -2460,8 +1779,10 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
struct ice_pf *pf = vsi->back;
LIST_HEAD(tmp_add_list);
enum ice_status status;
+ struct device *dev;
- list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL);
+ dev = ice_pf_to_dev(pf);
+ list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL);
if (!list)
return;
@@ -2488,12 +1809,11 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
status = ice_remove_eth_mac(&pf->hw, &tmp_add_list);
if (status)
- dev_err(&pf->pdev->dev,
- "Fail %s %s LLDP rule on VSI %i error: %d\n",
+ dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %d\n",
create ? "adding" : "removing", tx ? "TX" : "RX",
vsi->vsi_num, status);
- ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+ ice_free_fltr_list(dev, &tmp_add_list);
}
/**
@@ -2515,7 +1835,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
enum ice_vsi_type type, u16 vf_id)
{
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
- struct device *dev = &pf->pdev->dev;
+ struct device *dev = ice_pf_to_dev(pf);
enum ice_status status;
struct ice_vsi *vsi;
int ret, i;
@@ -2551,7 +1871,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
ice_vsi_set_tc_cfg(vsi);
/* create the VSI */
- ret = ice_vsi_init(vsi);
+ ret = ice_vsi_init(vsi, true);
if (ret)
goto unroll_get_qs;
@@ -2624,8 +1944,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
max_txqs);
if (status) {
- dev_err(&pf->pdev->dev,
- "VSI %d failed lan queue config, error %d\n",
+ dev_err(dev, "VSI %d failed lan queue config, error %d\n",
vsi->vsi_num, status);
goto unroll_vector_base;
}
@@ -2635,23 +1954,17 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
* out PAUSE or PFC frames. If enabled, FW can still send FC frames.
* The rule is added once for PF VSI in order to create appropriate
* recipe, since VSI/VSI list is ignored with drop action...
- * Also add rules to handle LLDP Tx and Rx packets. Tx LLDP packets
- * need to be dropped so that VFs cannot send LLDP packets to reconfig
- * DCB settings in the HW. Also, if the FW DCBX engine is not running
- * then Rx LLDP packets need to be redirected up the stack.
+ * Also add rules to handle LLDP Tx packets. Tx LLDP packets need to
+ * be dropped so that VFs cannot send LLDP packets to reconfig DCB
+ * settings in the HW.
*/
- if (!ice_is_safe_mode(pf)) {
+ if (!ice_is_safe_mode(pf))
if (vsi->type == ICE_VSI_PF) {
ice_vsi_add_rem_eth_mac(vsi, true);
/* Tx LLDP packets */
ice_cfg_sw_lldp(vsi, true, true);
-
- /* Rx LLDP packets */
- if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
- ice_cfg_sw_lldp(vsi, false, true);
}
- }
return vsi;
@@ -2690,6 +2003,11 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi)
wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0);
for (q = 0; q < q_vector->num_ring_tx; q++) {
wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+ wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), 0);
+ }
txq++;
}
@@ -2738,8 +2056,7 @@ void ice_vsi_free_irq(struct ice_vsi *vsi)
/* clear the affinity_mask in the IRQ descriptor */
irq_set_affinity_hint(irq_num, NULL);
synchronize_irq(irq_num);
- devm_free_irq(&pf->pdev->dev, irq_num,
- vsi->q_vectors[i]);
+ devm_free_irq(ice_pf_to_dev(pf), irq_num, vsi->q_vectors[i]);
}
}
@@ -2790,6 +2107,62 @@ void ice_vsi_close(struct ice_vsi *vsi)
}
/**
+ * ice_ena_vsi - resume a VSI
+ * @vsi: the VSI being resume
+ * @locked: is the rtnl_lock already held
+ */
+int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
+{
+ int err = 0;
+
+ if (!test_bit(__ICE_NEEDS_RESTART, vsi->state))
+ return 0;
+
+ clear_bit(__ICE_NEEDS_RESTART, vsi->state);
+
+ if (vsi->netdev && vsi->type == ICE_VSI_PF) {
+ if (netif_running(vsi->netdev)) {
+ if (!locked)
+ rtnl_lock();
+
+ err = ice_open(vsi->netdev);
+
+ if (!locked)
+ rtnl_unlock();
+ }
+ }
+
+ return err;
+}
+
+/**
+ * ice_dis_vsi - pause a VSI
+ * @vsi: the VSI being paused
+ * @locked: is the rtnl_lock already held
+ */
+void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
+{
+ if (test_bit(__ICE_DOWN, vsi->state))
+ return;
+
+ set_bit(__ICE_NEEDS_RESTART, vsi->state);
+
+ if (vsi->type == ICE_VSI_PF && vsi->netdev) {
+ if (netif_running(vsi->netdev)) {
+ if (!locked)
+ rtnl_lock();
+
+ ice_stop(vsi->netdev);
+
+ if (!locked)
+ rtnl_unlock();
+ } else {
+ ice_vsi_close(vsi);
+ }
+ }
+}
+
+/**
* ice_free_res - free a block of resources
* @res: pointer to the resource
* @index: starting index previously returned by ice_get_res
@@ -2869,7 +2242,7 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
return -EINVAL;
if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) {
- dev_err(&pf->pdev->dev,
+ dev_err(ice_pf_to_dev(pf),
"param err: needed=%d, num_entries = %d id=0x%04x\n",
needed, res->num_entries, id);
return -EINVAL;
@@ -3031,10 +2404,11 @@ int ice_vsi_release(struct ice_vsi *vsi)
/**
* ice_vsi_rebuild - Rebuild VSI after reset
* @vsi: VSI to be rebuild
+ * @init_vsi: is this an initialization or a reconfigure of the VSI
*
* Returns 0 on success and negative value on failure
*/
-int ice_vsi_rebuild(struct ice_vsi *vsi)
+int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
{
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
struct ice_vf *vf = NULL;
@@ -3064,6 +2438,11 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
vsi->base_vector = 0;
}
+ if (ice_is_xdp_ena_vsi(vsi))
+ /* return value check can be skipped here, it always returns
+ * 0 if reset is in progress
+ */
+ ice_destroy_xdp_rings(vsi);
ice_vsi_put_qs(vsi);
ice_vsi_clear_rings(vsi);
ice_vsi_free_arrays(vsi);
@@ -3081,11 +2460,10 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
ice_vsi_set_tc_cfg(vsi);
/* Initialize VSI struct elements and create VSI in FW */
- ret = ice_vsi_init(vsi);
+ ret = ice_vsi_init(vsi, init_vsi);
if (ret < 0)
goto err_vsi;
-
switch (vsi->type) {
case ICE_VSI_PF:
ret = ice_vsi_alloc_q_vectors(vsi);
@@ -3105,6 +2483,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
goto err_vectors;
ice_vsi_map_rings_to_vectors(vsi);
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ vsi->num_xdp_txq = vsi->alloc_txq;
+ ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog);
+ if (ret)
+ goto err_vectors;
+ }
/* Do not exit if configuring RSS had an issue, at least
* receive traffic on first queue. Hence no need to capture
* return value
@@ -3131,16 +2515,25 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
}
/* configure VSI nodes based on number of queues and TC's */
- for (i = 0; i < vsi->tc_cfg.numtc; i++)
+ for (i = 0; i < vsi->tc_cfg.numtc; i++) {
max_txqs[i] = vsi->alloc_txq;
+ if (ice_is_xdp_ena_vsi(vsi))
+ max_txqs[i] += vsi->num_xdp_txq;
+ }
+
status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
max_txqs);
if (status) {
- dev_err(&pf->pdev->dev,
+ dev_err(ice_pf_to_dev(pf),
"VSI %d failed lan queue config, error %d\n",
vsi->vsi_num, status);
- goto err_vectors;
+ if (init_vsi) {
+ ret = -EIO;
+ goto err_vectors;
+ } else {
+ return ice_schedule_reset(pf, ICE_RESET_PFR);
+ }
}
return 0;
@@ -3166,6 +2559,7 @@ err_vsi:
bool ice_is_reset_in_progress(unsigned long *state)
{
return test_bit(__ICE_RESET_OICR_RECV, state) ||
+ test_bit(__ICE_DCBNL_DEVRESET, state) ||
test_bit(__ICE_PFR_REQ, state) ||
test_bit(__ICE_CORER_REQ, state) ||
test_bit(__ICE_GLOBR_REQ, state);
@@ -3199,9 +2593,12 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
struct ice_vsi_ctx *ctx;
struct ice_pf *pf = vsi->back;
enum ice_status status;
+ struct device *dev;
int i, ret = 0;
u8 num_tc = 0;
+ dev = ice_pf_to_dev(pf);
+
ice_for_each_traffic_class(i) {
/* build bitmap of enabled TCs */
if (ena_tc & BIT(i))
@@ -3213,7 +2610,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
vsi->tc_cfg.ena_tc = ena_tc;
vsi->tc_cfg.numtc = num_tc;
- ctx = devm_kzalloc(&pf->pdev->dev, sizeof(*ctx), GFP_KERNEL);
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
@@ -3226,7 +2623,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
status = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL);
if (status) {
- dev_info(&pf->pdev->dev, "Failed VSI Update\n");
+ dev_info(dev, "Failed VSI Update\n");
ret = -EIO;
goto out;
}
@@ -3235,8 +2632,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
max_txqs);
if (status) {
- dev_err(&pf->pdev->dev,
- "VSI %d failed TC config, error %d\n",
+ dev_err(dev, "VSI %d failed TC config, error %d\n",
vsi->vsi_num, status);
ret = -EIO;
goto out;
@@ -3246,7 +2642,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
ice_vsi_cfg_netdev_tc(vsi, ena_tc);
out:
- devm_kfree(&pf->pdev->dev, ctx);
+ kfree(ctx);
return ret;
}
#endif /* CONFIG_DCB */
@@ -3271,6 +2667,51 @@ char *ice_nvm_version_str(struct ice_hw *hw)
}
/**
+ * ice_update_ring_stats - Update ring statistics
+ * @ring: ring to update
+ * @cont: used to increment per-vector counters
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ *
+ * This function assumes that caller has acquired a u64_stats_sync lock.
+ */
+static void
+ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont,
+ u64 pkts, u64 bytes)
+{
+ ring->stats.bytes += bytes;
+ ring->stats.pkts += pkts;
+ cont->total_bytes += bytes;
+ cont->total_pkts += pkts;
+}
+
+/**
+ * ice_update_tx_ring_stats - Update Tx ring specific counters
+ * @tx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes)
+{
+ u64_stats_update_begin(&tx_ring->syncp);
+ ice_update_ring_stats(tx_ring, &tx_ring->q_vector->tx, pkts, bytes);
+ u64_stats_update_end(&tx_ring->syncp);
+}
+
+/**
+ * ice_update_rx_ring_stats - Update Rx ring specific counters
+ * @rx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes)
+{
+ u64_stats_update_begin(&rx_ring->syncp);
+ ice_update_ring_stats(rx_ring, &rx_ring->q_vector->rx, pkts, bytes);
+ u64_stats_update_end(&rx_ring->syncp);
+}
+
+/**
* ice_vsi_cfg_mac_fltr - Add or remove a MAC address filter for a VSI
* @vsi: the VSI being configured MAC filter
* @macaddr: the MAC address to be added.
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 47bc033fff20..6e31e30aba39 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -6,18 +6,7 @@
#include "ice.h"
-struct ice_txq_meta {
- /* Tx-scheduler element identifier */
- u32 q_teid;
- /* Entry in VSI's txq_map bitmap */
- u16 q_id;
- /* Relative index of Tx queue within TC */
- u16 q_handle;
- /* VSI index that Tx queue belongs to */
- u16 vsi_idx;
- /* TC number that Tx queue belongs to */
- u8 tc;
-};
+const char *ice_vsi_type_str(enum ice_vsi_type type);
int
ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
@@ -33,24 +22,6 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
void ice_vsi_cfg_msix(struct ice_vsi *vsi);
-#ifdef CONFIG_PCI_IOV
-void
-ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
-
-void
-ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
-
-int
-ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
- u16 rel_vmvf_num, struct ice_ring *ring,
- struct ice_txq_meta *txq_meta);
-
-void ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
- struct ice_txq_meta *txq_meta);
-
-int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
-#endif /* CONFIG_PCI_IOV */
-
int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid);
int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid);
@@ -67,6 +38,10 @@ int
ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
u16 rel_vmvf_num);
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi);
+
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi);
+
int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc);
void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
@@ -89,25 +64,21 @@ int ice_vsi_release(struct ice_vsi *vsi);
void ice_vsi_close(struct ice_vsi *vsi);
+int ice_ena_vsi(struct ice_vsi *vsi, bool locked);
+
+void ice_dis_vsi(struct ice_vsi *vsi, bool locked);
+
int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id);
int
ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id);
-int ice_vsi_rebuild(struct ice_vsi *vsi);
+int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi);
bool ice_is_reset_in_progress(unsigned long *state);
-void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
-
-void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
-
void ice_vsi_put_qs(struct ice_vsi *vsi);
-#ifdef CONFIG_DCB
-void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
-#endif /* CONFIG_DCB */
-
void ice_vsi_dis_irq(struct ice_vsi *vsi);
void ice_vsi_free_irq(struct ice_vsi *vsi);
@@ -118,6 +89,12 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi);
int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
+void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
+
+void ice_update_rx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
+
+void ice_vsi_cfg_frame_size(struct ice_vsi *vsi);
+
u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran);
char *ice_nvm_version_str(struct ice_hw *hw);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 214cd6eca405..69bff085acf7 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -6,8 +6,10 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include "ice.h"
+#include "ice_base.h"
#include "ice_lib.h"
#include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
#define DRV_VERSION_MAJOR 0
#define DRV_VERSION_MINOR 8
@@ -42,6 +44,7 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
static struct workqueue_struct *ice_wq;
static const struct net_device_ops ice_netdev_safe_mode_ops;
static const struct net_device_ops ice_netdev_ops;
+static int ice_vsi_open(struct ice_vsi *vsi);
static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type);
@@ -159,7 +162,7 @@ unregister:
* had an error
*/
if (status && vsi->netdev->reg_state == NETREG_REGISTERED) {
- dev_err(&pf->pdev->dev,
+ dev_err(ice_pf_to_dev(pf),
"Could not add MAC filters error %d. Unregistering device\n",
status);
unregister_netdev(vsi->netdev);
@@ -435,42 +438,11 @@ static void ice_sync_fltr_subtask(struct ice_pf *pf)
}
/**
- * ice_dis_vsi - pause a VSI
- * @vsi: the VSI being paused
- * @locked: is the rtnl_lock already held
- */
-static void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
-{
- if (test_bit(__ICE_DOWN, vsi->state))
- return;
-
- set_bit(__ICE_NEEDS_RESTART, vsi->state);
-
- if (vsi->type == ICE_VSI_PF && vsi->netdev) {
- if (netif_running(vsi->netdev)) {
- if (!locked)
- rtnl_lock();
-
- ice_stop(vsi->netdev);
-
- if (!locked)
- rtnl_unlock();
- } else {
- ice_vsi_close(vsi);
- }
- }
-}
-
-/**
* ice_pf_dis_all_vsi - Pause all VSIs on a PF
* @pf: the PF
* @locked: is the rtnl_lock already held
*/
-#ifdef CONFIG_DCB
-void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
-#else
static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
-#endif /* CONFIG_DCB */
{
int v;
@@ -524,7 +496,7 @@ ice_prepare_for_reset(struct ice_pf *pf)
*/
static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
{
- struct device *dev = &pf->pdev->dev;
+ struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
@@ -636,8 +608,14 @@ static void ice_print_topo_conflict(struct ice_vsi *vsi)
switch (vsi->port_info->phy.link_info.topo_media_conflict) {
case ICE_AQ_LINK_TOPO_CONFLICT:
case ICE_AQ_LINK_MEDIA_CONFLICT:
+ case ICE_AQ_LINK_TOPO_UNREACH_PRT:
+ case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT:
+ case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA:
netdev_info(vsi->netdev, "Possible mis-configuration of the Ethernet port detected, please use the Intel(R) Ethernet Port Configuration Tool application to address the issue.\n");
break;
+ case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA:
+ netdev_info(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+ break;
default:
break;
}
@@ -747,7 +725,7 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
an = "False";
/* Get FEC mode requested based on PHY caps last SW configuration */
- caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+ caps = kzalloc(sizeof(*caps), GFP_KERNEL);
if (!caps) {
fec_req = "Unknown";
goto done;
@@ -767,7 +745,7 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
else
fec_req = "NONE";
- devm_kfree(&vsi->back->pdev->dev, caps);
+ kfree(caps);
done:
netdev_info(vsi->netdev, "NIC Link is up %sbps, Requested FEC: %s, FEC: %s, Autoneg: %s, Flow Control: %s\n",
@@ -815,6 +793,7 @@ static int
ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
u16 link_speed)
{
+ struct device *dev = ice_pf_to_dev(pf);
struct ice_phy_info *phy_info;
struct ice_vsi *vsi;
u16 old_link_speed;
@@ -832,7 +811,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
*/
result = ice_update_link_info(pi);
if (result)
- dev_dbg(&pf->pdev->dev,
+ dev_dbg(dev,
"Failed to update link status and re-enable link events for port %d\n",
pi->lport);
@@ -851,7 +830,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
result = ice_aq_set_link_restart_an(pi, false, NULL);
if (result) {
- dev_dbg(&pf->pdev->dev,
+ dev_dbg(dev,
"Failed to set link down, VSI %d error %d\n",
vsi->vsi_num, result);
return result;
@@ -947,7 +926,7 @@ ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event)
!!(link_data->link_info & ICE_AQ_LINK_UP),
le16_to_cpu(link_data->link_speed));
if (status)
- dev_dbg(&pf->pdev->dev,
+ dev_dbg(ice_pf_to_dev(pf),
"Could not process link event, error %d\n", status);
return status;
@@ -960,6 +939,7 @@ ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event)
*/
static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
{
+ struct device *dev = ice_pf_to_dev(pf);
struct ice_rq_event_info event;
struct ice_hw *hw = &pf->hw;
struct ice_ctl_q_info *cq;
@@ -981,8 +961,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
qtype = "Mailbox";
break;
default:
- dev_warn(&pf->pdev->dev, "Unknown control queue type 0x%x\n",
- q_type);
+ dev_warn(dev, "Unknown control queue type 0x%x\n", q_type);
return 0;
}
@@ -994,15 +973,15 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
PF_FW_ARQLEN_ARQCRIT_M)) {
oldval = val;
if (val & PF_FW_ARQLEN_ARQVFE_M)
- dev_dbg(&pf->pdev->dev,
- "%s Receive Queue VF Error detected\n", qtype);
+ dev_dbg(dev, "%s Receive Queue VF Error detected\n",
+ qtype);
if (val & PF_FW_ARQLEN_ARQOVFL_M) {
- dev_dbg(&pf->pdev->dev,
+ dev_dbg(dev,
"%s Receive Queue Overflow Error detected\n",
qtype);
}
if (val & PF_FW_ARQLEN_ARQCRIT_M)
- dev_dbg(&pf->pdev->dev,
+ dev_dbg(dev,
"%s Receive Queue Critical Error detected\n",
qtype);
val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
@@ -1016,16 +995,14 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
PF_FW_ATQLEN_ATQCRIT_M)) {
oldval = val;
if (val & PF_FW_ATQLEN_ATQVFE_M)
- dev_dbg(&pf->pdev->dev,
+ dev_dbg(dev,
"%s Send Queue VF Error detected\n", qtype);
if (val & PF_FW_ATQLEN_ATQOVFL_M) {
- dev_dbg(&pf->pdev->dev,
- "%s Send Queue Overflow Error detected\n",
+ dev_dbg(dev, "%s Send Queue Overflow Error detected\n",
qtype);
}
if (val & PF_FW_ATQLEN_ATQCRIT_M)
- dev_dbg(&pf->pdev->dev,
- "%s Send Queue Critical Error detected\n",
+ dev_dbg(dev, "%s Send Queue Critical Error detected\n",
qtype);
val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
PF_FW_ATQLEN_ATQCRIT_M);
@@ -1034,8 +1011,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
}
event.buf_len = cq->rq_buf_size;
- event.msg_buf = devm_kzalloc(&pf->pdev->dev, event.buf_len,
- GFP_KERNEL);
+ event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
if (!event.msg_buf)
return 0;
@@ -1047,8 +1023,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
if (ret == ICE_ERR_AQ_NO_WORK)
break;
if (ret) {
- dev_err(&pf->pdev->dev,
- "%s Receive Queue event error %d\n", qtype,
+ dev_err(dev, "%s Receive Queue event error %d\n", qtype,
ret);
break;
}
@@ -1058,8 +1033,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
switch (opcode) {
case ice_aqc_opc_get_link_status:
if (ice_handle_link_event(pf, &event))
- dev_err(&pf->pdev->dev,
- "Could not handle link event\n");
+ dev_err(dev, "Could not handle link event\n");
break;
case ice_mbx_opc_send_msg_to_pf:
ice_vc_process_vf_msg(pf, &event);
@@ -1071,14 +1045,14 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
ice_dcb_process_lldp_set_mib_change(pf, &event);
break;
default:
- dev_dbg(&pf->pdev->dev,
+ dev_dbg(dev,
"%s Receive Queue unknown event 0x%04x ignored\n",
qtype, opcode);
break;
}
} while (pending && (i++ < ICE_DFLT_IRQ_WORK));
- devm_kfree(&pf->pdev->dev, event.msg_buf);
+ kfree(event.msg_buf);
return pending && (i == ICE_DFLT_IRQ_WORK);
}
@@ -1222,6 +1196,7 @@ static void ice_service_timer(struct timer_list *t)
*/
static void ice_handle_mdd_event(struct ice_pf *pf)
{
+ struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
bool mdd_detected = false;
u32 reg;
@@ -1243,7 +1218,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
GL_MDET_TX_PQM_QNUM_S);
if (netif_msg_tx_err(pf))
- dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
+ dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
event, queue, pf_num, vf_num);
wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
mdd_detected = true;
@@ -1261,7 +1236,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
GL_MDET_TX_TCLAN_QNUM_S);
if (netif_msg_rx_err(pf))
- dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
+ dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
event, queue, pf_num, vf_num);
wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
mdd_detected = true;
@@ -1279,7 +1254,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
GL_MDET_RX_QNUM_S);
if (netif_msg_rx_err(pf))
- dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
+ dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
event, queue, pf_num, vf_num);
wr32(hw, GL_MDET_RX, 0xffffffff);
mdd_detected = true;
@@ -1291,21 +1266,21 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
reg = rd32(hw, PF_MDET_TX_PQM);
if (reg & PF_MDET_TX_PQM_VALID_M) {
wr32(hw, PF_MDET_TX_PQM, 0xFFFF);
- dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n");
+ dev_info(dev, "TX driver issue detected, PF reset issued\n");
pf_mdd_detected = true;
}
reg = rd32(hw, PF_MDET_TX_TCLAN);
if (reg & PF_MDET_TX_TCLAN_VALID_M) {
wr32(hw, PF_MDET_TX_TCLAN, 0xFFFF);
- dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n");
+ dev_info(dev, "TX driver issue detected, PF reset issued\n");
pf_mdd_detected = true;
}
reg = rd32(hw, PF_MDET_RX);
if (reg & PF_MDET_RX_VALID_M) {
wr32(hw, PF_MDET_RX, 0xFFFF);
- dev_info(&pf->pdev->dev, "RX driver issue detected, PF reset issued\n");
+ dev_info(dev, "RX driver issue detected, PF reset issued\n");
pf_mdd_detected = true;
}
/* Queue belongs to the PF initiate a reset */
@@ -1325,7 +1300,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
if (reg & VP_MDET_TX_PQM_VALID_M) {
wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF);
vf_mdd_detected = true;
- dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
+ dev_info(dev, "TX driver issue detected on VF %d\n",
i);
}
@@ -1333,7 +1308,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
if (reg & VP_MDET_TX_TCLAN_VALID_M) {
wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF);
vf_mdd_detected = true;
- dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
+ dev_info(dev, "TX driver issue detected on VF %d\n",
i);
}
@@ -1341,7 +1316,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
if (reg & VP_MDET_TX_TDPU_VALID_M) {
wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF);
vf_mdd_detected = true;
- dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
+ dev_info(dev, "TX driver issue detected on VF %d\n",
i);
}
@@ -1349,7 +1324,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
if (reg & VP_MDET_RX_VALID_M) {
wr32(hw, VP_MDET_RX(i), 0xFFFF);
vf_mdd_detected = true;
- dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
+ dev_info(dev, "RX driver issue detected on VF %d\n",
i);
}
@@ -1357,7 +1332,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
vf->num_mdd_events++;
if (vf->num_mdd_events &&
vf->num_mdd_events <= ICE_MDD_EVENTS_THRESHOLD)
- dev_info(&pf->pdev->dev,
+ dev_info(dev,
"VF %d has had %llu MDD events since last boot, Admin might need to reload AVF driver with this number of events\n",
i, vf->num_mdd_events);
}
@@ -1393,7 +1368,7 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
pi = vsi->port_info;
- pcaps = devm_kzalloc(dev, sizeof(*pcaps), GFP_KERNEL);
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
return -ENOMEM;
@@ -1412,7 +1387,7 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP))
goto out;
- cfg = devm_kzalloc(dev, sizeof(*cfg), GFP_KERNEL);
+ cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
if (!cfg) {
retcode = -ENOMEM;
goto out;
@@ -1437,9 +1412,9 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
retcode = -EIO;
}
- devm_kfree(dev, cfg);
+ kfree(cfg);
out:
- devm_kfree(dev, pcaps);
+ kfree(pcaps);
return retcode;
}
@@ -1551,6 +1526,44 @@ static void ice_set_ctrlq_len(struct ice_hw *hw)
}
/**
+ * ice_schedule_reset - schedule a reset
+ * @pf: board private structure
+ * @reset: reset being requested
+ */
+int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+
+ /* bail out if earlier reset has failed */
+ if (test_bit(__ICE_RESET_FAILED, pf->state)) {
+ dev_dbg(dev, "earlier reset has failed\n");
+ return -EIO;
+ }
+ /* bail if reset/recovery already in progress */
+ if (ice_is_reset_in_progress(pf->state)) {
+ dev_dbg(dev, "Reset already in progress\n");
+ return -EBUSY;
+ }
+
+ switch (reset) {
+ case ICE_RESET_PFR:
+ set_bit(__ICE_PFR_REQ, pf->state);
+ break;
+ case ICE_RESET_CORER:
+ set_bit(__ICE_CORER_REQ, pf->state);
+ break;
+ case ICE_RESET_GLOBR:
+ set_bit(__ICE_GLOBR_REQ, pf->state);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ice_service_task_schedule(pf);
+ return 0;
+}
+
+/**
* ice_irq_affinity_notify - Callback for affinity changes
* @notify: context as to what irq was changed
* @mask: the new affinity mask
@@ -1604,11 +1617,13 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
int q_vectors = vsi->num_q_vectors;
struct ice_pf *pf = vsi->back;
int base = vsi->base_vector;
+ struct device *dev;
int rx_int_idx = 0;
int tx_int_idx = 0;
int vector, err;
int irq_num;
+ dev = ice_pf_to_dev(pf);
for (vector = 0; vector < q_vectors; vector++) {
struct ice_q_vector *q_vector = vsi->q_vectors[vector];
@@ -1628,8 +1643,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
/* skip this unused q_vector */
continue;
}
- err = devm_request_irq(&pf->pdev->dev, irq_num,
- vsi->irq_handler, 0,
+ err = devm_request_irq(dev, irq_num, vsi->irq_handler, 0,
q_vector->name, q_vector);
if (err) {
netdev_err(vsi->netdev,
@@ -1655,12 +1669,331 @@ free_q_irqs:
irq_num = pf->msix_entries[base + vector].vector,
irq_set_affinity_notifier(irq_num, NULL);
irq_set_affinity_hint(irq_num, NULL);
- devm_free_irq(&pf->pdev->dev, irq_num, &vsi->q_vectors[vector]);
+ devm_free_irq(dev, irq_num, &vsi->q_vectors[vector]);
}
return err;
}
/**
+ * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
+ * @vsi: VSI to setup Tx rings used by XDP
+ *
+ * Return 0 on success and negative value on error
+ */
+static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
+{
+ struct device *dev = &vsi->back->pdev->dev;
+ int i;
+
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ u16 xdp_q_idx = vsi->alloc_txq + i;
+ struct ice_ring *xdp_ring;
+
+ xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL);
+
+ if (!xdp_ring)
+ goto free_xdp_rings;
+
+ xdp_ring->q_index = xdp_q_idx;
+ xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
+ xdp_ring->ring_active = false;
+ xdp_ring->vsi = vsi;
+ xdp_ring->netdev = NULL;
+ xdp_ring->dev = dev;
+ xdp_ring->count = vsi->num_tx_desc;
+ vsi->xdp_rings[i] = xdp_ring;
+ if (ice_setup_tx_ring(xdp_ring))
+ goto free_xdp_rings;
+ ice_set_ring_xdp(xdp_ring);
+ xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
+ }
+
+ return 0;
+
+free_xdp_rings:
+ for (; i >= 0; i--)
+ if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc)
+ ice_free_tx_ring(vsi->xdp_rings[i]);
+ return -ENOMEM;
+}
+
+/**
+ * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
+ * @vsi: VSI to set the bpf prog on
+ * @prog: the bpf prog pointer
+ */
+static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+ struct bpf_prog *old_prog;
+ int i;
+
+ old_prog = xchg(&vsi->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ ice_for_each_rxq(vsi, i)
+ WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+}
+
+/**
+ * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
+ * @vsi: VSI to bring up Tx rings used by XDP
+ * @prog: bpf program that will be assigned to VSI
+ *
+ * Return 0 on success and negative value on error
+ */
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+ u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+ int xdp_rings_rem = vsi->num_xdp_txq;
+ struct ice_pf *pf = vsi->back;
+ struct ice_qs_cfg xdp_qs_cfg = {
+ .qs_mutex = &pf->avail_q_mutex,
+ .pf_map = pf->avail_txqs,
+ .pf_map_size = pf->max_pf_txqs,
+ .q_count = vsi->num_xdp_txq,
+ .scatter_count = ICE_MAX_SCATTER_TXQS,
+ .vsi_map = vsi->txq_map,
+ .vsi_map_offset = vsi->alloc_txq,
+ .mapping_mode = ICE_VSI_MAP_CONTIG
+ };
+ enum ice_status status;
+ struct device *dev;
+ int i, v_idx;
+
+ dev = ice_pf_to_dev(pf);
+ vsi->xdp_rings = devm_kcalloc(dev, vsi->num_xdp_txq,
+ sizeof(*vsi->xdp_rings), GFP_KERNEL);
+ if (!vsi->xdp_rings)
+ return -ENOMEM;
+
+ vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode;
+ if (__ice_vsi_get_qs(&xdp_qs_cfg))
+ goto err_map_xdp;
+
+ if (ice_xdp_alloc_setup_rings(vsi))
+ goto clear_xdp_rings;
+
+ /* follow the logic from ice_vsi_map_rings_to_vectors */
+ ice_for_each_q_vector(vsi, v_idx) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+ int xdp_rings_per_v, q_id, q_base;
+
+ xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
+ vsi->num_q_vectors - v_idx);
+ q_base = vsi->num_xdp_txq - xdp_rings_rem;
+
+ for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
+ struct ice_ring *xdp_ring = vsi->xdp_rings[q_id];
+
+ xdp_ring->q_vector = q_vector;
+ xdp_ring->next = q_vector->tx.ring;
+ q_vector->tx.ring = xdp_ring;
+ }
+ xdp_rings_rem -= xdp_rings_per_v;
+ }
+
+ /* omit the scheduler update if in reset path; XDP queues will be
+ * taken into account at the end of ice_vsi_rebuild, where
+ * ice_cfg_vsi_lan is being called
+ */
+ if (ice_is_reset_in_progress(pf->state))
+ return 0;
+
+ /* tell the Tx scheduler that right now we have
+ * additional queues
+ */
+ for (i = 0; i < vsi->tc_cfg.numtc; i++)
+ max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq;
+
+ status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+ max_txqs);
+ if (status) {
+ dev_err(dev, "Failed VSI LAN queue config for XDP, error:%d\n",
+ status);
+ goto clear_xdp_rings;
+ }
+ ice_vsi_assign_bpf_prog(vsi, prog);
+
+ return 0;
+clear_xdp_rings:
+ for (i = 0; i < vsi->num_xdp_txq; i++)
+ if (vsi->xdp_rings[i]) {
+ kfree_rcu(vsi->xdp_rings[i], rcu);
+ vsi->xdp_rings[i] = NULL;
+ }
+
+err_map_xdp:
+ mutex_lock(&pf->avail_q_mutex);
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+ vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+ }
+ mutex_unlock(&pf->avail_q_mutex);
+
+ devm_kfree(dev, vsi->xdp_rings);
+ return -ENOMEM;
+}
+
+/**
+ * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
+ * @vsi: VSI to remove XDP rings
+ *
+ * Detach XDP rings from irq vectors, clean up the PF bitmap and free
+ * resources
+ */
+int ice_destroy_xdp_rings(struct ice_vsi *vsi)
+{
+ u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+ struct ice_pf *pf = vsi->back;
+ int i, v_idx;
+
+ /* q_vectors are freed in reset path so there's no point in detaching
+ * rings; in case of rebuild being triggered not from reset reset bits
+ * in pf->state won't be set, so additionally check first q_vector
+ * against NULL
+ */
+ if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+ goto free_qmap;
+
+ ice_for_each_q_vector(vsi, v_idx) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+ struct ice_ring *ring;
+
+ ice_for_each_ring(ring, q_vector->tx)
+ if (!ring->tx_buf || !ice_ring_is_xdp(ring))
+ break;
+
+ /* restore the value of last node prior to XDP setup */
+ q_vector->tx.ring = ring;
+ }
+
+free_qmap:
+ mutex_lock(&pf->avail_q_mutex);
+ for (i = 0; i < vsi->num_xdp_txq; i++) {
+ clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+ vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+ }
+ mutex_unlock(&pf->avail_q_mutex);
+
+ for (i = 0; i < vsi->num_xdp_txq; i++)
+ if (vsi->xdp_rings[i]) {
+ if (vsi->xdp_rings[i]->desc)
+ ice_free_tx_ring(vsi->xdp_rings[i]);
+ kfree_rcu(vsi->xdp_rings[i], rcu);
+ vsi->xdp_rings[i] = NULL;
+ }
+
+ devm_kfree(ice_pf_to_dev(pf), vsi->xdp_rings);
+ vsi->xdp_rings = NULL;
+
+ if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+ return 0;
+
+ ice_vsi_assign_bpf_prog(vsi, NULL);
+
+ /* notify Tx scheduler that we destroyed XDP queues and bring
+ * back the old number of child nodes
+ */
+ for (i = 0; i < vsi->tc_cfg.numtc; i++)
+ max_txqs[i] = vsi->num_txq;
+
+ return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+ max_txqs);
+}
+
+/**
+ * ice_xdp_setup_prog - Add or remove XDP eBPF program
+ * @vsi: VSI to setup XDP for
+ * @prog: XDP program
+ * @extack: netlink extended ack
+ */
+static int
+ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
+ bool if_running = netif_running(vsi->netdev);
+ int ret = 0, xdp_ring_err = 0;
+
+ if (frame_size > vsi->rx_buf_len) {
+ NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
+ return -EOPNOTSUPP;
+ }
+
+ /* need to stop netdev while setting up the program for Rx rings */
+ if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) {
+ ret = ice_down(vsi);
+ if (ret) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Preparing device for XDP attach failed");
+ return ret;
+ }
+ }
+
+ if (!ice_is_xdp_ena_vsi(vsi) && prog) {
+ vsi->num_xdp_txq = vsi->alloc_txq;
+ xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
+ if (xdp_ring_err)
+ NL_SET_ERR_MSG_MOD(extack,
+ "Setting up XDP Tx resources failed");
+ } else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
+ xdp_ring_err = ice_destroy_xdp_rings(vsi);
+ if (xdp_ring_err)
+ NL_SET_ERR_MSG_MOD(extack,
+ "Freeing XDP Tx resources failed");
+ } else {
+ ice_vsi_assign_bpf_prog(vsi, prog);
+ }
+
+ if (if_running)
+ ret = ice_up(vsi);
+
+ if (!ret && prog && vsi->xsk_umems) {
+ int i;
+
+ ice_for_each_rxq(vsi, i) {
+ struct ice_ring *rx_ring = vsi->rx_rings[i];
+
+ if (rx_ring->xsk_umem)
+ napi_schedule(&rx_ring->q_vector->napi);
+ }
+ }
+
+ return (ret || xdp_ring_err) ? -ENOMEM : 0;
+}
+
+/**
+ * ice_xdp - implements XDP handler
+ * @dev: netdevice
+ * @xdp: XDP command
+ */
+static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ struct ice_vsi *vsi = np->vsi;
+
+ if (vsi->type != ICE_VSI_PF) {
+ NL_SET_ERR_MSG_MOD(xdp->extack,
+ "XDP can be loaded only on PF VSI");
+ return -EINVAL;
+ }
+
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
+ case XDP_QUERY_PROG:
+ xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
+ return 0;
+ case XDP_SETUP_XSK_UMEM:
+ return ice_xsk_umem_setup(vsi, xdp->xsk.umem,
+ xdp->xsk.queue_id);
+ default:
+ return -EINVAL;
+ }
+}
+
+/**
* ice_ena_misc_vector - enable the non-queue interrupts
* @pf: board private structure
*/
@@ -1698,8 +2031,10 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
struct ice_pf *pf = (struct ice_pf *)data;
struct ice_hw *hw = &pf->hw;
irqreturn_t ret = IRQ_NONE;
+ struct device *dev;
u32 oicr, ena_mask;
+ dev = ice_pf_to_dev(pf);
set_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state);
set_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state);
@@ -1735,8 +2070,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
else if (reset == ICE_RESET_EMPR)
pf->empr_count++;
else
- dev_dbg(&pf->pdev->dev, "Invalid reset type %d\n",
- reset);
+ dev_dbg(dev, "Invalid reset type %d\n", reset);
/* If a reset cycle isn't already in progress, we set a bit in
* pf->state so that the service task can start a reset/rebuild.
@@ -1770,8 +2104,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
if (oicr & PFINT_OICR_HMC_ERR_M) {
ena_mask &= ~PFINT_OICR_HMC_ERR_M;
- dev_dbg(&pf->pdev->dev,
- "HMC Error interrupt - info 0x%x, data 0x%x\n",
+ dev_dbg(dev, "HMC Error interrupt - info 0x%x, data 0x%x\n",
rd32(hw, PFHMC_ERRORINFO),
rd32(hw, PFHMC_ERRORDATA));
}
@@ -1779,8 +2112,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
/* Report any remaining unexpected interrupts */
oicr &= ena_mask;
if (oicr) {
- dev_dbg(&pf->pdev->dev, "unhandled interrupt oicr=0x%08x\n",
- oicr);
+ dev_dbg(dev, "unhandled interrupt oicr=0x%08x\n", oicr);
/* If a critical error is pending there is no choice but to
* reset the device.
*/
@@ -1838,7 +2170,7 @@ static void ice_free_irq_msix_misc(struct ice_pf *pf)
if (pf->msix_entries) {
synchronize_irq(pf->msix_entries[pf->oicr_idx].vector);
- devm_free_irq(&pf->pdev->dev,
+ devm_free_irq(ice_pf_to_dev(pf),
pf->msix_entries[pf->oicr_idx].vector, pf);
}
@@ -1882,13 +2214,13 @@ static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx)
*/
static int ice_req_irq_msix_misc(struct ice_pf *pf)
{
+ struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
int oicr_idx, err = 0;
if (!pf->int_name[0])
snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc",
- dev_driver_string(&pf->pdev->dev),
- dev_name(&pf->pdev->dev));
+ dev_driver_string(dev), dev_name(dev));
/* Do not request IRQ but do enable OICR interrupt since settings are
* lost during reset. Note that this function is called only during
@@ -1905,12 +2237,10 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf)
pf->num_avail_sw_msix -= 1;
pf->oicr_idx = oicr_idx;
- err = devm_request_irq(&pf->pdev->dev,
- pf->msix_entries[pf->oicr_idx].vector,
+ err = devm_request_irq(dev, pf->msix_entries[pf->oicr_idx].vector,
ice_misc_intr, 0, pf->int_name, pf);
if (err) {
- dev_err(&pf->pdev->dev,
- "devm_request_irq for %s failed: %d\n",
+ dev_err(dev, "devm_request_irq for %s failed: %d\n",
pf->int_name, err);
ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
pf->num_avail_sw_msix += 1;
@@ -2043,7 +2373,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
ice_set_ops(netdev);
if (vsi->type == ICE_VSI_PF) {
- SET_NETDEV_DEV(netdev, &pf->pdev->dev);
+ SET_NETDEV_DEV(netdev, ice_pf_to_dev(pf));
ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
ether_addr_copy(netdev->dev_addr, mac_addr);
ether_addr_copy(netdev->perm_addr, mac_addr);
@@ -2219,6 +2549,11 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
status = -ENODEV;
goto unroll_vsi_setup;
}
+ /* netdev has to be configured before setting frame size */
+ ice_vsi_cfg_frame_size(vsi);
+
+ /* Setup DCB netlink interface */
+ ice_dcbnl_setup(vsi);
/* registering the NAPI handler requires both the queues and
* netdev to be created, which are done in ice_pf_vsi_setup()
@@ -2300,6 +2635,7 @@ static void ice_deinit_pf(struct ice_pf *pf)
{
ice_service_task_stop(pf);
mutex_destroy(&pf->sw_mutex);
+ mutex_destroy(&pf->tc_mutex);
mutex_destroy(&pf->avail_q_mutex);
if (pf->avail_txqs) {
@@ -2349,6 +2685,7 @@ static int ice_init_pf(struct ice_pf *pf)
ice_set_pf_caps(pf);
mutex_init(&pf->sw_mutex);
+ mutex_init(&pf->tc_mutex);
/* setup service timer and periodic service task */
timer_setup(&pf->serv_tmr, ice_service_timer, 0);
@@ -2363,7 +2700,7 @@ static int ice_init_pf(struct ice_pf *pf)
pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL);
if (!pf->avail_rxqs) {
- devm_kfree(&pf->pdev->dev, pf->avail_txqs);
+ devm_kfree(ice_pf_to_dev(pf), pf->avail_txqs);
pf->avail_txqs = NULL;
return -ENOMEM;
}
@@ -2380,6 +2717,7 @@ static int ice_init_pf(struct ice_pf *pf)
*/
static int ice_ena_msix_range(struct ice_pf *pf)
{
+ struct device *dev = ice_pf_to_dev(pf);
int v_left, v_actual, v_budget = 0;
int needed, err, i;
@@ -2400,7 +2738,7 @@ static int ice_ena_msix_range(struct ice_pf *pf)
v_budget += needed;
v_left -= needed;
- pf->msix_entries = devm_kcalloc(&pf->pdev->dev, v_budget,
+ pf->msix_entries = devm_kcalloc(dev, v_budget,
sizeof(*pf->msix_entries), GFP_KERNEL);
if (!pf->msix_entries) {
@@ -2416,13 +2754,13 @@ static int ice_ena_msix_range(struct ice_pf *pf)
ICE_MIN_MSIX, v_budget);
if (v_actual < 0) {
- dev_err(&pf->pdev->dev, "unable to reserve MSI-X vectors\n");
+ dev_err(dev, "unable to reserve MSI-X vectors\n");
err = v_actual;
goto msix_err;
}
if (v_actual < v_budget) {
- dev_warn(&pf->pdev->dev,
+ dev_warn(dev,
"not enough OS MSI-X vectors. requested = %d, obtained = %d\n",
v_budget, v_actual);
/* 2 vectors for LAN (traffic + OICR) */
@@ -2441,11 +2779,11 @@ static int ice_ena_msix_range(struct ice_pf *pf)
return v_actual;
msix_err:
- devm_kfree(&pf->pdev->dev, pf->msix_entries);
+ devm_kfree(dev, pf->msix_entries);
goto exit_err;
no_hw_vecs_left_err:
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"not enough device MSI-X vectors. requested = %d, available = %d\n",
needed, v_left);
err = -ERANGE;
@@ -2461,7 +2799,7 @@ exit_err:
static void ice_dis_msix(struct ice_pf *pf)
{
pci_disable_msix(pf->pdev);
- devm_kfree(&pf->pdev->dev, pf->msix_entries);
+ devm_kfree(ice_pf_to_dev(pf), pf->msix_entries);
pf->msix_entries = NULL;
}
@@ -2474,7 +2812,7 @@ static void ice_clear_interrupt_scheme(struct ice_pf *pf)
ice_dis_msix(pf);
if (pf->irq_tracker) {
- devm_kfree(&pf->pdev->dev, pf->irq_tracker);
+ devm_kfree(ice_pf_to_dev(pf), pf->irq_tracker);
pf->irq_tracker = NULL;
}
}
@@ -2494,7 +2832,7 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf)
/* set up vector assignment tracking */
pf->irq_tracker =
- devm_kzalloc(&pf->pdev->dev, sizeof(*pf->irq_tracker) +
+ devm_kzalloc(ice_pf_to_dev(pf), sizeof(*pf->irq_tracker) +
(sizeof(u16) * vectors), GFP_KERNEL);
if (!pf->irq_tracker) {
ice_dis_msix(pf);
@@ -2510,6 +2848,52 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf)
}
/**
+ * ice_vsi_recfg_qs - Change the number of queues on a VSI
+ * @vsi: VSI being changed
+ * @new_rx: new number of Rx queues
+ * @new_tx: new number of Tx queues
+ *
+ * Only change the number of queues if new_tx, or new_rx is non-0.
+ *
+ * Returns 0 on success.
+ */
+int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx)
+{
+ struct ice_pf *pf = vsi->back;
+ int err = 0, timeout = 50;
+
+ if (!new_rx && !new_tx)
+ return -EINVAL;
+
+ while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
+ timeout--;
+ if (!timeout)
+ return -EBUSY;
+ usleep_range(1000, 2000);
+ }
+
+ if (new_tx)
+ vsi->req_txq = new_tx;
+ if (new_rx)
+ vsi->req_rxq = new_rx;
+
+ /* set for the next time the netdev is started */
+ if (!netif_running(vsi->netdev)) {
+ ice_vsi_rebuild(vsi, false);
+ dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n");
+ goto done;
+ }
+
+ ice_vsi_close(vsi);
+ ice_vsi_rebuild(vsi, false);
+ ice_pf_dcb_recfg(pf);
+ ice_vsi_open(vsi);
+done:
+ clear_bit(__ICE_CFG_BUSY, pf->state);
+ return err;
+}
+
+/**
* ice_log_pkg_init - log result of DDP package load
* @hw: pointer to hardware info
* @status: status of package load
@@ -2518,7 +2902,7 @@ static void
ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status)
{
struct ice_pf *pf = (struct ice_pf *)hw->back;
- struct device *dev = &pf->pdev->dev;
+ struct device *dev = ice_pf_to_dev(pf);
switch (*status) {
case ICE_SUCCESS:
@@ -2598,7 +2982,7 @@ ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status)
ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
break;
case ICE_ERR_AQ_ERROR:
- switch (hw->adminq.sq_last_status) {
+ switch (hw->pkg_dwnld_status) {
case ICE_AQ_RC_ENOSEC:
case ICE_AQ_RC_EBADSIG:
dev_err(dev,
@@ -2637,7 +3021,7 @@ static void
ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf)
{
enum ice_status status = ICE_ERR_PARAM;
- struct device *dev = &pf->pdev->dev;
+ struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
/* Load DDP Package */
@@ -2677,7 +3061,7 @@ ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf)
static void ice_verify_cacheline_size(struct ice_pf *pf)
{
if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M)
- dev_warn(&pf->pdev->dev,
+ dev_warn(ice_pf_to_dev(pf),
"%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
ICE_CACHE_LINE_BYTES);
}
@@ -2747,7 +3131,7 @@ static void ice_request_fw(struct ice_pf *pf)
{
char *opt_fw_filename = ice_get_opt_fw_name(pf);
const struct firmware *firmware = NULL;
- struct device *dev = &pf->pdev->dev;
+ struct device *dev = ice_pf_to_dev(pf);
int err = 0;
/* optional device-specific DDP (if present) overrides the default DDP
@@ -2831,6 +3215,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
hw = &pf->hw;
hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0];
+ pci_save_state(pdev);
+
hw->back = pf;
hw->vendor_id = pdev->vendor;
hw->device_id = pdev->device;
@@ -2936,7 +3322,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
err = ice_setup_pf_sw(pf);
if (err) {
- dev_err(dev, "probe failed due to setup PF switch:%d\n", err);
+ dev_err(dev, "probe failed due to setup PF switch: %d\n", err);
goto err_alloc_sw_unroll;
}
@@ -2976,12 +3362,15 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
ice_cfg_lldp_mib_change(&pf->hw, true);
}
+ /* print PCI link speed and width */
+ pcie_print_link_status(pf->pdev);
+
return 0;
err_alloc_sw_unroll:
set_bit(__ICE_SERVICE_DIS, pf->state);
set_bit(__ICE_DOWN, pf->state);
- devm_kfree(&pf->pdev->dev, pf->first_sw);
+ devm_kfree(dev, pf->first_sw);
err_msix_misc_unroll:
ice_free_irq_msix_misc(pf);
err_init_interrupt_unroll:
@@ -3027,12 +3416,13 @@ static void ice_remove(struct pci_dev *pdev)
}
ice_deinit_pf(pf);
ice_deinit_hw(&pf->hw);
- ice_clear_interrupt_scheme(pf);
/* Issue a PFR as part of the prescribed driver unload flow. Do not
* do it via ice_schedule_reset() since there is no need to rebuild
* and the service task is already stopped.
*/
ice_reset(&pf->hw, ICE_RESET_PFR);
+ pci_wait_for_pending_transaction(pdev);
+ ice_clear_interrupt_scheme(pf);
pci_disable_pcie_error_reporting(pdev);
}
@@ -3347,6 +3737,48 @@ static void ice_set_rx_mode(struct net_device *netdev)
}
/**
+ * ice_set_tx_maxrate - NDO callback to set the maximum per-queue bitrate
+ * @netdev: network interface device structure
+ * @queue_index: Queue ID
+ * @maxrate: maximum bandwidth in Mbps
+ */
+static int
+ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ enum ice_status status;
+ u16 q_handle;
+ u8 tc;
+
+ /* Validate maxrate requested is within permitted range */
+ if (maxrate && (maxrate > (ICE_SCHED_MAX_BW / 1000))) {
+ netdev_err(netdev,
+ "Invalid max rate %d specified for the queue %d\n",
+ maxrate, queue_index);
+ return -EINVAL;
+ }
+
+ q_handle = vsi->tx_rings[queue_index]->q_handle;
+ tc = ice_dcb_get_tc(vsi, queue_index);
+
+ /* Set BW back to default, when user set maxrate to 0 */
+ if (!maxrate)
+ status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc,
+ q_handle, ICE_MAX_BW);
+ else
+ status = ice_cfg_q_bw_lmt(vsi->port_info, vsi->idx, tc,
+ q_handle, ICE_MAX_BW, maxrate * 1000);
+ if (status) {
+ netdev_err(netdev,
+ "Unable to set Tx max rate, error %d\n", status);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
* ice_fdb_add - add an entry to the hardware database
* @ndm: the input from the stack
* @tb: pointer to array of nladdr (unused)
@@ -3426,6 +3858,7 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
int ret = 0;
/* Don't set any netdev advanced features with device in Safe Mode */
@@ -3435,6 +3868,13 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
return ret;
}
+ /* Do not change setting during reset */
+ if (ice_is_reset_in_progress(pf->state)) {
+ dev_err(&vsi->back->pdev->dev,
+ "Device is resetting, changing advanced netdev features temporarily unavailable.\n");
+ return -EBUSY;
+ }
+
/* Multiple features can be changed in one call so keep features in
* separate if/else statements to guarantee each feature is checked
*/
@@ -3505,6 +3945,8 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
ice_vsi_cfg_dcb_rings(vsi);
err = ice_vsi_cfg_lan_txqs(vsi);
+ if (!err && ice_is_xdp_ena_vsi(vsi))
+ err = ice_vsi_cfg_xdp_txqs(vsi);
if (!err)
err = ice_vsi_cfg_rxqs(vsi);
@@ -3920,6 +4362,13 @@ int ice_down(struct ice_vsi *vsi)
netdev_err(vsi->netdev,
"Failed stop Tx rings, VSI %d error %d\n",
vsi->vsi_num, tx_err);
+ if (!tx_err && ice_is_xdp_ena_vsi(vsi)) {
+ tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
+ if (tx_err)
+ netdev_err(vsi->netdev,
+ "Failed stop XDP rings, VSI %d error %d\n",
+ vsi->vsi_num, tx_err);
+ }
rx_err = ice_vsi_stop_rx_rings(vsi);
if (rx_err)
@@ -3970,8 +4419,13 @@ int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
}
ice_for_each_txq(vsi, i) {
- vsi->tx_rings[i]->netdev = vsi->netdev;
- err = ice_setup_tx_ring(vsi->tx_rings[i]);
+ struct ice_ring *ring = vsi->tx_rings[i];
+
+ if (!ring)
+ return -EINVAL;
+
+ ring->netdev = vsi->netdev;
+ err = ice_setup_tx_ring(ring);
if (err)
break;
}
@@ -3996,8 +4450,13 @@ int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
}
ice_for_each_rxq(vsi, i) {
- vsi->rx_rings[i]->netdev = vsi->netdev;
- err = ice_setup_rx_ring(vsi->rx_rings[i]);
+ struct ice_ring *ring = vsi->rx_rings[i];
+
+ if (!ring)
+ return -EINVAL;
+
+ ring->netdev = vsi->netdev;
+ err = ice_setup_rx_ring(ring);
if (err)
break;
}
@@ -4033,7 +4492,7 @@ static int ice_vsi_open(struct ice_vsi *vsi)
goto err_setup_rx;
snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
- dev_driver_string(&pf->pdev->dev), vsi->netdev->name);
+ dev_driver_string(ice_pf_to_dev(pf)), vsi->netdev->name);
err = ice_vsi_req_irq_msix(vsi, int_name);
if (err)
goto err_setup_rx;
@@ -4082,61 +4541,13 @@ static void ice_vsi_release_all(struct ice_pf *pf)
err = ice_vsi_release(pf->vsi[i]);
if (err)
- dev_dbg(&pf->pdev->dev,
+ dev_dbg(ice_pf_to_dev(pf),
"Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
i, err, pf->vsi[i]->vsi_num);
}
}
/**
- * ice_ena_vsi - resume a VSI
- * @vsi: the VSI being resume
- * @locked: is the rtnl_lock already held
- */
-static int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
-{
- int err = 0;
-
- if (!test_bit(__ICE_NEEDS_RESTART, vsi->state))
- return 0;
-
- clear_bit(__ICE_NEEDS_RESTART, vsi->state);
-
- if (vsi->netdev && vsi->type == ICE_VSI_PF) {
- if (netif_running(vsi->netdev)) {
- if (!locked)
- rtnl_lock();
-
- err = ice_open(vsi->netdev);
-
- if (!locked)
- rtnl_unlock();
- }
- }
-
- return err;
-}
-
-/**
- * ice_pf_ena_all_vsi - Resume all VSIs on a PF
- * @pf: the PF
- * @locked: is the rtnl_lock already held
- */
-#ifdef CONFIG_DCB
-int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked)
-{
- int v;
-
- ice_for_each_vsi(pf, v)
- if (pf->vsi[v])
- if (ice_ena_vsi(pf->vsi[v], locked))
- return -EIO;
-
- return 0;
-}
-#endif /* CONFIG_DCB */
-
-/**
* ice_vsi_rebuild_by_type - Rebuild VSI of a given type
* @pf: pointer to the PF instance
* @type: VSI type to rebuild
@@ -4145,6 +4556,7 @@ int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked)
*/
static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
{
+ struct device *dev = ice_pf_to_dev(pf);
enum ice_status status;
int i, err;
@@ -4155,20 +4567,20 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
continue;
/* rebuild the VSI */
- err = ice_vsi_rebuild(vsi);
+ err = ice_vsi_rebuild(vsi, true);
if (err) {
- dev_err(&pf->pdev->dev,
- "rebuild VSI failed, err %d, VSI index %d, type %d\n",
- err, vsi->idx, type);
+ dev_err(dev,
+ "rebuild VSI failed, err %d, VSI index %d, type %s\n",
+ err, vsi->idx, ice_vsi_type_str(type));
return err;
}
/* replay filters for the VSI */
status = ice_replay_vsi(&pf->hw, vsi->idx);
if (status) {
- dev_err(&pf->pdev->dev,
- "replay VSI failed, status %d, VSI index %d, type %d\n",
- status, vsi->idx, type);
+ dev_err(dev,
+ "replay VSI failed, status %d, VSI index %d, type %s\n",
+ status, vsi->idx, ice_vsi_type_str(type));
return -EIO;
}
@@ -4180,14 +4592,14 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
/* enable the VSI */
err = ice_ena_vsi(vsi, false);
if (err) {
- dev_err(&pf->pdev->dev,
- "enable VSI failed, err %d, VSI index %d, type %d\n",
- err, vsi->idx, type);
+ dev_err(dev,
+ "enable VSI failed, err %d, VSI index %d, type %s\n",
+ err, vsi->idx, ice_vsi_type_str(type));
return err;
}
- dev_info(&pf->pdev->dev, "VSI rebuilt. VSI index %d, type %d\n",
- vsi->idx, type);
+ dev_info(dev, "VSI rebuilt. VSI index %d, type %s\n", vsi->idx,
+ ice_vsi_type_str(type));
}
return 0;
@@ -4226,7 +4638,7 @@ static void ice_update_pf_netdev_link(struct ice_pf *pf)
*/
static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
{
- struct device *dev = &pf->pdev->dev;
+ struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
enum ice_status ret;
int err;
@@ -4272,7 +4684,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
err = ice_update_link_info(hw->port_info);
if (err)
- dev_err(&pf->pdev->dev, "Get link status error %d\n", err);
+ dev_err(dev, "Get link status error %d\n", err);
/* start misc vector */
err = ice_req_irq_msix_misc(pf);
@@ -4329,6 +4741,18 @@ clear_recovery:
}
/**
+ * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
+ * @vsi: Pointer to VSI structure
+ */
+static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
+{
+ if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+ return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM;
+ else
+ return ICE_RXBUF_3072;
+}
+
+/**
* ice_change_mtu - NDO callback to change the MTU
* @netdev: network interface device structure
* @new_mtu: new value for maximum frame size
@@ -4347,6 +4771,16 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
return 0;
}
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ int frame_size = ice_max_xdp_frame_size(vsi);
+
+ if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
+ netdev_err(netdev, "max MTU for XDP usage is %d\n",
+ frame_size - ICE_ETH_PKT_HDR_PAD);
+ return -EINVAL;
+ }
+ }
+
if (new_mtu < netdev->min_mtu) {
netdev_err(netdev, "new MTU invalid. min_mtu is %d\n",
netdev->min_mtu);
@@ -4409,7 +4843,9 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
struct ice_pf *pf = vsi->back;
struct ice_hw *hw = &pf->hw;
enum ice_status status;
+ struct device *dev;
+ dev = ice_pf_to_dev(pf);
if (seed) {
struct ice_aqc_get_set_rss_keys *buf =
(struct ice_aqc_get_set_rss_keys *)seed;
@@ -4417,8 +4853,7 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
status = ice_aq_set_rss_key(hw, vsi->idx, buf);
if (status) {
- dev_err(&pf->pdev->dev,
- "Cannot set RSS key, err %d aq_err %d\n",
+ dev_err(dev, "Cannot set RSS key, err %d aq_err %d\n",
status, hw->adminq.rq_last_status);
return -EIO;
}
@@ -4428,8 +4863,7 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type,
lut, lut_size);
if (status) {
- dev_err(&pf->pdev->dev,
- "Cannot set RSS lut, err %d aq_err %d\n",
+ dev_err(dev, "Cannot set RSS lut, err %d aq_err %d\n",
status, hw->adminq.rq_last_status);
return -EIO;
}
@@ -4452,15 +4886,16 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
struct ice_pf *pf = vsi->back;
struct ice_hw *hw = &pf->hw;
enum ice_status status;
+ struct device *dev;
+ dev = ice_pf_to_dev(pf);
if (seed) {
struct ice_aqc_get_set_rss_keys *buf =
(struct ice_aqc_get_set_rss_keys *)seed;
status = ice_aq_get_rss_key(hw, vsi->idx, buf);
if (status) {
- dev_err(&pf->pdev->dev,
- "Cannot get RSS key, err %d aq_err %d\n",
+ dev_err(dev, "Cannot get RSS key, err %d aq_err %d\n",
status, hw->adminq.rq_last_status);
return -EIO;
}
@@ -4470,8 +4905,7 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
status = ice_aq_get_rss_lut(hw, vsi->idx, vsi->rss_lut_type,
lut, lut_size);
if (status) {
- dev_err(&pf->pdev->dev,
- "Cannot get RSS lut, err %d aq_err %d\n",
+ dev_err(dev, "Cannot get RSS lut, err %d aq_err %d\n",
status, hw->adminq.rq_last_status);
return -EIO;
}
@@ -4515,7 +4949,6 @@ ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
*/
static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
{
- struct device *dev = &vsi->back->pdev->dev;
struct ice_aqc_vsi_props *vsi_props;
struct ice_hw *hw = &vsi->back->hw;
struct ice_vsi_ctx *ctxt;
@@ -4524,7 +4957,7 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
vsi_props = &vsi->info;
- ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL);
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
if (!ctxt)
return -ENOMEM;
@@ -4540,7 +4973,7 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
if (status) {
- dev_err(dev, "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n",
+ dev_err(&vsi->back->pdev->dev, "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n",
bmode, status, hw->adminq.sq_last_status);
ret = -EIO;
goto out;
@@ -4549,7 +4982,7 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
vsi_props->sw_flags = ctxt->info.sw_flags;
out:
- devm_kfree(dev, ctxt);
+ kfree(ctxt);
return ret;
}
@@ -4864,12 +5297,14 @@ static const struct net_device_ops ice_netdev_ops = {
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = ice_change_mtu,
.ndo_get_stats64 = ice_get_stats64,
+ .ndo_set_tx_maxrate = ice_set_tx_maxrate,
.ndo_set_vf_spoofchk = ice_set_vf_spoofchk,
.ndo_set_vf_mac = ice_set_vf_mac,
.ndo_get_vf_config = ice_get_vf_cfg,
.ndo_set_vf_trust = ice_set_vf_trust,
.ndo_set_vf_vlan = ice_set_vf_port_vlan,
.ndo_set_vf_link_state = ice_set_vf_link_state,
+ .ndo_get_vf_stats = ice_get_vf_stats,
.ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
.ndo_set_features = ice_set_features,
@@ -4878,4 +5313,7 @@ static const struct net_device_ops ice_netdev_ops = {
.ndo_fdb_add = ice_fdb_add,
.ndo_fdb_del = ice_fdb_del,
.ndo_tx_timeout = ice_tx_timeout,
+ .ndo_bpf = ice_xdp,
+ .ndo_xdp_xmit = ice_xdp_xmit,
+ .ndo_xsk_wakeup = ice_xsk_wakeup,
};
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index bcb431f1bd92..57c73f613f32 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -219,8 +219,7 @@ static void ice_release_nvm(struct ice_hw *hw)
*
* Reads one 16 bit word from the Shadow RAM using the ice_read_sr_word_aq.
*/
-static enum ice_status
-ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
+enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
{
enum ice_status status;
@@ -242,9 +241,10 @@ ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
*/
enum ice_status ice_init_nvm(struct ice_hw *hw)
{
+ u16 oem_hi, oem_lo, boot_cfg_tlv, boot_cfg_tlv_len;
struct ice_nvm_info *nvm = &hw->nvm;
u16 eetrack_lo, eetrack_hi;
- enum ice_status status = 0;
+ enum ice_status status;
u32 fla, gens_stat;
u8 sr_size;
@@ -261,15 +261,15 @@ enum ice_status ice_init_nvm(struct ice_hw *hw)
fla = rd32(hw, GLNVM_FLA);
if (fla & GLNVM_FLA_LOCKED_M) { /* Normal programming mode */
nvm->blank_nvm_mode = false;
- } else { /* Blank programming mode */
+ } else {
+ /* Blank programming mode */
nvm->blank_nvm_mode = true;
- status = ICE_ERR_NVM_BLANK_MODE;
ice_debug(hw, ICE_DBG_NVM,
"NVM init error: unsupported blank mode.\n");
- return status;
+ return ICE_ERR_NVM_BLANK_MODE;
}
- status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &hw->nvm.ver);
+ status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &nvm->ver);
if (status) {
ice_debug(hw, ICE_DBG_INIT,
"Failed to read DEV starter version.\n");
@@ -287,9 +287,42 @@ enum ice_status ice_init_nvm(struct ice_hw *hw)
return status;
}
- hw->nvm.eetrack = (eetrack_hi << 16) | eetrack_lo;
+ nvm->eetrack = (eetrack_hi << 16) | eetrack_lo;
- return status;
+ status = ice_get_pfa_module_tlv(hw, &boot_cfg_tlv, &boot_cfg_tlv_len,
+ ICE_SR_BOOT_CFG_PTR);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT,
+ "Failed to read Boot Configuration Block TLV.\n");
+ return status;
+ }
+
+ /* Boot Configuration Block must have length at least 2 words
+ * (Combo Image Version High and Combo Image Version Low)
+ */
+ if (boot_cfg_tlv_len < 2) {
+ ice_debug(hw, ICE_DBG_INIT,
+ "Invalid Boot Configuration Block TLV size.\n");
+ return ICE_ERR_INVAL_SIZE;
+ }
+
+ status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OEM_VER_OFF),
+ &oem_hi);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read OEM_VER hi.\n");
+ return status;
+ }
+
+ status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OEM_VER_OFF + 1),
+ &oem_lo);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT, "Failed to read OEM_VER lo.\n");
+ return status;
+ }
+
+ nvm->oem_ver = ((u32)oem_hi << 16) | oem_lo;
+
+ return 0;
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h
new file mode 100644
index 000000000000..a9fa011c22c6
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_NVM_H_
+#define _ICE_NVM_H_
+
+enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data);
+#endif /* _ICE_NVM_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index fc624b73d05d..eae707ddf8e8 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -411,6 +411,27 @@ ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req,
}
/**
+ * ice_aq_cfg_sched_elems - configures scheduler elements
+ * @hw: pointer to the HW struct
+ * @elems_req: number of elements to configure
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_cfgd: returns total number of elements configured
+ * @cd: pointer to command details structure or NULL
+ *
+ * Configure scheduling elements (0x0403)
+ */
+static enum ice_status
+ice_aq_cfg_sched_elems(struct ice_hw *hw, u16 elems_req,
+ struct ice_aqc_conf_elem *buf, u16 buf_size,
+ u16 *elems_cfgd, struct ice_sq_cd *cd)
+{
+ return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_cfg_sched_elems,
+ elems_req, (void *)buf, buf_size,
+ elems_cfgd, cd);
+}
+
+/**
* ice_aq_suspend_sched_elems - suspend scheduler elements
* @hw: pointer to the HW struct
* @elems_req: number of elements to suspend
@@ -557,6 +578,149 @@ ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
}
/**
+ * ice_aq_rl_profile - performs a rate limiting task
+ * @hw: pointer to the HW struct
+ * @opcode:opcode for add, query, or remove profile(s)
+ * @num_profiles: the number of profiles
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_processed: number of processed add or remove profile(s) to return
+ * @cd: pointer to command details structure
+ *
+ * RL profile function to add, query, or remove profile(s)
+ */
+static enum ice_status
+ice_aq_rl_profile(struct ice_hw *hw, enum ice_adminq_opc opcode,
+ u16 num_profiles, struct ice_aqc_rl_profile_generic_elem *buf,
+ u16 buf_size, u16 *num_processed, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_rl_profile *cmd;
+ struct ice_aq_desc desc;
+ enum ice_status status;
+
+ cmd = &desc.params.rl_profile;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, opcode);
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+ cmd->num_profiles = cpu_to_le16(num_profiles);
+ status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+ if (!status && num_processed)
+ *num_processed = le16_to_cpu(cmd->num_processed);
+ return status;
+}
+
+/**
+ * ice_aq_add_rl_profile - adds rate limiting profile(s)
+ * @hw: pointer to the HW struct
+ * @num_profiles: the number of profile(s) to be add
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_profiles_added: total number of profiles added to return
+ * @cd: pointer to command details structure
+ *
+ * Add RL profile (0x0410)
+ */
+static enum ice_status
+ice_aq_add_rl_profile(struct ice_hw *hw, u16 num_profiles,
+ struct ice_aqc_rl_profile_generic_elem *buf,
+ u16 buf_size, u16 *num_profiles_added,
+ struct ice_sq_cd *cd)
+{
+ return ice_aq_rl_profile(hw, ice_aqc_opc_add_rl_profiles,
+ num_profiles, buf,
+ buf_size, num_profiles_added, cd);
+}
+
+/**
+ * ice_aq_remove_rl_profile - removes RL profile(s)
+ * @hw: pointer to the HW struct
+ * @num_profiles: the number of profile(s) to remove
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_profiles_removed: total number of profiles removed to return
+ * @cd: pointer to command details structure or NULL
+ *
+ * Remove RL profile (0x0415)
+ */
+static enum ice_status
+ice_aq_remove_rl_profile(struct ice_hw *hw, u16 num_profiles,
+ struct ice_aqc_rl_profile_generic_elem *buf,
+ u16 buf_size, u16 *num_profiles_removed,
+ struct ice_sq_cd *cd)
+{
+ return ice_aq_rl_profile(hw, ice_aqc_opc_remove_rl_profiles,
+ num_profiles, buf,
+ buf_size, num_profiles_removed, cd);
+}
+
+/**
+ * ice_sched_del_rl_profile - remove RL profile
+ * @hw: pointer to the HW struct
+ * @rl_info: rate limit profile information
+ *
+ * If the profile ID is not referenced anymore, it removes profile ID with
+ * its associated parameters from HW DB,and locally. The caller needs to
+ * hold scheduler lock.
+ */
+static enum ice_status
+ice_sched_del_rl_profile(struct ice_hw *hw,
+ struct ice_aqc_rl_profile_info *rl_info)
+{
+ struct ice_aqc_rl_profile_generic_elem *buf;
+ u16 num_profiles_removed;
+ enum ice_status status;
+ u16 num_profiles = 1;
+
+ if (rl_info->prof_id_ref != 0)
+ return ICE_ERR_IN_USE;
+
+ /* Safe to remove profile ID */
+ buf = (struct ice_aqc_rl_profile_generic_elem *)
+ &rl_info->profile;
+ status = ice_aq_remove_rl_profile(hw, num_profiles, buf, sizeof(*buf),
+ &num_profiles_removed, NULL);
+ if (status || num_profiles_removed != num_profiles)
+ return ICE_ERR_CFG;
+
+ /* Delete stale entry now */
+ list_del(&rl_info->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), rl_info);
+ return status;
+}
+
+/**
+ * ice_sched_clear_rl_prof - clears RL prof entries
+ * @pi: port information structure
+ *
+ * This function removes all RL profile from HW as well as from SW DB.
+ */
+static void ice_sched_clear_rl_prof(struct ice_port_info *pi)
+{
+ u16 ln;
+
+ for (ln = 0; ln < pi->hw->num_tx_sched_layers; ln++) {
+ struct ice_aqc_rl_profile_info *rl_prof_elem;
+ struct ice_aqc_rl_profile_info *rl_prof_tmp;
+
+ list_for_each_entry_safe(rl_prof_elem, rl_prof_tmp,
+ &pi->rl_prof_list[ln], list_entry) {
+ struct ice_hw *hw = pi->hw;
+ enum ice_status status;
+
+ rl_prof_elem->prof_id_ref = 0;
+ status = ice_sched_del_rl_profile(hw, rl_prof_elem);
+ if (status) {
+ ice_debug(hw, ICE_DBG_SCHED,
+ "Remove rl profile failed\n");
+ /* On error, free mem required */
+ list_del(&rl_prof_elem->list_entry);
+ devm_kfree(ice_hw_to_dev(hw), rl_prof_elem);
+ }
+ }
+ }
+}
+
+/**
* ice_sched_clear_agg - clears the aggregator related information
* @hw: pointer to the hardware structure
*
@@ -592,6 +756,8 @@ static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
{
if (!pi)
return;
+ /* remove RL profiles related lists */
+ ice_sched_clear_rl_prof(pi);
if (pi->root) {
ice_free_sched_node(pi, pi->root);
pi->root = NULL;
@@ -632,8 +798,7 @@ void ice_sched_cleanup_all(struct ice_hw *hw)
hw->layer_info = NULL;
}
- if (hw->port_info)
- ice_sched_clear_port(hw->port_info);
+ ice_sched_clear_port(hw->port_info);
hw->num_tx_sched_layers = 0;
hw->num_tx_sched_phys_layers = 0;
@@ -1014,6 +1179,8 @@ enum ice_status ice_sched_init_port(struct ice_port_info *pi)
/* initialize the port for handling the scheduler tree */
pi->port_state = ICE_SCHED_PORT_STATE_READY;
mutex_init(&pi->sched_lock);
+ for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
+ INIT_LIST_HEAD(&pi->rl_prof_list[i]);
err_init_port:
if (status && pi->root) {
@@ -1036,7 +1203,7 @@ enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw)
struct ice_aqc_query_txsched_res_resp *buf;
enum ice_status status = 0;
__le16 max_sibl;
- u8 i;
+ u16 i;
if (hw->layer_info)
return status;
@@ -1062,8 +1229,8 @@ enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw)
* and so on. This array will be populated from root (index 0) to
* qgroup layer 7. Leaf node has no children.
*/
- for (i = 0; i < hw->num_tx_sched_layers; i++) {
- max_sibl = buf->layer_props[i].max_sibl_grp_sz;
+ for (i = 0; i < hw->num_tx_sched_layers - 1; i++) {
+ max_sibl = buf->layer_props[i + 1].max_sibl_grp_sz;
hw->max_children[i] = le16_to_cpu(max_sibl);
}
@@ -1670,3 +1837,1095 @@ enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle)
{
return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_LAN);
}
+
+/**
+ * ice_sched_rm_unused_rl_prof - remove unused RL profile
+ * @pi: port information structure
+ *
+ * This function removes unused rate limit profiles from the HW and
+ * SW DB. The caller needs to hold scheduler lock.
+ */
+static void ice_sched_rm_unused_rl_prof(struct ice_port_info *pi)
+{
+ u16 ln;
+
+ for (ln = 0; ln < pi->hw->num_tx_sched_layers; ln++) {
+ struct ice_aqc_rl_profile_info *rl_prof_elem;
+ struct ice_aqc_rl_profile_info *rl_prof_tmp;
+
+ list_for_each_entry_safe(rl_prof_elem, rl_prof_tmp,
+ &pi->rl_prof_list[ln], list_entry) {
+ if (!ice_sched_del_rl_profile(pi->hw, rl_prof_elem))
+ ice_debug(pi->hw, ICE_DBG_SCHED,
+ "Removed rl profile\n");
+ }
+ }
+}
+
+/**
+ * ice_sched_update_elem - update element
+ * @hw: pointer to the HW struct
+ * @node: pointer to node
+ * @info: node info to update
+ *
+ * It updates the HW DB, and local SW DB of node. It updates the scheduling
+ * parameters of node from argument info data buffer (Info->data buf) and
+ * returns success or error on config sched element failure. The caller
+ * needs to hold scheduler lock.
+ */
+static enum ice_status
+ice_sched_update_elem(struct ice_hw *hw, struct ice_sched_node *node,
+ struct ice_aqc_txsched_elem_data *info)
+{
+ struct ice_aqc_conf_elem buf;
+ enum ice_status status;
+ u16 elem_cfgd = 0;
+ u16 num_elems = 1;
+
+ buf.generic[0] = *info;
+ /* Parent TEID is reserved field in this aq call */
+ buf.generic[0].parent_teid = 0;
+ /* Element type is reserved field in this aq call */
+ buf.generic[0].data.elem_type = 0;
+ /* Flags is reserved field in this aq call */
+ buf.generic[0].data.flags = 0;
+
+ /* Update HW DB */
+ /* Configure element node */
+ status = ice_aq_cfg_sched_elems(hw, num_elems, &buf, sizeof(buf),
+ &elem_cfgd, NULL);
+ if (status || elem_cfgd != num_elems) {
+ ice_debug(hw, ICE_DBG_SCHED, "Config sched elem error\n");
+ return ICE_ERR_CFG;
+ }
+
+ /* Config success case */
+ /* Now update local SW DB */
+ /* Only copy the data portion of info buffer */
+ node->info.data = info->data;
+ return status;
+}
+
+/**
+ * ice_sched_cfg_node_bw_alloc - configure node BW weight/alloc params
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @rl_type: rate limit type CIR, EIR, or shared
+ * @bw_alloc: BW weight/allocation
+ *
+ * This function configures node element's BW allocation.
+ */
+static enum ice_status
+ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node,
+ enum ice_rl_type rl_type, u8 bw_alloc)
+{
+ struct ice_aqc_txsched_elem_data buf;
+ struct ice_aqc_txsched_elem *data;
+ enum ice_status status;
+
+ buf = node->info;
+ data = &buf.data;
+ if (rl_type == ICE_MIN_BW) {
+ data->valid_sections |= ICE_AQC_ELEM_VALID_CIR;
+ data->cir_bw.bw_alloc = cpu_to_le16(bw_alloc);
+ } else if (rl_type == ICE_MAX_BW) {
+ data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+ data->eir_bw.bw_alloc = cpu_to_le16(bw_alloc);
+ } else {
+ return ICE_ERR_PARAM;
+ }
+
+ /* Configure element */
+ status = ice_sched_update_elem(hw, node, &buf);
+ return status;
+}
+
+/**
+ * ice_set_clear_cir_bw - set or clear CIR BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear CIR bandwidth (BW) in the passed param bw_t_info.
+ */
+static void
+ice_set_clear_cir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+ if (bw == ICE_SCHED_DFLT_BW) {
+ clear_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap);
+ bw_t_info->cir_bw.bw = 0;
+ } else {
+ /* Save type of BW information */
+ set_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap);
+ bw_t_info->cir_bw.bw = bw;
+ }
+}
+
+/**
+ * ice_set_clear_eir_bw - set or clear EIR BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear EIR bandwidth (BW) in the passed param bw_t_info.
+ */
+static void
+ice_set_clear_eir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+ if (bw == ICE_SCHED_DFLT_BW) {
+ clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+ bw_t_info->eir_bw.bw = 0;
+ } else {
+ /* EIR BW and Shared BW profiles are mutually exclusive and
+ * hence only one of them may be set for any given element.
+ * First clear earlier saved shared BW information.
+ */
+ clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+ bw_t_info->shared_bw = 0;
+ /* save EIR BW information */
+ set_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+ bw_t_info->eir_bw.bw = bw;
+ }
+}
+
+/**
+ * ice_set_clear_shared_bw - set or clear shared BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear shared bandwidth (BW) in the passed param bw_t_info.
+ */
+static void
+ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+ if (bw == ICE_SCHED_DFLT_BW) {
+ clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+ bw_t_info->shared_bw = 0;
+ } else {
+ /* EIR BW and Shared BW profiles are mutually exclusive and
+ * hence only one of them may be set for any given element.
+ * First clear earlier saved EIR BW information.
+ */
+ clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+ bw_t_info->eir_bw.bw = 0;
+ /* save shared BW information */
+ set_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+ bw_t_info->shared_bw = bw;
+ }
+}
+
+/**
+ * ice_sched_calc_wakeup - calculate RL profile wakeup parameter
+ * @bw: bandwidth in Kbps
+ *
+ * This function calculates the wakeup parameter of RL profile.
+ */
+static u16 ice_sched_calc_wakeup(s32 bw)
+{
+ s64 bytes_per_sec, wakeup_int, wakeup_a, wakeup_b, wakeup_f;
+ s32 wakeup_f_int;
+ u16 wakeup = 0;
+
+ /* Get the wakeup integer value */
+ bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE);
+ wakeup_int = div64_long(ICE_RL_PROF_FREQUENCY, bytes_per_sec);
+ if (wakeup_int > 63) {
+ wakeup = (u16)((1 << 15) | wakeup_int);
+ } else {
+ /* Calculate fraction value up to 4 decimals
+ * Convert Integer value to a constant multiplier
+ */
+ wakeup_b = (s64)ICE_RL_PROF_MULTIPLIER * wakeup_int;
+ wakeup_a = div64_long((s64)ICE_RL_PROF_MULTIPLIER *
+ ICE_RL_PROF_FREQUENCY,
+ bytes_per_sec);
+
+ /* Get Fraction value */
+ wakeup_f = wakeup_a - wakeup_b;
+
+ /* Round up the Fractional value via Ceil(Fractional value) */
+ if (wakeup_f > div64_long(ICE_RL_PROF_MULTIPLIER, 2))
+ wakeup_f += 1;
+
+ wakeup_f_int = (s32)div64_long(wakeup_f * ICE_RL_PROF_FRACTION,
+ ICE_RL_PROF_MULTIPLIER);
+ wakeup |= (u16)(wakeup_int << 9);
+ wakeup |= (u16)(0x1ff & wakeup_f_int);
+ }
+
+ return wakeup;
+}
+
+/**
+ * ice_sched_bw_to_rl_profile - convert BW to profile parameters
+ * @bw: bandwidth in Kbps
+ * @profile: profile parameters to return
+ *
+ * This function converts the BW to profile structure format.
+ */
+static enum ice_status
+ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile)
+{
+ enum ice_status status = ICE_ERR_PARAM;
+ s64 bytes_per_sec, ts_rate, mv_tmp;
+ bool found = false;
+ s32 encode = 0;
+ s64 mv = 0;
+ s32 i;
+
+ /* Bw settings range is from 0.5Mb/sec to 100Gb/sec */
+ if (bw < ICE_SCHED_MIN_BW || bw > ICE_SCHED_MAX_BW)
+ return status;
+
+ /* Bytes per second from Kbps */
+ bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE);
+
+ /* encode is 6 bits but really useful are 5 bits */
+ for (i = 0; i < 64; i++) {
+ u64 pow_result = BIT_ULL(i);
+
+ ts_rate = div64_long((s64)ICE_RL_PROF_FREQUENCY,
+ pow_result * ICE_RL_PROF_TS_MULTIPLIER);
+ if (ts_rate <= 0)
+ continue;
+
+ /* Multiplier value */
+ mv_tmp = div64_long(bytes_per_sec * ICE_RL_PROF_MULTIPLIER,
+ ts_rate);
+
+ /* Round to the nearest ICE_RL_PROF_MULTIPLIER */
+ mv = round_up_64bit(mv_tmp, ICE_RL_PROF_MULTIPLIER);
+
+ /* First multiplier value greater than the given
+ * accuracy bytes
+ */
+ if (mv > ICE_RL_PROF_ACCURACY_BYTES) {
+ encode = i;
+ found = true;
+ break;
+ }
+ }
+ if (found) {
+ u16 wm;
+
+ wm = ice_sched_calc_wakeup(bw);
+ profile->rl_multiply = cpu_to_le16(mv);
+ profile->wake_up_calc = cpu_to_le16(wm);
+ profile->rl_encode = cpu_to_le16(encode);
+ status = 0;
+ } else {
+ status = ICE_ERR_DOES_NOT_EXIST;
+ }
+
+ return status;
+}
+
+/**
+ * ice_sched_add_rl_profile - add RL profile
+ * @pi: port information structure
+ * @rl_type: type of rate limit BW - min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ * @layer_num: specifies in which layer to create profile
+ *
+ * This function first checks the existing list for corresponding BW
+ * parameter. If it exists, it returns the associated profile otherwise
+ * it creates a new rate limit profile for requested BW, and adds it to
+ * the HW DB and local list. It returns the new profile or null on error.
+ * The caller needs to hold the scheduler lock.
+ */
+static struct ice_aqc_rl_profile_info *
+ice_sched_add_rl_profile(struct ice_port_info *pi,
+ enum ice_rl_type rl_type, u32 bw, u8 layer_num)
+{
+ struct ice_aqc_rl_profile_generic_elem *buf;
+ struct ice_aqc_rl_profile_info *rl_prof_elem;
+ u16 profiles_added = 0, num_profiles = 1;
+ enum ice_status status;
+ struct ice_hw *hw;
+ u8 profile_type;
+
+ if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM)
+ return NULL;
+ switch (rl_type) {
+ case ICE_MIN_BW:
+ profile_type = ICE_AQC_RL_PROFILE_TYPE_CIR;
+ break;
+ case ICE_MAX_BW:
+ profile_type = ICE_AQC_RL_PROFILE_TYPE_EIR;
+ break;
+ case ICE_SHARED_BW:
+ profile_type = ICE_AQC_RL_PROFILE_TYPE_SRL;
+ break;
+ default:
+ return NULL;
+ }
+
+ if (!pi)
+ return NULL;
+ hw = pi->hw;
+ list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num],
+ list_entry)
+ if (rl_prof_elem->profile.flags == profile_type &&
+ rl_prof_elem->bw == bw)
+ /* Return existing profile ID info */
+ return rl_prof_elem;
+
+ /* Create new profile ID */
+ rl_prof_elem = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rl_prof_elem),
+ GFP_KERNEL);
+
+ if (!rl_prof_elem)
+ return NULL;
+
+ status = ice_sched_bw_to_rl_profile(bw, &rl_prof_elem->profile);
+ if (status)
+ goto exit_add_rl_prof;
+
+ rl_prof_elem->bw = bw;
+ /* layer_num is zero relative, and fw expects level from 1 to 9 */
+ rl_prof_elem->profile.level = layer_num + 1;
+ rl_prof_elem->profile.flags = profile_type;
+ rl_prof_elem->profile.max_burst_size = cpu_to_le16(hw->max_burst_size);
+
+ /* Create new entry in HW DB */
+ buf = (struct ice_aqc_rl_profile_generic_elem *)
+ &rl_prof_elem->profile;
+ status = ice_aq_add_rl_profile(hw, num_profiles, buf, sizeof(*buf),
+ &profiles_added, NULL);
+ if (status || profiles_added != num_profiles)
+ goto exit_add_rl_prof;
+
+ /* Good entry - add in the list */
+ rl_prof_elem->prof_id_ref = 0;
+ list_add(&rl_prof_elem->list_entry, &pi->rl_prof_list[layer_num]);
+ return rl_prof_elem;
+
+exit_add_rl_prof:
+ devm_kfree(ice_hw_to_dev(hw), rl_prof_elem);
+ return NULL;
+}
+
+/**
+ * ice_sched_cfg_node_bw_lmt - configure node sched params
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @rl_type: rate limit type CIR, EIR, or shared
+ * @rl_prof_id: rate limit profile ID
+ *
+ * This function configures node element's BW limit.
+ */
+static enum ice_status
+ice_sched_cfg_node_bw_lmt(struct ice_hw *hw, struct ice_sched_node *node,
+ enum ice_rl_type rl_type, u16 rl_prof_id)
+{
+ struct ice_aqc_txsched_elem_data buf;
+ struct ice_aqc_txsched_elem *data;
+
+ buf = node->info;
+ data = &buf.data;
+ switch (rl_type) {
+ case ICE_MIN_BW:
+ data->valid_sections |= ICE_AQC_ELEM_VALID_CIR;
+ data->cir_bw.bw_profile_idx = cpu_to_le16(rl_prof_id);
+ break;
+ case ICE_MAX_BW:
+ /* EIR BW and Shared BW profiles are mutually exclusive and
+ * hence only one of them may be set for any given element
+ */
+ if (data->valid_sections & ICE_AQC_ELEM_VALID_SHARED)
+ return ICE_ERR_CFG;
+ data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+ data->eir_bw.bw_profile_idx = cpu_to_le16(rl_prof_id);
+ break;
+ case ICE_SHARED_BW:
+ /* Check for removing shared BW */
+ if (rl_prof_id == ICE_SCHED_NO_SHARED_RL_PROF_ID) {
+ /* remove shared profile */
+ data->valid_sections &= ~ICE_AQC_ELEM_VALID_SHARED;
+ data->srl_id = 0; /* clear SRL field */
+
+ /* enable back EIR to default profile */
+ data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+ data->eir_bw.bw_profile_idx =
+ cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+ break;
+ }
+ /* EIR BW and Shared BW profiles are mutually exclusive and
+ * hence only one of them may be set for any given element
+ */
+ if ((data->valid_sections & ICE_AQC_ELEM_VALID_EIR) &&
+ (le16_to_cpu(data->eir_bw.bw_profile_idx) !=
+ ICE_SCHED_DFLT_RL_PROF_ID))
+ return ICE_ERR_CFG;
+ /* EIR BW is set to default, disable it */
+ data->valid_sections &= ~ICE_AQC_ELEM_VALID_EIR;
+ /* Okay to enable shared BW now */
+ data->valid_sections |= ICE_AQC_ELEM_VALID_SHARED;
+ data->srl_id = cpu_to_le16(rl_prof_id);
+ break;
+ default:
+ /* Unknown rate limit type */
+ return ICE_ERR_PARAM;
+ }
+
+ /* Configure element */
+ return ice_sched_update_elem(hw, node, &buf);
+}
+
+/**
+ * ice_sched_get_node_rl_prof_id - get node's rate limit profile ID
+ * @node: sched node
+ * @rl_type: rate limit type
+ *
+ * If existing profile matches, it returns the corresponding rate
+ * limit profile ID, otherwise it returns an invalid ID as error.
+ */
+static u16
+ice_sched_get_node_rl_prof_id(struct ice_sched_node *node,
+ enum ice_rl_type rl_type)
+{
+ u16 rl_prof_id = ICE_SCHED_INVAL_PROF_ID;
+ struct ice_aqc_txsched_elem *data;
+
+ data = &node->info.data;
+ switch (rl_type) {
+ case ICE_MIN_BW:
+ if (data->valid_sections & ICE_AQC_ELEM_VALID_CIR)
+ rl_prof_id = le16_to_cpu(data->cir_bw.bw_profile_idx);
+ break;
+ case ICE_MAX_BW:
+ if (data->valid_sections & ICE_AQC_ELEM_VALID_EIR)
+ rl_prof_id = le16_to_cpu(data->eir_bw.bw_profile_idx);
+ break;
+ case ICE_SHARED_BW:
+ if (data->valid_sections & ICE_AQC_ELEM_VALID_SHARED)
+ rl_prof_id = le16_to_cpu(data->srl_id);
+ break;
+ default:
+ break;
+ }
+
+ return rl_prof_id;
+}
+
+/**
+ * ice_sched_get_rl_prof_layer - selects rate limit profile creation layer
+ * @pi: port information structure
+ * @rl_type: type of rate limit BW - min, max, or shared
+ * @layer_index: layer index
+ *
+ * This function returns requested profile creation layer.
+ */
+static u8
+ice_sched_get_rl_prof_layer(struct ice_port_info *pi, enum ice_rl_type rl_type,
+ u8 layer_index)
+{
+ struct ice_hw *hw = pi->hw;
+
+ if (layer_index >= hw->num_tx_sched_layers)
+ return ICE_SCHED_INVAL_LAYER_NUM;
+ switch (rl_type) {
+ case ICE_MIN_BW:
+ if (hw->layer_info[layer_index].max_cir_rl_profiles)
+ return layer_index;
+ break;
+ case ICE_MAX_BW:
+ if (hw->layer_info[layer_index].max_eir_rl_profiles)
+ return layer_index;
+ break;
+ case ICE_SHARED_BW:
+ /* if current layer doesn't support SRL profile creation
+ * then try a layer up or down.
+ */
+ if (hw->layer_info[layer_index].max_srl_profiles)
+ return layer_index;
+ else if (layer_index < hw->num_tx_sched_layers - 1 &&
+ hw->layer_info[layer_index + 1].max_srl_profiles)
+ return layer_index + 1;
+ else if (layer_index > 0 &&
+ hw->layer_info[layer_index - 1].max_srl_profiles)
+ return layer_index - 1;
+ break;
+ default:
+ break;
+ }
+ return ICE_SCHED_INVAL_LAYER_NUM;
+}
+
+/**
+ * ice_sched_get_srl_node - get shared rate limit node
+ * @node: tree node
+ * @srl_layer: shared rate limit layer
+ *
+ * This function returns SRL node to be used for shared rate limit purpose.
+ * The caller needs to hold scheduler lock.
+ */
+static struct ice_sched_node *
+ice_sched_get_srl_node(struct ice_sched_node *node, u8 srl_layer)
+{
+ if (srl_layer > node->tx_sched_layer)
+ return node->children[0];
+ else if (srl_layer < node->tx_sched_layer)
+ /* Node can't be created without a parent. It will always
+ * have a valid parent except root node.
+ */
+ return node->parent;
+ else
+ return node;
+}
+
+/**
+ * ice_sched_rm_rl_profile - remove RL profile ID
+ * @pi: port information structure
+ * @layer_num: layer number where profiles are saved
+ * @profile_type: profile type like EIR, CIR, or SRL
+ * @profile_id: profile ID to remove
+ *
+ * This function removes rate limit profile from layer 'layer_num' of type
+ * 'profile_type' and profile ID as 'profile_id'. The caller needs to hold
+ * scheduler lock.
+ */
+static enum ice_status
+ice_sched_rm_rl_profile(struct ice_port_info *pi, u8 layer_num, u8 profile_type,
+ u16 profile_id)
+{
+ struct ice_aqc_rl_profile_info *rl_prof_elem;
+ enum ice_status status = 0;
+
+ if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM)
+ return ICE_ERR_PARAM;
+ /* Check the existing list for RL profile */
+ list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num],
+ list_entry)
+ if (rl_prof_elem->profile.flags == profile_type &&
+ le16_to_cpu(rl_prof_elem->profile.profile_id) ==
+ profile_id) {
+ if (rl_prof_elem->prof_id_ref)
+ rl_prof_elem->prof_id_ref--;
+
+ /* Remove old profile ID from database */
+ status = ice_sched_del_rl_profile(pi->hw, rl_prof_elem);
+ if (status && status != ICE_ERR_IN_USE)
+ ice_debug(pi->hw, ICE_DBG_SCHED,
+ "Remove rl profile failed\n");
+ break;
+ }
+ if (status == ICE_ERR_IN_USE)
+ status = 0;
+ return status;
+}
+
+/**
+ * ice_sched_set_node_bw_dflt - set node's bandwidth limit to default
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @rl_type: rate limit type min, max, or shared
+ * @layer_num: layer number where RL profiles are saved
+ *
+ * This function configures node element's BW rate limit profile ID of
+ * type CIR, EIR, or SRL to default. This function needs to be called
+ * with the scheduler lock held.
+ */
+static enum ice_status
+ice_sched_set_node_bw_dflt(struct ice_port_info *pi,
+ struct ice_sched_node *node,
+ enum ice_rl_type rl_type, u8 layer_num)
+{
+ enum ice_status status;
+ struct ice_hw *hw;
+ u8 profile_type;
+ u16 rl_prof_id;
+ u16 old_id;
+
+ hw = pi->hw;
+ switch (rl_type) {
+ case ICE_MIN_BW:
+ profile_type = ICE_AQC_RL_PROFILE_TYPE_CIR;
+ rl_prof_id = ICE_SCHED_DFLT_RL_PROF_ID;
+ break;
+ case ICE_MAX_BW:
+ profile_type = ICE_AQC_RL_PROFILE_TYPE_EIR;
+ rl_prof_id = ICE_SCHED_DFLT_RL_PROF_ID;
+ break;
+ case ICE_SHARED_BW:
+ profile_type = ICE_AQC_RL_PROFILE_TYPE_SRL;
+ /* No SRL is configured for default case */
+ rl_prof_id = ICE_SCHED_NO_SHARED_RL_PROF_ID;
+ break;
+ default:
+ return ICE_ERR_PARAM;
+ }
+ /* Save existing RL prof ID for later clean up */
+ old_id = ice_sched_get_node_rl_prof_id(node, rl_type);
+ /* Configure BW scheduling parameters */
+ status = ice_sched_cfg_node_bw_lmt(hw, node, rl_type, rl_prof_id);
+ if (status)
+ return status;
+
+ /* Remove stale RL profile ID */
+ if (old_id == ICE_SCHED_DFLT_RL_PROF_ID ||
+ old_id == ICE_SCHED_INVAL_PROF_ID)
+ return 0;
+
+ return ice_sched_rm_rl_profile(pi, layer_num, profile_type, old_id);
+}
+
+/**
+ * ice_sched_set_eir_srl_excl - set EIR/SRL exclusiveness
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @layer_num: layer number where rate limit profiles are saved
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth value
+ *
+ * This function prepares node element's bandwidth to SRL or EIR exclusively.
+ * EIR BW and Shared BW profiles are mutually exclusive and hence only one of
+ * them may be set for any given element. This function needs to be called
+ * with the scheduler lock held.
+ */
+static enum ice_status
+ice_sched_set_eir_srl_excl(struct ice_port_info *pi,
+ struct ice_sched_node *node,
+ u8 layer_num, enum ice_rl_type rl_type, u32 bw)
+{
+ if (rl_type == ICE_SHARED_BW) {
+ /* SRL node passed in this case, it may be different node */
+ if (bw == ICE_SCHED_DFLT_BW)
+ /* SRL being removed, ice_sched_cfg_node_bw_lmt()
+ * enables EIR to default. EIR is not set in this
+ * case, so no additional action is required.
+ */
+ return 0;
+
+ /* SRL being configured, set EIR to default here.
+ * ice_sched_cfg_node_bw_lmt() disables EIR when it
+ * configures SRL
+ */
+ return ice_sched_set_node_bw_dflt(pi, node, ICE_MAX_BW,
+ layer_num);
+ } else if (rl_type == ICE_MAX_BW &&
+ node->info.data.valid_sections & ICE_AQC_ELEM_VALID_SHARED) {
+ /* Remove Shared profile. Set default shared BW call
+ * removes shared profile for a node.
+ */
+ return ice_sched_set_node_bw_dflt(pi, node,
+ ICE_SHARED_BW,
+ layer_num);
+ }
+ return 0;
+}
+
+/**
+ * ice_sched_set_node_bw - set node's bandwidth
+ * @pi: port information structure
+ * @node: tree node
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ * @layer_num: layer number
+ *
+ * This function adds new profile corresponding to requested BW, configures
+ * node's RL profile ID of type CIR, EIR, or SRL, and removes old profile
+ * ID from local database. The caller needs to hold scheduler lock.
+ */
+static enum ice_status
+ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node,
+ enum ice_rl_type rl_type, u32 bw, u8 layer_num)
+{
+ struct ice_aqc_rl_profile_info *rl_prof_info;
+ enum ice_status status = ICE_ERR_PARAM;
+ struct ice_hw *hw = pi->hw;
+ u16 old_id, rl_prof_id;
+
+ rl_prof_info = ice_sched_add_rl_profile(pi, rl_type, bw, layer_num);
+ if (!rl_prof_info)
+ return status;
+
+ rl_prof_id = le16_to_cpu(rl_prof_info->profile.profile_id);
+
+ /* Save existing RL prof ID for later clean up */
+ old_id = ice_sched_get_node_rl_prof_id(node, rl_type);
+ /* Configure BW scheduling parameters */
+ status = ice_sched_cfg_node_bw_lmt(hw, node, rl_type, rl_prof_id);
+ if (status)
+ return status;
+
+ /* New changes has been applied */
+ /* Increment the profile ID reference count */
+ rl_prof_info->prof_id_ref++;
+
+ /* Check for old ID removal */
+ if ((old_id == ICE_SCHED_DFLT_RL_PROF_ID && rl_type != ICE_SHARED_BW) ||
+ old_id == ICE_SCHED_INVAL_PROF_ID || old_id == rl_prof_id)
+ return 0;
+
+ return ice_sched_rm_rl_profile(pi, layer_num,
+ rl_prof_info->profile.flags,
+ old_id);
+}
+
+/**
+ * ice_sched_set_node_bw_lmt - set node's BW limit
+ * @pi: port information structure
+ * @node: tree node
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * It updates node's BW limit parameters like BW RL profile ID of type CIR,
+ * EIR, or SRL. The caller needs to hold scheduler lock.
+ */
+static enum ice_status
+ice_sched_set_node_bw_lmt(struct ice_port_info *pi, struct ice_sched_node *node,
+ enum ice_rl_type rl_type, u32 bw)
+{
+ struct ice_sched_node *cfg_node = node;
+ enum ice_status status;
+
+ struct ice_hw *hw;
+ u8 layer_num;
+
+ if (!pi)
+ return ICE_ERR_PARAM;
+ hw = pi->hw;
+ /* Remove unused RL profile IDs from HW and SW DB */
+ ice_sched_rm_unused_rl_prof(pi);
+ layer_num = ice_sched_get_rl_prof_layer(pi, rl_type,
+ node->tx_sched_layer);
+ if (layer_num >= hw->num_tx_sched_layers)
+ return ICE_ERR_PARAM;
+
+ if (rl_type == ICE_SHARED_BW) {
+ /* SRL node may be different */
+ cfg_node = ice_sched_get_srl_node(node, layer_num);
+ if (!cfg_node)
+ return ICE_ERR_CFG;
+ }
+ /* EIR BW and Shared BW profiles are mutually exclusive and
+ * hence only one of them may be set for any given element
+ */
+ status = ice_sched_set_eir_srl_excl(pi, cfg_node, layer_num, rl_type,
+ bw);
+ if (status)
+ return status;
+ if (bw == ICE_SCHED_DFLT_BW)
+ return ice_sched_set_node_bw_dflt(pi, cfg_node, rl_type,
+ layer_num);
+ return ice_sched_set_node_bw(pi, cfg_node, rl_type, bw, layer_num);
+}
+
+/**
+ * ice_sched_set_node_bw_dflt_lmt - set node's BW limit to default
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @rl_type: rate limit type min, max, or shared
+ *
+ * This function configures node element's BW rate limit profile ID of
+ * type CIR, EIR, or SRL to default. This function needs to be called
+ * with the scheduler lock held.
+ */
+static enum ice_status
+ice_sched_set_node_bw_dflt_lmt(struct ice_port_info *pi,
+ struct ice_sched_node *node,
+ enum ice_rl_type rl_type)
+{
+ return ice_sched_set_node_bw_lmt(pi, node, rl_type,
+ ICE_SCHED_DFLT_BW);
+}
+
+/**
+ * ice_sched_validate_srl_node - Check node for SRL applicability
+ * @node: sched node to configure
+ * @sel_layer: selected SRL layer
+ *
+ * This function checks if the SRL can be applied to a selected layer node on
+ * behalf of the requested node (first argument). This function needs to be
+ * called with scheduler lock held.
+ */
+static enum ice_status
+ice_sched_validate_srl_node(struct ice_sched_node *node, u8 sel_layer)
+{
+ /* SRL profiles are not available on all layers. Check if the
+ * SRL profile can be applied to a node above or below the
+ * requested node. SRL configuration is possible only if the
+ * selected layer's node has single child.
+ */
+ if (sel_layer == node->tx_sched_layer ||
+ ((sel_layer == node->tx_sched_layer + 1) &&
+ node->num_children == 1) ||
+ ((sel_layer == node->tx_sched_layer - 1) &&
+ (node->parent && node->parent->num_children == 1)))
+ return 0;
+
+ return ICE_ERR_CFG;
+}
+
+/**
+ * ice_sched_save_q_bw - save queue node's BW information
+ * @q_ctx: queue context structure
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save BW information of queue type node for post replay use.
+ */
+static enum ice_status
+ice_sched_save_q_bw(struct ice_q_ctx *q_ctx, enum ice_rl_type rl_type, u32 bw)
+{
+ switch (rl_type) {
+ case ICE_MIN_BW:
+ ice_set_clear_cir_bw(&q_ctx->bw_t_info, bw);
+ break;
+ case ICE_MAX_BW:
+ ice_set_clear_eir_bw(&q_ctx->bw_t_info, bw);
+ break;
+ case ICE_SHARED_BW:
+ ice_set_clear_shared_bw(&q_ctx->bw_t_info, bw);
+ break;
+ default:
+ return ICE_ERR_PARAM;
+ }
+ return 0;
+}
+
+/**
+ * ice_sched_set_q_bw_lmt - sets queue BW limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ * @bw: bandwidth in Kbps
+ *
+ * This function sets BW limit of queue scheduling node.
+ */
+static enum ice_status
+ice_sched_set_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ u16 q_handle, enum ice_rl_type rl_type, u32 bw)
+{
+ enum ice_status status = ICE_ERR_PARAM;
+ struct ice_sched_node *node;
+ struct ice_q_ctx *q_ctx;
+
+ if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+ return ICE_ERR_PARAM;
+ mutex_lock(&pi->sched_lock);
+ q_ctx = ice_get_lan_q_ctx(pi->hw, vsi_handle, tc, q_handle);
+ if (!q_ctx)
+ goto exit_q_bw_lmt;
+ node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid);
+ if (!node) {
+ ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong q_teid\n");
+ goto exit_q_bw_lmt;
+ }
+
+ /* Return error if it is not a leaf node */
+ if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF)
+ goto exit_q_bw_lmt;
+
+ /* SRL bandwidth layer selection */
+ if (rl_type == ICE_SHARED_BW) {
+ u8 sel_layer; /* selected layer */
+
+ sel_layer = ice_sched_get_rl_prof_layer(pi, rl_type,
+ node->tx_sched_layer);
+ if (sel_layer >= pi->hw->num_tx_sched_layers) {
+ status = ICE_ERR_PARAM;
+ goto exit_q_bw_lmt;
+ }
+ status = ice_sched_validate_srl_node(node, sel_layer);
+ if (status)
+ goto exit_q_bw_lmt;
+ }
+
+ if (bw == ICE_SCHED_DFLT_BW)
+ status = ice_sched_set_node_bw_dflt_lmt(pi, node, rl_type);
+ else
+ status = ice_sched_set_node_bw_lmt(pi, node, rl_type, bw);
+
+ if (!status)
+ status = ice_sched_save_q_bw(q_ctx, rl_type, bw);
+
+exit_q_bw_lmt:
+ mutex_unlock(&pi->sched_lock);
+ return status;
+}
+
+/**
+ * ice_cfg_q_bw_lmt - configure queue BW limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ * @bw: bandwidth in Kbps
+ *
+ * This function configures BW limit of queue scheduling node.
+ */
+enum ice_status
+ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ u16 q_handle, enum ice_rl_type rl_type, u32 bw)
+{
+ return ice_sched_set_q_bw_lmt(pi, vsi_handle, tc, q_handle, rl_type,
+ bw);
+}
+
+/**
+ * ice_cfg_q_bw_dflt_lmt - configure queue BW default limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ *
+ * This function configures BW default limit of queue scheduling node.
+ */
+enum ice_status
+ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ u16 q_handle, enum ice_rl_type rl_type)
+{
+ return ice_sched_set_q_bw_lmt(pi, vsi_handle, tc, q_handle, rl_type,
+ ICE_SCHED_DFLT_BW);
+}
+
+/**
+ * ice_cfg_rl_burst_size - Set burst size value
+ * @hw: pointer to the HW struct
+ * @bytes: burst size in bytes
+ *
+ * This function configures/set the burst size to requested new value. The new
+ * burst size value is used for future rate limit calls. It doesn't change the
+ * existing or previously created RL profiles.
+ */
+enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes)
+{
+ u16 burst_size_to_prog;
+
+ if (bytes < ICE_MIN_BURST_SIZE_ALLOWED ||
+ bytes > ICE_MAX_BURST_SIZE_ALLOWED)
+ return ICE_ERR_PARAM;
+ if (ice_round_to_num(bytes, 64) <=
+ ICE_MAX_BURST_SIZE_64_BYTE_GRANULARITY) {
+ /* 64 byte granularity case */
+ /* Disable MSB granularity bit */
+ burst_size_to_prog = ICE_64_BYTE_GRANULARITY;
+ /* round number to nearest 64 byte granularity */
+ bytes = ice_round_to_num(bytes, 64);
+ /* The value is in 64 byte chunks */
+ burst_size_to_prog |= (u16)(bytes / 64);
+ } else {
+ /* k bytes granularity case */
+ /* Enable MSB granularity bit */
+ burst_size_to_prog = ICE_KBYTE_GRANULARITY;
+ /* round number to nearest 1024 granularity */
+ bytes = ice_round_to_num(bytes, 1024);
+ /* check rounding doesn't go beyond allowed */
+ if (bytes > ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY)
+ bytes = ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY;
+ /* The value is in k bytes */
+ burst_size_to_prog |= (u16)(bytes / 1024);
+ }
+ hw->max_burst_size = burst_size_to_prog;
+ return 0;
+}
+
+/**
+ * ice_sched_replay_node_prio - re-configure node priority
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @priority: priority value
+ *
+ * This function configures node element's priority value. It
+ * needs to be called with scheduler lock held.
+ */
+static enum ice_status
+ice_sched_replay_node_prio(struct ice_hw *hw, struct ice_sched_node *node,
+ u8 priority)
+{
+ struct ice_aqc_txsched_elem_data buf;
+ struct ice_aqc_txsched_elem *data;
+ enum ice_status status;
+
+ buf = node->info;
+ data = &buf.data;
+ data->valid_sections |= ICE_AQC_ELEM_VALID_GENERIC;
+ data->generic = priority;
+
+ /* Configure element */
+ status = ice_sched_update_elem(hw, node, &buf);
+ return status;
+}
+
+/**
+ * ice_sched_replay_node_bw - replay node(s) BW
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @bw_t_info: BW type information
+ *
+ * This function restores node's BW from bw_t_info. The caller needs
+ * to hold the scheduler lock.
+ */
+static enum ice_status
+ice_sched_replay_node_bw(struct ice_hw *hw, struct ice_sched_node *node,
+ struct ice_bw_type_info *bw_t_info)
+{
+ struct ice_port_info *pi = hw->port_info;
+ enum ice_status status = ICE_ERR_PARAM;
+ u16 bw_alloc;
+
+ if (!node)
+ return status;
+ if (bitmap_empty(bw_t_info->bw_t_bitmap, ICE_BW_TYPE_CNT))
+ return 0;
+ if (test_bit(ICE_BW_TYPE_PRIO, bw_t_info->bw_t_bitmap)) {
+ status = ice_sched_replay_node_prio(hw, node,
+ bw_t_info->generic);
+ if (status)
+ return status;
+ }
+ if (test_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap)) {
+ status = ice_sched_set_node_bw_lmt(pi, node, ICE_MIN_BW,
+ bw_t_info->cir_bw.bw);
+ if (status)
+ return status;
+ }
+ if (test_bit(ICE_BW_TYPE_CIR_WT, bw_t_info->bw_t_bitmap)) {
+ bw_alloc = bw_t_info->cir_bw.bw_alloc;
+ status = ice_sched_cfg_node_bw_alloc(hw, node, ICE_MIN_BW,
+ bw_alloc);
+ if (status)
+ return status;
+ }
+ if (test_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap)) {
+ status = ice_sched_set_node_bw_lmt(pi, node, ICE_MAX_BW,
+ bw_t_info->eir_bw.bw);
+ if (status)
+ return status;
+ }
+ if (test_bit(ICE_BW_TYPE_EIR_WT, bw_t_info->bw_t_bitmap)) {
+ bw_alloc = bw_t_info->eir_bw.bw_alloc;
+ status = ice_sched_cfg_node_bw_alloc(hw, node, ICE_MAX_BW,
+ bw_alloc);
+ if (status)
+ return status;
+ }
+ if (test_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap))
+ status = ice_sched_set_node_bw_lmt(pi, node, ICE_SHARED_BW,
+ bw_t_info->shared_bw);
+ return status;
+}
+
+/**
+ * ice_sched_replay_q_bw - replay queue type node BW
+ * @pi: port information structure
+ * @q_ctx: queue context structure
+ *
+ * This function replays queue type node bandwidth. This function needs to be
+ * called with scheduler lock held.
+ */
+enum ice_status
+ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx)
+{
+ struct ice_sched_node *q_node;
+
+ /* Following also checks the presence of node in tree */
+ q_node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid);
+ if (!q_node)
+ return ICE_ERR_PARAM;
+ return ice_sched_replay_node_bw(pi->hw, q_node, &q_ctx->bw_t_info);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
index 3902a8ad3025..f0593cfb6521 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.h
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h
@@ -8,6 +8,36 @@
#define ICE_QGRP_LAYER_OFFSET 2
#define ICE_VSI_LAYER_OFFSET 4
+#define ICE_SCHED_INVAL_LAYER_NUM 0xFF
+/* Burst size is a 12 bits register that is configured while creating the RL
+ * profile(s). MSB is a granularity bit and tells the granularity type
+ * 0 - LSB bits are in 64 bytes granularity
+ * 1 - LSB bits are in 1K bytes granularity
+ */
+#define ICE_64_BYTE_GRANULARITY 0
+#define ICE_KBYTE_GRANULARITY BIT(11)
+#define ICE_MIN_BURST_SIZE_ALLOWED 64 /* In Bytes */
+#define ICE_MAX_BURST_SIZE_ALLOWED \
+ ((BIT(11) - 1) * 1024) /* In Bytes */
+#define ICE_MAX_BURST_SIZE_64_BYTE_GRANULARITY \
+ ((BIT(11) - 1) * 64) /* In Bytes */
+#define ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY ICE_MAX_BURST_SIZE_ALLOWED
+
+#define ICE_RL_PROF_FREQUENCY 446000000
+#define ICE_RL_PROF_ACCURACY_BYTES 128
+#define ICE_RL_PROF_MULTIPLIER 10000
+#define ICE_RL_PROF_TS_MULTIPLIER 32
+#define ICE_RL_PROF_FRACTION 512
+
+/* BW rate limit profile parameters list entry along
+ * with bandwidth maintained per layer in port info
+ */
+struct ice_aqc_rl_profile_info {
+ struct ice_aqc_rl_profile_elem profile;
+ struct list_head list_entry;
+ u32 bw; /* requested */
+ u16 prof_id_ref; /* profile ID to node association ref count */
+};
struct ice_sched_agg_vsi_info {
struct list_head list_entry;
@@ -48,4 +78,13 @@ enum ice_status
ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
u8 owner, bool enable);
enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle);
+enum ice_status
+ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ u16 q_handle, enum ice_rl_type rl_type, u32 bw);
+enum ice_status
+ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ u16 q_handle, enum ice_rl_type rl_type);
+enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes);
+enum ice_status
+ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx);
#endif /* _ICE_SCHED_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 1acdd43a2edd..b5a53f862a83 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -416,8 +416,7 @@ ice_add_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
ice_save_vsi_ctx(hw, vsi_handle, tmp_vsi_ctx);
} else {
/* update with new HW VSI num */
- if (tmp_vsi_ctx->vsi_num != vsi_ctx->vsi_num)
- tmp_vsi_ctx->vsi_num = vsi_ctx->vsi_num;
+ tmp_vsi_ctx->vsi_num = vsi_ctx->vsi_num;
}
return 0;
@@ -2429,7 +2428,7 @@ ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
if (!ice_is_vsi_valid(hw, vsi_handle))
return ICE_ERR_PARAM;
- if (vid)
+ if (promisc_mask & (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX))
recipe_id = ICE_SW_LKUP_PROMISC_VLAN;
else
recipe_id = ICE_SW_LKUP_PROMISC;
@@ -2441,13 +2440,18 @@ ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
mutex_lock(rule_lock);
list_for_each_entry(itr, rule_head, list_entry) {
+ struct ice_fltr_info *fltr_info;
u8 fltr_promisc_mask = 0;
if (!ice_vsi_uses_fltr(itr, vsi_handle))
continue;
+ fltr_info = &itr->fltr_info;
+
+ if (recipe_id == ICE_SW_LKUP_PROMISC_VLAN &&
+ vid != fltr_info->l_data.mac_vlan.vlan_id)
+ continue;
- fltr_promisc_mask |=
- ice_determine_promisc_mask(&itr->fltr_info);
+ fltr_promisc_mask |= ice_determine_promisc_mask(fltr_info);
/* Skip if filter is not completely specified by given mask */
if (fltr_promisc_mask & ~promisc_mask)
@@ -2455,7 +2459,7 @@ ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle,
&remove_list_head,
- &itr->fltr_info);
+ fltr_info);
if (status) {
mutex_unlock(rule_lock);
goto free_fltr_list;
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index cb123fbe30be..fa14b9545dab 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -14,11 +14,6 @@
#define ICE_VSI_INVAL_ID 0xffff
#define ICE_INVAL_Q_HANDLE 0xFFFF
-/* VSI queue context structure */
-struct ice_q_ctx {
- u16 q_handle;
-};
-
/* VSI context structure for add/get/update/free operations */
struct ice_vsi_ctx {
u16 vsi_num;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 33dd103035dc..2c212f64d99f 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -5,8 +5,13 @@
#include <linux/prefetch.h>
#include <linux/mm.h>
+#include <linux/bpf_trace.h>
+#include <net/xdp.h>
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
#include "ice.h"
#include "ice_dcb_lib.h"
+#include "ice_xsk.h"
#define ICE_RX_HDR_SIZE 256
@@ -19,7 +24,10 @@ static void
ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf)
{
if (tx_buf->skb) {
- dev_kfree_skb_any(tx_buf->skb);
+ if (ice_ring_is_xdp(ring))
+ page_frag_free(tx_buf->raw_buf);
+ else
+ dev_kfree_skb_any(tx_buf->skb);
if (dma_unmap_len(tx_buf, len))
dma_unmap_single(ring->dev,
dma_unmap_addr(tx_buf, dma),
@@ -51,6 +59,11 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
{
u16 i;
+ if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
+ ice_xsk_clean_xdp_ring(tx_ring);
+ goto tx_skip_free;
+ }
+
/* ring already cleared, nothing to do */
if (!tx_ring->tx_buf)
return;
@@ -59,6 +72,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
for (i = 0; i < tx_ring->count; i++)
ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
+tx_skip_free:
memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count);
/* Zero out the descriptor ring */
@@ -136,8 +150,11 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
total_bytes += tx_buf->bytecount;
total_pkts += tx_buf->gso_segs;
- /* free the skb */
- napi_consume_skb(tx_buf->skb, napi_budget);
+ if (ice_ring_is_xdp(tx_ring))
+ page_frag_free(tx_buf->raw_buf);
+ else
+ /* free the skb */
+ napi_consume_skb(tx_buf->skb, napi_budget);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -188,12 +205,11 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
i += tx_ring->count;
tx_ring->next_to_clean = i;
- u64_stats_update_begin(&tx_ring->syncp);
- tx_ring->stats.bytes += total_bytes;
- tx_ring->stats.pkts += total_pkts;
- u64_stats_update_end(&tx_ring->syncp);
- tx_ring->q_vector->tx.total_bytes += total_bytes;
- tx_ring->q_vector->tx.total_pkts += total_pkts;
+
+ ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes);
+
+ if (ice_ring_is_xdp(tx_ring))
+ return !!budget;
netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts,
total_bytes);
@@ -273,6 +289,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
if (!rx_ring->rx_buf)
return;
+ if (rx_ring->xsk_umem) {
+ ice_xsk_clean_rx_ring(rx_ring);
+ goto rx_skip_free;
+ }
+
/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
@@ -289,10 +310,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
*/
dma_sync_single_range_for_cpu(dev, rx_buf->dma,
rx_buf->page_offset,
- ICE_RXBUF_2048, DMA_FROM_DEVICE);
+ rx_ring->rx_buf_len,
+ DMA_FROM_DEVICE);
/* free resources associated with mapping */
- dma_unmap_page_attrs(dev, rx_buf->dma, PAGE_SIZE,
+ dma_unmap_page_attrs(dev, rx_buf->dma, ice_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
@@ -300,6 +322,7 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
rx_buf->page_offset = 0;
}
+rx_skip_free:
memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count);
/* Zero out the descriptor ring */
@@ -319,6 +342,10 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
void ice_free_rx_ring(struct ice_ring *rx_ring)
{
ice_clean_rx_ring(rx_ring);
+ if (rx_ring->vsi->type == ICE_VSI_PF)
+ if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+ rx_ring->xdp_prog = NULL;
devm_kfree(rx_ring->dev, rx_ring->rx_buf);
rx_ring->rx_buf = NULL;
@@ -363,6 +390,15 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring)
rx_ring->next_to_use = 0;
rx_ring->next_to_clean = 0;
+
+ if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+ WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
+
+ if (rx_ring->vsi->type == ICE_VSI_PF &&
+ !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+ if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+ rx_ring->q_index))
+ goto err;
return 0;
err:
@@ -372,34 +408,110 @@ err:
}
/**
- * ice_release_rx_desc - Store the new tail and head values
- * @rx_ring: ring to bump
- * @val: new head index
+ * ice_rx_offset - Return expected offset into page to access data
+ * @rx_ring: Ring we are requesting offset of
+ *
+ * Returns the offset value for ring into the data buffer.
*/
-static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
+static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
{
- u16 prev_ntu = rx_ring->next_to_use;
+ if (ice_ring_uses_build_skb(rx_ring))
+ return ICE_SKB_PAD;
+ else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+ return XDP_PACKET_HEADROOM;
- rx_ring->next_to_use = val;
+ return 0;
+}
- /* update next to alloc since we have filled the ring */
- rx_ring->next_to_alloc = val;
+/**
+ * ice_run_xdp - Executes an XDP program on initialized xdp_buff
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ * @xdp_prog: XDP program to run
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+{
+ int err, result = ICE_XDP_PASS;
+ struct ice_ring *xdp_ring;
+ u32 act;
- /* QRX_TAIL will be updated with any tail value, but hardware ignores
- * the lower 3 bits. This makes it so we only bump tail on meaningful
- * boundaries. Also, this allows us to bump tail on intervals of 8 up to
- * the budget depending on the current traffic load.
- */
- val &= ~0x7;
- if (prev_ntu != val) {
- /* Force memory writes to complete before letting h/w
- * know there are new descriptors to fetch. (Only
- * applicable for weak-ordered memory model archs,
- * such as IA-64).
- */
- wmb();
- writel(val, rx_ring->tail);
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_TX:
+ xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()];
+ result = ice_xmit_xdp_buff(xdp, xdp_ring);
+ break;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+ result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fallthrough -- not supported action */
+ case XDP_ABORTED:
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+ /* fallthrough -- handle aborts by dropping frame */
+ case XDP_DROP:
+ result = ICE_XDP_CONSUMED;
+ break;
}
+
+ return result;
+}
+
+/**
+ * ice_xdp_xmit - submit packets to XDP ring for transmission
+ * @dev: netdev
+ * @n: number of XDP frames to be transmitted
+ * @frames: XDP frames to be transmitted
+ * @flags: transmit flags
+ *
+ * Returns number of frames successfully sent. Frames that fail are
+ * free'ed via XDP return API.
+ * For error cases, a negative errno code is returned and no-frames
+ * are transmitted (caller must handle freeing frames).
+ */
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags)
+{
+ struct ice_netdev_priv *np = netdev_priv(dev);
+ unsigned int queue_index = smp_processor_id();
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_ring *xdp_ring;
+ int drops = 0, i;
+
+ if (test_bit(__ICE_DOWN, vsi->state))
+ return -ENETDOWN;
+
+ if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq)
+ return -ENXIO;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ xdp_ring = vsi->xdp_rings[queue_index];
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ int err;
+
+ err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+ if (err != ICE_XDP_TX) {
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ }
+ }
+
+ if (unlikely(flags & XDP_XMIT_FLUSH))
+ ice_xdp_ring_update_tail(xdp_ring);
+
+ return n - drops;
}
/**
@@ -423,28 +535,28 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi)
}
/* alloc new page for storage */
- page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+ page = dev_alloc_pages(ice_rx_pg_order(rx_ring));
if (unlikely(!page)) {
rx_ring->rx_stats.alloc_page_failed++;
return false;
}
/* map page for use */
- dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
+ dma = dma_map_page_attrs(rx_ring->dev, page, 0, ice_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
/* if mapping failed free memory back to system since
* there isn't much point in holding memory we can't use
*/
if (dma_mapping_error(rx_ring->dev, dma)) {
- __free_pages(page, 0);
+ __free_pages(page, ice_rx_pg_order(rx_ring));
rx_ring->rx_stats.alloc_page_failed++;
return false;
}
bi->dma = dma;
bi->page = page;
- bi->page_offset = 0;
+ bi->page_offset = ice_rx_offset(rx_ring);
page_ref_add(page, USHRT_MAX - 1);
bi->pagecnt_bias = USHRT_MAX;
@@ -486,7 +598,7 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
/* sync the buffer for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
bi->page_offset,
- ICE_RXBUF_2048,
+ rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
/* Refresh the desc even if buffer_addrs didn't change
@@ -557,9 +669,6 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
*/
static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
{
-#if (PAGE_SIZE >= 8192)
- unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048;
-#endif
unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
struct page *page = rx_buf->page;
@@ -572,7 +681,9 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
if (unlikely((page_count(page) - pagecnt_bias) > 1))
return false;
#else
- if (rx_buf->page_offset > last_offset)
+#define ICE_LAST_OFFSET \
+ (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048)
+ if (rx_buf->page_offset > ICE_LAST_OFFSET)
return false;
#endif /* PAGE_SIZE < 8192) */
@@ -590,6 +701,7 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
/**
* ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag
+ * @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: buffer containing page to add
* @skb: sk_buff to place the data into
* @size: packet length from rx_desc
@@ -599,13 +711,13 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
* The function will then update the page offset.
*/
static void
-ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb,
- unsigned int size)
+ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+ struct sk_buff *skb, unsigned int size)
{
#if (PAGE_SIZE >= 8192)
- unsigned int truesize = SKB_DATA_ALIGN(size);
+ unsigned int truesize = SKB_DATA_ALIGN(size + ice_rx_offset(rx_ring));
#else
- unsigned int truesize = ICE_RXBUF_2048;
+ unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
#endif
if (!size)
@@ -679,10 +791,64 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
}
/**
+ * ice_build_skb - Build skb around an existing buffer
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buf: Rx buffer to pull data from
+ * @xdp: xdp_buff pointing to the data
+ *
+ * This function builds an skb around an existing Rx buffer, taking care
+ * to set up the skb correctly and avoid any memcpy overhead.
+ */
+static struct sk_buff *
+ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+ struct xdp_buff *xdp)
+{
+ unsigned int metasize = xdp->data - xdp->data_meta;
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
+#else
+ unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+ SKB_DATA_ALIGN(xdp->data_end -
+ xdp->data_hard_start);
+#endif
+ struct sk_buff *skb;
+
+ /* Prefetch first cache line of first page. If xdp->data_meta
+ * is unused, this points exactly as xdp->data, otherwise we
+ * likely have a consumer accessing first few bytes of meta
+ * data, and then actual data.
+ */
+ prefetch(xdp->data_meta);
+#if L1_CACHE_BYTES < 128
+ prefetch((void *)(xdp->data + L1_CACHE_BYTES));
+#endif
+ /* build an skb around the page buffer */
+ skb = build_skb(xdp->data_hard_start, truesize);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* must to record Rx queue, otherwise OS features such as
+ * symmetric queue won't work
+ */
+ skb_record_rx_queue(skb, rx_ring->q_index);
+
+ /* update pointers within the skb to store the data */
+ skb_reserve(skb, xdp->data - xdp->data_hard_start);
+ __skb_put(skb, xdp->data_end - xdp->data);
+ if (metasize)
+ skb_metadata_set(skb, metasize);
+
+ /* buffer is used by skb, update page_offset */
+ ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+
+ return skb;
+}
+
+/**
* ice_construct_skb - Allocate skb and populate it
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: Rx buffer to pull data from
- * @size: the length of the packet
+ * @xdp: xdp_buff pointing to the data
*
* This function allocates an skb. It then populates it with the page
* data from the current receive descriptor, taking care to set up the
@@ -690,16 +856,16 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
*/
static struct sk_buff *
ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
- unsigned int size)
+ struct xdp_buff *xdp)
{
- void *va = page_address(rx_buf->page) + rx_buf->page_offset;
+ unsigned int size = xdp->data_end - xdp->data;
unsigned int headlen;
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(va);
+ prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
- prefetch((u8 *)va + L1_CACHE_BYTES);
+ prefetch((void *)(xdp->data + L1_CACHE_BYTES));
#endif /* L1_CACHE_BYTES */
/* allocate a skb to store the frags */
@@ -712,10 +878,11 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
/* Determine available headroom for copy */
headlen = size;
if (headlen > ICE_RX_HDR_SIZE)
- headlen = eth_get_headlen(skb->dev, va, ICE_RX_HDR_SIZE);
+ headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
- memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+ memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen,
+ sizeof(long)));
/* if we exhaust the linear part then add what is left as a frag */
size -= headlen;
@@ -723,7 +890,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
#if (PAGE_SIZE >= 8192)
unsigned int truesize = SKB_DATA_ALIGN(size);
#else
- unsigned int truesize = ICE_RXBUF_2048;
+ unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
#endif
skb_add_rx_frag(skb, 0, rx_buf->page,
rx_buf->page_offset + headlen, size, truesize);
@@ -745,11 +912,18 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: Rx buffer to pull data from
*
- * This function will clean up the contents of the rx_buf. It will
- * either recycle the buffer or unmap it and free the associated resources.
+ * This function will update next_to_clean and then clean up the contents
+ * of the rx_buf. It will either recycle the buffer or unmap it and free
+ * the associated resources.
*/
static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
{
+ u32 ntc = rx_ring->next_to_clean + 1;
+
+ /* fetch, update, and store next to clean */
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+
if (!rx_buf)
return;
@@ -759,8 +933,9 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
rx_ring->rx_stats.page_reuse_count++;
} else {
/* we are not reusing the buffer so unmap it */
- dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, PAGE_SIZE,
- DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
+ dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma,
+ ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
+ ICE_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
}
@@ -770,227 +945,31 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
}
/**
- * ice_cleanup_headers - Correct empty headers
- * @skb: pointer to current skb being fixed
- *
- * Also address the case where we are pulling data in on pages only
- * and as such no data is present in the skb header.
- *
- * In addition if skb is not at least 60 bytes we need to pad it so that
- * it is large enough to qualify as a valid Ethernet frame.
- *
- * Returns true if an error was encountered and skb was freed.
- */
-static bool ice_cleanup_headers(struct sk_buff *skb)
-{
- /* if eth_skb_pad returns an error the skb was freed */
- if (eth_skb_pad(skb))
- return true;
-
- return false;
-}
-
-/**
- * ice_test_staterr - tests bits in Rx descriptor status and error fields
- * @rx_desc: pointer to receive descriptor (in le64 format)
- * @stat_err_bits: value to mask
- *
- * This function does some fast chicanery in order to return the
- * value of the mask which is really only used for boolean tests.
- * The status_error_len doesn't need to be shifted because it begins
- * at offset zero.
- */
-static bool
-ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
-{
- return !!(rx_desc->wb.status_error0 &
- cpu_to_le16(stat_err_bits));
-}
-
-/**
* ice_is_non_eop - process handling of non-EOP buffers
* @rx_ring: Rx ring being processed
* @rx_desc: Rx descriptor for current buffer
* @skb: Current socket buffer containing buffer in progress
*
- * This function updates next to clean. If the buffer is an EOP buffer
- * this function exits returning false, otherwise it will place the
- * sk_buff in the next buffer to be chained and return true indicating
- * that this is in fact a non-EOP buffer.
+ * If the buffer is an EOP buffer, this function exits returning false,
+ * otherwise return true indicating that this is in fact a non-EOP buffer.
*/
static bool
ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
struct sk_buff *skb)
{
- u32 ntc = rx_ring->next_to_clean + 1;
-
- /* fetch, update, and store next to clean */
- ntc = (ntc < rx_ring->count) ? ntc : 0;
- rx_ring->next_to_clean = ntc;
-
- prefetch(ICE_RX_DESC(rx_ring, ntc));
-
/* if we are the last buffer then there is nothing else to do */
#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF)))
return false;
/* place skb in next buffer to be received */
- rx_ring->rx_buf[ntc].skb = skb;
+ rx_ring->rx_buf[rx_ring->next_to_clean].skb = skb;
rx_ring->rx_stats.non_eop_descs++;
return true;
}
/**
- * ice_ptype_to_htype - get a hash type
- * @ptype: the ptype value from the descriptor
- *
- * Returns a hash type to be used by skb_set_hash
- */
-static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
-{
- return PKT_HASH_TYPE_NONE;
-}
-
-/**
- * ice_rx_hash - set the hash value in the skb
- * @rx_ring: descriptor ring
- * @rx_desc: specific descriptor
- * @skb: pointer to current skb
- * @rx_ptype: the ptype value from the descriptor
- */
-static void
-ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
- struct sk_buff *skb, u8 rx_ptype)
-{
- struct ice_32b_rx_flex_desc_nic *nic_mdid;
- u32 hash;
-
- if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
- return;
-
- if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
- return;
-
- nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
- hash = le32_to_cpu(nic_mdid->rss_hash);
- skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
-}
-
-/**
- * ice_rx_csum - Indicate in skb if checksum is good
- * @ring: the ring we care about
- * @skb: skb currently being received and modified
- * @rx_desc: the receive descriptor
- * @ptype: the packet type decoded by hardware
- *
- * skb->protocol must be set before this function is called
- */
-static void
-ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
- union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
-{
- struct ice_rx_ptype_decoded decoded;
- u32 rx_error, rx_status;
- bool ipv4, ipv6;
-
- rx_status = le16_to_cpu(rx_desc->wb.status_error0);
- rx_error = rx_status;
-
- decoded = ice_decode_rx_desc_ptype(ptype);
-
- /* Start with CHECKSUM_NONE and by default csum_level = 0 */
- skb->ip_summed = CHECKSUM_NONE;
- skb_checksum_none_assert(skb);
-
- /* check if Rx checksum is enabled */
- if (!(ring->netdev->features & NETIF_F_RXCSUM))
- return;
-
- /* check if HW has decoded the packet and checksum */
- if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
- return;
-
- if (!(decoded.known && decoded.outer_ip))
- return;
-
- ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
- (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
- ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
- (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
-
- if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
- BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
- goto checksum_fail;
- else if (ipv6 && (rx_status &
- (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
- goto checksum_fail;
-
- /* check for L4 errors and handle packets that were not able to be
- * checksummed due to arrival speed
- */
- if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
- goto checksum_fail;
-
- /* Only report checksum unnecessary for TCP, UDP, or SCTP */
- switch (decoded.inner_prot) {
- case ICE_RX_PTYPE_INNER_PROT_TCP:
- case ICE_RX_PTYPE_INNER_PROT_UDP:
- case ICE_RX_PTYPE_INNER_PROT_SCTP:
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- default:
- break;
- }
- return;
-
-checksum_fail:
- ring->vsi->back->hw_csum_rx_error++;
-}
-
-/**
- * ice_process_skb_fields - Populate skb header fields from Rx descriptor
- * @rx_ring: Rx descriptor ring packet is being transacted on
- * @rx_desc: pointer to the EOP Rx descriptor
- * @skb: pointer to current skb being populated
- * @ptype: the packet type decoded by hardware
- *
- * This function checks the ring, descriptor, and packet information in
- * order to populate the hash, checksum, VLAN, protocol, and
- * other fields within the skb.
- */
-static void
-ice_process_skb_fields(struct ice_ring *rx_ring,
- union ice_32b_rx_flex_desc *rx_desc,
- struct sk_buff *skb, u8 ptype)
-{
- ice_rx_hash(rx_ring, rx_desc, skb, ptype);
-
- /* modifies the skb - consumes the enet header */
- skb->protocol = eth_type_trans(skb, rx_ring->netdev);
-
- ice_rx_csum(rx_ring, skb, rx_desc, ptype);
-}
-
-/**
- * ice_receive_skb - Send a completed packet up the stack
- * @rx_ring: Rx ring in play
- * @skb: packet to send up
- * @vlan_tag: VLAN tag for packet
- *
- * This function sends the completed packet (via. skb) up the stack using
- * gro receive functions (with/without VLAN tag)
- */
-static void
-ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
-{
- if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
- (vlan_tag & VLAN_VID_MASK))
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
- napi_gro_receive(&rx_ring->q_vector->napi, skb);
-}
-
-/**
* ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @rx_ring: Rx descriptor ring to transact packets on
* @budget: Total limit on number of packets to process
@@ -1006,8 +985,13 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+ unsigned int xdp_res, xdp_xmit = 0;
+ struct bpf_prog *xdp_prog = NULL;
+ struct xdp_buff xdp;
bool failure;
+ xdp.rxq = &rx_ring->xdp_rxq;
+
/* start the loop to process Rx packets bounded by 'budget' */
while (likely(total_rx_pkts < (unsigned int)budget)) {
union ice_32b_rx_flex_desc *rx_desc;
@@ -1042,10 +1026,57 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
/* retrieve a buffer from the ring */
rx_buf = ice_get_rx_buf(rx_ring, &skb, size);
+ if (!size) {
+ xdp.data = NULL;
+ xdp.data_end = NULL;
+ xdp.data_hard_start = NULL;
+ xdp.data_meta = NULL;
+ goto construct_skb;
+ }
+
+ xdp.data = page_address(rx_buf->page) + rx_buf->page_offset;
+ xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring);
+ xdp.data_meta = xdp.data;
+ xdp.data_end = xdp.data + size;
+
+ rcu_read_lock();
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+ if (!xdp_prog) {
+ rcu_read_unlock();
+ goto construct_skb;
+ }
+
+ xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog);
+ rcu_read_unlock();
+ if (!xdp_res)
+ goto construct_skb;
+ if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+ unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+ truesize = ice_rx_pg_size(rx_ring) / 2;
+#else
+ truesize = SKB_DATA_ALIGN(ice_rx_offset(rx_ring) +
+ size);
+#endif
+ xdp_xmit |= xdp_res;
+ ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+ } else {
+ rx_buf->pagecnt_bias++;
+ }
+ total_rx_bytes += size;
+ total_rx_pkts++;
+
+ cleaned_count++;
+ ice_put_rx_buf(rx_ring, rx_buf);
+ continue;
+construct_skb:
if (skb)
- ice_add_rx_frag(rx_buf, skb, size);
+ ice_add_rx_frag(rx_ring, rx_buf, skb, size);
+ else if (ice_ring_uses_build_skb(rx_ring))
+ skb = ice_build_skb(rx_ring, rx_buf, &xdp);
else
- skb = ice_construct_skb(rx_ring, rx_buf, size);
+ skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
/* exit if we failed to retrieve a buffer */
if (!skb) {
@@ -1072,10 +1103,8 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
if (ice_test_staterr(rx_desc, stat_err_bits))
vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
- /* correct empty headers and pad skb if needed (to make valid
- * ethernet frame
- */
- if (ice_cleanup_headers(skb)) {
+ /* pad the skb if needed, to make a valid ethernet frame */
+ if (eth_skb_pad(skb)) {
skb = NULL;
continue;
}
@@ -1099,13 +1128,10 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
/* return up to cleaned_count buffers to hardware */
failure = ice_alloc_rx_bufs(rx_ring, cleaned_count);
- /* update queue and vector specific stats */
- u64_stats_update_begin(&rx_ring->syncp);
- rx_ring->stats.pkts += total_rx_pkts;
- rx_ring->stats.bytes += total_rx_bytes;
- u64_stats_update_end(&rx_ring->syncp);
- rx_ring->q_vector->rx.total_pkts += total_rx_pkts;
- rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+ if (xdp_prog)
+ ice_finalize_xdp_rx(rx_ring, xdp_xmit);
+
+ ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes);
/* guarantee a trip back through this routine if there was a failure */
return failure ? budget : (int)total_rx_pkts;
@@ -1483,9 +1509,14 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
/* Since the actual Tx work is minimal, we can give the Tx a larger
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
- ice_for_each_ring(ring, q_vector->tx)
- if (!ice_clean_tx_irq(ring, budget))
+ ice_for_each_ring(ring, q_vector->tx) {
+ bool wd = ring->xsk_umem ?
+ ice_clean_tx_irq_zc(ring, budget) :
+ ice_clean_tx_irq(ring, budget);
+
+ if (!wd)
clean_complete = false;
+ }
/* Handle case where we are called by netpoll with a budget of 0 */
if (unlikely(budget <= 0))
@@ -1505,7 +1536,13 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
ice_for_each_ring(ring, q_vector->rx) {
int cleaned;
- cleaned = ice_clean_rx_irq(ring, budget_per_ring);
+ /* A dedicated path for zero-copy allows making a single
+ * comparison in the irq context instead of many inside the
+ * ice_clean_rx_irq function and makes the codebase cleaner.
+ */
+ cleaned = ring->xsk_umem ?
+ ice_clean_rx_irq_zc(ring, budget_per_ring) :
+ ice_clean_rx_irq(ring, budget_per_ring);
work_done += cleaned;
/* if we clean as many as budgeted, we must not be done */
if (cleaned >= budget_per_ring)
@@ -1527,17 +1564,6 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
return min_t(int, work_done, budget - 1);
}
-/* helper function for building cmd/type/offset */
-static __le64
-build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
-{
- return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
- (td_cmd << ICE_TXD_QW1_CMD_S) |
- (td_offset << ICE_TXD_QW1_OFFSET_S) |
- ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
- (td_tag << ICE_TXD_QW1_L2TAG1_S));
-}
-
/**
* __ice_maybe_stop_tx - 2nd level check for Tx stop conditions
* @tx_ring: the ring to be checked
@@ -1689,9 +1715,9 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
i = 0;
/* write last descriptor with RS and EOP bits */
- td_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
- tx_desc->cmd_type_offset_bsz =
- build_ctob(td_cmd, td_offset, size, td_tag);
+ td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD;
+ tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, size,
+ td_tag);
/* Force memory writes to complete before letting h/w know there
* are new descriptors to fetch.
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 94a9280193e2..a84cc0e6dd27 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -4,8 +4,12 @@
#ifndef _ICE_TXRX_H_
#define _ICE_TXRX_H_
+#include "ice_type.h"
+
#define ICE_DFLT_IRQ_WORK 256
+#define ICE_RXBUF_3072 3072
#define ICE_RXBUF_2048 2048
+#define ICE_RXBUF_1536 1536
#define ICE_MAX_CHAINED_RX_BUFS 5
#define ICE_MAX_BUF_TXD 8
#define ICE_MIN_TX_LEN 17
@@ -22,6 +26,71 @@
#define ICE_RX_BUF_WRITE 16 /* Must be power of 2 */
#define ICE_MAX_TXQ_PER_TXQG 128
+/* Attempt to maximize the headroom available for incoming frames. We use a 2K
+ * buffer for MTUs <= 1500 and need 1536/1534 to store the data for the frame.
+ * This leaves us with 512 bytes of room. From that we need to deduct the
+ * space needed for the shared info and the padding needed to IP align the
+ * frame.
+ *
+ * Note: For cache line sizes 256 or larger this value is going to end
+ * up negative. In these cases we should fall back to the legacy
+ * receive path.
+ */
+#if (PAGE_SIZE < 8192)
+#define ICE_2K_TOO_SMALL_WITH_PADDING \
+((NET_SKB_PAD + ICE_RXBUF_1536) > SKB_WITH_OVERHEAD(ICE_RXBUF_2048))
+
+/**
+ * ice_compute_pad - compute the padding
+ * rx_buf_len: buffer length
+ *
+ * Figure out the size of half page based on given buffer length and
+ * then subtract the skb_shared_info followed by subtraction of the
+ * actual buffer length; this in turn results in the actual space that
+ * is left for padding usage
+ */
+static inline int ice_compute_pad(int rx_buf_len)
+{
+ int half_page_size;
+
+ half_page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
+ return SKB_WITH_OVERHEAD(half_page_size) - rx_buf_len;
+}
+
+/**
+ * ice_skb_pad - determine the padding that we can supply
+ *
+ * Figure out the right Rx buffer size and based on that calculate the
+ * padding
+ */
+static inline int ice_skb_pad(void)
+{
+ int rx_buf_len;
+
+ /* If a 2K buffer cannot handle a standard Ethernet frame then
+ * optimize padding for a 3K buffer instead of a 1.5K buffer.
+ *
+ * For a 3K buffer we need to add enough padding to allow for
+ * tailroom due to NET_IP_ALIGN possibly shifting us out of
+ * cache-line alignment.
+ */
+ if (ICE_2K_TOO_SMALL_WITH_PADDING)
+ rx_buf_len = ICE_RXBUF_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
+ else
+ rx_buf_len = ICE_RXBUF_1536;
+
+ /* if needed make room for NET_IP_ALIGN */
+ rx_buf_len -= NET_IP_ALIGN;
+
+ return ice_compute_pad(rx_buf_len);
+}
+
+#define ICE_SKB_PAD ice_skb_pad()
+#else
+#define ICE_2K_TOO_SMALL_WITH_PADDING false
+#define ICE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+#endif
+
/* We are assuming that the cache line is always 64 Bytes here for ice.
* In order to make sure that is a correct assumption there is a check in probe
* to print a warning if the read from GLPCI_CNF2 tells us that the cache line
@@ -49,12 +118,24 @@
#define ICE_TX_FLAGS_VLAN_PR_S 29
#define ICE_TX_FLAGS_VLAN_S 16
+#define ICE_XDP_PASS 0
+#define ICE_XDP_CONSUMED BIT(0)
+#define ICE_XDP_TX BIT(1)
+#define ICE_XDP_REDIR BIT(2)
+
#define ICE_RX_DMA_ATTR \
(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+#define ICE_ETH_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
+
+#define ICE_TXD_LAST_DESC_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)
+
struct ice_tx_buf {
struct ice_tx_desc *next_to_watch;
- struct sk_buff *skb;
+ union {
+ struct sk_buff *skb;
+ void *raw_buf; /* used for XDP */
+ };
unsigned int bytecount;
unsigned short gso_segs;
u32 tx_flags;
@@ -76,9 +157,17 @@ struct ice_tx_offload_params {
struct ice_rx_buf {
struct sk_buff *skb;
dma_addr_t dma;
- struct page *page;
- unsigned int page_offset;
- u16 pagecnt_bias;
+ union {
+ struct {
+ struct page *page;
+ unsigned int page_offset;
+ u16 pagecnt_bias;
+ };
+ struct {
+ void *addr;
+ u64 handle;
+ };
+ };
};
struct ice_q_stats {
@@ -198,18 +287,44 @@ struct ice_ring {
};
struct rcu_head rcu; /* to avoid race on free */
+ struct bpf_prog *xdp_prog;
+ struct xdp_umem *xsk_umem;
+ struct zero_copy_allocator zca;
+ /* CL3 - 3rd cacheline starts here */
+ struct xdp_rxq_info xdp_rxq;
/* CLX - the below items are only accessed infrequently and should be
* in their own cache line if possible
*/
+#define ICE_TX_FLAGS_RING_XDP BIT(0)
+#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1)
+ u8 flags;
dma_addr_t dma; /* physical address of ring */
unsigned int size; /* length of descriptor ring in bytes */
u32 txq_teid; /* Added Tx queue TEID */
u16 rx_buf_len;
-#ifdef CONFIG_DCB
u8 dcb_tc; /* Traffic class of ring */
-#endif /* CONFIG_DCB */
} ____cacheline_internodealigned_in_smp;
+static inline bool ice_ring_uses_build_skb(struct ice_ring *ring)
+{
+ return !!(ring->flags & ICE_RX_FLAGS_RING_BUILD_SKB);
+}
+
+static inline void ice_set_ring_build_skb_ena(struct ice_ring *ring)
+{
+ ring->flags |= ICE_RX_FLAGS_RING_BUILD_SKB;
+}
+
+static inline void ice_clear_ring_build_skb_ena(struct ice_ring *ring)
+{
+ ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB;
+}
+
+static inline bool ice_ring_is_xdp(struct ice_ring *ring)
+{
+ return !!(ring->flags & ICE_TX_FLAGS_RING_XDP);
+}
+
struct ice_ring_container {
/* head of linked-list of rings */
struct ice_ring *ring;
@@ -230,6 +345,19 @@ struct ice_ring_container {
#define ice_for_each_ring(pos, head) \
for (pos = (head).ring; pos; pos = pos->next)
+static inline unsigned int ice_rx_pg_order(struct ice_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+ if (ring->rx_buf_len > (PAGE_SIZE / 2))
+ return 1;
+#endif
+ return 0;
+}
+
+#define ice_rx_pg_size(_ring) (PAGE_SIZE << ice_rx_pg_order(_ring))
+
+union ice_32b_rx_flex_desc;
+
bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count);
netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
void ice_clean_tx_ring(struct ice_ring *tx_ring);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
new file mode 100644
index 000000000000..35bbc4ff603c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_txrx_lib.h"
+
+/**
+ * ice_release_rx_desc - Store the new tail and head values
+ * @rx_ring: ring to bump
+ * @val: new head index
+ */
+void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
+{
+ u16 prev_ntu = rx_ring->next_to_use;
+
+ rx_ring->next_to_use = val;
+
+ /* update next to alloc since we have filled the ring */
+ rx_ring->next_to_alloc = val;
+
+ /* QRX_TAIL will be updated with any tail value, but hardware ignores
+ * the lower 3 bits. This makes it so we only bump tail on meaningful
+ * boundaries. Also, this allows us to bump tail on intervals of 8 up to
+ * the budget depending on the current traffic load.
+ */
+ val &= ~0x7;
+ if (prev_ntu != val) {
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64).
+ */
+ wmb();
+ writel(val, rx_ring->tail);
+ }
+}
+
+/**
+ * ice_ptype_to_htype - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns a hash type to be used by skb_set_hash
+ */
+static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
+{
+ return PKT_HASH_TYPE_NONE;
+}
+
+/**
+ * ice_rx_hash - set the hash value in the skb
+ * @rx_ring: descriptor ring
+ * @rx_desc: specific descriptor
+ * @skb: pointer to current skb
+ * @rx_ptype: the ptype value from the descriptor
+ */
+static void
+ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
+ struct sk_buff *skb, u8 rx_ptype)
+{
+ struct ice_32b_rx_flex_desc_nic *nic_mdid;
+ u32 hash;
+
+ if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
+ return;
+
+ if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
+ return;
+
+ nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
+ hash = le32_to_cpu(nic_mdid->rss_hash);
+ skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
+}
+
+/**
+ * ice_rx_csum - Indicate in skb if checksum is good
+ * @ring: the ring we care about
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ * @ptype: the packet type decoded by hardware
+ *
+ * skb->protocol must be set before this function is called
+ */
+static void
+ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
+ union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
+{
+ struct ice_rx_ptype_decoded decoded;
+ u32 rx_error, rx_status;
+ bool ipv4, ipv6;
+
+ rx_status = le16_to_cpu(rx_desc->wb.status_error0);
+ rx_error = rx_status;
+
+ decoded = ice_decode_rx_desc_ptype(ptype);
+
+ /* Start with CHECKSUM_NONE and by default csum_level = 0 */
+ skb->ip_summed = CHECKSUM_NONE;
+ skb_checksum_none_assert(skb);
+
+ /* check if Rx checksum is enabled */
+ if (!(ring->netdev->features & NETIF_F_RXCSUM))
+ return;
+
+ /* check if HW has decoded the packet and checksum */
+ if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
+ return;
+
+ if (!(decoded.known && decoded.outer_ip))
+ return;
+
+ ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+ (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
+ ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+ (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
+
+ if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
+ BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
+ goto checksum_fail;
+ else if (ipv6 && (rx_status &
+ (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
+ goto checksum_fail;
+
+ /* check for L4 errors and handle packets that were not able to be
+ * checksummed due to arrival speed
+ */
+ if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
+ goto checksum_fail;
+
+ /* Only report checksum unnecessary for TCP, UDP, or SCTP */
+ switch (decoded.inner_prot) {
+ case ICE_RX_PTYPE_INNER_PROT_TCP:
+ case ICE_RX_PTYPE_INNER_PROT_UDP:
+ case ICE_RX_PTYPE_INNER_PROT_SCTP:
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ default:
+ break;
+ }
+ return;
+
+checksum_fail:
+ ring->vsi->back->hw_csum_rx_error++;
+}
+
+/**
+ * ice_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: Rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ * @ptype: the packet type decoded by hardware
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
+ */
+void
+ice_process_skb_fields(struct ice_ring *rx_ring,
+ union ice_32b_rx_flex_desc *rx_desc,
+ struct sk_buff *skb, u8 ptype)
+{
+ ice_rx_hash(rx_ring, rx_desc, skb, ptype);
+
+ /* modifies the skb - consumes the enet header */
+ skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+
+ ice_rx_csum(rx_ring, skb, rx_desc, ptype);
+}
+
+/**
+ * ice_receive_skb - Send a completed packet up the stack
+ * @rx_ring: Rx ring in play
+ * @skb: packet to send up
+ * @vlan_tag: VLAN tag for packet
+ *
+ * This function sends the completed packet (via. skb) up the stack using
+ * gro receive functions (with/without VLAN tag)
+ */
+void
+ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
+{
+ if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+ (vlan_tag & VLAN_VID_MASK))
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+ napi_gro_receive(&rx_ring->q_vector->napi, skb);
+}
+
+/**
+ * ice_xmit_xdp_ring - submit single packet to XDP ring for transmission
+ * @data: packet data pointer
+ * @size: packet data size
+ * @xdp_ring: XDP ring for transmission
+ */
+int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring)
+{
+ u16 i = xdp_ring->next_to_use;
+ struct ice_tx_desc *tx_desc;
+ struct ice_tx_buf *tx_buf;
+ dma_addr_t dma;
+
+ if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
+ xdp_ring->tx_stats.tx_busy++;
+ return ICE_XDP_CONSUMED;
+ }
+
+ dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(xdp_ring->dev, dma))
+ return ICE_XDP_CONSUMED;
+
+ tx_buf = &xdp_ring->tx_buf[i];
+ tx_buf->bytecount = size;
+ tx_buf->gso_segs = 1;
+ tx_buf->raw_buf = data;
+
+ /* record length, and DMA address */
+ dma_unmap_len_set(tx_buf, len, size);
+ dma_unmap_addr_set(tx_buf, dma, dma);
+
+ tx_desc = ICE_TX_DESC(xdp_ring, i);
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD, 0,
+ size, 0);
+
+ /* Make certain all of the status bits have been updated
+ * before next_to_watch is written.
+ */
+ smp_wmb();
+
+ i++;
+ if (i == xdp_ring->count)
+ i = 0;
+
+ tx_buf->next_to_watch = tx_desc;
+ xdp_ring->next_to_use = i;
+
+ return ICE_XDP_TX;
+}
+
+/**
+ * ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it
+ * @xdp: XDP buffer
+ * @xdp_ring: XDP Tx ring
+ *
+ * Returns negative on failure, 0 on success.
+ */
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring)
+{
+ struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+
+ if (unlikely(!xdpf))
+ return ICE_XDP_CONSUMED;
+
+ return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+}
+
+/**
+ * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
+ * @rx_ring: Rx ring
+ * @xdp_res: Result of the receive batch
+ *
+ * This function bumps XDP Tx tail and/or flush redirect map, and
+ * should be called when a batch of packets has been processed in the
+ * napi loop.
+ */
+void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res)
+{
+ if (xdp_res & ICE_XDP_REDIR)
+ xdp_do_flush_map();
+
+ if (xdp_res & ICE_XDP_TX) {
+ struct ice_ring *xdp_ring =
+ rx_ring->vsi->xdp_rings[rx_ring->q_index];
+
+ ice_xdp_ring_update_tail(xdp_ring);
+ }
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
new file mode 100644
index 000000000000..ba9164dad9ae
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_TXRX_LIB_H_
+#define _ICE_TXRX_LIB_H_
+#include "ice.h"
+
+/**
+ * ice_test_staterr - tests bits in Rx descriptor status and error fields
+ * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static inline bool
+ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
+{
+ return !!(rx_desc->wb.status_error0 & cpu_to_le16(stat_err_bits));
+}
+
+static inline __le64
+build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
+{
+ return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+ (td_cmd << ICE_TXD_QW1_CMD_S) |
+ (td_offset << ICE_TXD_QW1_OFFSET_S) |
+ ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+ (td_tag << ICE_TXD_QW1_L2TAG1_S));
+}
+
+/**
+ * ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register
+ * @xdp_ring: XDP Tx ring
+ *
+ * This function updates the XDP Tx ring tail register.
+ */
+static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring)
+{
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch.
+ */
+ wmb();
+ writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+}
+
+void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res);
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring);
+int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring);
+void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val);
+void
+ice_process_skb_fields(struct ice_ring *rx_ring,
+ union ice_32b_rx_flex_desc *rx_desc,
+ struct sk_buff *skb, u8 ptype);
+void
+ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
+#endif /* !_ICE_TXRX_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 6667d17a4206..c4854a987130 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -19,6 +19,17 @@ static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc)
return test_bit(tc, &bitmap);
}
+static inline u64 round_up_64bit(u64 a, u32 b)
+{
+ return div64_long(((a) + (b) / 2), (b));
+}
+
+static inline u32 ice_round_to_num(u32 N, u32 R)
+{
+ return ((((N) % (R)) < ((R) / 2)) ? (((N) / (R)) * (R)) :
+ ((((N) + (R) - 1) / (R)) * (R)));
+}
+
/* Driver always calls main vsi_handle first */
#define ICE_MAIN_VSI_HANDLE 0
@@ -35,6 +46,8 @@ static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc)
#define ICE_DBG_PKG BIT_ULL(16)
#define ICE_DBG_RES BIT_ULL(17)
#define ICE_DBG_AQ_MSG BIT_ULL(24)
+#define ICE_DBG_AQ_DESC BIT_ULL(25)
+#define ICE_DBG_AQ_DESC_BUF BIT_ULL(26)
#define ICE_DBG_AQ_CMD BIT_ULL(27)
#define ICE_DBG_USER BIT_ULL(31)
@@ -189,6 +202,7 @@ struct ice_hw_dev_caps {
struct ice_hw_common_caps common_cap;
u32 num_vfs_exposed; /* Total number of VFs exposed */
u32 num_vsi_allocd_to_host; /* Excluding EMP VSI */
+ u32 num_funcs;
};
/* MAC info */
@@ -272,10 +286,56 @@ enum ice_agg_type {
ICE_AGG_TYPE_QG
};
+/* Rate limit types */
+enum ice_rl_type {
+ ICE_UNKNOWN_BW = 0,
+ ICE_MIN_BW, /* for CIR profile */
+ ICE_MAX_BW, /* for EIR profile */
+ ICE_SHARED_BW /* for shared profile */
+};
+
+#define ICE_SCHED_MIN_BW 500 /* in Kbps */
+#define ICE_SCHED_MAX_BW 100000000 /* in Kbps */
+#define ICE_SCHED_DFLT_BW 0xFFFFFFFF /* unlimited */
#define ICE_SCHED_DFLT_RL_PROF_ID 0
+#define ICE_SCHED_NO_SHARED_RL_PROF_ID 0xFFFF
#define ICE_SCHED_DFLT_BW_WT 1
+#define ICE_SCHED_INVAL_PROF_ID 0xFFFF
+#define ICE_SCHED_DFLT_BURST_SIZE (15 * 1024) /* in bytes (15k) */
-/* VSI type list entry to locate corresponding VSI/ag nodes */
+ /* Data structure for saving BW information */
+enum ice_bw_type {
+ ICE_BW_TYPE_PRIO,
+ ICE_BW_TYPE_CIR,
+ ICE_BW_TYPE_CIR_WT,
+ ICE_BW_TYPE_EIR,
+ ICE_BW_TYPE_EIR_WT,
+ ICE_BW_TYPE_SHARED,
+ ICE_BW_TYPE_CNT /* This must be last */
+};
+
+struct ice_bw {
+ u32 bw;
+ u16 bw_alloc;
+};
+
+struct ice_bw_type_info {
+ DECLARE_BITMAP(bw_t_bitmap, ICE_BW_TYPE_CNT);
+ u8 generic;
+ struct ice_bw cir_bw;
+ struct ice_bw eir_bw;
+ u32 shared_bw;
+};
+
+/* VSI queue context structure for given TC */
+struct ice_q_ctx {
+ u16 q_handle;
+ u32 q_teid;
+ /* bw_t_info saves queue BW information */
+ struct ice_bw_type_info bw_t_info;
+};
+
+/* VSI type list entry to locate corresponding VSI/aggregator nodes */
struct ice_sched_vsi_info {
struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS];
struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS];
@@ -364,6 +424,8 @@ struct ice_port_info {
struct mutex sched_lock; /* protect access to TXSched tree */
struct ice_sched_node *
sib_head[ICE_MAX_TRAFFIC_CLASS][ICE_AQC_TOPO_MAX_LEVEL_NUM];
+ /* List contain profile ID(s) and other params per layer */
+ struct list_head rl_prof_list[ICE_AQC_TOPO_MAX_LEVEL_NUM];
struct ice_dcbx_cfg local_dcbx_cfg; /* Oper/Local Cfg */
/* DCBX info */
struct ice_dcbx_cfg remote_dcbx_cfg; /* Peer Cfg */
@@ -415,6 +477,8 @@ struct ice_hw {
u8 pf_id; /* device profile info */
+ u16 max_burst_size; /* driver sets this value */
+
/* Tx Scheduler values */
u16 num_tx_sched_layers;
u16 num_tx_sched_phys_layers;
@@ -555,6 +619,8 @@ struct ice_hw_port_stats {
};
/* Checksum and Shadow RAM pointers */
+#define ICE_SR_BOOT_CFG_PTR 0x132
+#define ICE_NVM_OEM_VER_OFF 0x02
#define ICE_SR_NVM_DEV_STARTER_VER 0x18
#define ICE_SR_NVM_EETRACK_LO 0x2D
#define ICE_SR_NVM_EETRACK_HI 0x2E
@@ -568,6 +634,7 @@ struct ice_hw_port_stats {
#define ICE_OEM_VER_BUILD_MASK (0xffff << ICE_OEM_VER_BUILD_SHIFT)
#define ICE_OEM_VER_SHIFT 24
#define ICE_OEM_VER_MASK (0xff << ICE_OEM_VER_SHIFT)
+#define ICE_SR_PFA_PTR 0x40
#define ICE_SR_SECTOR_SIZE_IN_WORDS 0x800
#define ICE_SR_WORDS_IN_1KB 512
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index b45797f39b2f..edb374296d1f 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -2,9 +2,39 @@
/* Copyright (c) 2018, Intel Corporation. */
#include "ice.h"
+#include "ice_base.h"
#include "ice_lib.h"
/**
+ * ice_validate_vf_id - helper to check if VF ID is valid
+ * @pf: pointer to the PF structure
+ * @vf_id: the ID of the VF to check
+ */
+static int ice_validate_vf_id(struct ice_pf *pf, int vf_id)
+{
+ if (vf_id >= pf->num_alloc_vfs) {
+ dev_err(ice_pf_to_dev(pf), "Invalid VF ID: %d\n", vf_id);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * ice_check_vf_init - helper to check if VF init complete
+ * @pf: pointer to the PF structure
+ * @vf: the pointer to the VF to check
+ */
+static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf *vf)
+{
+ if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+ dev_err(ice_pf_to_dev(pf), "VF ID: %d in reset. Try again.\n",
+ vf->vf_id);
+ return -EBUSY;
+ }
+ return 0;
+}
+
+/**
* ice_err_to_virt err - translate errors for VF return code
* @ice_err: error return code
*/
@@ -184,12 +214,14 @@ static void ice_dis_vf_mappings(struct ice_vf *vf)
{
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
+ struct device *dev;
int first, last, v;
struct ice_hw *hw;
hw = &pf->hw;
vsi = pf->vsi[vf->lan_vsi_idx];
+ dev = ice_pf_to_dev(pf);
wr32(hw, VPINT_ALLOC(vf->vf_id), 0);
wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0);
@@ -208,13 +240,12 @@ static void ice_dis_vf_mappings(struct ice_vf *vf)
if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG)
wr32(hw, VPLAN_TX_QBASE(vf->vf_id), 0);
else
- dev_err(&pf->pdev->dev,
- "Scattered mode for VF Tx queues is not yet implemented\n");
+ dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n");
if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG)
wr32(hw, VPLAN_RX_QBASE(vf->vf_id), 0);
else
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"Scattered mode for VF Rx queues is not yet implemented\n");
}
@@ -289,6 +320,7 @@ static void ice_dis_vf_qs(struct ice_vf *vf)
*/
void ice_free_vfs(struct ice_pf *pf)
{
+ struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
int tmp, i;
@@ -310,24 +342,24 @@ void ice_free_vfs(struct ice_pf *pf)
if (!pci_vfs_assigned(pf->pdev))
pci_disable_sriov(pf->pdev);
else
- dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n");
+ dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n");
tmp = pf->num_alloc_vfs;
pf->num_vf_qps = 0;
pf->num_alloc_vfs = 0;
for (i = 0; i < tmp; i++) {
if (test_bit(ICE_VF_STATE_INIT, pf->vf[i].vf_states)) {
- /* disable VF qp mappings */
+ /* disable VF qp mappings and set VF disable state */
ice_dis_vf_mappings(&pf->vf[i]);
+ set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states);
ice_free_vf_res(&pf->vf[i]);
}
}
if (ice_sriov_free_msix_res(pf))
- dev_err(&pf->pdev->dev,
- "Failed to free MSIX resources used by SR-IOV\n");
+ dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n");
- devm_kfree(&pf->pdev->dev, pf->vf);
+ devm_kfree(dev, pf->vf);
pf->vf = NULL;
/* This check is for when the driver is unloaded while VFs are
@@ -366,9 +398,11 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
{
struct ice_pf *pf = vf->pf;
u32 reg, reg_idx, bit_idx;
+ struct device *dev;
struct ice_hw *hw;
int vf_abs_id, i;
+ dev = ice_pf_to_dev(pf);
hw = &pf->hw;
vf_abs_id = vf->vf_id + hw->func_caps.vf_base_id;
@@ -389,7 +423,7 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
* by the time we get here.
*/
if (!is_pfr)
- wr32(hw, VF_MBX_ARQLEN(vf_abs_id), 0);
+ wr32(hw, VF_MBX_ARQLEN(vf->vf_id), 0);
/* In the case of a VFLR, the HW has already reset the VF and we
* just need to clean up, so don't hit the VFRTRIG register.
@@ -414,7 +448,7 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
if ((reg & VF_TRANS_PENDING_M) == 0)
break;
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"VF %d PCI transactions stuck\n", vf->vf_id);
udelay(ICE_PCI_CIAD_WAIT_DELAY_US);
}
@@ -457,13 +491,12 @@ static void ice_vsi_kill_pvid_fill_ctxt(struct ice_vsi_ctx *ctxt)
*/
static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable)
{
- struct device *dev = &vsi->back->pdev->dev;
struct ice_hw *hw = &vsi->back->hw;
struct ice_vsi_ctx *ctxt;
enum ice_status status;
int ret = 0;
- ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL);
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
if (!ctxt)
return -ENOMEM;
@@ -475,7 +508,7 @@ static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable)
status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
if (status) {
- dev_info(dev, "update VSI for port VLAN failed, err %d aq_err %d\n",
+ dev_info(&vsi->back->pdev->dev, "update VSI for port VLAN failed, err %d aq_err %d\n",
status, hw->adminq.sq_last_status);
ret = -EIO;
goto out;
@@ -483,7 +516,7 @@ static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable)
vsi->info = ctxt->info;
out:
- devm_kfree(dev, ctxt);
+ kfree(ctxt);
return ret;
}
@@ -531,14 +564,16 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
LIST_HEAD(tmp_add_list);
u8 broadcast[ETH_ALEN];
struct ice_vsi *vsi;
+ struct device *dev;
int status = 0;
+ dev = ice_pf_to_dev(pf);
/* first vector index is the VFs OICR index */
vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf);
vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id);
if (!vsi) {
- dev_err(&pf->pdev->dev, "Failed to create VF VSI\n");
+ dev_err(dev, "Failed to create VF VSI\n");
return -ENOMEM;
}
@@ -566,8 +601,7 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
status = ice_add_mac(&pf->hw, &tmp_add_list);
if (status)
- dev_err(&pf->pdev->dev,
- "could not add mac filters error %d\n", status);
+ dev_err(dev, "could not add mac filters error %d\n", status);
else
vf->num_mac = 1;
@@ -578,7 +612,7 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
* more vectors.
*/
ice_alloc_vsi_res_exit:
- ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+ ice_free_fltr_list(dev, &tmp_add_list);
return status;
}
@@ -634,10 +668,12 @@ static void ice_ena_vf_mappings(struct ice_vf *vf)
int abs_vf_id, abs_first, abs_last;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
+ struct device *dev;
int first, last, v;
struct ice_hw *hw;
u32 reg;
+ dev = ice_pf_to_dev(pf);
hw = &pf->hw;
vsi = pf->vsi[vf->lan_vsi_idx];
first = vf->first_vector_idx;
@@ -685,8 +721,7 @@ static void ice_ena_vf_mappings(struct ice_vf *vf)
VPLAN_TX_QBASE_VFNUMQ_M));
wr32(hw, VPLAN_TX_QBASE(vf->vf_id), reg);
} else {
- dev_err(&pf->pdev->dev,
- "Scattered mode for VF Tx queues is not yet implemented\n");
+ dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n");
}
/* set regardless of mapping mode */
@@ -704,8 +739,7 @@ static void ice_ena_vf_mappings(struct ice_vf *vf)
VPLAN_RX_QBASE_VFNUMQ_M));
wr32(hw, VPLAN_RX_QBASE(vf->vf_id), reg);
} else {
- dev_err(&pf->pdev->dev,
- "Scattered mode for VF Rx queues is not yet implemented\n");
+ dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n");
}
}
@@ -851,6 +885,7 @@ static int ice_check_avail_res(struct ice_pf *pf)
{
int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker);
u16 num_msix, num_txq, num_rxq, num_avail_msix;
+ struct device *dev = ice_pf_to_dev(pf);
if (!pf->num_alloc_vfs || max_valid_res_idx < 0)
return -EINVAL;
@@ -883,8 +918,7 @@ static int ice_check_avail_res(struct ice_pf *pf)
ICE_DFLT_INTR_PER_VF,
ICE_MIN_INTR_PER_VF);
} else {
- dev_err(&pf->pdev->dev,
- "Number of VFs %d exceeds max VF count %d\n",
+ dev_err(dev, "Number of VFs %d exceeds max VF count %d\n",
pf->num_alloc_vfs, ICE_MAX_VF_COUNT);
return -EIO;
}
@@ -1022,12 +1056,12 @@ ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m,
*/
static bool ice_config_res_vfs(struct ice_pf *pf)
{
+ struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
int v;
if (ice_check_avail_res(pf)) {
- dev_err(&pf->pdev->dev,
- "Cannot allocate VF resources, try with fewer number of VFs\n");
+ dev_err(dev, "Cannot allocate VF resources, try with fewer number of VFs\n");
return false;
}
@@ -1040,9 +1074,8 @@ static bool ice_config_res_vfs(struct ice_pf *pf)
struct ice_vf *vf = &pf->vf[v];
vf->num_vf_qs = pf->num_vf_qps;
- dev_dbg(&pf->pdev->dev,
- "VF-id %d has %d queues configured\n",
- vf->vf_id, vf->num_vf_qs);
+ dev_dbg(dev, "VF-id %d has %d queues configured\n", vf->vf_id,
+ vf->num_vf_qs);
ice_cleanup_and_realloc_vf(vf);
}
@@ -1066,6 +1099,7 @@ static bool ice_config_res_vfs(struct ice_pf *pf)
*/
bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
{
+ struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
struct ice_vf *vf;
int v, i;
@@ -1124,7 +1158,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
* time, but continue on with the operation.
*/
if (v < pf->num_alloc_vfs)
- dev_warn(&pf->pdev->dev, "VF reset check timeout\n");
+ dev_warn(dev, "VF reset check timeout\n");
/* free VF resources to begin resetting the VSI state */
for (v = 0; v < pf->num_alloc_vfs; v++) {
@@ -1141,8 +1175,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
}
if (ice_sriov_free_msix_res(pf))
- dev_err(&pf->pdev->dev,
- "Failed to free MSIX resources used by SR-IOV\n");
+ dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n");
if (!ice_config_res_vfs(pf))
return false;
@@ -1151,6 +1184,25 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
}
/**
+ * ice_is_vf_disabled
+ * @vf: pointer to the VF info
+ *
+ * Returns true if the PF or VF is disabled, false otherwise.
+ */
+static bool ice_is_vf_disabled(struct ice_vf *vf)
+{
+ struct ice_pf *pf = vf->pf;
+
+ /* If the PF has been disabled, there is no need resetting VF until
+ * PF is active again. Similarly, if the VF has been disabled, this
+ * means something else is resetting the VF, so we shouldn't continue.
+ * Otherwise, set disable VF state bit for actual reset, and continue.
+ */
+ return (test_bit(__ICE_VF_DIS, pf->state) ||
+ test_bit(ICE_VF_STATE_DIS, vf->vf_states));
+}
+
+/**
* ice_reset_vf - Reset a particular VF
* @vf: pointer to the VF structure
* @is_vflr: true if VFLR was issued, false if not
@@ -1161,25 +1213,23 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
{
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
+ struct device *dev;
struct ice_hw *hw;
bool rsd = false;
u8 promisc_m;
u32 reg;
int i;
- /* If the PF has been disabled, there is no need resetting VF until
- * PF is active again.
- */
- if (test_bit(__ICE_VF_DIS, pf->state))
- return false;
+ dev = ice_pf_to_dev(pf);
- /* If the VF has been disabled, this means something else is
- * resetting the VF, so we shouldn't continue. Otherwise, set
- * disable VF state bit for actual reset, and continue.
- */
- if (test_and_set_bit(ICE_VF_STATE_DIS, vf->vf_states))
- return false;
+ if (ice_is_vf_disabled(vf)) {
+ dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n",
+ vf->vf_id);
+ return true;
+ }
+ /* Set VF disable bit state here, before triggering reset */
+ set_bit(ICE_VF_STATE_DIS, vf->vf_states);
ice_trigger_vf_reset(vf, is_vflr, false);
vsi = pf->vsi[vf->lan_vsi_idx];
@@ -1216,8 +1266,7 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
* continue on with the operation.
*/
if (!rsd)
- dev_warn(&pf->pdev->dev, "VF reset check timeout on VF %d\n",
- vf->vf_id);
+ dev_warn(dev, "VF reset check timeout on VF %d\n", vf->vf_id);
/* disable promiscuous modes in case they were enabled
* ignore any error if disabling process failed
@@ -1231,7 +1280,7 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
vsi = pf->vsi[vf->lan_vsi_idx];
if (ice_vf_set_vsi_promisc(vf, vsi, promisc_m, true))
- dev_err(&pf->pdev->dev, "disabling promiscuous mode failed\n");
+ dev_err(dev, "disabling promiscuous mode failed\n");
}
/* free VF resources to begin resetting the VSI state */
@@ -1282,19 +1331,26 @@ void ice_vc_notify_reset(struct ice_pf *pf)
static void ice_vc_notify_vf_reset(struct ice_vf *vf)
{
struct virtchnl_pf_event pfe;
+ struct ice_pf *pf;
+
+ if (!vf)
+ return;
- /* validate the request */
- if (!vf || vf->vf_id >= vf->pf->num_alloc_vfs)
+ pf = vf->pf;
+ if (ice_validate_vf_id(pf, vf->vf_id))
return;
- /* verify if the VF is in either init or active before proceeding */
- if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
- !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+ /* Bail out if VF is in disabled state, neither initialized, nor active
+ * state - otherwise proceed with notifications
+ */
+ if ((!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
+ !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) ||
+ test_bit(ICE_VF_STATE_DIS, vf->vf_states))
return;
pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM;
- ice_aq_send_msg_to_vf(&vf->pf->hw, vf->vf_id, VIRTCHNL_OP_EVENT,
+ ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, VIRTCHNL_OP_EVENT,
VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, sizeof(pfe),
NULL);
}
@@ -1306,6 +1362,7 @@ static void ice_vc_notify_vf_reset(struct ice_vf *vf)
*/
static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
{
+ struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
struct ice_vf *vfs;
int i, ret;
@@ -1322,8 +1379,7 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
goto err_unroll_intr;
}
/* allocate memory */
- vfs = devm_kcalloc(&pf->pdev->dev, num_alloc_vfs, sizeof(*vfs),
- GFP_KERNEL);
+ vfs = devm_kcalloc(dev, num_alloc_vfs, sizeof(*vfs), GFP_KERNEL);
if (!vfs) {
ret = -ENOMEM;
goto err_pci_disable_sriov;
@@ -1352,7 +1408,7 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
err_unroll_sriov:
pf->vf = NULL;
- devm_kfree(&pf->pdev->dev, vfs);
+ devm_kfree(dev, vfs);
vfs = NULL;
pf->num_alloc_vfs = 0;
err_pci_disable_sriov:
@@ -1397,7 +1453,7 @@ static bool ice_pf_state_is_nominal(struct ice_pf *pf)
static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs)
{
int pre_existing_vfs = pci_num_vf(pf->pdev);
- struct device *dev = &pf->pdev->dev;
+ struct device *dev = ice_pf_to_dev(pf);
int err;
if (!ice_pf_state_is_nominal(pf)) {
@@ -1407,7 +1463,7 @@ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs)
if (!test_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags)) {
dev_err(dev, "This device is not capable of SR-IOV\n");
- return -ENODEV;
+ return -EOPNOTSUPP;
}
if (pre_existing_vfs && pre_existing_vfs != num_vfs)
@@ -1442,10 +1498,10 @@ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs)
int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
{
struct ice_pf *pf = pci_get_drvdata(pdev);
+ struct device *dev = ice_pf_to_dev(pf);
if (ice_is_safe_mode(pf)) {
- dev_err(&pf->pdev->dev,
- "SR-IOV cannot be configured - Device is in Safe Mode\n");
+ dev_err(dev, "SR-IOV cannot be configured - Device is in Safe Mode\n");
return -EOPNOTSUPP;
}
@@ -1455,8 +1511,7 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (!pci_vfs_assigned(pdev)) {
ice_free_vfs(pf);
} else {
- dev_err(&pf->pdev->dev,
- "can't free VFs because some are assigned to VMs.\n");
+ dev_err(dev, "can't free VFs because some are assigned to VMs.\n");
return -EBUSY;
}
@@ -1495,12 +1550,10 @@ void ice_process_vflr_event(struct ice_pf *pf)
}
/**
- * ice_vc_dis_vf - Disable a given VF via SW reset
+ * ice_vc_reset_vf - Perform software reset on the VF after informing the AVF
* @vf: pointer to the VF info
- *
- * Disable the VF through a SW reset
*/
-static void ice_vc_dis_vf(struct ice_vf *vf)
+static void ice_vc_reset_vf(struct ice_vf *vf)
{
ice_vc_notify_vf_reset(vf);
ice_reset_vf(vf, false);
@@ -1521,24 +1574,28 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
{
enum ice_status aq_ret;
+ struct device *dev;
struct ice_pf *pf;
- /* validate the request */
- if (!vf || vf->vf_id >= vf->pf->num_alloc_vfs)
+ if (!vf)
return -EINVAL;
pf = vf->pf;
+ if (ice_validate_vf_id(pf, vf->vf_id))
+ return -EINVAL;
+
+ dev = ice_pf_to_dev(pf);
/* single place to detect unsuccessful return values */
if (v_retval) {
vf->num_inval_msgs++;
- dev_info(&pf->pdev->dev, "VF %d failed opcode %d, retval: %d\n",
- vf->vf_id, v_opcode, v_retval);
+ dev_info(dev, "VF %d failed opcode %d, retval: %d\n", vf->vf_id,
+ v_opcode, v_retval);
if (vf->num_inval_msgs > ICE_DFLT_NUM_INVAL_MSGS_ALLOWED) {
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"Number of invalid messages exceeded for VF %d\n",
vf->vf_id);
- dev_err(&pf->pdev->dev, "Use PF Control I/F to enable the VF\n");
+ dev_err(dev, "Use PF Control I/F to enable the VF\n");
set_bit(ICE_VF_STATE_DIS, vf->vf_states);
return -EIO;
}
@@ -1551,7 +1608,7 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval,
msg, msglen, NULL);
if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) {
- dev_info(&pf->pdev->dev,
+ dev_info(dev,
"Unable to send the message to VF %d ret %d aq_err %d\n",
vf->vf_id, aq_ret, pf->hw.mailboxq.sq_last_status);
return -EIO;
@@ -1599,14 +1656,14 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
int len = 0;
int ret;
- if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+ if (ice_check_vf_init(pf, vf)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto err;
}
len = sizeof(struct virtchnl_vf_resource);
- vfres = devm_kzalloc(&pf->pdev->dev, len, GFP_KERNEL);
+ vfres = kzalloc(len, GFP_KERNEL);
if (!vfres) {
v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
len = 0;
@@ -1672,6 +1729,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
ether_addr_copy(vfres->vsi_res[0].default_mac_addr,
vf->dflt_lan_addr.addr);
+ /* match guest capabilities */
+ vf->driver_caps = vfres->vf_cap_flags;
+
set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
err:
@@ -1679,7 +1739,7 @@ err:
ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_VF_RESOURCES, v_ret,
(u8 *)vfres, len);
- devm_kfree(&pf->pdev->dev, vfres);
+ kfree(vfres);
return ret;
}
@@ -1775,7 +1835,7 @@ static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg)
struct virtchnl_rss_key *vrk =
(struct virtchnl_rss_key *)msg;
struct ice_pf *pf = vf->pf;
- struct ice_vsi *vsi = NULL;
+ struct ice_vsi *vsi;
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -1822,7 +1882,7 @@ static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg)
struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg;
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct ice_pf *pf = vf->pf;
- struct ice_vsi *vsi = NULL;
+ struct ice_vsi *vsi;
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -1869,8 +1929,8 @@ static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg)
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct virtchnl_queue_select *vqs =
(struct virtchnl_queue_select *)msg;
+ struct ice_eth_stats stats = { 0 };
struct ice_pf *pf = vf->pf;
- struct ice_eth_stats stats;
struct ice_vsi *vsi;
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
@@ -1889,7 +1949,6 @@ static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg)
goto error_param;
}
- memset(&stats, 0, sizeof(struct ice_eth_stats));
ice_update_eth_stats(vsi);
stats = vsi->eth_stats;
@@ -2159,9 +2218,11 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
vector_id = map->vector_id;
vsi_id = map->vsi_id;
- /* validate msg params */
- if (!(vector_id < pf->hw.func_caps.common_cap
- .num_msix_vectors) || !ice_vc_isvalid_vsi_id(vf, vsi_id) ||
+ /* vector_id is always 0-based for each VF, and can never be
+ * larger than or equal to the max allowed interrupts per VF
+ */
+ if (!(vector_id < ICE_MAX_INTR_PER_VF) ||
+ !ice_vc_isvalid_vsi_id(vf, vsi_id) ||
(!vector_id && (map->rxq_map || map->txq_map))) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
@@ -2252,7 +2313,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
if (qci->num_queue_pairs > ICE_MAX_BASE_QS_PER_VF ||
qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
- dev_err(&pf->pdev->dev,
+ dev_err(ice_pf_to_dev(pf),
"VF-%d requesting more than supported number of queues: %d\n",
vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -2365,9 +2426,12 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
enum virtchnl_ops vc_op;
enum ice_status status;
struct ice_vsi *vsi;
+ struct device *dev;
int mac_count = 0;
int i;
+ dev = ice_pf_to_dev(pf);
+
if (set)
vc_op = VIRTCHNL_OP_ADD_ETH_ADDR;
else
@@ -2381,7 +2445,7 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
if (set && !ice_is_vf_trusted(vf) &&
(vf->num_mac + al->num_elements) > ICE_MAX_MACADDR_PER_VF) {
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n",
vf->vf_id);
/* There is no need to let VF know about not being trusted
@@ -2406,13 +2470,13 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
/* VF is trying to add filters that the PF
* already added. Just continue.
*/
- dev_info(&pf->pdev->dev,
+ dev_info(dev,
"MAC %pM already set for VF %d\n",
maddr, vf->vf_id);
continue;
} else {
/* VF can't remove dflt_lan_addr/bcast MAC */
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"VF can't remove default MAC address or MAC %pM programmed by PF for VF %d\n",
maddr, vf->vf_id);
continue;
@@ -2421,7 +2485,7 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
/* check for the invalid cases and bail if necessary */
if (is_zero_ether_addr(maddr)) {
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"invalid MAC %pM provided for VF %d\n",
maddr, vf->vf_id);
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -2430,7 +2494,7 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
if (is_unicast_ether_addr(maddr) &&
!ice_can_vf_change_mac(vf)) {
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"can't change unicast MAC for untrusted VF %d\n",
vf->vf_id);
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -2441,12 +2505,12 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
status = ice_vsi_cfg_mac_fltr(vsi, maddr, set);
if (status == ICE_ERR_DOES_NOT_EXIST ||
status == ICE_ERR_ALREADY_EXISTS) {
- dev_info(&pf->pdev->dev,
+ dev_info(dev,
"can't %s MAC filters %pM for VF %d, error %d\n",
set ? "add" : "remove", maddr, vf->vf_id,
status);
} else if (status) {
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"can't %s MAC filters for VF %d, error %d\n",
set ? "add" : "remove", vf->vf_id, status);
v_ret = ice_err_to_virt_err(status);
@@ -2511,7 +2575,9 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
u16 max_allowed_vf_queues;
u16 tx_rx_queue_left;
u16 cur_queues;
+ struct device *dev;
+ dev = ice_pf_to_dev(pf);
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
@@ -2522,17 +2588,15 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
ice_get_avail_rxq_count(pf));
max_allowed_vf_queues = tx_rx_queue_left + cur_queues;
if (!req_queues) {
- dev_err(&pf->pdev->dev,
- "VF %d tried to request 0 queues. Ignoring.\n",
+ dev_err(dev, "VF %d tried to request 0 queues. Ignoring.\n",
vf->vf_id);
} else if (req_queues > ICE_MAX_BASE_QS_PER_VF) {
- dev_err(&pf->pdev->dev,
- "VF %d tried to request more than %d queues.\n",
+ dev_err(dev, "VF %d tried to request more than %d queues.\n",
vf->vf_id, ICE_MAX_BASE_QS_PER_VF);
vfres->num_queue_pairs = ICE_MAX_BASE_QS_PER_VF;
} else if (req_queues > cur_queues &&
req_queues - cur_queues > tx_rx_queue_left) {
- dev_warn(&pf->pdev->dev,
+ dev_warn(dev,
"VF %d requested %u more queues, but only %u left.\n",
vf->vf_id, req_queues - cur_queues, tx_rx_queue_left);
vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues,
@@ -2540,9 +2604,8 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
} else {
/* request is successful, then reset VF */
vf->num_req_qs = req_queues;
- ice_vc_dis_vf(vf);
- dev_info(&pf->pdev->dev,
- "VF %d granted request of %u queues.\n",
+ ice_vc_reset_vf(vf);
+ dev_info(dev, "VF %d granted request of %u queues.\n",
vf->vf_id, req_queues);
return 0;
}
@@ -2568,39 +2631,34 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
__be16 vlan_proto)
{
u16 vlanprio = vlan_id | (qos << ICE_VLAN_PRIORITY_S);
- struct ice_netdev_priv *np = netdev_priv(netdev);
- struct ice_pf *pf = np->vsi->back;
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
struct ice_vsi *vsi;
+ struct device *dev;
struct ice_vf *vf;
int ret = 0;
- /* validate the request */
- if (vf_id >= pf->num_alloc_vfs) {
- dev_err(&pf->pdev->dev, "invalid VF id: %d\n", vf_id);
+ dev = ice_pf_to_dev(pf);
+ if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
- }
if (vlan_id > ICE_MAX_VLANID || qos > 7) {
- dev_err(&pf->pdev->dev, "Invalid VF Parameters\n");
+ dev_err(dev, "Invalid VF Parameters\n");
return -EINVAL;
}
if (vlan_proto != htons(ETH_P_8021Q)) {
- dev_err(&pf->pdev->dev, "VF VLAN protocol is not supported\n");
+ dev_err(dev, "VF VLAN protocol is not supported\n");
return -EPROTONOSUPPORT;
}
vf = &pf->vf[vf_id];
vsi = pf->vsi[vf->lan_vsi_idx];
- if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
- dev_err(&pf->pdev->dev, "VF %d in reset. Try again.\n", vf_id);
+ if (ice_check_vf_init(pf, vf))
return -EBUSY;
- }
if (le16_to_cpu(vsi->info.pvid) == vlanprio) {
/* duplicate request, so just return success */
- dev_info(&pf->pdev->dev,
- "Duplicate pvid %d request\n", vlanprio);
+ dev_dbg(dev, "Duplicate pvid %d request\n", vlanprio);
return ret;
}
@@ -2619,7 +2677,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
}
if (vlan_id) {
- dev_info(&pf->pdev->dev, "Setting VLAN %d, QOS 0x%x on VF %d\n",
+ dev_info(dev, "Setting VLAN %d, QoS 0x%x on VF %d\n",
vlan_id, qos, vf_id);
/* add new VLAN filter for each MAC */
@@ -2638,6 +2696,17 @@ error_set_pvid:
}
/**
+ * ice_vf_vlan_offload_ena - determine if capabilities support VLAN offloads
+ * @caps: VF driver negotiated capabilities
+ *
+ * Return true if VIRTCHNL_VF_OFFLOAD_VLAN capability is set, else return false
+ */
+static bool ice_vf_vlan_offload_ena(u32 caps)
+{
+ return !!(caps & VIRTCHNL_VF_OFFLOAD_VLAN);
+}
+
+/**
* ice_vc_process_vlan_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
@@ -2653,16 +2722,23 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
struct ice_pf *pf = vf->pf;
bool vlan_promisc = false;
struct ice_vsi *vsi;
+ struct device *dev;
struct ice_hw *hw;
int status = 0;
u8 promisc_m;
int i;
+ dev = ice_pf_to_dev(pf);
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
+ if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
if (!ice_vc_isvalid_vsi_id(vf, vfl->vsi_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
@@ -2670,7 +2746,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
if (add_v && !ice_is_vf_trusted(vf) &&
vf->num_vlan >= ICE_MAX_VLAN_PER_VF) {
- dev_info(&pf->pdev->dev,
+ dev_info(dev,
"VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
vf->vf_id);
/* There is no need to let VF know about being not trusted,
@@ -2682,7 +2758,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
for (i = 0; i < vfl->num_elements; i++) {
if (vfl->vlan_id[i] > ICE_MAX_VLANID) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"invalid VF VLAN id %d\n", vfl->vlan_id[i]);
goto error_param;
}
@@ -2700,14 +2776,6 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
goto error_param;
}
- if (ice_vsi_manage_vlan_stripping(vsi, add_v)) {
- dev_err(&pf->pdev->dev,
- "%sable VLAN stripping failed for VSI %i\n",
- add_v ? "en" : "dis", vsi->vsi_num);
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto error_param;
- }
-
if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
vlan_promisc = true;
@@ -2718,7 +2786,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
if (!ice_is_vf_trusted(vf) &&
vf->num_vlan >= ICE_MAX_VLAN_PER_VF) {
- dev_info(&pf->pdev->dev,
+ dev_info(dev,
"VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
vf->vf_id);
/* There is no need to let VF know about being
@@ -2739,7 +2807,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
status = ice_cfg_vlan_pruning(vsi, true, false);
if (status) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"Enable VLAN pruning on VLAN ID: %d failed error-%d\n",
vid, status);
goto error_param;
@@ -2753,7 +2821,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
promisc_m, vid);
if (status) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- dev_err(&pf->pdev->dev,
+ dev_err(dev,
"Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n",
vid, status);
}
@@ -2848,6 +2916,11 @@ static int ice_vc_ena_vlan_stripping(struct ice_vf *vf)
goto error_param;
}
+ if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
vsi = pf->vsi[vf->lan_vsi_idx];
if (ice_vsi_manage_vlan_stripping(vsi, true))
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -2874,6 +2947,11 @@ static int ice_vc_dis_vlan_stripping(struct ice_vf *vf)
goto error_param;
}
+ if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
vsi = pf->vsi[vf->lan_vsi_idx];
if (!vsi) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -2889,6 +2967,33 @@ error_param:
}
/**
+ * ice_vf_init_vlan_stripping - enable/disable VLAN stripping on initialization
+ * @vf: VF to enable/disable VLAN stripping for on initialization
+ *
+ * If the VIRTCHNL_VF_OFFLOAD_VLAN flag is set enable VLAN stripping, else if
+ * the flag is cleared then we want to disable stripping. For example, the flag
+ * will be cleared when port VLANs are configured by the administrator before
+ * passing the VF to the guest or if the AVF driver doesn't support VLAN
+ * offloads.
+ */
+static int ice_vf_init_vlan_stripping(struct ice_vf *vf)
+{
+ struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+
+ if (!vsi)
+ return -EINVAL;
+
+ /* don't modify stripping if port VLAN is configured */
+ if (vsi->info.pvid)
+ return 0;
+
+ if (ice_vf_vlan_offload_ena(vf->driver_caps))
+ return ice_vsi_manage_vlan_stripping(vsi, true);
+ else
+ return ice_vsi_manage_vlan_stripping(vsi, false);
+}
+
+/**
* ice_vc_process_vf_msg - Process request from VF
* @pf: pointer to the PF structure
* @event: pointer to the AQ event
@@ -2903,9 +3008,11 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
u16 msglen = event->msg_len;
u8 *msg = event->msg_buf;
struct ice_vf *vf = NULL;
+ struct device *dev;
int err = 0;
- if (vf_id >= pf->num_alloc_vfs) {
+ dev = ice_pf_to_dev(pf);
+ if (ice_validate_vf_id(pf, vf_id)) {
err = -EINVAL;
goto error_handler;
}
@@ -2931,7 +3038,7 @@ error_handler:
if (err) {
ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_PARAM,
NULL, 0);
- dev_err(&pf->pdev->dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n",
+ dev_err(dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n",
vf_id, v_opcode, msglen, err);
return;
}
@@ -2942,6 +3049,10 @@ error_handler:
break;
case VIRTCHNL_OP_GET_VF_RESOURCES:
err = ice_vc_get_vf_res_msg(vf, msg);
+ if (ice_vf_init_vlan_stripping(vf))
+ dev_err(dev,
+ "Failed to initialize VLAN stripping for VF %d\n",
+ vf->vf_id);
ice_vc_notify_vf_link_state(vf);
break;
case VIRTCHNL_OP_RESET_VF:
@@ -2992,8 +3103,8 @@ error_handler:
break;
case VIRTCHNL_OP_UNKNOWN:
default:
- dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",
- v_opcode, vf_id);
+ dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode,
+ vf_id);
err = ice_vc_send_msg_to_vf(vf, v_opcode,
VIRTCHNL_STATUS_ERR_NOT_SUPPORTED,
NULL, 0);
@@ -3003,8 +3114,7 @@ error_handler:
/* Helper function cares less about error return values here
* as it is busy with pending work.
*/
- dev_info(&pf->pdev->dev,
- "PF failed to honor VF %d, opcode %d, error %d\n",
+ dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n",
vf_id, v_opcode, err);
}
}
@@ -3020,24 +3130,18 @@ error_handler:
int
ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
{
- struct ice_netdev_priv *np = netdev_priv(netdev);
- struct ice_vsi *vsi = np->vsi;
- struct ice_pf *pf = vsi->back;
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_vsi *vsi;
struct ice_vf *vf;
- /* validate the request */
- if (vf_id >= pf->num_alloc_vfs) {
- netdev_err(netdev, "invalid VF id: %d\n", vf_id);
+ if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
- }
vf = &pf->vf[vf_id];
vsi = pf->vsi[vf->lan_vsi_idx];
- if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
- netdev_err(netdev, "VF %d in reset. Try again.\n", vf_id);
+ if (ice_check_vf_init(pf, vf))
return -EBUSY;
- }
ivi->vf = vf_id;
ether_addr_copy(ivi->mac, vf->dflt_lan_addr.addr);
@@ -3070,33 +3174,29 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
*/
int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
{
- struct ice_netdev_priv *np = netdev_priv(netdev);
- struct ice_vsi *vsi = np->vsi;
- struct ice_pf *pf = vsi->back;
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_vsi *vsi = pf->vsi[0];
struct ice_vsi_ctx *ctx;
enum ice_status status;
+ struct device *dev;
struct ice_vf *vf;
int ret = 0;
- /* validate the request */
- if (vf_id >= pf->num_alloc_vfs) {
- netdev_err(netdev, "invalid VF id: %d\n", vf_id);
+ dev = ice_pf_to_dev(pf);
+ if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
- }
vf = &pf->vf[vf_id];
- if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
- netdev_err(netdev, "VF %d in reset. Try again.\n", vf_id);
+ if (ice_check_vf_init(pf, vf))
return -EBUSY;
- }
if (ena == vf->spoofchk) {
- dev_dbg(&pf->pdev->dev, "VF spoofchk already %s\n",
+ dev_dbg(dev, "VF spoofchk already %s\n",
ena ? "ON" : "OFF");
return 0;
}
- ctx = devm_kzalloc(&pf->pdev->dev, sizeof(*ctx), GFP_KERNEL);
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
@@ -3109,7 +3209,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
status = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL);
if (status) {
- dev_dbg(&pf->pdev->dev,
+ dev_dbg(dev,
"Error %d, failed to update VSI* parameters\n", status);
ret = -EIO;
goto out;
@@ -3119,11 +3219,28 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
vsi->info.sec_flags = ctx->info.sec_flags;
vsi->info.sw_flags2 = ctx->info.sw_flags2;
out:
- devm_kfree(&pf->pdev->dev, ctx);
+ kfree(ctx);
return ret;
}
/**
+ * ice_wait_on_vf_reset
+ * @vf: The VF being resseting
+ *
+ * Poll to make sure a given VF is ready after reset
+ */
+static void ice_wait_on_vf_reset(struct ice_vf *vf)
+{
+ int i;
+
+ for (i = 0; i < ICE_MAX_VF_RESET_WAIT; i++) {
+ if (test_bit(ICE_VF_STATE_INIT, vf->vf_states))
+ break;
+ msleep(20);
+ }
+}
+
+/**
* ice_set_vf_mac
* @netdev: network interface device structure
* @vf_id: VF identifier
@@ -3133,23 +3250,25 @@ out:
*/
int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
{
- struct ice_netdev_priv *np = netdev_priv(netdev);
- struct ice_vsi *vsi = np->vsi;
- struct ice_pf *pf = vsi->back;
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
struct ice_vf *vf;
int ret = 0;
- /* validate the request */
- if (vf_id >= pf->num_alloc_vfs) {
- netdev_err(netdev, "invalid VF id: %d\n", vf_id);
+ if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
- }
vf = &pf->vf[vf_id];
- if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
- netdev_err(netdev, "VF %d in reset. Try again.\n", vf_id);
+ /* Don't set MAC on disabled VF */
+ if (ice_is_vf_disabled(vf))
+ return -EINVAL;
+
+ /* In case VF is in reset mode, wait until it is completed. Depending
+ * on factors like queue disabling routine, this could take ~250ms
+ */
+ ice_wait_on_vf_reset(vf);
+
+ if (ice_check_vf_init(pf, vf))
return -EBUSY;
- }
if (is_zero_ether_addr(mac) || is_multicast_ether_addr(mac)) {
netdev_err(netdev, "%pM not a valid unicast address\n", mac);
@@ -3167,7 +3286,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
"MAC on VF %d set to %pM. VF driver will be reinitialized\n",
vf_id, mac);
- ice_vc_dis_vf(vf);
+ ice_vc_reset_vf(vf);
return ret;
}
@@ -3181,30 +3300,34 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
*/
int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
{
- struct ice_netdev_priv *np = netdev_priv(netdev);
- struct ice_vsi *vsi = np->vsi;
- struct ice_pf *pf = vsi->back;
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct device *dev;
struct ice_vf *vf;
- /* validate the request */
- if (vf_id >= pf->num_alloc_vfs) {
- dev_err(&pf->pdev->dev, "invalid VF id: %d\n", vf_id);
+ dev = ice_pf_to_dev(pf);
+ if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
- }
vf = &pf->vf[vf_id];
- if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
- dev_err(&pf->pdev->dev, "VF %d in reset. Try again.\n", vf_id);
+ /* Don't set Trusted Mode on disabled VF */
+ if (ice_is_vf_disabled(vf))
+ return -EINVAL;
+
+ /* In case VF is in reset mode, wait until it is completed. Depending
+ * on factors like queue disabling routine, this could take ~250ms
+ */
+ ice_wait_on_vf_reset(vf);
+
+ if (ice_check_vf_init(pf, vf))
return -EBUSY;
- }
/* Check if already trusted */
if (trusted == vf->trusted)
return 0;
vf->trusted = trusted;
- ice_vc_dis_vf(vf);
- dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
+ ice_vc_reset_vf(vf);
+ dev_info(dev, "VF %u is now %strusted\n",
vf_id, trusted ? "" : "un");
return 0;
@@ -3220,26 +3343,21 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
*/
int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
{
- struct ice_netdev_priv *np = netdev_priv(netdev);
- struct ice_pf *pf = np->vsi->back;
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
struct virtchnl_pf_event pfe = { 0 };
struct ice_link_status *ls;
struct ice_vf *vf;
struct ice_hw *hw;
- if (vf_id >= pf->num_alloc_vfs) {
- dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id);
+ if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
- }
vf = &pf->vf[vf_id];
hw = &pf->hw;
ls = &pf->hw.port_info->phy.link_info;
- if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
- dev_err(&pf->pdev->dev, "vf %d in reset. Try again.\n", vf_id);
+ if (ice_check_vf_init(pf, vf))
return -EBUSY;
- }
pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
pfe.severity = PF_EVENT_SEVERITY_INFO;
@@ -3273,3 +3391,48 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
return 0;
}
+
+/**
+ * ice_get_vf_stats - populate some stats for the VF
+ * @netdev: the netdev of the PF
+ * @vf_id: the host OS identifier (0-255)
+ * @vf_stats: pointer to the OS memory to be initialized
+ */
+int ice_get_vf_stats(struct net_device *netdev, int vf_id,
+ struct ifla_vf_stats *vf_stats)
+{
+ struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct ice_eth_stats *stats;
+ struct ice_vsi *vsi;
+ struct ice_vf *vf;
+
+ if (ice_validate_vf_id(pf, vf_id))
+ return -EINVAL;
+
+ vf = &pf->vf[vf_id];
+
+ if (ice_check_vf_init(pf, vf))
+ return -EBUSY;
+
+ vsi = pf->vsi[vf->lan_vsi_idx];
+ if (!vsi)
+ return -EINVAL;
+
+ ice_update_eth_stats(vsi);
+ stats = &vsi->eth_stats;
+
+ memset(vf_stats, 0, sizeof(*vf_stats));
+
+ vf_stats->rx_packets = stats->rx_unicast + stats->rx_broadcast +
+ stats->rx_multicast;
+ vf_stats->tx_packets = stats->tx_unicast + stats->tx_broadcast +
+ stats->tx_multicast;
+ vf_stats->rx_bytes = stats->rx_bytes;
+ vf_stats->tx_bytes = stats->tx_bytes;
+ vf_stats->broadcast = stats->rx_broadcast;
+ vf_stats->multicast = stats->rx_multicast;
+ vf_stats->rx_dropped = stats->rx_discards;
+ vf_stats->tx_dropped = stats->tx_discards;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 0d9880c8bba3..88aa65d5cb31 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -38,6 +38,7 @@
#define ICE_MAX_POLICY_INTR_PER_VF 33
#define ICE_MIN_INTR_PER_VF (ICE_MIN_QS_PER_VF + 1)
#define ICE_DFLT_INTR_PER_VF (ICE_DFLT_QS_PER_VF + 1)
+#define ICE_MAX_VF_RESET_WAIT 15
/* Specific VF states */
enum ice_vf_states {
@@ -121,6 +122,9 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena);
int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector);
void ice_set_vf_state_qs_dis(struct ice_vf *vf);
+int
+ice_get_vf_stats(struct net_device *netdev, int vf_id,
+ struct ifla_vf_stats *vf_stats);
#else /* CONFIG_PCI_IOV */
#define ice_process_vflr_event(pf) do {} while (0)
#define ice_free_vfs(pf) do {} while (0)
@@ -193,5 +197,13 @@ ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf,
{
return 0;
}
+
+static inline int
+ice_get_vf_stats(struct net_device __always_unused *netdev,
+ int __always_unused vf_id,
+ struct ifla_vf_stats __always_unused *vf_stats)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_PCI_IOV */
#endif /* _ICE_VIRTCHNL_PF_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
new file mode 100644
index 000000000000..cf9b8b22d24f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -0,0 +1,1181 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
+#include <net/xdp.h>
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_type.h"
+#include "ice_xsk.h"
+#include "ice_txrx.h"
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
+
+/**
+ * ice_qp_reset_stats - Resets all stats for rings of given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
+{
+ memset(&vsi->rx_rings[q_idx]->rx_stats, 0,
+ sizeof(vsi->rx_rings[q_idx]->rx_stats));
+ memset(&vsi->tx_rings[q_idx]->stats, 0,
+ sizeof(vsi->tx_rings[q_idx]->stats));
+ if (ice_is_xdp_ena_vsi(vsi))
+ memset(&vsi->xdp_rings[q_idx]->stats, 0,
+ sizeof(vsi->xdp_rings[q_idx]->stats));
+}
+
+/**
+ * ice_qp_clean_rings - Cleans all the rings of a given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
+{
+ ice_clean_tx_ring(vsi->tx_rings[q_idx]);
+ if (ice_is_xdp_ena_vsi(vsi))
+ ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
+ ice_clean_rx_ring(vsi->rx_rings[q_idx]);
+}
+
+/**
+ * ice_qvec_toggle_napi - Enables/disables NAPI for a given q_vector
+ * @vsi: VSI that has netdev
+ * @q_vector: q_vector that has NAPI context
+ * @enable: true for enable, false for disable
+ */
+static void
+ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector,
+ bool enable)
+{
+ if (!vsi->netdev || !q_vector)
+ return;
+
+ if (enable)
+ napi_enable(&q_vector->napi);
+ else
+ napi_disable(&q_vector->napi);
+}
+
+/**
+ * ice_qvec_dis_irq - Mask off queue interrupt generation on given ring
+ * @vsi: the VSI that contains queue vector being un-configured
+ * @rx_ring: Rx ring that will have its IRQ disabled
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_ring *rx_ring,
+ struct ice_q_vector *q_vector)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ int base = vsi->base_vector;
+ u16 reg;
+ u32 val;
+
+ /* QINT_TQCTL is being cleared in ice_vsi_stop_tx_ring, so handle
+ * here only QINT_RQCTL
+ */
+ reg = rx_ring->reg_idx;
+ val = rd32(hw, QINT_RQCTL(reg));
+ val &= ~QINT_RQCTL_CAUSE_ENA_M;
+ wr32(hw, QINT_RQCTL(reg), val);
+
+ if (q_vector) {
+ u16 v_idx = q_vector->v_idx;
+
+ wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0);
+ ice_flush(hw);
+ synchronize_irq(pf->msix_entries[v_idx + base].vector);
+ }
+}
+
+/**
+ * ice_qvec_cfg_msix - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+ u16 reg_idx = q_vector->reg_idx;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ struct ice_ring *ring;
+
+ ice_cfg_itr(hw, q_vector);
+
+ wr32(hw, GLINT_RATE(reg_idx),
+ ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
+
+ ice_for_each_ring(ring, q_vector->tx)
+ ice_cfg_txq_interrupt(vsi, ring->reg_idx, reg_idx,
+ q_vector->tx.itr_idx);
+
+ ice_for_each_ring(ring, q_vector->rx)
+ ice_cfg_rxq_interrupt(vsi, ring->reg_idx, reg_idx,
+ q_vector->rx.itr_idx);
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_qvec_ena_irq - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+
+ ice_irq_dynamic_ena(hw, vsi, q_vector);
+
+ ice_flush(hw);
+}
+
+/**
+ * ice_qp_dis - Disables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
+{
+ struct ice_txq_meta txq_meta = { };
+ struct ice_ring *tx_ring, *rx_ring;
+ struct ice_q_vector *q_vector;
+ int timeout = 50;
+ int err;
+
+ if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+ return -EINVAL;
+
+ tx_ring = vsi->tx_rings[q_idx];
+ rx_ring = vsi->rx_rings[q_idx];
+ q_vector = rx_ring->q_vector;
+
+ while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) {
+ timeout--;
+ if (!timeout)
+ return -EBUSY;
+ usleep_range(1000, 2000);
+ }
+ netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+
+ ice_qvec_dis_irq(vsi, rx_ring, q_vector);
+
+ ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
+ err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
+ if (err)
+ return err;
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+ memset(&txq_meta, 0, sizeof(txq_meta));
+ ice_fill_txq_meta(vsi, xdp_ring, &txq_meta);
+ err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring,
+ &txq_meta);
+ if (err)
+ return err;
+ }
+ err = ice_vsi_ctrl_rx_ring(vsi, false, q_idx);
+ if (err)
+ return err;
+
+ ice_qvec_toggle_napi(vsi, q_vector, false);
+ ice_qp_clean_rings(vsi, q_idx);
+ ice_qp_reset_stats(vsi, q_idx);
+
+ return 0;
+}
+
+/**
+ * ice_qp_ena - Enables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
+{
+ struct ice_aqc_add_tx_qgrp *qg_buf;
+ struct ice_ring *tx_ring, *rx_ring;
+ struct ice_q_vector *q_vector;
+ int err;
+
+ if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+ return -EINVAL;
+
+ qg_buf = kzalloc(sizeof(*qg_buf), GFP_KERNEL);
+ if (!qg_buf)
+ return -ENOMEM;
+
+ qg_buf->num_txqs = 1;
+
+ tx_ring = vsi->tx_rings[q_idx];
+ rx_ring = vsi->rx_rings[q_idx];
+ q_vector = rx_ring->q_vector;
+
+ err = ice_vsi_cfg_txq(vsi, tx_ring, qg_buf);
+ if (err)
+ goto free_buf;
+
+ if (ice_is_xdp_ena_vsi(vsi)) {
+ struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+ memset(qg_buf, 0, sizeof(*qg_buf));
+ qg_buf->num_txqs = 1;
+ err = ice_vsi_cfg_txq(vsi, xdp_ring, qg_buf);
+ if (err)
+ goto free_buf;
+ ice_set_ring_xdp(xdp_ring);
+ xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
+ }
+
+ err = ice_setup_rx_ctx(rx_ring);
+ if (err)
+ goto free_buf;
+
+ ice_qvec_cfg_msix(vsi, q_vector);
+
+ err = ice_vsi_ctrl_rx_ring(vsi, true, q_idx);
+ if (err)
+ goto free_buf;
+
+ clear_bit(__ICE_CFG_BUSY, vsi->state);
+ ice_qvec_toggle_napi(vsi, q_vector, true);
+ ice_qvec_ena_irq(vsi, q_vector);
+
+ netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+free_buf:
+ kfree(qg_buf);
+ return err;
+}
+
+/**
+ * ice_xsk_alloc_umems - allocate a UMEM region for an XDP socket
+ * @vsi: VSI to allocate the UMEM on
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_alloc_umems(struct ice_vsi *vsi)
+{
+ if (vsi->xsk_umems)
+ return 0;
+
+ vsi->xsk_umems = kcalloc(vsi->num_xsk_umems, sizeof(*vsi->xsk_umems),
+ GFP_KERNEL);
+
+ if (!vsi->xsk_umems) {
+ vsi->num_xsk_umems = 0;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_xsk_add_umem - add a UMEM region for XDP sockets
+ * @vsi: VSI to which the UMEM will be added
+ * @umem: pointer to a requested UMEM region
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_add_umem(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+ int err;
+
+ err = ice_xsk_alloc_umems(vsi);
+ if (err)
+ return err;
+
+ vsi->xsk_umems[qid] = umem;
+ vsi->num_xsk_umems_used++;
+
+ return 0;
+}
+
+/**
+ * ice_xsk_remove_umem - Remove an UMEM for a certain ring/qid
+ * @vsi: VSI from which the VSI will be removed
+ * @qid: Ring/qid associated with the UMEM
+ */
+static void ice_xsk_remove_umem(struct ice_vsi *vsi, u16 qid)
+{
+ vsi->xsk_umems[qid] = NULL;
+ vsi->num_xsk_umems_used--;
+
+ if (vsi->num_xsk_umems_used == 0) {
+ kfree(vsi->xsk_umems);
+ vsi->xsk_umems = NULL;
+ vsi->num_xsk_umems = 0;
+ }
+}
+
+/**
+ * ice_xsk_umem_dma_map - DMA map UMEM region for XDP sockets
+ * @vsi: VSI to map the UMEM region
+ * @umem: UMEM to map
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_umem_dma_map(struct ice_vsi *vsi, struct xdp_umem *umem)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ unsigned int i;
+
+ dev = ice_pf_to_dev(pf);
+ for (i = 0; i < umem->npgs; i++) {
+ dma_addr_t dma = dma_map_page_attrs(dev, umem->pgs[i], 0,
+ PAGE_SIZE,
+ DMA_BIDIRECTIONAL,
+ ICE_RX_DMA_ATTR);
+ if (dma_mapping_error(dev, dma)) {
+ dev_dbg(dev,
+ "XSK UMEM DMA mapping error on page num %d", i);
+ goto out_unmap;
+ }
+
+ umem->pages[i].dma = dma;
+ }
+
+ return 0;
+
+out_unmap:
+ for (; i > 0; i--) {
+ dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR);
+ umem->pages[i].dma = 0;
+ }
+
+ return -EFAULT;
+}
+
+/**
+ * ice_xsk_umem_dma_unmap - DMA unmap UMEM region for XDP sockets
+ * @vsi: VSI from which the UMEM will be unmapped
+ * @umem: UMEM to unmap
+ */
+static void ice_xsk_umem_dma_unmap(struct ice_vsi *vsi, struct xdp_umem *umem)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+ unsigned int i;
+
+ dev = ice_pf_to_dev(pf);
+ for (i = 0; i < umem->npgs; i++) {
+ dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR);
+
+ umem->pages[i].dma = 0;
+ }
+}
+
+/**
+ * ice_xsk_umem_disable - disable a UMEM region
+ * @vsi: Current VSI
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_xsk_umem_disable(struct ice_vsi *vsi, u16 qid)
+{
+ if (!vsi->xsk_umems || qid >= vsi->num_xsk_umems ||
+ !vsi->xsk_umems[qid])
+ return -EINVAL;
+
+ ice_xsk_umem_dma_unmap(vsi, vsi->xsk_umems[qid]);
+ ice_xsk_remove_umem(vsi, qid);
+
+ return 0;
+}
+
+/**
+ * ice_xsk_umem_enable - enable a UMEM region
+ * @vsi: Current VSI
+ * @umem: pointer to a requested UMEM region
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int
+ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+ struct xdp_umem_fq_reuse *reuseq;
+ int err;
+
+ if (vsi->type != ICE_VSI_PF)
+ return -EINVAL;
+
+ vsi->num_xsk_umems = min_t(u16, vsi->num_rxq, vsi->num_txq);
+ if (qid >= vsi->num_xsk_umems)
+ return -EINVAL;
+
+ if (vsi->xsk_umems && vsi->xsk_umems[qid])
+ return -EBUSY;
+
+ reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count);
+ if (!reuseq)
+ return -ENOMEM;
+
+ xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
+
+ err = ice_xsk_umem_dma_map(vsi, umem);
+ if (err)
+ return err;
+
+ err = ice_xsk_add_umem(vsi, umem, qid);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/**
+ * ice_xsk_umem_setup - enable/disable a UMEM region depending on its state
+ * @vsi: Current VSI
+ * @umem: UMEM to enable/associate to a ring, NULL to disable
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+ bool if_running, umem_present = !!umem;
+ int ret = 0, umem_failure = 0;
+
+ if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
+
+ if (if_running) {
+ ret = ice_qp_dis(vsi, qid);
+ if (ret) {
+ netdev_err(vsi->netdev, "ice_qp_dis error = %d", ret);
+ goto xsk_umem_if_up;
+ }
+ }
+
+ umem_failure = umem_present ? ice_xsk_umem_enable(vsi, umem, qid) :
+ ice_xsk_umem_disable(vsi, qid);
+
+xsk_umem_if_up:
+ if (if_running) {
+ ret = ice_qp_ena(vsi, qid);
+ if (!ret && umem_present)
+ napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi);
+ else if (ret)
+ netdev_err(vsi->netdev, "ice_qp_ena error = %d", ret);
+ }
+
+ if (umem_failure) {
+ netdev_err(vsi->netdev, "Could not %sable UMEM, error = %d",
+ umem_present ? "en" : "dis", umem_failure);
+ return umem_failure;
+ }
+
+ return ret;
+}
+
+/**
+ * ice_zca_free - Callback for MEM_TYPE_ZERO_COPY allocations
+ * @zca: zero-cpoy allocator
+ * @handle: Buffer handle
+ */
+void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
+{
+ struct ice_rx_buf *rx_buf;
+ struct ice_ring *rx_ring;
+ struct xdp_umem *umem;
+ u64 hr, mask;
+ u16 nta;
+
+ rx_ring = container_of(zca, struct ice_ring, zca);
+ umem = rx_ring->xsk_umem;
+ hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+ mask = umem->chunk_mask;
+
+ nta = rx_ring->next_to_alloc;
+ rx_buf = &rx_ring->rx_buf[nta];
+
+ nta++;
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+ handle &= mask;
+
+ rx_buf->dma = xdp_umem_get_dma(umem, handle);
+ rx_buf->dma += hr;
+
+ rx_buf->addr = xdp_umem_get_data(umem, handle);
+ rx_buf->addr += hr;
+
+ rx_buf->handle = (u64)handle + umem->headroom;
+}
+
+/**
+ * ice_alloc_buf_fast_zc - Retrieve buffer address from XDP umem
+ * @rx_ring: ring with an xdp_umem bound to it
+ * @rx_buf: buffer to which xsk page address will be assigned
+ *
+ * This function allocates an Rx buffer in the hot path.
+ * The buffer can come from fill queue or recycle queue.
+ *
+ * Returns true if an assignment was successful, false if not.
+ */
+static __always_inline bool
+ice_alloc_buf_fast_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
+{
+ struct xdp_umem *umem = rx_ring->xsk_umem;
+ void *addr = rx_buf->addr;
+ u64 handle, hr;
+
+ if (addr) {
+ rx_ring->rx_stats.page_reuse_count++;
+ return true;
+ }
+
+ if (!xsk_umem_peek_addr(umem, &handle)) {
+ rx_ring->rx_stats.alloc_page_failed++;
+ return false;
+ }
+
+ hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+ rx_buf->dma = xdp_umem_get_dma(umem, handle);
+ rx_buf->dma += hr;
+
+ rx_buf->addr = xdp_umem_get_data(umem, handle);
+ rx_buf->addr += hr;
+
+ rx_buf->handle = handle + umem->headroom;
+
+ xsk_umem_discard_addr(umem);
+ return true;
+}
+
+/**
+ * ice_alloc_buf_slow_zc - Retrieve buffer address from XDP umem
+ * @rx_ring: ring with an xdp_umem bound to it
+ * @rx_buf: buffer to which xsk page address will be assigned
+ *
+ * This function allocates an Rx buffer in the slow path.
+ * The buffer can come from fill queue or recycle queue.
+ *
+ * Returns true if an assignment was successful, false if not.
+ */
+static __always_inline bool
+ice_alloc_buf_slow_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
+{
+ struct xdp_umem *umem = rx_ring->xsk_umem;
+ u64 handle, headroom;
+
+ if (!xsk_umem_peek_addr_rq(umem, &handle)) {
+ rx_ring->rx_stats.alloc_page_failed++;
+ return false;
+ }
+
+ handle &= umem->chunk_mask;
+ headroom = umem->headroom + XDP_PACKET_HEADROOM;
+
+ rx_buf->dma = xdp_umem_get_dma(umem, handle);
+ rx_buf->dma += headroom;
+
+ rx_buf->addr = xdp_umem_get_data(umem, handle);
+ rx_buf->addr += headroom;
+
+ rx_buf->handle = handle + umem->headroom;
+
+ xsk_umem_discard_addr_rq(umem);
+ return true;
+}
+
+/**
+ * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ * @alloc: the function pointer to call for allocation
+ *
+ * This function allocates a number of Rx buffers from the fill ring
+ * or the internal recycle mechanism and places them on the Rx ring.
+ *
+ * Returns false if all allocations were successful, true if any fail.
+ */
+static bool
+ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, int count,
+ bool alloc(struct ice_ring *, struct ice_rx_buf *))
+{
+ union ice_32b_rx_flex_desc *rx_desc;
+ u16 ntu = rx_ring->next_to_use;
+ struct ice_rx_buf *rx_buf;
+ bool ret = false;
+
+ if (!count)
+ return false;
+
+ rx_desc = ICE_RX_DESC(rx_ring, ntu);
+ rx_buf = &rx_ring->rx_buf[ntu];
+
+ do {
+ if (!alloc(rx_ring, rx_buf)) {
+ ret = true;
+ break;
+ }
+
+ dma_sync_single_range_for_device(rx_ring->dev, rx_buf->dma, 0,
+ rx_ring->rx_buf_len,
+ DMA_BIDIRECTIONAL);
+
+ rx_desc->read.pkt_addr = cpu_to_le64(rx_buf->dma);
+ rx_desc->wb.status_error0 = 0;
+
+ rx_desc++;
+ rx_buf++;
+ ntu++;
+
+ if (unlikely(ntu == rx_ring->count)) {
+ rx_desc = ICE_RX_DESC(rx_ring, 0);
+ rx_buf = rx_ring->rx_buf;
+ ntu = 0;
+ }
+ } while (--count);
+
+ if (rx_ring->next_to_use != ntu)
+ ice_release_rx_desc(rx_ring, ntu);
+
+ return ret;
+}
+
+/**
+ * ice_alloc_rx_bufs_fast_zc - allocate zero copy bufs in the hot path
+ * @rx_ring: Rx ring
+ * @count: number of bufs to allocate
+ *
+ * Returns false on success, true on failure.
+ */
+static bool ice_alloc_rx_bufs_fast_zc(struct ice_ring *rx_ring, u16 count)
+{
+ return ice_alloc_rx_bufs_zc(rx_ring, count,
+ ice_alloc_buf_fast_zc);
+}
+
+/**
+ * ice_alloc_rx_bufs_slow_zc - allocate zero copy bufs in the slow path
+ * @rx_ring: Rx ring
+ * @count: number of bufs to allocate
+ *
+ * Returns false on success, true on failure.
+ */
+bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count)
+{
+ return ice_alloc_rx_bufs_zc(rx_ring, count,
+ ice_alloc_buf_slow_zc);
+}
+
+/**
+ * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring
+ * @rx_ring: Rx ring
+ */
+static void ice_bump_ntc(struct ice_ring *rx_ring)
+{
+ int ntc = rx_ring->next_to_clean + 1;
+
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+ prefetch(ICE_RX_DESC(rx_ring, ntc));
+}
+
+/**
+ * ice_get_rx_buf_zc - Fetch the current Rx buffer
+ * @rx_ring: Rx ring
+ * @size: size of a buffer
+ *
+ * This function returns the current, received Rx buffer and does
+ * DMA synchronization.
+ *
+ * Returns a pointer to the received Rx buffer.
+ */
+static struct ice_rx_buf *ice_get_rx_buf_zc(struct ice_ring *rx_ring, int size)
+{
+ struct ice_rx_buf *rx_buf;
+
+ rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
+
+ dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, 0,
+ size, DMA_BIDIRECTIONAL);
+
+ return rx_buf;
+}
+
+/**
+ * ice_reuse_rx_buf_zc - reuse an Rx buffer
+ * @rx_ring: Rx ring
+ * @old_buf: The buffer to recycle
+ *
+ * This function recycles a finished Rx buffer, and places it on the recycle
+ * queue (next_to_alloc).
+ */
+static void
+ice_reuse_rx_buf_zc(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf)
+{
+ unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask;
+ u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
+ u16 nta = rx_ring->next_to_alloc;
+ struct ice_rx_buf *new_buf;
+
+ new_buf = &rx_ring->rx_buf[nta++];
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+ new_buf->dma = old_buf->dma & mask;
+ new_buf->dma += hr;
+
+ new_buf->addr = (void *)((unsigned long)old_buf->addr & mask);
+ new_buf->addr += hr;
+
+ new_buf->handle = old_buf->handle & mask;
+ new_buf->handle += rx_ring->xsk_umem->headroom;
+
+ old_buf->addr = NULL;
+}
+
+/**
+ * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
+ * @rx_ring: Rx ring
+ * @rx_buf: zero-copy Rx buffer
+ * @xdp: XDP buffer
+ *
+ * This function allocates a new skb from a zero-copy Rx buffer.
+ *
+ * Returns the skb on success, NULL on failure.
+ */
+static struct sk_buff *
+ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+ struct xdp_buff *xdp)
+{
+ unsigned int metasize = xdp->data - xdp->data_meta;
+ unsigned int datasize = xdp->data_end - xdp->data;
+ unsigned int datasize_hard = xdp->data_end -
+ xdp->data_hard_start;
+ struct sk_buff *skb;
+
+ skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!skb))
+ return NULL;
+
+ skb_reserve(skb, xdp->data - xdp->data_hard_start);
+ memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+ if (metasize)
+ skb_metadata_set(skb, metasize);
+
+ ice_reuse_rx_buf_zc(rx_ring, rx_buf);
+
+ return skb;
+}
+
+/**
+ * ice_run_xdp_zc - Executes an XDP program in zero-copy path
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
+{
+ int err, result = ICE_XDP_PASS;
+ struct bpf_prog *xdp_prog;
+ struct ice_ring *xdp_ring;
+ u32 act;
+
+ rcu_read_lock();
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+ if (!xdp_prog) {
+ rcu_read_unlock();
+ return ICE_XDP_PASS;
+ }
+
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+ xdp->handle += xdp->data - xdp->data_hard_start;
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_TX:
+ xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index];
+ result = ice_xmit_xdp_buff(xdp, xdp_ring);
+ break;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+ result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fallthrough -- not supported action */
+ case XDP_ABORTED:
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+ /* fallthrough -- handle aborts by dropping frame */
+ case XDP_DROP:
+ result = ICE_XDP_CONSUMED;
+ break;
+ }
+
+ rcu_read_unlock();
+ return result;
+}
+
+/**
+ * ice_clean_rx_irq_zc - consumes packets from the hardware ring
+ * @rx_ring: AF_XDP Rx ring
+ * @budget: NAPI budget
+ *
+ * Returns number of processed packets on success, remaining budget on failure.
+ */
+int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
+{
+ unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+ u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+ unsigned int xdp_xmit = 0;
+ struct xdp_buff xdp;
+ bool failure = 0;
+
+ xdp.rxq = &rx_ring->xdp_rxq;
+
+ while (likely(total_rx_packets < (unsigned int)budget)) {
+ union ice_32b_rx_flex_desc *rx_desc;
+ unsigned int size, xdp_res = 0;
+ struct ice_rx_buf *rx_buf;
+ struct sk_buff *skb;
+ u16 stat_err_bits;
+ u16 vlan_tag = 0;
+ u8 rx_ptype;
+
+ if (cleaned_count >= ICE_RX_BUF_WRITE) {
+ failure |= ice_alloc_rx_bufs_fast_zc(rx_ring,
+ cleaned_count);
+ cleaned_count = 0;
+ }
+
+ rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
+ if (!ice_test_staterr(rx_desc, stat_err_bits))
+ break;
+
+ /* This memory barrier is needed to keep us from reading
+ * any other fields out of the rx_desc until we have
+ * verified the descriptor has been written back.
+ */
+ dma_rmb();
+
+ size = le16_to_cpu(rx_desc->wb.pkt_len) &
+ ICE_RX_FLX_DESC_PKT_LEN_M;
+ if (!size)
+ break;
+
+ rx_buf = ice_get_rx_buf_zc(rx_ring, size);
+ if (!rx_buf->addr)
+ break;
+
+ xdp.data = rx_buf->addr;
+ xdp.data_meta = xdp.data;
+ xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
+ xdp.data_end = xdp.data + size;
+ xdp.handle = rx_buf->handle;
+
+ xdp_res = ice_run_xdp_zc(rx_ring, &xdp);
+ if (xdp_res) {
+ if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+ xdp_xmit |= xdp_res;
+ rx_buf->addr = NULL;
+ } else {
+ ice_reuse_rx_buf_zc(rx_ring, rx_buf);
+ }
+
+ total_rx_bytes += size;
+ total_rx_packets++;
+ cleaned_count++;
+
+ ice_bump_ntc(rx_ring);
+ continue;
+ }
+
+ /* XDP_PASS path */
+ skb = ice_construct_skb_zc(rx_ring, rx_buf, &xdp);
+ if (!skb) {
+ rx_ring->rx_stats.alloc_buf_failed++;
+ break;
+ }
+
+ cleaned_count++;
+ ice_bump_ntc(rx_ring);
+
+ if (eth_skb_pad(skb)) {
+ skb = NULL;
+ continue;
+ }
+
+ total_rx_bytes += skb->len;
+ total_rx_packets++;
+
+ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
+ if (ice_test_staterr(rx_desc, stat_err_bits))
+ vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
+
+ rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+ ICE_RX_FLEX_DESC_PTYPE_M;
+
+ ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+ ice_receive_skb(rx_ring, skb, vlan_tag);
+ }
+
+ ice_finalize_xdp_rx(rx_ring, xdp_xmit);
+ ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
+
+ return failure ? budget : (int)total_rx_packets;
+}
+
+/**
+ * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries
+ * @xdp_ring: XDP Tx ring
+ * @budget: max number of frames to xmit
+ *
+ * Returns true if cleanup/transmission is done.
+ */
+static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget)
+{
+ struct ice_tx_desc *tx_desc = NULL;
+ bool work_done = true;
+ struct xdp_desc desc;
+ dma_addr_t dma;
+
+ while (likely(budget-- > 0)) {
+ struct ice_tx_buf *tx_buf;
+
+ if (unlikely(!ICE_DESC_UNUSED(xdp_ring))) {
+ xdp_ring->tx_stats.tx_busy++;
+ work_done = false;
+ break;
+ }
+
+ tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
+
+ if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
+ break;
+
+ dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
+
+ dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
+ DMA_BIDIRECTIONAL);
+
+ tx_buf->bytecount = desc.len;
+
+ tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD,
+ 0, desc.len, 0);
+
+ xdp_ring->next_to_use++;
+ if (xdp_ring->next_to_use == xdp_ring->count)
+ xdp_ring->next_to_use = 0;
+ }
+
+ if (tx_desc) {
+ ice_xdp_ring_update_tail(xdp_ring);
+ xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+ }
+
+ return budget > 0 && work_done;
+}
+
+/**
+ * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
+ * @xdp_ring: XDP Tx ring
+ * @tx_buf: Tx buffer to clean
+ */
+static void
+ice_clean_xdp_tx_buf(struct ice_ring *xdp_ring, struct ice_tx_buf *tx_buf)
+{
+ xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
+ dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buf, len, 0);
+}
+
+/**
+ * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries
+ * @xdp_ring: XDP Tx ring
+ * @budget: NAPI budget
+ *
+ * Returns true if cleanup/tranmission is done.
+ */
+bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget)
+{
+ int total_packets = 0, total_bytes = 0;
+ s16 ntc = xdp_ring->next_to_clean;
+ struct ice_tx_desc *tx_desc;
+ struct ice_tx_buf *tx_buf;
+ bool xmit_done = true;
+ u32 xsk_frames = 0;
+
+ tx_desc = ICE_TX_DESC(xdp_ring, ntc);
+ tx_buf = &xdp_ring->tx_buf[ntc];
+ ntc -= xdp_ring->count;
+
+ do {
+ if (!(tx_desc->cmd_type_offset_bsz &
+ cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+ break;
+
+ total_bytes += tx_buf->bytecount;
+ total_packets++;
+
+ if (tx_buf->raw_buf) {
+ ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+ tx_buf->raw_buf = NULL;
+ } else {
+ xsk_frames++;
+ }
+
+ tx_desc->cmd_type_offset_bsz = 0;
+ tx_buf++;
+ tx_desc++;
+ ntc++;
+
+ if (unlikely(!ntc)) {
+ ntc -= xdp_ring->count;
+ tx_buf = xdp_ring->tx_buf;
+ tx_desc = ICE_TX_DESC(xdp_ring, 0);
+ }
+
+ prefetch(tx_desc);
+
+ } while (likely(--budget));
+
+ ntc += xdp_ring->count;
+ xdp_ring->next_to_clean = ntc;
+
+ if (xsk_frames)
+ xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames);
+
+ ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes);
+ xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK);
+
+ return budget > 0 && xmit_done;
+}
+
+/**
+ * ice_xsk_wakeup - Implements ndo_xsk_wakeup
+ * @netdev: net_device
+ * @queue_id: queue to wake up
+ * @flags: ignored in our case, since we have Rx and Tx in the same NAPI
+ *
+ * Returns negative on error, zero otherwise.
+ */
+int
+ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
+ u32 __always_unused flags)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_q_vector *q_vector;
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_ring *ring;
+
+ if (test_bit(__ICE_DOWN, vsi->state))
+ return -ENETDOWN;
+
+ if (!ice_is_xdp_ena_vsi(vsi))
+ return -ENXIO;
+
+ if (queue_id >= vsi->num_txq)
+ return -ENXIO;
+
+ if (!vsi->xdp_rings[queue_id]->xsk_umem)
+ return -ENXIO;
+
+ ring = vsi->xdp_rings[queue_id];
+
+ /* The idea here is that if NAPI is running, mark a miss, so
+ * it will run again. If not, trigger an interrupt and
+ * schedule the NAPI from interrupt context. If NAPI would be
+ * scheduled here, the interrupt affinity would not be
+ * honored.
+ */
+ q_vector = ring->q_vector;
+ if (!napi_if_scheduled_mark_missed(&q_vector->napi))
+ ice_trigger_sw_intr(&vsi->back->hw, q_vector);
+
+ return 0;
+}
+
+/**
+ * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP UMEM attached
+ * @vsi: VSI to be checked
+ *
+ * Returns true if any of the Rx rings has an AF_XDP UMEM attached
+ */
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
+{
+ int i;
+
+ if (!vsi->xsk_umems)
+ return false;
+
+ for (i = 0; i < vsi->num_xsk_umems; i++) {
+ if (vsi->xsk_umems[i])
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * ice_xsk_clean_rx_ring - clean UMEM queues connected to a given Rx ring
+ * @rx_ring: ring to be cleaned
+ */
+void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring)
+{
+ u16 i;
+
+ for (i = 0; i < rx_ring->count; i++) {
+ struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
+
+ if (!rx_buf->addr)
+ continue;
+
+ xsk_umem_fq_reuse(rx_ring->xsk_umem, rx_buf->handle);
+ rx_buf->addr = NULL;
+ }
+}
+
+/**
+ * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its UMEM queues
+ * @xdp_ring: XDP_Tx ring
+ */
+void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring)
+{
+ u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use;
+ u32 xsk_frames = 0;
+
+ while (ntc != ntu) {
+ struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
+
+ if (tx_buf->raw_buf)
+ ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+ else
+ xsk_frames++;
+
+ tx_buf->raw_buf = NULL;
+
+ ntc++;
+ if (ntc >= xdp_ring->count)
+ ntc = 0;
+ }
+
+ if (xsk_frames)
+ xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
new file mode 100644
index 000000000000..3479e1de98fe
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_XSK_H_
+#define _ICE_XSK_H_
+#include "ice_txrx.h"
+#include "ice.h"
+
+struct ice_vsi;
+
+#ifdef CONFIG_XDP_SOCKETS
+int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid);
+void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
+int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget);
+bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget);
+int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
+bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count);
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
+void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring);
+void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring);
+#else
+static inline int
+ice_xsk_umem_setup(struct ice_vsi __always_unused *vsi,
+ struct xdp_umem __always_unused *umem,
+ u16 __always_unused qid)
+{
+ return -ENOTSUPP;
+}
+
+static inline void
+ice_zca_free(struct zero_copy_allocator __always_unused *zca,
+ unsigned long __always_unused handle)
+{
+}
+
+static inline int
+ice_clean_rx_irq_zc(struct ice_ring __always_unused *rx_ring,
+ int __always_unused budget)
+{
+ return 0;
+}
+
+static inline bool
+ice_clean_tx_irq_zc(struct ice_ring __always_unused *xdp_ring,
+ int __always_unused budget)
+{
+ return false;
+}
+
+static inline bool
+ice_alloc_rx_bufs_slow_zc(struct ice_ring __always_unused *rx_ring,
+ u16 __always_unused count)
+{
+ return false;
+}
+
+static inline bool ice_xsk_any_rx_ring_ena(struct ice_vsi __always_unused *vsi)
+{
+ return false;
+}
+
+static inline int
+ice_xsk_wakeup(struct net_device __always_unused *netdev,
+ u32 __always_unused queue_id, u32 __always_unused flags)
+{
+ return -ENOTSUPP;
+}
+
+#define ice_xsk_clean_rx_ring(rx_ring) do {} while (0)
+#define ice_xsk_clean_xdp_ring(xdp_ring) do {} while (0)
+#endif /* CONFIG_XDP_SOCKETS */
+#endif /* !_ICE_XSK_H_ */