aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/devlink/mlx5.rst1
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c101
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c104
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c226
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c308
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h39
-rw-r--r--include/linux/mlx5/eq.h4
19 files changed, 675 insertions, 236 deletions
diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index 38e94ed65936..29ad304e6fba 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -17,6 +17,7 @@ Parameters
- Validation
* - ``enable_roce``
- driverinit
+ - Type: Boolean
* - ``io_eq_size``
- driverinit
- The range is between 64 and 4096.
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 91eb615b89ee..86842cd580ba 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1541,16 +1541,10 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
param = (struct mlx5_eq_param) {
- .irq_index = MLX5_IRQ_EQ_CTRL,
.nent = MLX5_IB_NUM_PF_EQE,
};
param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT;
- if (!zalloc_cpumask_var(&param.affinity, GFP_KERNEL)) {
- err = -ENOMEM;
- goto err_wq;
- }
eq->core = mlx5_eq_create_generic(dev->mdev, &param);
- free_cpumask_var(param.affinity);
if (IS_ERR(eq->core)) {
err = PTR_ERR(eq->core);
goto err_wq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 33904bc87efa..fcfd38fa9e6c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -109,7 +109,7 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o
#
# SF device
#
-mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o
+mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_affinity.o
#
# SF manager
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index c5f959a9e14b..812e6810cb3b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -984,7 +984,7 @@ struct mlx5e_profile {
};
#define mlx5e_profile_feature_cap(profile, feature) \
- ((profile)->features & (MLX5E_PROFILE_FEATURE_## feature))
+ ((profile)->features & BIT(MLX5E_PROFILE_FEATURE_##feature))
void mlx5e_build_ptys2ethtool_map(void);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
index 074ffa4fa5af..b4f3bd7d346e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
@@ -120,14 +120,14 @@ static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent)
cancel_delayed_work_sync(&priv->stats_agent.work);
}
-int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
{
int buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
struct mlx5_hv_vhca_agent *agent;
priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL);
if (!priv->stats_agent.buf)
- return -ENOMEM;
+ return;
agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca,
MLX5_HV_VHCA_AGENT_STATS,
@@ -142,13 +142,11 @@ int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
PTR_ERR(agent));
kvfree(priv->stats_agent.buf);
- return IS_ERR_OR_NULL(agent);
+ return;
}
priv->stats_agent.agent = agent;
INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);
-
- return 0;
}
void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
index 664463faf77b..29c8c6d3260f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
@@ -7,19 +7,12 @@
#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
-int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
+void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv);
#else
-
-static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
-{
- return 0;
-}
-
-static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
-{
-}
+static inline void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) {}
+static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) {}
#endif
#endif /* __MLX5_EN_STATS_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
index a0832b86016c..c614fc7fdc9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
@@ -45,14 +45,10 @@ verify_uplink_forwarding(struct mlx5e_priv *priv,
termination_table_raw_traffic)) {
NL_SET_ERR_MSG_MOD(extack,
"devices are both uplink, can't offload forwarding");
- pr_err("devices %s %s are both uplink, can't offload forwarding\n",
- priv->netdev->name, out_dev->name);
return -EOPNOTSUPP;
} else if (out_dev != rep_priv->netdev) {
NL_SET_ERR_MSG_MOD(extack,
"devices are not the same uplink, can't offload forwarding");
- pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n",
- priv->netdev->name, out_dev->name);
return -EOPNOTSUPP;
}
return 0;
@@ -160,10 +156,6 @@ tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state,
}
NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding");
- netdev_warn(priv->netdev,
- "devices %s %s not on same switch HW, can't offload forwarding\n",
- netdev_name(priv->netdev),
- out_dev->name);
return false;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 536fcb2c5e90..57d755db1cf5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1883,24 +1883,19 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
bool is_rx_cq)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_params new_params;
- bool mode_changed;
u8 cq_period_mode, current_cq_period_mode;
+ struct mlx5e_params new_params;
+
+ if (enable && !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
+ return -EOPNOTSUPP;
+
+ cq_period_mode = cqe_mode_to_period_mode(enable);
- cq_period_mode = enable ?
- MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
- MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
current_cq_period_mode = is_rx_cq ?
priv->channels.params.rx_cq_moderation.cq_period_mode :
priv->channels.params.tx_cq_moderation.cq_period_mode;
- mode_changed = cq_period_mode != current_cq_period_mode;
-
- if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE &&
- !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe))
- return -EOPNOTSUPP;
- if (!mode_changed)
+ if (cq_period_mode == current_cq_period_mode)
return 0;
new_params = priv->channels.params;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 31c911182498..d36489363a26 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3605,11 +3605,6 @@ static int set_feature_hw_gro(struct net_device *netdev, bool enable)
new_params = priv->channels.params;
if (enable) {
- if (MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
- netdev_warn(netdev, "Can't set HW-GRO when CQE compress is active\n");
- err = -EINVAL;
- goto out;
- }
new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO;
new_params.packet_merge.shampo.match_criteria_type =
MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED;
@@ -3871,6 +3866,11 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
features &= ~NETIF_F_RXHASH;
if (netdev->features & NETIF_F_RXHASH)
netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
+
+ if (features & NETIF_F_GRO_HW) {
+ netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
}
if (mlx5e_is_uplink_rep(priv))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index f09b57c31ed7..96e260fd7987 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1603,6 +1603,12 @@ static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
}
}
+static void mlx5e_handle_rx_err_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ trigger_report(rq, cqe);
+ rq->stats->wqe_err++;
+}
+
static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
@@ -1616,8 +1622,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- trigger_report(rq, cqe);
- rq->stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto free_wqe;
}
@@ -1670,7 +1675,7 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- rq->stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto free_wqe;
}
@@ -1719,8 +1724,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64
wi->consumed_strides += cstrides;
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- trigger_report(rq, cqe);
- rq->stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto mpwrq_cqe_out;
}
@@ -1988,8 +1992,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq
wi->consumed_strides += cstrides;
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- trigger_report(rq, cqe);
- stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto mpwrq_cqe_out;
}
@@ -2058,8 +2061,7 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq
wi->consumed_strides += cstrides;
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- trigger_report(rq, cqe);
- rq->stats->wqe_err++;
+ mlx5e_handle_rx_err_cqe(rq, cqe);
goto mpwrq_cqe_out;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 73fcd9fb17dd..7e7c0c1019f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -35,6 +35,7 @@
#include "en_accel/tls.h"
#include "en_accel/en_accel.h"
#include "en/ptp.h"
+#include "en/port.h"
static unsigned int stats_grps_num(struct mlx5e_priv *priv)
{
@@ -1158,12 +1159,99 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy)
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
}
-void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
- struct ethtool_fec_stats *fec_stats)
+static int fec_num_lanes(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(pmlp_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {};
+ int err;
+
+ MLX5_SET(pmlp_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_PMLP, 0, 0);
+ if (err)
+ return 0;
+
+ return MLX5_GET(pmlp_reg, out, width);
+}
+
+static int fec_active_mode(struct mlx5_core_dev *mdev)
+{
+ unsigned long fec_active_long;
+ u32 fec_active;
+
+ if (mlx5e_get_fec_mode(mdev, &fec_active, NULL))
+ return MLX5E_FEC_NOFEC;
+
+ fec_active_long = fec_active;
+ return find_first_bit(&fec_active_long, sizeof(unsigned long) * BITS_PER_BYTE);
+}
+
+#define MLX5E_STATS_SET_FEC_BLOCK(idx) ({ \
+ fec_stats->corrected_blocks.lanes[(idx)] = \
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \
+ fc_fec_corrected_blocks_lane##idx); \
+ fec_stats->uncorrectable_blocks.lanes[(idx)] = \
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \
+ fc_fec_uncorrectable_blocks_lane##idx); \
+})
+
+static void fec_set_fc_stats(struct ethtool_fec_stats *fec_stats,
+ u32 *ppcnt, u8 lanes)
+{
+ if (lanes > 3) { /* 4 lanes */
+ MLX5E_STATS_SET_FEC_BLOCK(3);
+ MLX5E_STATS_SET_FEC_BLOCK(2);
+ }
+ if (lanes > 1) /* 2 lanes */
+ MLX5E_STATS_SET_FEC_BLOCK(1);
+ if (lanes > 0) /* 1 lane */
+ MLX5E_STATS_SET_FEC_BLOCK(0);
+}
+
+static void fec_set_rs_stats(struct ethtool_fec_stats *fec_stats, u32 *ppcnt)
+{
+ fec_stats->corrected_blocks.total =
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs,
+ rs_fec_corrected_blocks);
+ fec_stats->uncorrectable_blocks.total =
+ MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs,
+ rs_fec_uncorrectable_blocks);
+}
+
+static void fec_set_block_stats(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 out[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ int mode = fec_active_mode(mdev);
+
+ if (mode == MLX5E_FEC_NOFEC)
+ return;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
+ if (mlx5_core_access_reg(mdev, in, sz, outl, sz, MLX5_REG_PPCNT, 0, 0))
+ return;
+
+ switch (mode) {
+ case MLX5E_FEC_RS_528_514:
+ case MLX5E_FEC_RS_544_514:
+ case MLX5E_FEC_LLRS_272_257_1:
+ fec_set_rs_stats(fec_stats, out);
+ return;
+ case MLX5E_FEC_FIRECODE:
+ fec_set_fc_stats(fec_stats, out, fec_num_lanes(mdev));
+ }
+}
+
+static void fec_set_corrected_bits_total(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats)
{
u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)];
struct mlx5_core_dev *mdev = priv->mdev;
- u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
@@ -1181,6 +1269,13 @@ void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
phy_corrected_bits);
}
+void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
+ struct ethtool_fec_stats *fec_stats)
+{
+ fec_set_corrected_bits_total(priv, fec_stats);
+ fec_set_block_stats(priv, fec_stats);
+}
+
#define PPORT_ETH_EXT_OFF(c) \
MLX5_BYTE_OFF(ppcnt_reg, \
counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index b695aad71ee1..48a45aa54a3c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -59,6 +59,8 @@ struct mlx5_eq_table {
struct mutex lock; /* sync async eqs creations */
int num_comp_eqs;
struct mlx5_irq_table *irq_table;
+ struct mlx5_irq **comp_irqs;
+ struct mlx5_irq *ctrl_irq;
#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *rmap;
#endif
@@ -266,8 +268,8 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
u8 log_eq_stride = ilog2(MLX5_EQE_SIZE);
struct mlx5_priv *priv = &dev->priv;
- u16 vecidx = param->irq_index;
__be64 *pas;
+ u16 vecidx;
void *eqc;
int inlen;
u32 *in;
@@ -289,20 +291,16 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc);
init_eq_buf(eq);
- eq->irq = mlx5_irq_request(dev, vecidx, param->affinity);
- if (IS_ERR(eq->irq)) {
- err = PTR_ERR(eq->irq);
- goto err_buf;
- }
-
+ eq->irq = param->irq;
vecidx = mlx5_irq_get_index(eq->irq);
+
inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
- goto err_irq;
+ goto err_buf;
}
pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
@@ -346,8 +344,6 @@ err_eq:
err_in:
kvfree(in);
-err_irq:
- mlx5_irq_release(eq->irq);
err_buf:
mlx5_frag_buf_free(dev, &eq->frag_buf);
return err;
@@ -401,7 +397,6 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
if (err)
mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
eq->eqn);
- mlx5_irq_release(eq->irq);
mlx5_frag_buf_free(dev, &eq->frag_buf);
return err;
@@ -594,11 +589,8 @@ setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
eq->irq_nb.notifier_call = mlx5_eq_async_int;
spin_lock_init(&eq->lock);
- if (!zalloc_cpumask_var(&param->affinity, GFP_KERNEL))
- return -ENOMEM;
err = create_async_eq(dev, &eq->core, param);
- free_cpumask_var(param->affinity);
if (err) {
mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err);
return err;
@@ -643,11 +635,18 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
struct mlx5_eq_param param = {};
int err;
+ /* All the async_eqs are using single IRQ, request one IRQ and share its
+ * index among all the async_eqs of this device.
+ */
+ table->ctrl_irq = mlx5_ctrl_irq_request(dev);
+ if (IS_ERR(table->ctrl_irq))
+ return PTR_ERR(table->ctrl_irq);
+
MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
mlx5_eq_notifier_register(dev, &table->cq_err_nb);
param = (struct mlx5_eq_param) {
- .irq_index = MLX5_IRQ_EQ_CTRL,
+ .irq = table->ctrl_irq,
.nent = MLX5_NUM_CMD_EQE,
.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
};
@@ -660,7 +659,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
param = (struct mlx5_eq_param) {
- .irq_index = MLX5_IRQ_EQ_CTRL,
+ .irq = table->ctrl_irq,
.nent = async_eq_depth_devlink_param_get(dev),
};
@@ -670,7 +669,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
goto err2;
param = (struct mlx5_eq_param) {
- .irq_index = MLX5_IRQ_EQ_CTRL,
+ .irq = table->ctrl_irq,
.nent = /* TODO: sriov max_vf + */ 1,
.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
};
@@ -689,6 +688,7 @@ err2:
err1:
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
+ mlx5_ctrl_irq_release(table->ctrl_irq);
return err;
}
@@ -703,6 +703,7 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
cleanup_async_eq(dev, &table->cmd_eq, "cmd");
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
+ mlx5_ctrl_irq_release(table->ctrl_irq);
}
struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
@@ -730,12 +731,10 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev,
struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
int err;
- if (!cpumask_available(param->affinity))
- return ERR_PTR(-EINVAL);
-
if (!eq)
return ERR_PTR(-ENOMEM);
+ param->irq = dev->priv.eq_table->ctrl_irq;
err = create_async_eq(dev, eq, param);
if (err) {
kvfree(eq);
@@ -795,6 +794,54 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
}
EXPORT_SYMBOL(mlx5_eq_update_ci);
+static void comp_irqs_release(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+
+ if (mlx5_core_is_sf(dev))
+ mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs);
+ else
+ mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs);
+ kfree(table->comp_irqs);
+}
+
+static int comp_irqs_request(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ int ncomp_eqs = table->num_comp_eqs;
+ u16 *cpus;
+ int ret;
+ int i;
+
+ ncomp_eqs = table->num_comp_eqs;
+ table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
+ if (!table->comp_irqs)
+ return -ENOMEM;
+ if (mlx5_core_is_sf(dev)) {
+ ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs);
+ if (ret < 0)
+ goto free_irqs;
+ return ret;
+ }
+
+ cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL);
+ if (!cpus) {
+ ret = -ENOMEM;
+ goto free_irqs;
+ }
+ for (i = 0; i < ncomp_eqs; i++)
+ cpus[i] = cpumask_local_spread(i, dev->priv.numa_node);
+ ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs);
+ kfree(cpus);
+ if (ret < 0)
+ goto free_irqs;
+ return ret;
+
+free_irqs:
+ kfree(table->comp_irqs);
+ return ret;
+}
+
static void destroy_comp_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
@@ -809,6 +856,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev)
tasklet_disable(&eq->tasklet_ctx.task);
kfree(eq);
}
+ comp_irqs_release(dev);
}
static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
@@ -835,12 +883,13 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
int err;
int i;
+ ncomp_eqs = comp_irqs_request(dev);
+ if (ncomp_eqs < 0)
+ return ncomp_eqs;
INIT_LIST_HEAD(&table->comp_eqs_list);
- ncomp_eqs = table->num_comp_eqs;
nent = comp_eq_depth_devlink_param_get(dev);
for (i = 0; i < ncomp_eqs; i++) {
struct mlx5_eq_param param = {};
- int vecidx = i;
eq = kzalloc(sizeof(*eq), GFP_KERNEL);
if (!eq) {
@@ -855,18 +904,11 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
eq->irq_nb.notifier_call = mlx5_eq_comp_int;
param = (struct mlx5_eq_param) {
- .irq_index = vecidx,
+ .irq = table->comp_irqs[i],
.nent = nent,
};
- if (!zalloc_cpumask_var(&param.affinity, GFP_KERNEL)) {
- err = -ENOMEM;
- goto clean_eq;
- }
- cpumask_set_cpu(cpumask_local_spread(i, dev->priv.numa_node),
- param.affinity);
err = create_map_eq(dev, &eq->core, &param);
- free_cpumask_var(param.affinity);
if (err)
goto clean_eq;
err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
@@ -880,7 +922,9 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
list_add_tail(&eq->list, &table->comp_eqs_list);
}
+ table->num_comp_eqs = ncomp_eqs;
return 0;
+
clean_eq:
kfree(eq);
clean:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
index 2b52f7c09152..9d17206d1625 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
@@ -431,7 +431,7 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
int err = 0;
if (!mlx5_esw_allowed(esw))
- return -EPERM;
+ return vlan ? -EPERM : 0;
if (vlan || qos)
set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
new file mode 100644
index 000000000000..380a208ab137
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "mlx5_irq.h"
+#include "pci_irq.h"
+
+static void cpu_put(struct mlx5_irq_pool *pool, int cpu)
+{
+ pool->irqs_per_cpu[cpu]--;
+}
+
+static void cpu_get(struct mlx5_irq_pool *pool, int cpu)
+{
+ pool->irqs_per_cpu[cpu]++;
+}
+
+/* Gets the least loaded CPU. e.g.: the CPU with least IRQs bound to it */
+static int cpu_get_least_loaded(struct mlx5_irq_pool *pool,
+ const struct cpumask *req_mask)
+{
+ int best_cpu = -1;
+ int cpu;
+
+ for_each_cpu_and(cpu, req_mask, cpu_online_mask) {
+ /* CPU has zero IRQs on it. No need to search any more CPUs. */
+ if (!pool->irqs_per_cpu[cpu]) {
+ best_cpu = cpu;
+ break;
+ }
+ if (best_cpu < 0)
+ best_cpu = cpu;
+ if (pool->irqs_per_cpu[cpu] < pool->irqs_per_cpu[best_cpu])
+ best_cpu = cpu;
+ }
+ if (best_cpu == -1) {
+ /* There isn't online CPUs in req_mask */
+ mlx5_core_err(pool->dev, "NO online CPUs in req_mask (%*pbl)\n",
+ cpumask_pr_args(req_mask));
+ best_cpu = cpumask_first(cpu_online_mask);
+ }
+ pool->irqs_per_cpu[best_cpu]++;
+ return best_cpu;
+}
+
+/* Creating an IRQ from irq_pool */
+static struct mlx5_irq *
+irq_pool_request_irq(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ cpumask_var_t auto_mask;
+ struct mlx5_irq *irq;
+ u32 irq_index;
+ int err;
+
+ if (!zalloc_cpumask_var(&auto_mask, GFP_KERNEL))
+ return ERR_PTR(-ENOMEM);
+ err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL);
+ if (err)
+ return ERR_PTR(err);
+ if (pool->irqs_per_cpu) {
+ if (cpumask_weight(req_mask) > 1)
+ /* if req_mask contain more then one CPU, set the least loadad CPU
+ * of req_mask
+ */
+ cpumask_set_cpu(cpu_get_least_loaded(pool, req_mask), auto_mask);
+ else
+ cpu_get(pool, cpumask_first(req_mask));
+ }
+ irq = mlx5_irq_alloc(pool, irq_index, cpumask_empty(auto_mask) ? req_mask : auto_mask);
+ free_cpumask_var(auto_mask);
+ return irq;
+}
+
+/* Looking for the IRQ with the smallest refcount that fits req_mask.
+ * If pool is sf_comp_pool, then we are looking for an IRQ with any of the
+ * requested CPUs in req_mask.
+ * for example: req_mask = 0xf, irq0_mask = 0x10, irq1_mask = 0x1. irq0_mask
+ * isn't subset of req_mask, so we will skip it. irq1_mask is subset of req_mask,
+ * we don't skip it.
+ * If pool is sf_ctrl_pool, then all IRQs have the same mask, so any IRQ will
+ * fit. And since mask is subset of itself, we will pass the first if bellow.
+ */
+static struct mlx5_irq *
+irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ int start = pool->xa_num_irqs.min;
+ int end = pool->xa_num_irqs.max;
+ struct mlx5_irq *irq = NULL;
+ struct mlx5_irq *iter;
+ int irq_refcount = 0;
+ unsigned long index;
+
+ lockdep_assert_held(&pool->lock);
+ xa_for_each_range(&pool->irqs, index, iter, start, end) {
+ struct cpumask *iter_mask = mlx5_irq_get_affinity_mask(iter);
+ int iter_refcount = mlx5_irq_read_locked(iter);
+
+ if (!cpumask_subset(iter_mask, req_mask))
+ /* skip IRQs with a mask which is not subset of req_mask */
+ continue;
+ if (iter_refcount < pool->min_threshold)
+ /* If we found an IRQ with less than min_thres, return it */
+ return iter;
+ if (!irq || iter_refcount < irq_refcount) {
+ /* In case we won't find an IRQ with less than min_thres,
+ * keep a pointer to the least used IRQ
+ */
+ irq_refcount = iter_refcount;
+ irq = iter;
+ }
+ }
+ return irq;
+}
+
+/**
+ * mlx5_irq_affinity_request - request an IRQ according to the given mask.
+ * @pool: IRQ pool to request from.
+ * @req_mask: cpumask requested for this IRQ.
+ *
+ * This function returns a pointer to IRQ, or ERR_PTR in case of error.
+ */
+struct mlx5_irq *
+mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ struct mlx5_irq *least_loaded_irq, *new_irq;
+
+ mutex_lock(&pool->lock);
+ least_loaded_irq = irq_pool_find_least_loaded(pool, req_mask);
+ if (least_loaded_irq &&
+ mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold)
+ goto out;
+ /* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */
+ new_irq = irq_pool_request_irq(pool, req_mask);
+ if (IS_ERR(new_irq)) {
+ if (!least_loaded_irq) {
+ /* We failed to create an IRQ and we didn't find an IRQ */
+ mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n",
+ PTR_ERR(new_irq));
+ mutex_unlock(&pool->lock);
+ return new_irq;
+ }
+ /* We failed to create a new IRQ for the requested affinity,
+ * sharing existing IRQ.
+ */
+ goto out;
+ }
+ least_loaded_irq = new_irq;
+ goto unlock;
+out:
+ mlx5_irq_get_locked(least_loaded_irq);
+ if (mlx5_irq_read_locked(least_loaded_irq) > pool->max_threshold)
+ mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n",
+ pci_irq_vector(pool->dev->pdev,
+ mlx5_irq_get_index(least_loaded_irq)), pool->name,
+ mlx5_irq_read_locked(least_loaded_irq) / MLX5_EQ_REFS_PER_IRQ);
+unlock:
+ mutex_unlock(&pool->lock);
+ return least_loaded_irq;
+}
+
+void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs,
+ int num_irqs)
+{
+ struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
+ int i;
+
+ for (i = 0; i < num_irqs; i++) {
+ int cpu = cpumask_first(mlx5_irq_get_affinity_mask(irqs[i]));
+
+ synchronize_irq(pci_irq_vector(pool->dev->pdev,
+ mlx5_irq_get_index(irqs[i])));
+ if (mlx5_irq_put(irqs[i]))
+ if (pool->irqs_per_cpu)
+ cpu_put(pool, cpu);
+ }
+}
+
+/**
+ * mlx5_irq_affinity_irqs_request_auto - request one or more IRQs for mlx5 device.
+ * @dev: mlx5 device that is requesting the IRQs.
+ * @nirqs: number of IRQs to request.
+ * @irqs: an output array of IRQs pointers.
+ *
+ * Each IRQ is bounded to at most 1 CPU.
+ * This function is requesting IRQs according to the default assignment.
+ * The default assignment policy is:
+ * - in each iteration, request the least loaded IRQ which is not bound to any
+ * CPU of the previous IRQs requested.
+ *
+ * This function returns the number of IRQs requested, (which might be smaller than
+ * @nirqs), if successful, or a negative error code in case of an error.
+ */
+int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
+ struct mlx5_irq **irqs)
+{
+ struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
+ cpumask_var_t req_mask;
+ struct mlx5_irq *irq;
+ int i = 0;
+
+ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
+ return -ENOMEM;
+ cpumask_copy(req_mask, cpu_online_mask);
+ for (i = 0; i < nirqs; i++) {
+ if (mlx5_irq_pool_is_sf_pool(pool))
+ irq = mlx5_irq_affinity_request(pool, req_mask);
+ else
+ /* In case SF pool doesn't exists, fallback to the PF IRQs.
+ * The PF IRQs are already allocated and binded to CPU
+ * at this point. Hence, only an index is needed.
+ */
+ irq = mlx5_irq_request(dev, i, NULL);
+ if (IS_ERR(irq))
+ break;
+ irqs[i] = irq;
+ cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), req_mask);
+ mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
+ pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
+ cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
+ mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
+ }
+ free_cpumask_var(req_mask);
+ if (!i)
+ return PTR_ERR(irq);
+ return i;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 6b225bb5751a..2c774f367199 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -98,6 +98,8 @@ enum {
MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
};
+#define LOG_MAX_SUPPORTED_QPS 0xff
+
static struct mlx5_profile profile[] = {
[0] = {
.mask = 0,
@@ -109,7 +111,7 @@ static struct mlx5_profile profile[] = {
[2] = {
.mask = MLX5_PROF_MASK_QP_SIZE |
MLX5_PROF_MASK_MR_CACHE,
- .log_max_qp = 18,
+ .log_max_qp = LOG_MAX_SUPPORTED_QPS,
.mr_cache[0] = {
.size = 500,
.limit = 250
@@ -523,7 +525,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
to_fw_pkey_sz(dev, 128));
/* Check log_max_qp from HCA caps to set in current profile */
- if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) {
+ if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) {
+ prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
+ } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) {
mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
prof->log_max_qp,
MLX5_CAP_GEN_MAX(dev, log_max_qp));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
index 8116815663a7..23cb63fa4588 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -22,12 +22,40 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
int msix_vec_count);
int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
+struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev);
+void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq);
struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
struct cpumask *affinity);
-void mlx5_irq_release(struct mlx5_irq *irq);
+int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
+ struct mlx5_irq **irqs);
+void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs);
int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq);
int mlx5_irq_get_index(struct mlx5_irq *irq);
+struct mlx5_irq_pool;
+#ifdef CONFIG_MLX5_SF
+int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
+ struct mlx5_irq **irqs);
+struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool,
+ const struct cpumask *req_mask);
+void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs,
+ int num_irqs);
+#else
+static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
+ struct mlx5_irq **irqs)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline struct mlx5_irq *
+mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev,
+ struct mlx5_irq **irqs, int num_irqs) {}
+#endif
#endif /* __MLX5_IRQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 163e01fe500e..90fec0649ef5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -7,15 +7,12 @@
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
#include "mlx5_irq.h"
+#include "pci_irq.h"
#include "lib/sf.h"
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
-#define MLX5_MAX_IRQ_NAME (32)
-/* max irq_index is 2047, so four chars */
-#define MLX5_MAX_IRQ_IDX_CHARS (4)
-
#define MLX5_SFS_PER_CTRL_IRQ 64
#define MLX5_IRQ_CTRL_SF_MAX 8
/* min num of vectors for SFs to be enabled */
@@ -25,7 +22,6 @@
#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
#define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
#define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
-#define MLX5_EQ_REFS_PER_IRQ (2)
struct mlx5_irq {
struct atomic_notifier_head nh;
@@ -37,16 +33,6 @@ struct mlx5_irq {
int irqn;
};
-struct mlx5_irq_pool {
- char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS];
- struct xa_limit xa_num_irqs;
- struct mutex lock; /* sync IRQs creations */
- struct xarray irqs;
- u32 max_threshold;
- u32 min_threshold;
- struct mlx5_core_dev *dev;
-};
-
struct mlx5_irq_table {
struct mlx5_irq_pool *pf_pool;
struct mlx5_irq_pool *sf_ctrl_pool;
@@ -153,18 +139,28 @@ static void irq_release(struct mlx5_irq *irq)
kfree(irq);
}
-static void irq_put(struct mlx5_irq *irq)
+int mlx5_irq_put(struct mlx5_irq *irq)
{
struct mlx5_irq_pool *pool = irq->pool;
+ int ret = 0;
mutex_lock(&pool->lock);
irq->refcount--;
- if (!irq->refcount)
+ if (!irq->refcount) {
irq_release(irq);
+ ret = 1;
+ }
mutex_unlock(&pool->lock);
+ return ret;
+}
+
+int mlx5_irq_read_locked(struct mlx5_irq *irq)
+{
+ lockdep_assert_held(&irq->pool->lock);
+ return irq->refcount;
}
-static int irq_get_locked(struct mlx5_irq *irq)
+int mlx5_irq_get_locked(struct mlx5_irq *irq)
{
lockdep_assert_held(&irq->pool->lock);
if (WARN_ON_ONCE(!irq->refcount))
@@ -178,7 +174,7 @@ static int irq_get(struct mlx5_irq *irq)
int err;
mutex_lock(&irq->pool->lock);
- err = irq_get_locked(irq);
+ err = mlx5_irq_get_locked(irq);
mutex_unlock(&irq->pool->lock);
return err;
}
@@ -210,12 +206,8 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
}
-static bool irq_pool_is_sf_pool(struct mlx5_irq_pool *pool)
-{
- return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf"));
-}
-
-static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
+struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
+ const struct cpumask *affinity)
{
struct mlx5_core_dev *dev = pool->dev;
char name[MLX5_MAX_IRQ_NAME];
@@ -226,7 +218,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
if (!irq)
return ERR_PTR(-ENOMEM);
irq->irqn = pci_irq_vector(dev->pdev, i);
- if (!irq_pool_is_sf_pool(pool))
+ if (!mlx5_irq_pool_is_sf_pool(pool))
irq_set_name(pool, name, i);
else
irq_sf_set_name(pool, name, i);
@@ -244,6 +236,10 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
err = -ENOMEM;
goto err_cpumask;
}
+ if (affinity) {
+ cpumask_copy(irq->mask, affinity);
+ irq_set_affinity_hint(irq->irqn, irq->mask);
+ }
irq->pool = pool;
irq->refcount = 1;
irq->index = i;
@@ -255,6 +251,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
}
return irq;
err_xa:
+ irq_set_affinity_hint(irq->irqn, NULL);
free_cpumask_var(irq->mask);
err_cpumask:
free_irq(irq->irqn, &irq->nh);
@@ -275,7 +272,7 @@ int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
return -ENOENT;
ret = atomic_notifier_chain_register(&irq->nh, nb);
if (ret)
- irq_put(irq);
+ mlx5_irq_put(irq);
return ret;
}
@@ -284,7 +281,7 @@ int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
int err = 0;
err = atomic_notifier_chain_unregister(&irq->nh, nb);
- irq_put(irq);
+ mlx5_irq_put(irq);
return err;
}
@@ -300,131 +297,121 @@ int mlx5_irq_get_index(struct mlx5_irq *irq)
/* irq_pool API */
-/* creating an irq from irq_pool */
-static struct mlx5_irq *irq_pool_create_irq(struct mlx5_irq_pool *pool,
- struct cpumask *affinity)
+/* requesting an irq from a given pool according to given index */
+static struct mlx5_irq *
+irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
+ struct cpumask *affinity)
{
struct mlx5_irq *irq;
- u32 irq_index;
- int err;
- err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs,
- GFP_KERNEL);
- if (err)
- return ERR_PTR(err);
- irq = irq_request(pool, irq_index);
- if (IS_ERR(irq))
- return irq;
- cpumask_copy(irq->mask, affinity);
- irq_set_affinity_hint(irq->irqn, irq->mask);
+ mutex_lock(&pool->lock);
+ irq = xa_load(&pool->irqs, vecidx);
+ if (irq) {
+ mlx5_irq_get_locked(irq);
+ goto unlock;
+ }
+ irq = mlx5_irq_alloc(pool, vecidx, affinity);
+unlock:
+ mutex_unlock(&pool->lock);
return irq;
}
-/* looking for the irq with the smallest refcount and the same affinity */
-static struct mlx5_irq *irq_pool_find_least_loaded(struct mlx5_irq_pool *pool,
- struct cpumask *affinity)
+static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
{
- int start = pool->xa_num_irqs.min;
- int end = pool->xa_num_irqs.max;
- struct mlx5_irq *irq = NULL;
- struct mlx5_irq *iter;
- unsigned long index;
+ return irq_table->sf_ctrl_pool;
+}
- lockdep_assert_held(&pool->lock);
- xa_for_each_range(&pool->irqs, index, iter, start, end) {
- if (!cpumask_equal(iter->mask, affinity))
- continue;
- if (iter->refcount < pool->min_threshold)
- return iter;
- if (!irq || iter->refcount < irq->refcount)
- irq = iter;
- }
- return irq;
+static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
+{
+ return irq_table->sf_comp_pool;
}
-/* requesting an irq from a given pool according to given affinity */
-static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool,
- struct cpumask *affinity)
+struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
{
- struct mlx5_irq *least_loaded_irq, *new_irq;
+ struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
+ struct mlx5_irq_pool *pool = NULL;
- mutex_lock(&pool->lock);
- least_loaded_irq = irq_pool_find_least_loaded(pool, affinity);
- if (least_loaded_irq &&
- least_loaded_irq->refcount < pool->min_threshold)
- goto out;
- new_irq = irq_pool_create_irq(pool, affinity);
- if (IS_ERR(new_irq)) {
- if (!least_loaded_irq) {
- mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n",
- PTR_ERR(new_irq));
- mutex_unlock(&pool->lock);
- return new_irq;
- }
- /* We failed to create a new IRQ for the requested affinity,
- * sharing existing IRQ.
- */
- goto out;
- }
- least_loaded_irq = new_irq;
- goto unlock;
-out:
- irq_get_locked(least_loaded_irq);
- if (least_loaded_irq->refcount > pool->max_threshold)
- mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n",
- least_loaded_irq->irqn, pool->name,
- least_loaded_irq->refcount / MLX5_EQ_REFS_PER_IRQ);
-unlock:
- mutex_unlock(&pool->lock);
- return least_loaded_irq;
+ if (mlx5_core_is_sf(dev))
+ pool = sf_irq_pool_get(irq_table);
+
+ /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
+ * the PF IRQs pool in case the SF pool doesn't exist.
+ */
+ return pool ? pool : irq_table->pf_pool;
}
-/* requesting an irq from a given pool according to given index */
-static struct mlx5_irq *
-irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
- struct cpumask *affinity)
+static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
{
- struct mlx5_irq *irq;
+ struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
+ struct mlx5_irq_pool *pool = NULL;
- mutex_lock(&pool->lock);
- irq = xa_load(&pool->irqs, vecidx);
- if (irq) {
- irq_get_locked(irq);
- goto unlock;
+ if (mlx5_core_is_sf(dev))
+ pool = sf_ctrl_irq_pool_get(irq_table);
+
+ /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
+ * the PF IRQs pool in case the SF pool doesn't exist.
+ */
+ return pool ? pool : irq_table->pf_pool;
+}
+
+/**
+ * mlx5_irqs_release - release one or more IRQs back to the system.
+ * @irqs: IRQs to be released.
+ * @nirqs: number of IRQs to be released.
+ */
+static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs)
+{
+ int i;
+
+ for (i = 0; i < nirqs; i++) {
+ synchronize_irq(irqs[i]->irqn);
+ mlx5_irq_put(irqs[i]);
}
- irq = irq_request(pool, vecidx);
- if (IS_ERR(irq) || !affinity)
- goto unlock;
- cpumask_copy(irq->mask, affinity);
- if (!irq_pool_is_sf_pool(pool) && !pool->xa_num_irqs.max &&
- cpumask_empty(irq->mask))
- cpumask_set_cpu(cpumask_first(cpu_online_mask), irq->mask);
- irq_set_affinity_hint(irq->irqn, irq->mask);
-unlock:
- mutex_unlock(&pool->lock);
- return irq;
}
-static struct mlx5_irq_pool *find_sf_irq_pool(struct mlx5_irq_table *irq_table,
- int i, struct cpumask *affinity)
+/**
+ * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
+ * @ctrl_irq: ctrl IRQ to be released.
+ */
+void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
{
- if (cpumask_empty(affinity) && i == MLX5_IRQ_EQ_CTRL)
- return irq_table->sf_ctrl_pool;
- return irq_table->sf_comp_pool;
+ mlx5_irqs_release(&ctrl_irq, 1);
}
/**
- * mlx5_irq_release - release an IRQ back to the system.
- * @irq: irq to be released.
+ * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
+ * @dev: mlx5 device that requesting the IRQ.
+ *
+ * This function returns a pointer to IRQ, or ERR_PTR in case of error.
*/
-void mlx5_irq_release(struct mlx5_irq *irq)
+struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
{
- synchronize_irq(irq->irqn);
- irq_put(irq);
+ struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
+ cpumask_var_t req_mask;
+ struct mlx5_irq *irq;
+
+ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
+ return ERR_PTR(-ENOMEM);
+ cpumask_copy(req_mask, cpu_online_mask);
+ if (!mlx5_irq_pool_is_sf_pool(pool)) {
+ /* In case we are allocating a control IRQ for PF/VF */
+ if (!pool->xa_num_irqs.max) {
+ cpumask_clear(req_mask);
+ /* In case we only have a single IRQ for PF/VF */
+ cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask);
+ }
+ /* Allocate the IRQ in the last index of the pool */
+ irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask);
+ } else {
+ irq = mlx5_irq_affinity_request(pool, req_mask);
+ }
+
+ free_cpumask_var(req_mask);
+ return irq;
}
/**
- * mlx5_irq_request - request an IRQ for mlx5 device.
+ * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
* @dev: mlx5 device that requesting the IRQ.
* @vecidx: vector index of the IRQ. This argument is ignore if affinity is
* provided.
@@ -439,23 +426,8 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
struct mlx5_irq_pool *pool;
struct mlx5_irq *irq;
- if (mlx5_core_is_sf(dev)) {
- pool = find_sf_irq_pool(irq_table, vecidx, affinity);
- if (!pool)
- /* we don't have IRQs for SFs, using the PF IRQs */
- goto pf_irq;
- if (cpumask_empty(affinity) && !strcmp(pool->name, "mlx5_sf_comp"))
- /* In case an SF user request IRQ with vecidx */
- irq = irq_pool_request_vector(pool, vecidx, NULL);
- else
- irq = irq_pool_request_affinity(pool, affinity);
- goto out;
- }
-pf_irq:
pool = irq_table->pf_pool;
- vecidx = (vecidx == MLX5_IRQ_EQ_CTRL) ? pool->xa_num_irqs.max : vecidx;
irq = irq_pool_request_vector(pool, vecidx, affinity);
-out:
if (IS_ERR(irq))
return irq;
mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
@@ -464,6 +436,51 @@ out:
return irq;
}
+/**
+ * mlx5_irqs_release_vectors - release one or more IRQs back to the system.
+ * @irqs: IRQs to be released.
+ * @nirqs: number of IRQs to be released.
+ */
+void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
+{
+ mlx5_irqs_release(irqs, nirqs);
+}
+
+/**
+ * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device.
+ * @dev: mlx5 device that is requesting the IRQs.
+ * @cpus: CPUs array for binding the IRQs
+ * @nirqs: number of IRQs to request.
+ * @irqs: an output array of IRQs pointers.
+ *
+ * Each IRQ is bound to at most 1 CPU.
+ * This function is requests nirqs IRQs, starting from @vecidx.
+ *
+ * This function returns the number of IRQs requested, (which might be smaller than
+ * @nirqs), if successful, or a negative error code in case of an error.
+ */
+int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
+ struct mlx5_irq **irqs)
+{
+ cpumask_var_t req_mask;
+ struct mlx5_irq *irq;
+ int i;
+
+ if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
+ return -ENOMEM;
+ for (i = 0; i < nirqs; i++) {
+ cpumask_set_cpu(cpus[i], req_mask);
+ irq = mlx5_irq_request(dev, i, req_mask);
+ if (IS_ERR(irq))
+ break;
+ cpumask_clear(req_mask);
+ irqs[i] = irq;
+ }
+
+ free_cpumask_var(req_mask);
+ return i ? i : PTR_ERR(irq);
+}
+
static struct mlx5_irq_pool *
irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
u32 min_threshold, u32 max_threshold)
@@ -500,6 +517,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool)
irq_release(irq);
xa_destroy(&pool->irqs);
mutex_destroy(&pool->lock);
+ kfree(pool->irqs_per_cpu);
kvfree(pool);
}
@@ -547,7 +565,17 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
err = PTR_ERR(table->sf_comp_pool);
goto err_sf_ctrl;
}
+
+ table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
+ if (!table->sf_comp_pool->irqs_per_cpu) {
+ err = -ENOMEM;
+ goto err_irqs_per_cpu;
+ }
+
return 0;
+
+err_irqs_per_cpu:
+ irq_pool_free(table->sf_comp_pool);
err_sf_ctrl:
irq_pool_free(table->sf_ctrl_pool);
err_pf:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
new file mode 100644
index 000000000000..5c7e68bee43a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __PCI_IRQ_H__
+#define __PCI_IRQ_H__
+
+#include <linux/mlx5/driver.h>
+
+#define MLX5_MAX_IRQ_NAME (32)
+/* max irq_index is 2047, so four chars */
+#define MLX5_MAX_IRQ_IDX_CHARS (4)
+#define MLX5_EQ_REFS_PER_IRQ (2)
+
+struct mlx5_irq;
+
+struct mlx5_irq_pool {
+ char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS];
+ struct xa_limit xa_num_irqs;
+ struct mutex lock; /* sync IRQs creations */
+ struct xarray irqs;
+ u32 max_threshold;
+ u32 min_threshold;
+ u16 *irqs_per_cpu;
+ struct mlx5_core_dev *dev;
+};
+
+struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev);
+static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool)
+{
+ return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf"));
+}
+
+struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
+ const struct cpumask *affinity);
+int mlx5_irq_get_locked(struct mlx5_irq *irq);
+int mlx5_irq_read_locked(struct mlx5_irq *irq);
+int mlx5_irq_put(struct mlx5_irq *irq);
+
+#endif /* __PCI_IRQ_H__ */
diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h
index ea3ff5a8ced3..3705a382276b 100644
--- a/include/linux/mlx5/eq.h
+++ b/include/linux/mlx5/eq.h
@@ -9,13 +9,13 @@
#define MLX5_NUM_SPARE_EQE (0x80)
struct mlx5_eq;
+struct mlx5_irq;
struct mlx5_core_dev;
struct mlx5_eq_param {
- u8 irq_index;
int nent;
u64 mask[4];
- cpumask_var_t affinity;
+ struct mlx5_irq *irq;
};
struct mlx5_eq *