aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-06-30 18:41:13 -0700
committerDavid S. Miller <davem@davemloft.net>2019-06-30 18:41:13 -0700
commit954a5a029472568845a25cd1c59e02e09db3316c (patch)
tree6e77c71c4d1154ca4bb195ed155942108af9a0bb
parentMerge branch '10GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue (diff)
parentnet/mlx5e: Disallow tc redirect offload cases we don't support (diff)
downloadlinux-dev-954a5a029472568845a25cd1c59e02e09db3316c.tar.xz
linux-dev-954a5a029472568845a25cd1c59e02e09db3316c.zip
Merge tag 'mlx5e-updates-2019-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed says: ==================== mlx5e-updates-2019-06-28 This series adds some misc updates for mlx5e driver 1) Allow adding the same mac more than once in MPFS table 2) Move to HW checksumming advertising 3) Report netdevice MPLS features 4) Correct physical port name of the PF representor 5) Reduce stack usage in mlx5_eswitch_termtbl_create 6) Refresh TIR improvement for representors 7) Expose same physical switch_id for all representors ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to '')
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c8
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c18
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c13
-rw-r--r--drivers/infiniband/hw/mlx5/main.c75
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h2
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c10
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c439
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c74
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c601
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c56
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c334
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c22
-rw-r--r--include/linux/mlx5/driver.h23
-rw-r--r--include/linux/mlx5/eq.h23
-rw-r--r--include/linux/mlx5/eswitch.h29
-rw-r--r--include/linux/mlx5/fs.h18
-rw-r--r--include/linux/mlx5/mlx5_ifc.h58
-rw-r--r--include/linux/mlx5/qp.h5
-rw-r--r--include/net/devlink.h6
-rw-r--r--net/core/devlink.c6
43 files changed, 1462 insertions, 686 deletions
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 2e2e65f00257..0220736b073e 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -522,9 +522,9 @@ repoll:
case MLX5_CQE_SIG_ERR:
sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
- read_lock(&dev->mdev->priv.mkey_table.lock);
- mmkey = __mlx5_mr_lookup(dev->mdev,
- mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
+ xa_lock(&dev->mdev->priv.mkey_table);
+ mmkey = xa_load(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
mr = to_mibmr(mmkey);
get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
mr->sig->sig_err_exists = true;
@@ -537,7 +537,7 @@ repoll:
mr->sig->err_item.expected,
mr->sig->err_item.actual);
- read_unlock(&dev->mdev->priv.mkey_table.lock);
+ xa_unlock(&dev->mdev->priv.mkey_table);
goto repoll;
}
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 80b42d069328..931f587dfb8f 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1043,13 +1043,10 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
struct mlx5_ib_dev *dev,
void *in, void *out)
{
- struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr;
- unsigned long flags;
struct mlx5_core_mkey *mkey;
void *mkc;
u8 key;
- int err;
mkey = &devx_mr->mmkey;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
@@ -1062,11 +1059,8 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
mkey->pd = MLX5_GET(mkc, mkc, pd);
devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
- write_lock_irqsave(&table->lock, flags);
- err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key),
- mkey);
- write_unlock_irqrestore(&table->lock, flags);
- return err;
+ return xa_err(xa_store(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(mkey->key), mkey, GFP_KERNEL));
}
static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
@@ -1117,12 +1111,8 @@ static void devx_free_indirect_mkey(struct rcu_head *rcu)
*/
static void devx_cleanup_mkey(struct devx_obj *obj)
{
- struct mlx5_mkey_table *table = &obj->mdev->priv.mkey_table;
- unsigned long flags;
-
- write_lock_irqsave(&table->lock, flags);
- radix_tree_delete(&table->tree, mlx5_base_mkey(obj->devx_mr.mmkey.key));
- write_unlock_irqrestore(&table->lock, flags);
+ xa_erase(&obj->mdev->priv.mkey_table,
+ mlx5_base_mkey(obj->devx_mr.mmkey.key));
}
static int devx_obj_cleanup(struct ib_uobject *uobject,
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index 1fc302d41a53..b8841355fcd5 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -65,11 +65,12 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
struct uverbs_attr_bundle *attrs)
{
- struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
+ struct mlx5_flow_context flow_context = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
struct mlx5_ib_flow_handler *flow_handler;
struct mlx5_ib_flow_matcher *fs_matcher;
struct ib_uobject **arr_flow_actions;
struct ib_uflow_resources *uflow_res;
+ struct mlx5_flow_act flow_act = {};
void *devx_obj;
int dest_id, dest_type;
void *cmd_in;
@@ -172,17 +173,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
arr_flow_actions[i]->object);
}
- ret = uverbs_copy_from(&flow_act.flow_tag, attrs,
+ ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
MLX5_IB_ATTR_CREATE_FLOW_TAG);
if (!ret) {
- if (flow_act.flow_tag >= BIT(24)) {
+ if (flow_context.flow_tag >= BIT(24)) {
ret = -EINVAL;
goto err_out;
}
- flow_act.flags |= FLOW_ACT_HAS_TAG;
+ flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
}
- flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act,
+ flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher,
+ &flow_context,
+ &flow_act,
counter_id,
cmd_in, inlen,
dest_id, dest_type);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 340290b883fe..b1d5f4382d85 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2666,11 +2666,15 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
}
}
-static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
- u32 *match_v, const union ib_flow_spec *ib_spec,
+static int parse_flow_attr(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_spec *spec,
+ const union ib_flow_spec *ib_spec,
const struct ib_flow_attr *flow_attr,
struct mlx5_flow_act *action, u32 prev_type)
{
+ struct mlx5_flow_context *flow_context = &spec->flow_context;
+ u32 *match_c = spec->match_criteria;
+ u32 *match_v = spec->match_value;
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
@@ -2989,8 +2993,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
if (ib_spec->flow_tag.tag_id >= BIT(24))
return -EINVAL;
- action->flow_tag = ib_spec->flow_tag.tag_id;
- action->flags |= FLOW_ACT_HAS_TAG;
+ flow_context->flow_tag = ib_spec->flow_tag.tag_id;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
break;
case IB_FLOW_SPEC_ACTION_DROP:
if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
@@ -3084,7 +3088,8 @@ is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
return VALID_SPEC_NA;
return is_crypto && is_ipsec &&
- (!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ?
+ (!egress || (!is_drop &&
+ !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ?
VALID_SPEC_VALID : VALID_SPEC_INVALID;
}
@@ -3464,6 +3469,37 @@ free:
return ret;
}
+static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ void *misc;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw,
+ rep->vport));
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ }
+}
+
static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
const struct ib_flow_attr *flow_attr,
@@ -3473,7 +3509,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
{
struct mlx5_flow_table *ft = ft_prio->flow_table;
struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
+ struct mlx5_flow_act flow_act = {};
struct mlx5_flow_spec *spec;
struct mlx5_flow_destination dest_arr[2] = {};
struct mlx5_flow_destination *rule_dst = dest_arr;
@@ -3504,8 +3540,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- err = parse_flow_attr(dev->mdev, spec->match_criteria,
- spec->match_value,
+ err = parse_flow_attr(dev->mdev, spec,
ib_flow, flow_attr, &flow_act,
prev_type);
if (err < 0)
@@ -3519,19 +3554,15 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
set_underlay_qp(dev, spec, underlay_qpn);
if (dev->is_rep) {
- void *misc;
+ struct mlx5_eswitch_rep *rep;
- if (!dev->port[flow_attr->port - 1].rep) {
+ rep = dev->port[flow_attr->port - 1].rep;
+ if (!rep) {
err = -EINVAL;
goto free;
}
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port,
- dev->port[flow_attr->port - 1].rep->vport);
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ mlx5_ib_set_rule_source_port(dev, spec, rep);
}
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
@@ -3572,11 +3603,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
}
- if ((flow_act.flags & FLOW_ACT_HAS_TAG) &&
+ if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) &&
(flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
- flow_act.flow_tag, flow_attr->type);
+ spec->flow_context.flow_tag, flow_attr->type);
err = -EINVAL;
goto free;
}
@@ -3947,6 +3978,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
struct mlx5_flow_destination *dst,
struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
struct mlx5_flow_act *flow_act,
void *cmd_in, int inlen,
int dst_num)
@@ -3969,6 +4001,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
fs_matcher->mask_len);
spec->match_criteria_enable = fs_matcher->match_criteria_enable;
+ spec->flow_context = *flow_context;
handler->rule = mlx5_add_flow_rules(ft, spec,
flow_act, dst, dst_num);
@@ -4033,6 +4066,7 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
struct mlx5_ib_flow_handler *
mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
struct mlx5_flow_act *flow_act,
u32 counter_id,
void *cmd_in, int inlen, int dest_id,
@@ -4085,7 +4119,8 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
dst_num++;
}
- handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act,
+ handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher,
+ flow_context, flow_act,
cmd_in, inlen, dst_num);
if (IS_ERR(handler)) {
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 40eb8be482e4..1c205c2bd486 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -920,6 +920,7 @@ struct mlx5_ib_lb_state {
};
struct mlx5_ib_pf_eq {
+ struct notifier_block irq_nb;
struct mlx5_ib_dev *dev;
struct mlx5_eq *core;
struct work_struct work;
@@ -1316,6 +1317,7 @@ extern const struct uapi_definition mlx5_ib_devx_defs[];
extern const struct uapi_definition mlx5_ib_flow_defs[];
struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
struct mlx5_flow_act *flow_act, u32 counter_id,
void *cmd_in, int inlen, int dest_id, int dest_type);
bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 5f09699fab98..83b452d977d4 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -130,7 +130,7 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
struct mlx5_cache_ent *ent = &cache->ent[c];
u8 key;
unsigned long flags;
- struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
+ struct xarray *mkeys = &dev->mdev->priv.mkey_table;
int err;
spin_lock_irqsave(&ent->lock, flags);
@@ -158,12 +158,12 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
ent->size++;
spin_unlock_irqrestore(&ent->lock, flags);
- write_lock_irqsave(&table->lock, flags);
- err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
- &mr->mmkey);
+ xa_lock_irqsave(mkeys, flags);
+ err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key),
+ &mr->mmkey, GFP_ATOMIC));
+ xa_unlock_irqrestore(mkeys, flags);
if (err)
pr_err("Error inserting to mkey tree. 0x%x\n", -err);
- write_unlock_irqrestore(&table->lock, flags);
if (!completion_done(&ent->compl))
complete(&ent->compl);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 91507a2e9290..c594489eb2d7 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -768,7 +768,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
bcnt -= *bytes_committed;
next_mr:
- mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key));
+ mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(key));
if (!mkey_is_eq(mmkey, key)) {
mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
ret = -EFAULT;
@@ -1488,9 +1488,11 @@ static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
mlx5_eq_update_ci(eq->core, cc, 1);
}
-static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr)
+static int mlx5_ib_eq_pf_int(struct notifier_block *nb, unsigned long type,
+ void *data)
{
- struct mlx5_ib_pf_eq *eq = eq_ptr;
+ struct mlx5_ib_pf_eq *eq =
+ container_of(nb, struct mlx5_ib_pf_eq, irq_nb);
unsigned long flags;
if (spin_trylock_irqsave(&eq->lock, flags)) {
@@ -1553,20 +1555,26 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
goto err_mempool;
}
+ eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
param = (struct mlx5_eq_param) {
- .index = MLX5_EQ_PFAULT_IDX,
+ .irq_index = 0,
.mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
.nent = MLX5_IB_NUM_PF_EQE,
- .context = eq,
- .handler = mlx5_ib_eq_pf_int
};
- eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", &param);
+ eq->core = mlx5_eq_create_generic(dev->mdev, &param);
if (IS_ERR(eq->core)) {
err = PTR_ERR(eq->core);
goto err_wq;
}
+ err = mlx5_eq_enable(dev->mdev, eq->core, &eq->irq_nb);
+ if (err) {
+ mlx5_ib_err(dev, "failed to enable odp EQ %d\n", err);
+ goto err_eq;
+ }
return 0;
+err_eq:
+ mlx5_eq_destroy_generic(dev->mdev, eq->core);
err_wq:
destroy_workqueue(eq->wq);
err_mempool:
@@ -1579,6 +1587,7 @@ mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
{
int err;
+ mlx5_eq_disable(dev->mdev, eq->core, &eq->irq_nb);
err = mlx5_eq_destroy_generic(dev->mdev, eq->core);
cancel_work_sync(&eq->work);
destroy_workqueue(eq->wq);
@@ -1677,8 +1686,8 @@ static void num_pending_prefetch_dec(struct mlx5_ib_dev *dev,
struct mlx5_core_mkey *mmkey;
struct mlx5_ib_mr *mr;
- mmkey = __mlx5_mr_lookup(dev->mdev,
- mlx5_base_mkey(sg_list[i].lkey));
+ mmkey = xa_load(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(sg_list[i].lkey));
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
atomic_dec(&mr->num_pending_prefetch);
}
@@ -1697,8 +1706,8 @@ static bool num_pending_prefetch_inc(struct ib_pd *pd,
struct mlx5_core_mkey *mmkey;
struct mlx5_ib_mr *mr;
- mmkey = __mlx5_mr_lookup(dev->mdev,
- mlx5_base_mkey(sg_list[i].lkey));
+ mmkey = xa_load(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(sg_list[i].lkey));
if (!mmkey || mmkey->key != sg_list[i].lkey) {
ret = false;
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 5fe2bf916c06..6aa0fe6de878 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
#
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
- transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
+ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index a4cf123e3f17..ddf1b87f1bc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -187,6 +187,7 @@ TRACE_EVENT(mlx5_fs_set_fte,
__field(u32, index)
__field(u32, action)
__field(u32, flow_tag)
+ __field(u32, flow_source)
__field(u8, mask_enable)
__field(int, new_fte)
__array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
@@ -204,7 +205,8 @@ TRACE_EVENT(mlx5_fs_set_fte,
__entry->index = fte->index;
__entry->action = fte->action.action;
__entry->mask_enable = __entry->fg->mask.match_criteria_enable;
- __entry->flow_tag = fte->action.flow_tag;
+ __entry->flow_tag = fte->flow_context.flow_tag;
+ __entry->flow_source = fte->flow_context.flow_source;
memcpy(__entry->mask_outer,
MLX5_ADDR_OF(fte_match_param,
&__entry->fg->mask.match_criteria,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 3fe3fe128256..3281ace5e126 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -763,6 +763,7 @@ struct mlx5e_profile {
void (*cleanup_tx)(struct mlx5e_priv *priv);
void (*enable)(struct mlx5e_priv *priv);
void (*disable)(struct mlx5e_priv *priv);
+ int (*update_rx)(struct mlx5e_priv *priv);
void (*update_stats)(struct mlx5e_priv *priv);
void (*update_carrier)(struct mlx5e_priv *priv);
struct {
@@ -1034,6 +1035,7 @@ int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
int mlx5e_create_tises(struct mlx5e_priv *priv);
+int mlx5e_update_nic_rx(struct mlx5e_priv *priv);
void mlx5e_update_carrier(struct mlx5e_priv *priv);
int mlx5e_close(struct net_device *netdev);
int mlx5e_open(struct net_device *netdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index f5ad531e1749..3739646b653f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -5,6 +5,7 @@
#include <net/gre.h>
#include <net/geneve.h>
#include "en/tc_tun.h"
+#include "en_tc.h"
struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
{
@@ -47,7 +48,8 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv,
*route_dev = dev;
if (is_vlan_dev(*route_dev))
*out_dev = uplink_dev;
- else if (mlx5e_eswitch_rep(dev))
+ else if (mlx5e_eswitch_rep(dev) &&
+ mlx5e_is_valid_eswitch_fwd_dev(priv, dev))
*out_dev = *route_dev;
else
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 05f07331ab41..198a52d1e515 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -46,7 +46,7 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
"%d.%d.%04d (%.16s)",
fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev),
mdev->board_id);
- strlcpy(drvinfo->bus_info, pci_name(mdev->pdev),
+ strlcpy(drvinfo->bus_info, dev_name(mdev->device),
sizeof(drvinfo->bus_info));
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 4421c10f58ae..839662644ed3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -426,7 +426,7 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
}
spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
- flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ spec->flow_context.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 4789735b8c7f..1085040675ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2845,7 +2845,7 @@ static void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
if (hw_modify)
hw_modify(priv);
- mlx5e_refresh_tirs(priv, false);
+ priv->profile->update_rx(priv);
mlx5e_activate_priv_channels(priv);
/* return carrier back if needed */
@@ -2892,7 +2892,7 @@ int mlx5e_open_locked(struct net_device *netdev)
if (err)
goto err_clear_state_opened_flag;
- mlx5e_refresh_tirs(priv, false);
+ priv->profile->update_rx(priv);
mlx5e_activate_priv_channels(priv);
if (priv->profile->update_carrier)
priv->profile->update_carrier(priv);
@@ -4617,14 +4617,18 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->ethtool_ops = &mlx5e_ethtool_ops;
netdev->vlan_features |= NETIF_F_SG;
- netdev->vlan_features |= NETIF_F_IP_CSUM;
- netdev->vlan_features |= NETIF_F_IPV6_CSUM;
+ netdev->vlan_features |= NETIF_F_HW_CSUM;
netdev->vlan_features |= NETIF_F_GRO;
netdev->vlan_features |= NETIF_F_TSO;
netdev->vlan_features |= NETIF_F_TSO6;
netdev->vlan_features |= NETIF_F_RXCSUM;
netdev->vlan_features |= NETIF_F_RXHASH;
+ netdev->mpls_features |= NETIF_F_SG;
+ netdev->mpls_features |= NETIF_F_HW_CSUM;
+ netdev->mpls_features |= NETIF_F_TSO;
+ netdev->mpls_features |= NETIF_F_TSO6;
+
netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX;
netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX;
@@ -4640,8 +4644,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev) ||
MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
- netdev->hw_enc_features |= NETIF_F_IP_CSUM;
- netdev->hw_enc_features |= NETIF_F_IPV6_CSUM;
+ netdev->hw_enc_features |= NETIF_F_HW_CSUM;
netdev->hw_enc_features |= NETIF_F_TSO;
netdev->hw_enc_features |= NETIF_F_TSO6;
netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
@@ -4925,6 +4928,11 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
mlx5_lag_remove(mdev);
}
+int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
+{
+ return mlx5e_refresh_tirs(priv, false);
+}
+
static const struct mlx5e_profile mlx5e_nic_profile = {
.init = mlx5e_nic_init,
.cleanup = mlx5e_nic_cleanup,
@@ -4934,6 +4942,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
.cleanup_tx = mlx5e_cleanup_nic_tx,
.enable = mlx5e_nic_enable,
.disable = mlx5e_nic_disable,
+ .update_rx = mlx5e_update_nic_rx,
.update_stats = mlx5e_update_ndo_stats,
.update_carrier = mlx5e_update_carrier,
.rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 3999da3e6314..330034fcdfc5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -391,30 +391,19 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = {
static int mlx5e_rep_get_port_parent_id(struct net_device *dev,
struct netdev_phys_item_id *ppid)
{
- struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct net_device *uplink_upper = NULL;
- struct mlx5e_priv *uplink_priv = NULL;
- struct net_device *uplink_dev;
+ struct mlx5_eswitch *esw;
+ struct mlx5e_priv *priv;
+ u64 parent_id;
+
+ priv = netdev_priv(dev);
+ esw = priv->mdev->priv.eswitch;
if (esw->mode == SRIOV_NONE)
return -EOPNOTSUPP;
- uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
- if (uplink_dev) {
- uplink_upper = netdev_master_upper_dev_get(uplink_dev);
- uplink_priv = netdev_priv(uplink_dev);
- }
-
- ppid->id_len = ETH_ALEN;
- if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) {
- ether_addr_copy(ppid->id, uplink_upper->dev_addr);
- } else {
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_eswitch_rep *rep = rpriv->rep;
-
- ether_addr_copy(ppid->id, rep->hw_id);
- }
+ parent_id = mlx5_query_nic_system_image_guid(priv->mdev);
+ ppid->id_len = sizeof(parent_id);
+ memcpy(ppid->id, &parent_id, sizeof(parent_id));
return 0;
}
@@ -1145,6 +1134,8 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
if (rep->vport == MLX5_VPORT_UPLINK)
ret = snprintf(buf, len, "p%d", fn);
+ else if (rep->vport == MLX5_VPORT_PF)
+ ret = snprintf(buf, len, "pf%d", fn);
else
ret = snprintf(buf, len, "pf%dvf%d", fn, rep->vport - 1);
@@ -1636,6 +1627,11 @@ static void mlx5e_rep_enable(struct mlx5e_priv *priv)
mlx5e_set_netdev_mtu_boundaries(priv);
}
+static int mlx5e_update_rep_rx(struct mlx5e_priv *priv)
+{
+ return 0;
+}
+
static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data)
{
struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
@@ -1711,6 +1707,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = {
.init_tx = mlx5e_init_rep_tx,
.cleanup_tx = mlx5e_cleanup_rep_tx,
.enable = mlx5e_rep_enable,
+ .update_rx = mlx5e_update_rep_rx,
.update_stats = mlx5e_rep_update_hw_counters,
.rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
@@ -1726,6 +1723,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
.cleanup_tx = mlx5e_cleanup_rep_tx,
.enable = mlx5e_uplink_rep_enable,
.disable = mlx5e_uplink_rep_disable,
+ .update_rx = mlx5e_update_rep_rx,
.update_stats = mlx5e_uplink_rep_update_hw_counters,
.update_carrier = mlx5e_update_carrier,
.rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index eb8433cc49a7..e6b199cd68ea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -717,19 +717,22 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
+ struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context;
struct mlx5_nic_flow_attr *attr = flow->nic_attr;
struct mlx5_core_dev *dev = priv->mdev;
struct mlx5_flow_destination dest[2] = {};
struct mlx5_flow_act flow_act = {
.action = attr->action,
- .flow_tag = attr->flow_tag,
.reformat_id = 0,
- .flags = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND,
+ .flags = FLOW_ACT_NO_APPEND,
};
struct mlx5_fc *counter = NULL;
bool table_created = false;
int err, dest_ix = 0;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+ flow_context->flow_tag = attr->flow_tag;
+
if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
if (err) {
@@ -2799,6 +2802,16 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv,
return err;
}
+bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
+ struct net_device *out_dev)
+{
+ if (is_merged_eswitch_dev(priv, out_dev))
+ return true;
+
+ return mlx5e_eswitch_rep(out_dev) &&
+ same_hw_devs(priv, netdev_priv(out_dev));
+}
+
static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
struct flow_action *flow_action,
struct mlx5e_tc_flow *flow,
@@ -2864,9 +2877,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
- if (netdev_port_same_parent_id(priv->netdev,
- out_dev) ||
- is_merged_eswitch_dev(priv, out_dev)) {
+ if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev);
@@ -2883,6 +2894,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
if (err)
return err;
}
+
if (is_vlan_dev(parse_attr->filter_dev)) {
err = add_vlan_pop_action(priv, attr,
&action);
@@ -2890,8 +2902,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
return err;
}
- if (!mlx5e_eswitch_rep(out_dev))
+ if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not on same switch HW, can't offload forwarding");
+ pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
+ priv->netdev->name, out_dev->name);
return -EOPNOTSUPP;
+ }
out_priv = netdev_priv(out_dev);
rpriv = out_priv->ppriv;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index f62e81902d27..8f288cc53cee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -74,6 +74,9 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags);
void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
+bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
+ struct net_device *out_dev);
+
#else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 5e9319d3d90c..58fff2f39b38 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -61,17 +61,21 @@ enum {
MLX5_EQ_DOORBEL_OFFSET = 0x40,
};
-struct mlx5_irq_info {
- cpumask_var_t mask;
- char name[MLX5_MAX_IRQ_NAME];
- void *context; /* dev_id provided to request_irq */
+/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update
+ * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is
+ * used to set the EQ size, budget must be smaller than the EQ size.
+ */
+enum {
+ MLX5_EQ_POLLING_BUDGET = 128,
};
+static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE);
+
struct mlx5_eq_table {
struct list_head comp_eqs_list;
- struct mlx5_eq pages_eq;
- struct mlx5_eq cmd_eq;
- struct mlx5_eq async_eq;
+ struct mlx5_eq_async pages_eq;
+ struct mlx5_eq_async cmd_eq;
+ struct mlx5_eq_async async_eq;
struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
@@ -79,11 +83,8 @@ struct mlx5_eq_table {
struct mlx5_nb cq_err_nb;
struct mutex lock; /* sync async eqs creations */
- int num_comp_vectors;
- struct mlx5_irq_info *irq_info;
-#ifdef CONFIG_RFS_ACCEL
- struct cpu_rmap *rmap;
-#endif
+ int num_comp_eqs;
+ struct mlx5_irq_table *irq_table;
};
#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
@@ -124,16 +125,24 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
return cq;
}
-static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
+static int mlx5_eq_comp_int(struct notifier_block *nb,
+ __always_unused unsigned long action,
+ __always_unused void *data)
{
- struct mlx5_eq_comp *eq_comp = eq_ptr;
- struct mlx5_eq *eq = eq_ptr;
+ struct mlx5_eq_comp *eq_comp =
+ container_of(nb, struct mlx5_eq_comp, irq_nb);
+ struct mlx5_eq *eq = &eq_comp->core;
struct mlx5_eqe *eqe;
- int set_ci = 0;
+ int num_eqes = 0;
u32 cqn = -1;
- while ((eqe = next_eqe_sw(eq))) {
+ eqe = next_eqe_sw(eq);
+ if (!eqe)
+ goto out;
+
+ do {
struct mlx5_core_cq *cq;
+
/* Make sure we read EQ entry contents after we've
* checked the ownership bit.
*/
@@ -151,26 +160,16 @@ static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
}
++eq->cons_index;
- ++set_ci;
- /* The HCA will think the queue has overflowed if we
- * don't tell it we've been processing events. We
- * create our EQs with MLX5_NUM_SPARE_EQE extra
- * entries, so we must update our consumer index at
- * least that often.
- */
- if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
- eq_update_ci(eq, 0);
- set_ci = 0;
- }
- }
+ } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
+out:
eq_update_ci(eq, 1);
if (cqn != -1)
tasklet_schedule(&eq_comp->tasklet_ctx.task);
- return IRQ_HANDLED;
+ return 0;
}
/* Some architectures don't latch interrupts when they are disabled, so using
@@ -184,25 +183,32 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
disable_irq(eq->core.irqn);
count_eqe = eq->core.cons_index;
- mlx5_eq_comp_int(eq->core.irqn, eq);
+ mlx5_eq_comp_int(&eq->irq_nb, 0, NULL);
count_eqe = eq->core.cons_index - count_eqe;
enable_irq(eq->core.irqn);
return count_eqe;
}
-static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
+static int mlx5_eq_async_int(struct notifier_block *nb,
+ unsigned long action, void *data)
{
- struct mlx5_eq *eq = eq_ptr;
+ struct mlx5_eq_async *eq_async =
+ container_of(nb, struct mlx5_eq_async, irq_nb);
+ struct mlx5_eq *eq = &eq_async->core;
struct mlx5_eq_table *eqt;
struct mlx5_core_dev *dev;
struct mlx5_eqe *eqe;
- int set_ci = 0;
+ int num_eqes = 0;
dev = eq->dev;
eqt = dev->priv.eq_table;
- while ((eqe = next_eqe_sw(eq))) {
+ eqe = next_eqe_sw(eq);
+ if (!eqe)
+ goto out;
+
+ do {
/*
* Make sure we read EQ entry contents after we've
* checked the ownership bit.
@@ -217,23 +223,13 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
++eq->cons_index;
- ++set_ci;
- /* The HCA will think the queue has overflowed if we
- * don't tell it we've been processing events. We
- * create our EQs with MLX5_NUM_SPARE_EQE extra
- * entries, so we must update our consumer index at
- * least that often.
- */
- if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
- eq_update_ci(eq, 0);
- set_ci = 0;
- }
- }
+ } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
+out:
eq_update_ci(eq, 1);
- return IRQ_HANDLED;
+ return 0;
}
static void init_eq_buf(struct mlx5_eq *eq)
@@ -248,23 +244,19 @@ static void init_eq_buf(struct mlx5_eq *eq)
}
static int
-create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
+create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
struct mlx5_eq_param *param)
{
- struct mlx5_eq_table *eq_table = dev->priv.eq_table;
struct mlx5_cq_table *cq_table = &eq->cq_table;
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
struct mlx5_priv *priv = &dev->priv;
- u8 vecidx = param->index;
+ u8 vecidx = param->irq_index;
__be64 *pas;
void *eqc;
int inlen;
u32 *in;
int err;
- if (eq_table->irq_info[vecidx].context)
- return -EEXIST;
-
/* Init CQ table */
memset(cq_table, 0, sizeof(*cq_table));
spin_lock_init(&cq_table->lock);
@@ -307,34 +299,19 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
if (err)
goto err_in;
- snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
- name, pci_name(dev->pdev));
- eq_table->irq_info[vecidx].context = param->context;
-
eq->vecidx = vecidx;
eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
eq->irqn = pci_irq_vector(dev->pdev, vecidx);
eq->dev = dev;
eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
- err = request_irq(eq->irqn, param->handler, 0,
- eq_table->irq_info[vecidx].name, param->context);
- if (err)
- goto err_eq;
err = mlx5_debug_eq_add(dev, eq);
if (err)
- goto err_irq;
-
- /* EQs are created in ARMED state
- */
- eq_update_ci(eq, 1);
+ goto err_eq;
kvfree(in);
return 0;
-err_irq:
- free_irq(eq->irqn, eq);
-
err_eq:
mlx5_cmd_destroy_eq(dev, eq->eqn);
@@ -346,18 +323,48 @@ err_buf:
return err;
}
-static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+/**
+ * mlx5_eq_enable - Enable EQ for receiving EQEs
+ * @dev - Device which owns the eq
+ * @eq - EQ to enable
+ * @nb - notifier call block
+ * mlx5_eq_enable - must be called after EQ is created in device.
+ */
+int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ struct notifier_block *nb)
{
struct mlx5_eq_table *eq_table = dev->priv.eq_table;
- struct mlx5_irq_info *irq_info;
int err;
- irq_info = &eq_table->irq_info[eq->vecidx];
+ err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb);
+ if (!err)
+ eq_update_ci(eq, 1);
- mlx5_debug_eq_remove(dev, eq);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_eq_enable);
- free_irq(eq->irqn, irq_info->context);
- irq_info->context = NULL;
+/**
+ * mlx5_eq_disable - Enable EQ for receiving EQEs
+ * @dev - Device which owns the eq
+ * @eq - EQ to disable
+ * @nb - notifier call block
+ * mlx5_eq_disable - must be called before EQ is destroyed.
+ */
+void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ struct notifier_block *nb)
+{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+
+ mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb);
+}
+EXPORT_SYMBOL(mlx5_eq_disable);
+
+static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ int err;
+
+ mlx5_debug_eq_remove(dev, eq);
err = mlx5_cmd_destroy_eq(dev, eq->eqn);
if (err)
@@ -423,6 +430,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev)
for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
+ eq_table->irq_table = dev->priv.irq_table;
return 0;
kvfree_eq_table:
@@ -439,19 +447,20 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
/* Async EQs */
-static int create_async_eq(struct mlx5_core_dev *dev, const char *name,
+static int create_async_eq(struct mlx5_core_dev *dev,
struct mlx5_eq *eq, struct mlx5_eq_param *param)
{
struct mlx5_eq_table *eq_table = dev->priv.eq_table;
int err;
mutex_lock(&eq_table->lock);
- if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) {
- err = -ENOSPC;
+ /* Async EQs must share irq index 0 */
+ if (param->irq_index != 0) {
+ err = -EINVAL;
goto unlock;
}
- err = create_map_eq(dev, eq, name, param);
+ err = create_map_eq(dev, eq, param);
unlock:
mutex_unlock(&eq_table->lock);
return err;
@@ -480,7 +489,7 @@ static int cq_err_event_notifier(struct notifier_block *nb,
/* type == MLX5_EVENT_TYPE_CQ_ERROR */
eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
- eq = &eqt->async_eq;
+ eq = &eqt->async_eq.core;
eqe = data;
cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
@@ -549,55 +558,73 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
mlx5_eq_notifier_register(dev, &table->cq_err_nb);
+ table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int;
param = (struct mlx5_eq_param) {
- .index = MLX5_EQ_CMD_IDX,
+ .irq_index = 0,
.mask = 1ull << MLX5_EVENT_TYPE_CMD,
.nent = MLX5_NUM_CMD_EQE,
- .context = &table->cmd_eq,
- .handler = mlx5_eq_async_int,
};
- err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, &param);
+ err = create_async_eq(dev, &table->cmd_eq.core, &param);
if (err) {
mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
goto err0;
}
-
+ err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+ if (err) {
+ mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err);
+ goto err1;
+ }
mlx5_cmd_use_events(dev);
+ table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int;
param = (struct mlx5_eq_param) {
- .index = MLX5_EQ_ASYNC_IDX,
+ .irq_index = 0,
.mask = gather_async_events_mask(dev),
.nent = MLX5_NUM_ASYNC_EQE,
- .context = &table->async_eq,
- .handler = mlx5_eq_async_int,
};
- err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, &param);
+ err = create_async_eq(dev, &table->async_eq.core, &param);
if (err) {
mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
- goto err1;
+ goto err2;
+ }
+ err = mlx5_eq_enable(dev, &table->async_eq.core,
+ &table->async_eq.irq_nb);
+ if (err) {
+ mlx5_core_warn(dev, "failed to enable async EQ %d\n", err);
+ goto err3;
}
+ table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int;
param = (struct mlx5_eq_param) {
- .index = MLX5_EQ_PAGEREQ_IDX,
+ .irq_index = 0,
.mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST,
.nent = /* TODO: sriov max_vf + */ 1,
- .context = &table->pages_eq,
- .handler = mlx5_eq_async_int,
};
- err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, &param);
+ err = create_async_eq(dev, &table->pages_eq.core, &param);
if (err) {
mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
- goto err2;
+ goto err4;
+ }
+ err = mlx5_eq_enable(dev, &table->pages_eq.core,
+ &table->pages_eq.irq_nb);
+ if (err) {
+ mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err);
+ goto err5;
}
return err;
+err5:
+ destroy_async_eq(dev, &table->pages_eq.core);
+err4:
+ mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
+err3:
+ destroy_async_eq(dev, &table->async_eq.core);
err2:
- destroy_async_eq(dev, &table->async_eq);
-
-err1:
mlx5_cmd_use_polling(dev);
- destroy_async_eq(dev, &table->cmd_eq);
+ mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+err1:
+ destroy_async_eq(dev, &table->cmd_eq.core);
err0:
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
return err;
@@ -608,19 +635,22 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
struct mlx5_eq_table *table = dev->priv.eq_table;
int err;
- err = destroy_async_eq(dev, &table->pages_eq);
+ mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb);
+ err = destroy_async_eq(dev, &table->pages_eq.core);
if (err)
mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
err);
- err = destroy_async_eq(dev, &table->async_eq);
+ mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
+ err = destroy_async_eq(dev, &table->async_eq.core);
if (err)
mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
err);
mlx5_cmd_use_polling(dev);
- err = destroy_async_eq(dev, &table->cmd_eq);
+ mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+ err = destroy_async_eq(dev, &table->cmd_eq.core);
if (err)
mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
err);
@@ -630,24 +660,24 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
{
- return &dev->priv.eq_table->async_eq;
+ return &dev->priv.eq_table->async_eq.core;
}
void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
{
- synchronize_irq(dev->priv.eq_table->async_eq.irqn);
+ synchronize_irq(dev->priv.eq_table->async_eq.core.irqn);
}
void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
{
- synchronize_irq(dev->priv.eq_table->cmd_eq.irqn);
+ synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn);
}
/* Generic EQ API for mlx5_core consumers
* Needed For RDMA ODP EQ for now
*/
struct mlx5_eq *
-mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
+mlx5_eq_create_generic(struct mlx5_core_dev *dev,
struct mlx5_eq_param *param)
{
struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
@@ -656,7 +686,7 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
if (!eq)
return ERR_PTR(-ENOMEM);
- err = create_async_eq(dev, name, eq, param);
+ err = create_async_eq(dev, eq, param);
if (err) {
kvfree(eq);
eq = ERR_PTR(err);
@@ -714,84 +744,14 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
}
EXPORT_SYMBOL(mlx5_eq_update_ci);
-/* Completion EQs */
-
-static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
- struct mlx5_priv *priv = &mdev->priv;
- int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
- int irq = pci_irq_vector(mdev->pdev, vecidx);
- struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
-
- if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) {
- mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
- return -ENOMEM;
- }
-
- cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
- irq_info->mask);
-
- if (IS_ENABLED(CONFIG_SMP) &&
- irq_set_affinity_hint(irq, irq_info->mask))
- mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
-
- return 0;
-}
-
-static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
- int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
- struct mlx5_priv *priv = &mdev->priv;
- int irq = pci_irq_vector(mdev->pdev, vecidx);
- struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
-
- irq_set_affinity_hint(irq, NULL);
- free_cpumask_var(irq_info->mask);
-}
-
-static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
-{
- int err;
- int i;
-
- for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) {
- err = set_comp_irq_affinity_hint(mdev, i);
- if (err)
- goto err_out;
- }
-
- return 0;
-
-err_out:
- for (i--; i >= 0; i--)
- clear_comp_irq_affinity_hint(mdev, i);
-
- return err;
-}
-
-static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
-{
- int i;
-
- for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++)
- clear_comp_irq_affinity_hint(mdev, i);
-}
-
static void destroy_comp_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
struct mlx5_eq_comp *eq, *n;
- clear_comp_irqs_affinity_hints(dev);
-
-#ifdef CONFIG_RFS_ACCEL
- if (table->rmap) {
- free_irq_cpu_rmap(table->rmap);
- table->rmap = NULL;
- }
-#endif
list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
list_del(&eq->list);
+ mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
if (destroy_unmap_eq(dev, &eq->core))
mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
eq->core.eqn);
@@ -803,23 +763,17 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev)
static int create_comp_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
- char name[MLX5_MAX_IRQ_NAME];
struct mlx5_eq_comp *eq;
- int ncomp_vec;
+ int ncomp_eqs;
int nent;
int err;
int i;
INIT_LIST_HEAD(&table->comp_eqs_list);
- ncomp_vec = table->num_comp_vectors;
+ ncomp_eqs = table->num_comp_eqs;
nent = MLX5_COMP_EQ_SIZE;
-#ifdef CONFIG_RFS_ACCEL
- table->rmap = alloc_irq_cpu_rmap(ncomp_vec);
- if (!table->rmap)
- return -ENOMEM;
-#endif
- for (i = 0; i < ncomp_vec; i++) {
- int vecidx = i + MLX5_EQ_VEC_COMP_BASE;
+ for (i = 0; i < ncomp_eqs; i++) {
+ int vecidx = i + MLX5_IRQ_VEC_COMP_BASE;
struct mlx5_eq_param param = {};
eq = kzalloc(sizeof(*eq), GFP_KERNEL);
@@ -834,33 +788,29 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
(unsigned long)&eq->tasklet_ctx);
-#ifdef CONFIG_RFS_ACCEL
- irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx));
-#endif
- snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
+ eq->irq_nb.notifier_call = mlx5_eq_comp_int;
param = (struct mlx5_eq_param) {
- .index = vecidx,
+ .irq_index = vecidx,
.mask = 0,
.nent = nent,
- .context = &eq->core,
- .handler = mlx5_eq_comp_int
};
- err = create_map_eq(dev, &eq->core, name, &param);
+ err = create_map_eq(dev, &eq->core, &param);
+ if (err) {
+ kfree(eq);
+ goto clean;
+ }
+ err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
if (err) {
+ destroy_unmap_eq(dev, &eq->core);
kfree(eq);
goto clean;
}
+
mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
/* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
list_add_tail(&eq->list, &table->comp_eqs_list);
}
- err = set_comp_irq_affinity_hints(dev);
- if (err) {
- mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
- goto clean;
- }
-
return 0;
clean:
@@ -891,22 +841,24 @@ EXPORT_SYMBOL(mlx5_vector2eqn);
unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
{
- return dev->priv.eq_table->num_comp_vectors;
+ return dev->priv.eq_table->num_comp_eqs;
}
EXPORT_SYMBOL(mlx5_comp_vectors_count);
struct cpumask *
mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
{
- /* TODO: consider irq_get_affinity_mask(irq) */
- return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask;
+ int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE;
+
+ return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table,
+ vecidx);
}
EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
{
- return dev->priv.eq_table->rmap;
+ return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table);
}
#endif
@@ -927,82 +879,19 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
- int i, max_eqs;
-
- clear_comp_irqs_affinity_hints(dev);
-
-#ifdef CONFIG_RFS_ACCEL
- if (table->rmap) {
- free_irq_cpu_rmap(table->rmap);
- table->rmap = NULL;
- }
-#endif
mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
- max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE;
- for (i = max_eqs - 1; i >= 0; i--) {
- if (!table->irq_info[i].context)
- continue;
- free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context);
- table->irq_info[i].context = NULL;
- }
+ mlx5_irq_table_destroy(dev);
mutex_unlock(&table->lock);
- pci_free_irq_vectors(dev->pdev);
-}
-
-static int alloc_irq_vectors(struct mlx5_core_dev *dev)
-{
- struct mlx5_priv *priv = &dev->priv;
- struct mlx5_eq_table *table = priv->eq_table;
- int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
- MLX5_CAP_GEN(dev, max_num_eqs) :
- 1 << MLX5_CAP_GEN(dev, log_max_eq);
- int nvec;
- int err;
-
- nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
- MLX5_EQ_VEC_COMP_BASE;
- nvec = min_t(int, nvec, num_eqs);
- if (nvec <= MLX5_EQ_VEC_COMP_BASE)
- return -ENOMEM;
-
- table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL);
- if (!table->irq_info)
- return -ENOMEM;
-
- nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1,
- nvec, PCI_IRQ_MSIX);
- if (nvec < 0) {
- err = nvec;
- goto err_free_irq_info;
- }
-
- table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
-
- return 0;
-
-err_free_irq_info:
- kfree(table->irq_info);
- return err;
-}
-
-static void free_irq_vectors(struct mlx5_core_dev *dev)
-{
- struct mlx5_priv *priv = &dev->priv;
-
- pci_free_irq_vectors(dev->pdev);
- kfree(priv->eq_table->irq_info);
}
int mlx5_eq_table_create(struct mlx5_core_dev *dev)
{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
int err;
- err = alloc_irq_vectors(dev);
- if (err) {
- mlx5_core_err(dev, "alloc irq vectors failed\n");
- return err;
- }
+ eq_table->num_comp_eqs =
+ mlx5_irq_get_num_comp(eq_table->irq_table);
err = create_async_eqs(dev);
if (err) {
@@ -1020,7 +909,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
err_comp_eqs:
destroy_async_eqs(dev);
err_async_eqs:
- free_irq_vectors(dev);
return err;
}
@@ -1028,7 +916,6 @@ void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
{
destroy_comp_eqs(dev);
destroy_async_eqs(dev);
- free_irq_vectors(dev);
}
int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 5414e8f82d5f..67e76979bb42 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -134,6 +134,30 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
}
+int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+ void *in, int inlen)
+{
+ return modify_esw_vport_context_cmd(esw->dev, vport, in, inlen);
+}
+
+static int query_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
+ void *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
+
+ MLX5_SET(query_esw_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+ MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+ void *out, int outlen)
+{
+ return query_esw_vport_context_cmd(esw->dev, vport, out, outlen);
+}
+
static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
u16 vlan, u8 qos, u8 set_flags)
{
@@ -939,7 +963,7 @@ int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS,
- vport->vport);
+ mlx5_eswitch_vport_num_to_index(esw, vport->vport));
if (!root_ns) {
esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport);
return -EOPNOTSUPP;
@@ -1057,7 +1081,7 @@ int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
- vport->vport);
+ mlx5_eswitch_vport_num_to_index(esw, vport->vport));
if (!root_ns) {
esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport);
return -EOPNOTSUPP;
@@ -1168,6 +1192,8 @@ void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
vport->ingress.drop_rule = NULL;
vport->ingress.allow_rule = NULL;
+
+ esw_vport_del_ingress_acl_modify_metadata(esw, vport);
}
void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
@@ -1686,10 +1712,9 @@ static int eswitch_vport_event(struct notifier_block *nb,
return NOTIFY_OK;
}
-static int query_esw_functions(struct mlx5_core_dev *dev,
- u32 *out, int outlen)
+int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen)
{
- u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {};
MLX5_SET(query_esw_functions_in, in, opcode,
MLX5_CMD_OP_QUERY_ESW_FUNCTIONS);
@@ -1697,22 +1722,6 @@ static int query_esw_functions(struct mlx5_core_dev *dev,
return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
}
-int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs)
-{
- u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {0};
- int err;
-
- err = query_esw_functions(dev, out, sizeof(out));
- if (err)
- return err;
-
- *num_vfs = MLX5_GET(query_esw_functions_out, out,
- host_params_context.host_num_of_vfs);
- esw_debug(dev, "host_num_of_vfs=%d\n", *num_vfs);
-
- return 0;
-}
-
/* Public E-Switch API */
#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
@@ -1720,7 +1729,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
{
struct mlx5_vport *vport;
int total_nvports = 0;
- u16 vf_nvports = 0;
int err;
int i, enabled_events;
@@ -1739,15 +1747,10 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
if (mode == SRIOV_OFFLOADS) {
- if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
- err = mlx5_esw_query_functions(esw->dev, &vf_nvports);
- if (err)
- return err;
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev))
total_nvports = esw->total_vports;
- } else {
- vf_nvports = nvfs;
+ else
total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev);
- }
}
esw->mode = mode;
@@ -1761,7 +1764,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
} else {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
- err = esw_offloads_init(esw, vf_nvports, total_nvports);
+ err = esw_offloads_init(esw, nvfs, total_nvports);
}
if (err)
@@ -2480,6 +2483,17 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
}
EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
+enum devlink_eswitch_encap_mode
+mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
+{
+ struct mlx5_eswitch *esw;
+
+ esw = dev->priv.eswitch;
+ return ESW_ALLOWED(esw) ? esw->offloads.encap :
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode);
+
bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
{
if ((dev0->priv.eswitch->mode == SRIOV_NONE &&
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 849a628f6d17..b6472cf166ec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -68,6 +68,8 @@ struct vport_ingress {
struct mlx5_flow_group *allow_spoofchk_only_grp;
struct mlx5_flow_group *allow_untagged_only_grp;
struct mlx5_flow_group *drop_grp;
+ int modify_metadata_id;
+ struct mlx5_flow_handle *modify_metadata_rule;
struct mlx5_flow_handle *allow_rule;
struct mlx5_flow_handle *drop_rule;
struct mlx5_fc *drop_counter;
@@ -178,7 +180,7 @@ struct mlx5_esw_offload {
const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
u8 inline_mode;
u64 num_flows;
- u8 encap;
+ enum devlink_eswitch_encap_mode encap;
};
/* E-Switch MC FDB table hash node */
@@ -198,6 +200,10 @@ struct mlx5_esw_functions {
u16 num_vfs;
};
+enum {
+ MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0),
+};
+
struct mlx5_eswitch {
struct mlx5_core_dev *dev;
struct mlx5_nb nb;
@@ -205,6 +211,7 @@ struct mlx5_eswitch {
struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
struct workqueue_struct *work_queue;
struct mlx5_vport *vports;
+ u32 flags;
int total_vports;
int enabled_vports;
/* Synchronize between vport change events
@@ -242,6 +249,8 @@ void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
+void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport);
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
@@ -269,6 +278,11 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
struct ifla_vf_stats *vf_stats);
void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
+int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+ void *in, int inlen);
+int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+ void *out, int outlen);
+
struct mlx5_flow_spec;
struct mlx5_esw_flow_attr;
struct mlx5_termtbl_handle;
@@ -379,9 +393,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
struct netlink_ext_ack *extack);
int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
-int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode encap,
struct netlink_ext_ack *extack);
-int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode *encap);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
@@ -409,7 +425,7 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
struct mlx5_core_dev *dev1);
-int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs);
+int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen);
#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
@@ -528,6 +544,8 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
struct mlx5_vport *__must_check
mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num);
+bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num);
+
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -536,6 +554,11 @@ static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs,
static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
+static inline int
+mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen)
+{
+ return -EOPNOTSUPP;
+}
#define FDB_MAX_CHAIN 1
#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 060de01f09b6..af08e06f73ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -88,6 +88,53 @@ u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw)
return 1;
}
+static void
+mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr)
+{
+ void *misc2;
+ void *misc;
+
+ /* Use metadata matching because vport is not represented by single
+ * VHCA in dual-port RoCE mode, and matching on source vport may fail.
+ */
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch,
+ attr->in_rep->vport));
+
+ misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ if (memchr_inv(misc, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc)))
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
+
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ }
+
+ if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) &&
+ attr->in_rep->vport == MLX5_VPORT_UPLINK)
+ spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+}
+
struct mlx5_flow_handle *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
@@ -99,7 +146,6 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule;
struct mlx5_flow_table *fdb;
int j, i = 0;
- void *misc;
if (esw->mode != SRIOV_OFFLOADS)
return ERR_PTR(-EOPNOTSUPP);
@@ -159,21 +205,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
i++;
}
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
-
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- MLX5_SET(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(attr->in_mdev, vhca_id));
-
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- MLX5_SET_TO_ONES(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id);
+ mlx5_eswitch_set_rule_source_port(esw, spec, attr);
- spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
if (attr->tunnel_match_level != MLX5_MATCH_NONE)
spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
@@ -223,7 +256,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_table *fast_fdb;
struct mlx5_flow_table *fwd_fdb;
struct mlx5_flow_handle *rule;
- void *misc;
int i;
fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0);
@@ -255,25 +287,11 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
dest[i].ft = fwd_fdb,
i++;
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
-
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- MLX5_SET(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+ mlx5_eswitch_set_rule_source_port(esw, spec, attr);
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- MLX5_SET_TO_ONES(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id);
-
- if (attr->match_level == MLX5_MATCH_NONE)
- spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
- else
- spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS |
- MLX5_MATCH_MISC_PARAMETERS;
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ if (attr->match_level != MLX5_MATCH_NONE)
+ spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
@@ -570,23 +588,87 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
mlx5_del_flow_rules(rule);
}
-static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
+static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw)
+{
+ u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+ u8 fdb_to_vport_reg_c_id;
+ int err;
+
+ err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
+ out, sizeof(out));
+ if (err)
+ return err;
+
+ fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
+ esw_vport_context.fdb_to_vport_reg_c_id);
+
+ fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ field_select.fdb_to_vport_reg_c_id, 1);
+
+ return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
+ in, sizeof(in));
+}
+
+static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw)
+{
+ u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+ u8 fdb_to_vport_reg_c_id;
+ int err;
+
+ err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
+ out, sizeof(out));
+ if (err)
+ return err;
+
+ fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
+ esw_vport_context.fdb_to_vport_reg_c_id);
+
+ fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ field_select.fdb_to_vport_reg_c_id, 1);
+
+ return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
+ in, sizeof(in));
+}
+
+static void peer_miss_rules_setup(struct mlx5_eswitch *esw,
+ struct mlx5_core_dev *peer_dev,
struct mlx5_flow_spec *spec,
struct mlx5_flow_destination *dest)
{
- void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
+ void *misc;
- MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(peer_dev, vhca_id));
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
- spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id);
+ MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(peer_dev, vhca_id));
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+ }
dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest->vport.num = peer_dev->priv.eswitch->manager_vport;
@@ -594,6 +676,26 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
}
+static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch *peer_esw,
+ struct mlx5_flow_spec *spec,
+ u16 vport)
+{
+ void *misc;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(peer_esw,
+ vport));
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+ }
+}
+
static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
struct mlx5_core_dev *peer_dev)
{
@@ -611,7 +713,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
if (!spec)
return -ENOMEM;
- peer_miss_rules_setup(peer_dev, spec, &dest);
+ peer_miss_rules_setup(esw, peer_dev, spec, &dest);
flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL);
if (!flows) {
@@ -624,7 +726,9 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
misc_parameters);
if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
- MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF);
+ esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
+ spec, MLX5_VPORT_PF);
+
flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow)) {
@@ -646,7 +750,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
}
mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) {
- MLX5_SET(fte_match_set_misc, misc, source_port, i);
+ esw_set_peer_miss_rule_source_port(esw,
+ peer_dev->priv.eswitch,
+ spec, i);
+
flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow)) {
@@ -930,6 +1037,30 @@ static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw)
#define MAX_PF_SQ 256
#define MAX_SQ_NVPORTS 32
+static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
+ u32 *flow_group_in)
+{
+ void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in,
+ match_criteria);
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_0);
+ } else {
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_port);
+ }
+}
+
static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
@@ -1027,19 +1158,21 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
/* create peer esw miss group */
memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_MISC_PARAMETERS);
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
- match_criteria);
+ esw_set_flow_group_source_port(esw, flow_group_in);
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in,
+ match_criteria);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria,
- misc_parameters.source_port);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria,
- misc_parameters.source_eswitch_owner_vhca_id);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ }
- MLX5_SET(create_flow_group_in, flow_group_in,
- source_eswitch_owner_vhca_id_valid, 1);
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
ix + esw->total_vports - 1);
@@ -1153,7 +1286,6 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_group *g;
u32 *flow_group_in;
- void *match_criteria, *misc;
int err = 0;
nvports = nvports + MLX5_ESW_MISS_FLOWS;
@@ -1163,12 +1295,8 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
/* create vport rx group */
memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_MISC_PARAMETERS);
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
- misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ esw_set_flow_group_source_port(esw, flow_group_in);
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
@@ -1207,13 +1335,24 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
goto out;
}
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport));
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
- spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ }
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
@@ -1449,34 +1588,13 @@ err_reps:
return err;
}
-static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports,
- u8 rep_type)
-{
- int err;
-
- /* Special vports must be loaded first. */
- err = __load_reps_special_vport(esw, rep_type);
- if (err)
- return err;
-
- err = __load_reps_vf_vport(esw, nvports, rep_type);
- if (err)
- goto err_vfs;
-
- return 0;
-
-err_vfs:
- __unload_reps_special_vport(esw, rep_type);
- return err;
-}
-
-static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports)
+static int esw_offloads_load_special_vport(struct mlx5_eswitch *esw)
{
u8 rep_type = 0;
int err;
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
- err = __load_reps_all_vport(esw, nvports, rep_type);
+ err = __load_reps_special_vport(esw, rep_type);
if (err)
goto err_reps;
}
@@ -1485,7 +1603,7 @@ static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports)
err_reps:
while (rep_type-- > 0)
- __unload_reps_all_vport(esw, nvports, rep_type);
+ __unload_reps_special_vport(esw, rep_type);
return err;
}
@@ -1521,6 +1639,10 @@ static int mlx5_esw_offloads_devcom_event(int event,
switch (event) {
case ESW_OFFLOADS_DEVCOM_PAIR:
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
+ mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
+ break;
+
err = mlx5_esw_offloads_pair(esw, peer_esw);
if (err)
goto err_out;
@@ -1589,32 +1711,16 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
- struct mlx5_core_dev *dev = esw->dev;
struct mlx5_flow_act flow_act = {0};
struct mlx5_flow_spec *spec;
int err = 0;
/* For prio tag mode, there is only 1 FTEs:
- * 1) Untagged packets - push prio tag VLAN, allow
+ * 1) Untagged packets - push prio tag VLAN and modify metadata if
+ * required, allow
* Unmatched traffic is allowed by default
*/
- if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
- return -EOPNOTSUPP;
-
- esw_vport_cleanup_ingress_rules(esw, vport);
-
- err = esw_vport_enable_ingress_acl(esw, vport);
- if (err) {
- mlx5_core_warn(esw->dev,
- "failed to enable prio tag ingress acl (%d) on vport[%d]\n",
- err, vport->vport);
- return err;
- }
-
- esw_debug(esw->dev,
- "vport[%d] configure ingress rules\n", vport->vport);
-
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec) {
err = -ENOMEM;
@@ -1630,6 +1736,12 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
flow_act.vlan[0].ethtype = ETH_P_8021Q;
flow_act.vlan[0].vid = 0;
flow_act.vlan[0].prio = 0;
+
+ if (vport->ingress.modify_metadata_rule) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.modify_id = vport->ingress.modify_metadata_id;
+ }
+
vport->ingress.allow_rule =
mlx5_add_flow_rules(vport->ingress.acl, spec,
&flow_act, NULL, 0);
@@ -1650,6 +1762,58 @@ out_no_mem:
return err;
}
+static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec spec = {};
+ int err = 0;
+
+ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
+ MLX5_SET(set_action_in, action, data,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport));
+
+ err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ 1, action, &vport->ingress.modify_metadata_id);
+ if (err) {
+ esw_warn(esw->dev,
+ "failed to alloc modify header for vport %d ingress acl (%d)\n",
+ vport->vport, err);
+ return err;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ flow_act.modify_id = vport->ingress.modify_metadata_id;
+ vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl,
+ &spec, &flow_act, NULL, 0);
+ if (IS_ERR(vport->ingress.modify_metadata_rule)) {
+ err = PTR_ERR(vport->ingress.modify_metadata_rule);
+ esw_warn(esw->dev,
+ "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.modify_metadata_rule = NULL;
+ goto out;
+ }
+
+out:
+ if (err)
+ mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+ return err;
+}
+
+void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (vport->ingress.modify_metadata_rule) {
+ mlx5_del_flow_rules(vport->ingress.modify_metadata_rule);
+ mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+
+ vport->ingress.modify_metadata_rule = NULL;
+ }
+}
+
static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
@@ -1657,6 +1821,9 @@ static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec;
int err = 0;
+ if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
+ return 0;
+
/* For prio tag mode, there is only 1 FTEs:
* 1) prio tag packets - pop the prio tag VLAN, allow
* Unmatched traffic is allowed by default
@@ -1710,27 +1877,98 @@ out_no_mem:
return err;
}
-static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports)
+static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
{
- struct mlx5_vport *vport = NULL;
- int i, j;
int err;
- mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) {
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ !MLX5_CAP_GEN(esw->dev, prio_tag_required))
+ return 0;
+
+ esw_vport_cleanup_ingress_rules(esw, vport);
+
+ err = esw_vport_enable_ingress_acl(esw, vport);
+ if (err) {
+ esw_warn(esw->dev,
+ "failed to enable ingress acl (%d) on vport[%d]\n",
+ err, vport->vport);
+ return err;
+ }
+
+ esw_debug(esw->dev,
+ "vport[%d] configure ingress rules\n", vport->vport);
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ err = esw_vport_add_ingress_acl_modify_metadata(esw, vport);
+ if (err)
+ goto out;
+ }
+
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+ mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
err = esw_vport_ingress_prio_tag_config(esw, vport);
if (err)
- goto err_ingress;
- err = esw_vport_egress_prio_tag_config(esw, vport);
+ goto out;
+ }
+
+out:
+ if (err)
+ esw_vport_disable_ingress_acl(esw, vport);
+ return err;
+}
+
+static bool
+esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
+{
+ if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl))
+ return false;
+
+ if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
+ MLX5_FDB_TO_VPORT_REG_C_0))
+ return false;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
+ return false;
+
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
+ mlx5_ecpf_vport_exists(esw->dev))
+ return false;
+
+ return true;
+}
+
+static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ int i, j;
+ int err;
+
+ if (esw_check_vport_match_metadata_supported(esw))
+ esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+
+ mlx5_esw_for_all_vports(esw, i, vport) {
+ err = esw_vport_ingress_common_config(esw, vport);
if (err)
- goto err_egress;
+ goto err_ingress;
+
+ if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
+ err = esw_vport_egress_prio_tag_config(esw, vport);
+ if (err)
+ goto err_egress;
+ }
}
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+ esw_info(esw->dev, "Use metadata reg_c as source vport to match\n");
+
return 0;
err_egress:
esw_vport_disable_ingress_acl(esw, vport);
err_ingress:
- mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) {
+ for (j = MLX5_VPORT_PF; j < i; j++) {
+ vport = &esw->vports[j];
esw_vport_disable_egress_acl(esw, vport);
esw_vport_disable_ingress_acl(esw, vport);
}
@@ -1738,15 +1976,17 @@ err_ingress:
return err;
}
-static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw)
+static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
int i;
- mlx5_esw_for_each_vf_vport(esw, i, vport, esw->dev->priv.sriov.num_vfs) {
+ mlx5_esw_for_all_vports(esw, i, vport) {
esw_vport_disable_egress_acl(esw, vport);
esw_vport_disable_ingress_acl(esw, vport);
}
+
+ esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
}
static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int vf_nvports,
@@ -1757,15 +1997,13 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int vf_nvports,
memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
- if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
- err = esw_prio_tag_acls_config(esw, vf_nvports);
- if (err)
- return err;
- }
+ err = esw_create_offloads_acl_tables(esw);
+ if (err)
+ return err;
err = esw_create_offloads_fdb_tables(esw, nvports);
if (err)
- return err;
+ goto create_fdb_err;
err = esw_create_offloads_table(esw, nvports);
if (err)
@@ -1783,6 +2021,9 @@ create_fg_err:
create_ft_err:
esw_destroy_offloads_fdb_tables(esw);
+create_fdb_err:
+ esw_destroy_offloads_acl_tables(esw);
+
return err;
}
@@ -1791,12 +2032,12 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
esw_destroy_vport_rx_group(esw);
esw_destroy_offloads_table(esw);
esw_destroy_offloads_fdb_tables(esw);
- if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
- esw_prio_tag_acls_cleanup(esw);
+ esw_destroy_offloads_acl_tables(esw);
}
static void esw_functions_changed_event_handler(struct work_struct *work)
{
+ u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {};
struct mlx5_host_work *host_work;
struct mlx5_eswitch *esw;
u16 num_vfs = 0;
@@ -1805,7 +2046,9 @@ static void esw_functions_changed_event_handler(struct work_struct *work)
host_work = container_of(work, struct mlx5_host_work, work);
esw = host_work->esw;
- err = mlx5_esw_query_functions(esw->dev, &num_vfs);
+ err = mlx5_esw_query_functions(esw->dev, out, sizeof(out));
+ num_vfs = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_num_of_vfs);
if (err || num_vfs == esw->esw_funcs.num_vfs)
goto out;
@@ -1825,6 +2068,21 @@ out:
kfree(host_work);
}
+static void esw_emulate_event_handler(struct work_struct *work)
+{
+ struct mlx5_host_work *host_work =
+ container_of(work, struct mlx5_host_work, work);
+ struct mlx5_eswitch *esw = host_work->esw;
+ int err;
+
+ if (esw->esw_funcs.num_vfs) {
+ err = esw_offloads_load_vf_reps(esw, esw->esw_funcs.num_vfs);
+ if (err)
+ esw_warn(esw->dev, "Load vf reps err=%d\n", err);
+ }
+ kfree(host_work);
+}
+
static int esw_functions_changed_event(struct notifier_block *nb,
unsigned long type, void *data)
{
@@ -1841,7 +2099,11 @@ static int esw_functions_changed_event(struct notifier_block *nb,
host_work->esw = esw;
- INIT_WORK(&host_work->work, esw_functions_changed_event_handler);
+ if (mlx5_eswitch_is_funcs_handler(esw->dev))
+ INIT_WORK(&host_work->work,
+ esw_functions_changed_event_handler);
+ else
+ INIT_WORK(&host_work->work, esw_emulate_event_handler);
queue_work(esw->work_queue, &host_work->work);
return NOTIFY_OK;
@@ -1850,13 +2112,14 @@ static int esw_functions_changed_event(struct notifier_block *nb,
static void esw_functions_changed_event_init(struct mlx5_eswitch *esw,
u16 vf_nvports)
{
- if (!mlx5_eswitch_is_funcs_handler(esw->dev))
- return;
-
- MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event,
- ESW_FUNCTIONS_CHANGED);
- mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb);
- esw->esw_funcs.num_vfs = vf_nvports;
+ if (mlx5_eswitch_is_funcs_handler(esw->dev)) {
+ esw->esw_funcs.num_vfs = 0;
+ MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event,
+ ESW_FUNCTIONS_CHANGED);
+ mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb);
+ } else {
+ esw->esw_funcs.num_vfs = vf_nvports;
+ }
}
static void esw_functions_changed_event_cleanup(struct mlx5_eswitch *esw)
@@ -1877,7 +2140,17 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
if (err)
return err;
- err = esw_offloads_load_all_reps(esw, vf_nvports);
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ err = mlx5_eswitch_enable_passing_vport_metadata(esw);
+ if (err)
+ goto err_vport_metadata;
+ }
+
+ /* Only load special vports reps. VF reps will be loaded in
+ * context of functions_changed event handler through real
+ * or emulated event.
+ */
+ err = esw_offloads_load_special_vport(esw);
if (err)
goto err_reps;
@@ -1888,9 +2161,22 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
mlx5_rdma_enable_roce(esw->dev);
+ /* Call esw_functions_changed event to load VF reps:
+ * 1. HW does not support the event then emulate it
+ * Or
+ * 2. The event was already notified when num_vfs changed
+ * and eswitch was in legacy mode
+ */
+ esw_functions_changed_event(&esw->esw_funcs.nb.nb,
+ MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED,
+ NULL);
+
return 0;
err_reps:
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+ mlx5_eswitch_disable_passing_vport_metadata(esw);
+err_vport_metadata:
esw_offloads_steering_cleanup(esw);
return err;
}
@@ -1916,18 +2202,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
void esw_offloads_cleanup(struct mlx5_eswitch *esw)
{
- u16 num_vfs;
-
esw_functions_changed_event_cleanup(esw);
-
- if (mlx5_eswitch_is_funcs_handler(esw->dev))
- num_vfs = esw->esw_funcs.num_vfs;
- else
- num_vfs = esw->dev->priv.sriov.num_vfs;
-
mlx5_rdma_disable_roce(esw->dev);
esw_offloads_devcom_cleanup(esw);
- esw_offloads_unload_all_reps(esw, num_vfs);
+ esw_offloads_unload_all_reps(esw, esw->esw_funcs.num_vfs);
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+ mlx5_eswitch_disable_passing_vport_metadata(esw);
esw_offloads_steering_cleanup(esw);
}
@@ -2167,7 +2447,8 @@ out:
return 0;
}
-int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode encap,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
@@ -2216,7 +2497,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
return err;
}
-int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode *encap)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
struct mlx5_eswitch *esw = dev->priv.eswitch;
@@ -2295,3 +2577,22 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
return mlx5_eswitch_get_rep(esw, vport);
}
EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
+
+bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return vport_num >= MLX5_VPORT_FIRST_VF &&
+ vport_num <= esw->dev->priv.sriov.max_vfs;
+}
+
+bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
+{
+ return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA);
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
+
+u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+ u16 vport_num)
+{
+ return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index cb7d8ebe2c95..1d55a324a17e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -49,8 +49,8 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
struct mlx5_termtbl_handle *tt,
struct mlx5_flow_act *flow_act)
{
+ static const struct mlx5_flow_spec spec = {};
struct mlx5_flow_namespace *root_ns;
- struct mlx5_flow_spec spec = {};
int prio, flags;
int err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 52c47d3dd5a5..c76da309506b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -636,7 +636,8 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
u8 match_criteria_enable,
const u32 *match_c,
const u32 *match_v,
- struct mlx5_flow_act *flow_act)
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_context *flow_context)
{
const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c,
outer_headers);
@@ -655,7 +656,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
(match_criteria_enable &
~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
(flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
- (flow_act->flags & FLOW_ACT_HAS_TAG))
+ (flow_context->flags & FLOW_CONTEXT_HAS_TAG))
return false;
return true;
@@ -767,7 +768,8 @@ mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev,
fg->mask.match_criteria_enable,
fg->mask.match_criteria,
fte->val,
- &fte->action))
+ &fte->action,
+ &fte->flow_context))
return ERR_PTR(-EINVAL);
else if (!mlx5_is_fpga_ipsec_rule(mdev,
fg->mask.match_criteria_enable,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index bb24c3797218..7ac1249eadc3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -396,7 +396,11 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
MLX5_SET(flow_context, in_flow_context, group_id, group_id);
- MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
+ MLX5_SET(flow_context, in_flow_context, flow_tag,
+ fte->flow_context.flow_tag);
+ MLX5_SET(flow_context, in_flow_context, flow_source,
+ fte->flow_context.flow_source);
+
MLX5_SET(flow_context, in_flow_context, extended_destination,
extended_dest);
if (extended_dest) {
@@ -771,6 +775,10 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions);
table_type = FS_FT_NIC_TX;
break;
+ case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
+ max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions);
+ table_type = FS_FT_ESW_INGRESS_ACL;
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index fe76c6fd6d80..a68a51c5011a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -584,7 +584,7 @@ err_ida_remove:
}
static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
- u32 *match_value,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act)
{
struct mlx5_flow_steering *steering = get_steering(&ft->node);
@@ -594,9 +594,10 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
if (!fte)
return ERR_PTR(-ENOMEM);
- memcpy(fte->val, match_value, sizeof(fte->val));
+ memcpy(fte->val, &spec->match_value, sizeof(fte->val));
fte->node.type = FS_TYPE_FLOW_ENTRY;
fte->action = *flow_act;
+ fte->flow_context = spec->flow_context;
tree_init_node(&fte->node, NULL, del_sw_fte);
@@ -612,7 +613,7 @@ static void dealloc_flow_group(struct mlx5_flow_steering *steering,
static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering,
u8 match_criteria_enable,
- void *match_criteria,
+ const void *match_criteria,
int start_index,
int end_index)
{
@@ -642,7 +643,7 @@ static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steer
static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft,
u8 match_criteria_enable,
- void *match_criteria,
+ const void *match_criteria,
int start_index,
int end_index,
struct list_head *prev)
@@ -1285,7 +1286,7 @@ free_handle:
}
static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft,
- struct mlx5_flow_spec *spec)
+ const struct mlx5_flow_spec *spec)
{
struct list_head *prev = &ft->node.children;
struct mlx5_flow_group *fg;
@@ -1430,7 +1431,9 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
return false;
}
-static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act)
+static int check_conflicting_ftes(struct fs_fte *fte,
+ const struct mlx5_flow_context *flow_context,
+ const struct mlx5_flow_act *flow_act)
{
if (check_conflicting_actions(flow_act->action, fte->action.action)) {
mlx5_core_warn(get_dev(&fte->node),
@@ -1438,12 +1441,12 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act
return -EEXIST;
}
- if ((flow_act->flags & FLOW_ACT_HAS_TAG) &&
- fte->action.flow_tag != flow_act->flow_tag) {
+ if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) &&
+ fte->flow_context.flow_tag != flow_context->flow_tag) {
mlx5_core_warn(get_dev(&fte->node),
"FTE flow tag %u already exists with different flow tag %u\n",
- fte->action.flow_tag,
- flow_act->flow_tag);
+ fte->flow_context.flow_tag,
+ flow_context->flow_tag);
return -EEXIST;
}
@@ -1451,7 +1454,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act
}
static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
- u32 *match_value,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int dest_num,
@@ -1462,7 +1465,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
int i;
int ret;
- ret = check_conflicting_ftes(fte, flow_act);
+ ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act);
if (ret)
return ERR_PTR(ret);
@@ -1536,7 +1539,7 @@ static void free_match_list(struct match_list_head *head)
static int build_match_list(struct match_list_head *match_head,
struct mlx5_flow_table *ft,
- struct mlx5_flow_spec *spec)
+ const struct mlx5_flow_spec *spec)
{
struct rhlist_head *tmp, *list;
struct mlx5_flow_group *g;
@@ -1589,7 +1592,7 @@ static u64 matched_fgs_get_version(struct list_head *match_head)
static struct fs_fte *
lookup_fte_locked(struct mlx5_flow_group *g,
- u32 *match_value,
+ const u32 *match_value,
bool take_write)
{
struct fs_fte *fte_tmp;
@@ -1622,7 +1625,7 @@ out:
static struct mlx5_flow_handle *
try_add_to_existing_fg(struct mlx5_flow_table *ft,
struct list_head *match_head,
- struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int dest_num,
@@ -1637,7 +1640,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
u64 version;
int err;
- fte = alloc_fte(ft, spec->match_value, flow_act);
+ fte = alloc_fte(ft, spec, flow_act);
if (IS_ERR(fte))
return ERR_PTR(-ENOMEM);
@@ -1653,8 +1656,7 @@ search_again_locked:
fte_tmp = lookup_fte_locked(g, spec->match_value, take_write);
if (!fte_tmp)
continue;
- rule = add_rule_fg(g, spec->match_value,
- flow_act, dest, dest_num, fte_tmp);
+ rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp);
up_write_ref_node(&fte_tmp->node, false);
tree_put_node(&fte_tmp->node, false);
kmem_cache_free(steering->ftes_cache, fte);
@@ -1701,8 +1703,7 @@ skip_search:
nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
up_write_ref_node(&g->node, false);
- rule = add_rule_fg(g, spec->match_value,
- flow_act, dest, dest_num, fte);
+ rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
up_write_ref_node(&fte->node, false);
tree_put_node(&fte->node, false);
return rule;
@@ -1715,7 +1716,7 @@ out:
static struct mlx5_flow_handle *
_mlx5_add_flow_rules(struct mlx5_flow_table *ft,
- struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int dest_num)
@@ -1788,7 +1789,7 @@ search_again_locked:
if (err)
goto err_release_fg;
- fte = alloc_fte(ft, spec->match_value, flow_act);
+ fte = alloc_fte(ft, spec, flow_act);
if (IS_ERR(fte)) {
err = PTR_ERR(fte);
goto err_release_fg;
@@ -1802,8 +1803,7 @@ search_again_locked:
nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
up_write_ref_node(&g->node, false);
- rule = add_rule_fg(g, spec->match_value, flow_act, dest,
- dest_num, fte);
+ rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
up_write_ref_node(&fte->node, false);
tree_put_node(&fte->node, false);
tree_put_node(&g->node, false);
@@ -1823,7 +1823,7 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
struct mlx5_flow_handle *
mlx5_add_flow_rules(struct mlx5_flow_table *ft,
- struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int num_dest)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index a08c3d09a50f..c48c382f926f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -170,6 +170,7 @@ struct fs_fte {
u32 val[MLX5_ST_SZ_DW_MATCH_PARAM];
u32 dests_size;
u32 index;
+ struct mlx5_flow_context flow_context;
struct mlx5_flow_act action;
enum fs_fte_status status;
struct mlx5_fc *counter;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 9ca492b430d8..e68d124eb625 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -418,6 +418,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = {
.cleanup_rx = mlx5i_cleanup_rx,
.enable = NULL, /* mlx5i_enable */
.disable = NULL, /* mlx5i_disable */
+ .update_rx = mlx5e_update_nic_rx,
.update_stats = NULL, /* mlx5i_update_stats */
.update_carrier = NULL, /* no HW update in IB link */
.rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
@@ -526,7 +527,7 @@ static int mlx5i_open(struct net_device *netdev)
if (err)
goto err_remove_fs_underlay_qp;
- mlx5e_refresh_tirs(epriv, false);
+ epriv->profile->update_rx(epriv);
mlx5e_activate_priv_channels(epriv);
mutex_unlock(&epriv->state_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
index b491b8f5fd6b..e05186ada721 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -221,7 +221,7 @@ static int mlx5i_pkey_open(struct net_device *netdev)
mlx5_core_warn(mdev, "opening child channels failed, %d\n", err);
goto err_clear_state_opened_flag;
}
- mlx5e_refresh_tirs(epriv, false);
+ epriv->profile->update_rx(epriv);
mlx5e_activate_priv_channels(epriv);
mutex_unlock(&epriv->state_lock);
@@ -350,6 +350,7 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
.cleanup_rx = mlx5i_pkey_cleanup_rx,
.enable = NULL,
.disable = NULL,
+ .update_rx = mlx5e_update_nic_rx,
.update_stats = NULL,
.rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
.rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index c0fb6d72b695..24bd991a727e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -7,7 +7,6 @@
#include <linux/mlx5/eq.h>
#include <linux/mlx5/cq.h>
-#define MLX5_MAX_IRQ_NAME (32)
#define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe))
struct mlx5_eq_tasklet {
@@ -36,8 +35,14 @@ struct mlx5_eq {
struct mlx5_rsc_debug *dbg;
};
+struct mlx5_eq_async {
+ struct mlx5_eq core;
+ struct notifier_block irq_nb;
+};
+
struct mlx5_eq_comp {
- struct mlx5_eq core; /* Must be first */
+ struct mlx5_eq core;
+ struct notifier_block irq_nb;
struct mlx5_eq_tasklet tasklet_ctx;
struct list_head list;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
index a71d5b9c7ab2..3118e8d66407 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
@@ -67,6 +67,7 @@ static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
struct l2table_node {
struct l2addr_node node;
u32 index; /* index in HW l2 table */
+ int ref_count;
};
struct mlx5_mpfs {
@@ -134,8 +135,8 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
{
struct mlx5_mpfs *mpfs = dev->priv.mpfs;
struct l2table_node *l2addr;
+ int err = 0;
u32 index;
- int err;
if (!MLX5_ESWITCH_MANAGER(dev))
return 0;
@@ -144,30 +145,35 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node);
if (l2addr) {
- err = -EEXIST;
- goto abort;
+ l2addr->ref_count++;
+ goto out;
}
err = alloc_l2table_index(mpfs, &index);
if (err)
- goto abort;
+ goto out;
l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL);
if (!l2addr) {
- free_l2table_index(mpfs, index);
err = -ENOMEM;
- goto abort;
+ goto hash_add_err;
}
- l2addr->index = index;
err = set_l2table_entry_cmd(dev, index, mac);
- if (err) {
- l2addr_hash_del(l2addr);
- free_l2table_index(mpfs, index);
- }
+ if (err)
+ goto set_table_entry_err;
+
+ l2addr->index = index;
+ l2addr->ref_count = 1;
mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index);
-abort:
+ goto out;
+
+set_table_entry_err:
+ l2addr_hash_del(l2addr);
+hash_add_err:
+ free_l2table_index(mpfs, index);
+out:
mutex_unlock(&mpfs->lock);
return err;
}
@@ -190,6 +196,9 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
goto unlock;
}
+ if (--l2addr->ref_count > 0)
+ goto unlock;
+
index = l2addr->index;
del_l2table_entry_cmd(dev, index);
l2addr_hash_del(l2addr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 998eec938d3c..072b56fda27e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -172,18 +172,28 @@ static struct mlx5_profile profile[] = {
#define FW_INIT_TIMEOUT_MILI 2000
#define FW_INIT_WAIT_MS 2
-#define FW_PRE_INIT_TIMEOUT_MILI 10000
+#define FW_PRE_INIT_TIMEOUT_MILI 120000
+#define FW_INIT_WARN_MESSAGE_INTERVAL 20000
-static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
+static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
+ u32 warn_time_mili)
{
+ unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili);
unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
int err = 0;
+ BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL);
+
while (fw_initializing(dev)) {
if (time_after(jiffies, end)) {
err = -EBUSY;
break;
}
+ if (warn_time_mili && time_after(jiffies, warn)) {
+ mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n",
+ jiffies_to_msecs(end - warn) / 1000);
+ warn = jiffies + msecs_to_jiffies(warn_time_mili);
+ }
msleep(FW_INIT_WAIT_MS);
}
@@ -799,10 +809,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
goto err_devcom;
}
+ err = mlx5_irq_table_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "failed to initialize irq table\n");
+ goto err_devcom;
+ }
+
err = mlx5_eq_table_init(dev);
if (err) {
mlx5_core_err(dev, "failed to initialize eq\n");
- goto err_devcom;
+ goto err_irq_cleanup;
}
err = mlx5_events_init(dev);
@@ -840,32 +856,32 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
goto err_rl_cleanup;
}
- err = mlx5_eswitch_init(dev);
+ err = mlx5_sriov_init(dev);
if (err) {
- mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
+ mlx5_core_err(dev, "Failed to init sriov %d\n", err);
goto err_mpfs_cleanup;
}
- err = mlx5_sriov_init(dev);
+ err = mlx5_eswitch_init(dev);
if (err) {
- mlx5_core_err(dev, "Failed to init sriov %d\n", err);
- goto err_eswitch_cleanup;
+ mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
+ goto err_sriov_cleanup;
}
err = mlx5_fpga_init(dev);
if (err) {
mlx5_core_err(dev, "Failed to init fpga device %d\n", err);
- goto err_sriov_cleanup;
+ goto err_eswitch_cleanup;
}
dev->tracer = mlx5_fw_tracer_create(dev);
return 0;
-err_sriov_cleanup:
- mlx5_sriov_cleanup(dev);
err_eswitch_cleanup:
mlx5_eswitch_cleanup(dev->priv.eswitch);
+err_sriov_cleanup:
+ mlx5_sriov_cleanup(dev);
err_mpfs_cleanup:
mlx5_mpfs_cleanup(dev);
err_rl_cleanup:
@@ -880,6 +896,8 @@ err_events_cleanup:
mlx5_events_cleanup(dev);
err_eq_cleanup:
mlx5_eq_table_cleanup(dev);
+err_irq_cleanup:
+ mlx5_irq_table_cleanup(dev);
err_devcom:
mlx5_devcom_unregister_device(dev->priv.devcom);
@@ -890,8 +908,8 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
{
mlx5_fw_tracer_destroy(dev->tracer);
mlx5_fpga_cleanup(dev);
- mlx5_sriov_cleanup(dev);
mlx5_eswitch_cleanup(dev->priv.eswitch);
+ mlx5_sriov_cleanup(dev);
mlx5_mpfs_cleanup(dev);
mlx5_cleanup_rl_table(dev);
mlx5_geneve_destroy(dev->geneve);
@@ -903,6 +921,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_cq_debugfs_cleanup(dev);
mlx5_events_cleanup(dev);
mlx5_eq_table_cleanup(dev);
+ mlx5_irq_table_cleanup(dev);
mlx5_devcom_unregister_device(dev->priv.devcom);
}
@@ -919,7 +938,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
/* wait for firmware to accept initialization segments configurations
*/
- err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI);
+ err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL);
if (err) {
mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n",
FW_PRE_INIT_TIMEOUT_MILI);
@@ -932,7 +951,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
return err;
}
- err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
+ err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0);
if (err) {
mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n",
FW_INIT_TIMEOUT_MILI);
@@ -1036,6 +1055,12 @@ static int mlx5_load(struct mlx5_core_dev *dev)
mlx5_events_start(dev);
mlx5_pagealloc_start(dev);
+ err = mlx5_irq_table_create(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc IRQs\n");
+ goto err_irq_table;
+ }
+
err = mlx5_eq_table_create(dev);
if (err) {
mlx5_core_err(dev, "Failed to create EQs\n");
@@ -1107,6 +1132,8 @@ err_fpga_start:
err_fw_tracer:
mlx5_eq_table_destroy(dev);
err_eq_table:
+ mlx5_irq_table_destroy(dev);
+err_irq_table:
mlx5_pagealloc_stop(dev);
mlx5_events_stop(dev);
mlx5_put_uars_page(dev, dev->priv.uar);
@@ -1123,6 +1150,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
mlx5_fpga_device_stop(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
mlx5_eq_table_destroy(dev);
+ mlx5_irq_table_destroy(dev);
mlx5_pagealloc_stop(dev);
mlx5_events_stop(dev);
mlx5_put_uars_page(dev, dev->priv.uar);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 29bb61a10289..958769702823 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -159,6 +159,19 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_remove(struct mlx5_core_dev *dev);
+int mlx5_irq_table_init(struct mlx5_core_dev *dev);
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_irq_table_create(struct mlx5_core_dev *dev);
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
+int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+ struct notifier_block *nb);
+int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+ struct notifier_block *nb);
+struct cpumask *
+mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx);
+struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table);
+int mlx5_irq_get_num_comp(struct mlx5_irq_table *table);
+
int mlx5_events_init(struct mlx5_core_dev *dev);
void mlx5_events_cleanup(struct mlx5_core_dev *dev);
void mlx5_events_start(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index ea744d8466ea..9231b39d18b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -38,15 +38,12 @@
void mlx5_init_mkey_table(struct mlx5_core_dev *dev)
{
- struct mlx5_mkey_table *table = &dev->priv.mkey_table;
-
- memset(table, 0, sizeof(*table));
- rwlock_init(&table->lock);
- INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+ xa_init_flags(&dev->priv.mkey_table, XA_FLAGS_LOCK_IRQ);
}
void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev)
{
+ WARN_ON(!xa_empty(&dev->priv.mkey_table));
}
int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
@@ -56,8 +53,8 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
mlx5_async_cbk_t callback,
struct mlx5_async_work *context)
{
- struct mlx5_mkey_table *table = &dev->priv.mkey_table;
u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
+ struct xarray *mkeys = &dev->priv.mkey_table;
u32 mkey_index;
void *mkc;
int err;
@@ -88,12 +85,10 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
mkey_index, key, mkey->key);
- /* connect to mkey tree */
- write_lock_irq(&table->lock);
- err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey);
- write_unlock_irq(&table->lock);
+ err = xa_err(xa_store_irq(mkeys, mlx5_base_mkey(mkey->key), mkey,
+ GFP_KERNEL));
if (err) {
- mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n",
+ mlx5_core_warn(dev, "failed xarray insert of mkey 0x%x, %d\n",
mlx5_base_mkey(mkey->key), err);
mlx5_core_destroy_mkey(dev, mkey);
}
@@ -114,17 +109,17 @@ EXPORT_SYMBOL(mlx5_core_create_mkey);
int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
struct mlx5_core_mkey *mkey)
{
- struct mlx5_mkey_table *table = &dev->priv.mkey_table;
u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0};
+ struct xarray *mkeys = &dev->priv.mkey_table;
struct mlx5_core_mkey *deleted_mkey;
unsigned long flags;
- write_lock_irqsave(&table->lock, flags);
- deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key));
- write_unlock_irqrestore(&table->lock, flags);
+ xa_lock_irqsave(mkeys, flags);
+ deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key));
+ xa_unlock_irqrestore(mkeys, flags);
if (!deleted_mkey) {
- mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n",
+ mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n",
mlx5_base_mkey(mkey->key));
return -ENOENT;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
new file mode 100644
index 000000000000..373981a659c7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
+
+#define MLX5_MAX_IRQ_NAME (32)
+
+struct mlx5_irq {
+ struct atomic_notifier_head nh;
+ cpumask_var_t mask;
+ char name[MLX5_MAX_IRQ_NAME];
+};
+
+struct mlx5_irq_table {
+ struct mlx5_irq *irq;
+ int nvec;
+#ifdef CONFIG_RFS_ACCEL
+ struct cpu_rmap *rmap;
+#endif
+};
+
+int mlx5_irq_table_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *irq_table;
+
+ irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL);
+ if (!irq_table)
+ return -ENOMEM;
+
+ dev->priv.irq_table = irq_table;
+ return 0;
+}
+
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
+{
+ kvfree(dev->priv.irq_table);
+}
+
+int mlx5_irq_get_num_comp(struct mlx5_irq_table *table)
+{
+ return table->nvec - MLX5_IRQ_VEC_COMP_BASE;
+}
+
+static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx)
+{
+ struct mlx5_irq_table *irq_table = dev->priv.irq_table;
+
+ return &irq_table->irq[vecidx];
+}
+
+int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+ struct notifier_block *nb)
+{
+ struct mlx5_irq *irq;
+
+ irq = &irq_table->irq[vecidx];
+ return atomic_notifier_chain_register(&irq->nh, nb);
+}
+
+int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+ struct notifier_block *nb)
+{
+ struct mlx5_irq *irq;
+
+ irq = &irq_table->irq[vecidx];
+ return atomic_notifier_chain_unregister(&irq->nh, nb);
+}
+
+static irqreturn_t mlx5_irq_int_handler(int irq, void *nh)
+{
+ atomic_notifier_call_chain(nh, 0, NULL);
+ return IRQ_HANDLED;
+}
+
+static void irq_set_name(char *name, int vecidx)
+{
+ if (vecidx == 0) {
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async");
+ return;
+ }
+
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d",
+ vecidx - MLX5_IRQ_VEC_COMP_BASE);
+ return;
+}
+
+static int request_irqs(struct mlx5_core_dev *dev, int nvec)
+{
+ char name[MLX5_MAX_IRQ_NAME];
+ int err;
+ int i;
+
+ for (i = 0; i < nvec; i++) {
+ struct mlx5_irq *irq = mlx5_irq_get(dev, i);
+ int irqn = pci_irq_vector(dev->pdev, i);
+
+ irq_set_name(name, i);
+ ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
+ snprintf(irq->name, MLX5_MAX_IRQ_NAME,
+ "%s@pci:%s", name, pci_name(dev->pdev));
+ err = request_irq(irqn, mlx5_irq_int_handler, 0, irq->name,
+ &irq->nh);
+ if (err) {
+ mlx5_core_err(dev, "Failed to request irq\n");
+ goto err_request_irq;
+ }
+ }
+ return 0;
+
+err_request_irq:
+ for (; i >= 0; i--) {
+ struct mlx5_irq *irq = mlx5_irq_get(dev, i);
+ int irqn = pci_irq_vector(dev->pdev, i);
+
+ free_irq(irqn, &irq->nh);
+ }
+ return err;
+}
+
+static void irq_clear_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+ struct mlx5_irq_table *irq_table = dev->priv.irq_table;
+
+ free_irq_cpu_rmap(irq_table->rmap);
+#endif
+}
+
+static int irq_set_rmap(struct mlx5_core_dev *mdev)
+{
+ int err = 0;
+#ifdef CONFIG_RFS_ACCEL
+ struct mlx5_irq_table *irq_table = mdev->priv.irq_table;
+ int num_affinity_vec;
+ int vecidx;
+
+ num_affinity_vec = mlx5_irq_get_num_comp(irq_table);
+ irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec);
+ if (!irq_table->rmap) {
+ err = -ENOMEM;
+ mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
+ goto err_out;
+ }
+
+ vecidx = MLX5_IRQ_VEC_COMP_BASE;
+ for (; vecidx < irq_table->nvec; vecidx++) {
+ err = irq_cpu_rmap_add(irq_table->rmap,
+ pci_irq_vector(mdev->pdev, vecidx));
+ if (err) {
+ mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
+ err);
+ goto err_irq_cpu_rmap_add;
+ }
+ }
+ return 0;
+
+err_irq_cpu_rmap_add:
+ irq_clear_rmap(mdev);
+err_out:
+#endif
+ return err;
+}
+
+/* Completion IRQ vectors */
+
+static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
+ struct mlx5_irq *irq;
+ int irqn;
+
+ irq = mlx5_irq_get(mdev, vecidx);
+ irqn = pci_irq_vector(mdev->pdev, vecidx);
+ if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
+ mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+ return -ENOMEM;
+ }
+
+ cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node),
+ irq->mask);
+ if (IS_ENABLED(CONFIG_SMP) &&
+ irq_set_affinity_hint(irqn, irq->mask))
+ mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x",
+ irqn);
+
+ return 0;
+}
+
+static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
+ struct mlx5_irq *irq;
+ int irqn;
+
+ irq = mlx5_irq_get(mdev, vecidx);
+ irqn = pci_irq_vector(mdev->pdev, vecidx);
+ irq_set_affinity_hint(irqn, NULL);
+ free_cpumask_var(irq->mask);
+}
+
+static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
+ int err;
+ int i;
+
+ for (i = 0; i < nvec; i++) {
+ err = set_comp_irq_affinity_hint(mdev, i);
+ if (err)
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ for (i--; i >= 0; i--)
+ clear_comp_irq_affinity_hint(mdev, i);
+
+ return err;
+}
+
+static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
+ int i;
+
+ for (i = 0; i < nvec; i++)
+ clear_comp_irq_affinity_hint(mdev, i);
+}
+
+struct cpumask *
+mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx)
+{
+ return irq_table->irq[vecidx].mask;
+}
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table)
+{
+ return irq_table->rmap;
+}
+#endif
+
+static void unrequest_irqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *table = dev->priv.irq_table;
+ int i;
+
+ for (i = 0; i < table->nvec; i++)
+ free_irq(pci_irq_vector(dev->pdev, i),
+ &mlx5_irq_get(dev, i)->nh);
+}
+
+int mlx5_irq_table_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_irq_table *table = priv->irq_table;
+ int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+ MLX5_CAP_GEN(dev, max_num_eqs) :
+ 1 << MLX5_CAP_GEN(dev, log_max_eq);
+ int nvec;
+ int err;
+
+ nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+ MLX5_IRQ_VEC_COMP_BASE;
+ nvec = min_t(int, nvec, num_eqs);
+ if (nvec <= MLX5_IRQ_VEC_COMP_BASE)
+ return -ENOMEM;
+
+ table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL);
+ if (!table->irq)
+ return -ENOMEM;
+
+ nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1,
+ nvec, PCI_IRQ_MSIX);
+ if (nvec < 0) {
+ err = nvec;
+ goto err_free_irq;
+ }
+
+ table->nvec = nvec;
+
+ err = irq_set_rmap(dev);
+ if (err)
+ goto err_set_rmap;
+
+ err = request_irqs(dev, nvec);
+ if (err)
+ goto err_request_irqs;
+
+ err = set_comp_irq_affinity_hints(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
+ goto err_set_affinity;
+ }
+
+ return 0;
+
+err_set_affinity:
+ unrequest_irqs(dev);
+err_request_irqs:
+ irq_clear_rmap(dev);
+err_set_rmap:
+ pci_free_irq_vectors(dev->pdev);
+err_free_irq:
+ kfree(table->irq);
+ return err;
+}
+
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *table = dev->priv.irq_table;
+ int i;
+
+ /* free_irq requires that affinity and rmap will be cleared
+ * before calling it. This is why there is asymmetry with set_rmap
+ * which should be called after alloc_irq but before request_irq.
+ */
+ irq_clear_rmap(dev);
+ clear_comp_irqs_affinity_hints(dev);
+ for (i = 0; i < table->nvec; i++)
+ free_irq(pci_irq_vector(dev->pdev, i),
+ &mlx5_irq_get(dev, i)->nh);
+ pci_free_irq_vectors(dev->pdev);
+ kfree(table->irq);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index a249b3c3843d..2eecb831c499 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -208,6 +208,27 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev)
mlx5_device_disable_sriov(dev);
}
+static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {};
+ u16 host_total_vfs;
+ int err;
+
+ if (mlx5_core_is_ecpf_esw_manager(dev)) {
+ err = mlx5_esw_query_functions(dev, out, sizeof(out));
+ host_total_vfs = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_total_vfs);
+
+ /* Old FW doesn't support getting total_vfs from esw func
+ * but supports getting it from pci_sriov.
+ */
+ if (!err && host_total_vfs)
+ return host_total_vfs;
+ }
+
+ return pci_sriov_get_totalvfs(dev->pdev);
+}
+
int mlx5_sriov_init(struct mlx5_core_dev *dev)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
@@ -218,6 +239,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
return 0;
total_vfs = pci_sriov_get_totalvfs(pdev);
+ sriov->max_vfs = mlx5_get_max_vfs(dev);
sriov->num_vfs = pci_num_vf(pdev);
sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
if (!sriov->vfs_ctx)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 25847beabd3f..3ef663900f4d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -41,7 +41,7 @@
#include <linux/semaphore.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
-#include <linux/radix-tree.h>
+#include <linux/xarray.h>
#include <linux/workqueue.h>
#include <linux/mempool.h>
#include <linux/interrupt.h>
@@ -458,13 +458,6 @@ struct mlx5_qp_table {
struct radix_tree_root tree;
};
-struct mlx5_mkey_table {
- /* protect radix tree
- */
- rwlock_t lock;
- struct radix_tree_root tree;
-};
-
struct mlx5_vf_context {
int enabled;
u64 port_guid;
@@ -476,6 +469,7 @@ struct mlx5_core_sriov {
struct mlx5_vf_context *vfs_ctx;
int num_vfs;
int enabled_vfs;
+ u16 max_vfs;
};
struct mlx5_fc_stats {
@@ -497,6 +491,7 @@ struct mlx5_eswitch;
struct mlx5_lag;
struct mlx5_devcom;
struct mlx5_eq_table;
+struct mlx5_irq_table;
struct mlx5_rate_limit {
u32 rate;
@@ -526,6 +521,8 @@ struct mlx5_core_roce {
};
struct mlx5_priv {
+ /* IRQ table valid only for real pci devices PF or VF */
+ struct mlx5_irq_table *irq_table;
struct mlx5_eq_table *eq_table;
/* pages stuff */
@@ -548,9 +545,7 @@ struct mlx5_priv {
struct dentry *cmdif_debugfs;
/* end: qp staff */
- /* start: mkey staff */
- struct mlx5_mkey_table mkey_table;
- /* end: mkey staff */
+ struct xarray mkey_table;
/* start: alloc staff */
/* protect buffer alocation according to numa node */
@@ -1112,13 +1107,9 @@ static inline bool mlx5_ecpf_vport_exists(struct mlx5_core_dev *dev)
return mlx5_core_is_pf(dev) && MLX5_CAP_ESW(dev, ecpf_vport_exists);
}
-#define MLX5_HOST_PF_MAX_VFS (127u)
static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev)
{
- if (mlx5_core_is_ecpf_esw_manager(dev))
- return MLX5_HOST_PF_MAX_VFS;
- else
- return pci_sriov_get_totalvfs(dev->pdev);
+ return dev->priv.sriov.max_vfs;
}
static inline int mlx5_get_gid_table_len(u16 param)
diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h
index 00045cc4ea11..70e16dcfb4c4 100644
--- a/include/linux/mlx5/eq.h
+++ b/include/linux/mlx5/eq.h
@@ -4,17 +4,7 @@
#ifndef MLX5_CORE_EQ_H
#define MLX5_CORE_EQ_H
-enum {
- MLX5_EQ_PAGEREQ_IDX = 0,
- MLX5_EQ_CMD_IDX = 1,
- MLX5_EQ_ASYNC_IDX = 2,
- /* reserved to be used by mlx5_core ulps (mlx5e/mlx5_ib) */
- MLX5_EQ_PFAULT_IDX = 3,
- MLX5_EQ_MAX_ASYNC_EQS,
- /* completion eqs vector indices start here */
- MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS,
-};
-
+#define MLX5_IRQ_VEC_COMP_BASE 1
#define MLX5_NUM_CMD_EQE (32)
#define MLX5_NUM_ASYNC_EQE (0x1000)
#define MLX5_NUM_SPARE_EQE (0x80)
@@ -23,18 +13,19 @@ struct mlx5_eq;
struct mlx5_core_dev;
struct mlx5_eq_param {
- u8 index;
+ u8 irq_index;
int nent;
u64 mask;
- void *context;
- irq_handler_t handler;
};
struct mlx5_eq *
-mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
- struct mlx5_eq_param *param);
+mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param);
int
mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ struct notifier_block *nb);
+void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ struct notifier_block *nb);
struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc);
void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm);
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index e9a55c0d50fd..091e25881f52 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -7,6 +7,7 @@
#define _MLX5_ESWITCH_
#include <linux/mlx5/driver.h>
+#include <net/devlink.h>
#define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager)
@@ -62,4 +63,32 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
struct mlx5_flow_handle *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
u16 vport_num, u32 sqn);
+
+#ifdef CONFIG_MLX5_ESWITCH
+enum devlink_eswitch_encap_mode
+mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
+
+bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw);
+u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+ u16 vport_num);
+#else /* CONFIG_MLX5_ESWITCH */
+static inline enum devlink_eswitch_encap_mode
+mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
+{
+ return DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+}
+
+static inline bool
+mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
+{
+ return false;
+};
+
+static inline u32
+mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+ int vport_num)
+{
+ return 0;
+};
+#endif /* CONFIG_MLX5_ESWITCH */
#endif
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 2ddaa97f2179..04a569568eac 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -88,10 +88,21 @@ struct mlx5_flow_group;
struct mlx5_flow_namespace;
struct mlx5_flow_handle;
+enum {
+ FLOW_CONTEXT_HAS_TAG = BIT(0),
+};
+
+struct mlx5_flow_context {
+ u32 flags;
+ u32 flow_tag;
+ u32 flow_source;
+};
+
struct mlx5_flow_spec {
u8 match_criteria_enable;
u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)];
u32 match_value[MLX5_ST_SZ_DW(fte_match_param)];
+ struct mlx5_flow_context flow_context;
};
enum {
@@ -173,13 +184,11 @@ struct mlx5_fs_vlan {
#define MLX5_FS_VLAN_DEPTH 2
enum {
- FLOW_ACT_HAS_TAG = BIT(0),
- FLOW_ACT_NO_APPEND = BIT(1),
+ FLOW_ACT_NO_APPEND = BIT(0),
};
struct mlx5_flow_act {
u32 action;
- u32 flow_tag;
u32 reformat_id;
u32 modify_id;
uintptr_t esp_id;
@@ -190,7 +199,6 @@ struct mlx5_flow_act {
#define MLX5_DECLARE_FLOW_ACT(name) \
struct mlx5_flow_act name = { .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\
- .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, \
.reformat_id = 0, \
.modify_id = 0, \
.flags = 0, }
@@ -200,7 +208,7 @@ struct mlx5_flow_act {
*/
struct mlx5_flow_handle *
mlx5_add_flow_rules(struct mlx5_flow_table *ft,
- struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int num_dest);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 6513b985c5e9..d4409654f760 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -528,7 +528,21 @@ struct mlx5_ifc_fte_match_set_misc2_bits {
struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls_over_udp;
- u8 reserved_at_80[0x100];
+ u8 metadata_reg_c_7[0x20];
+
+ u8 metadata_reg_c_6[0x20];
+
+ u8 metadata_reg_c_5[0x20];
+
+ u8 metadata_reg_c_4[0x20];
+
+ u8 metadata_reg_c_3[0x20];
+
+ u8 metadata_reg_c_2[0x20];
+
+ u8 metadata_reg_c_1[0x20];
+
+ u8 metadata_reg_c_0[0x20];
u8 metadata_reg_a[0x20];
@@ -636,8 +650,22 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
u8 reserved_at_e00[0x7200];
};
+enum {
+ MLX5_FDB_TO_VPORT_REG_C_0 = 0x01,
+ MLX5_FDB_TO_VPORT_REG_C_1 = 0x02,
+ MLX5_FDB_TO_VPORT_REG_C_2 = 0x04,
+ MLX5_FDB_TO_VPORT_REG_C_3 = 0x08,
+ MLX5_FDB_TO_VPORT_REG_C_4 = 0x10,
+ MLX5_FDB_TO_VPORT_REG_C_5 = 0x20,
+ MLX5_FDB_TO_VPORT_REG_C_6 = 0x40,
+ MLX5_FDB_TO_VPORT_REG_C_7 = 0x80,
+};
+
struct mlx5_ifc_flow_table_eswitch_cap_bits {
- u8 reserved_at_0[0x1a];
+ u8 fdb_to_vport_reg_c_id[0x8];
+ u8 reserved_at_8[0xf];
+ u8 flow_source[0x1];
+ u8 reserved_at_18[0x2];
u8 multi_fdb_encap[0x1];
u8 reserved_at_1b[0x1];
u8 fdb_multi_path_to_table[0x1];
@@ -665,7 +693,9 @@ struct mlx5_ifc_e_switch_cap_bits {
u8 vport_svlan_insert[0x1];
u8 vport_cvlan_insert_if_not_exist[0x1];
u8 vport_cvlan_insert_overwrite[0x1];
- u8 reserved_at_5[0x14];
+ u8 reserved_at_5[0x3];
+ u8 esw_uplink_ingress_acl[0x1];
+ u8 reserved_at_9[0x10];
u8 esw_functions_changed[0x1];
u8 reserved_at_1a[0x1];
u8 ecpf_vport_exists[0x1];
@@ -2555,6 +2585,12 @@ enum {
MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 = 0x800,
};
+enum {
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT = 0x0,
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK = 0x1,
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT = 0x2,
+};
+
struct mlx5_ifc_vlan_bits {
u8 ethtype[0x10];
u8 prio[0x3];
@@ -2574,7 +2610,9 @@ struct mlx5_ifc_flow_context_bits {
u8 action[0x10];
u8 extended_destination[0x1];
- u8 reserved_at_80[0x7];
+ u8 reserved_at_81[0x1];
+ u8 flow_source[0x2];
+ u8 reserved_at_84[0x4];
u8 destination_list_size[0x18];
u8 reserved_at_a0[0x8];
@@ -3099,12 +3137,14 @@ struct mlx5_ifc_hca_vport_context_bits {
};
struct mlx5_ifc_esw_vport_context_bits {
- u8 reserved_at_0[0x3];
+ u8 fdb_to_vport_reg_c[0x1];
+ u8 reserved_at_1[0x2];
u8 vport_svlan_strip[0x1];
u8 vport_cvlan_strip[0x1];
u8 vport_svlan_insert[0x1];
u8 vport_cvlan_insert[0x2];
- u8 reserved_at_8[0x18];
+ u8 fdb_to_vport_reg_c_id[0x8];
+ u8 reserved_at_10[0x10];
u8 reserved_at_20[0x20];
@@ -4985,7 +5025,8 @@ struct mlx5_ifc_modify_esw_vport_context_out_bits {
};
struct mlx5_ifc_esw_vport_context_fields_select_bits {
- u8 reserved_at_0[0x1c];
+ u8 reserved_at_0[0x1b];
+ u8 fdb_to_vport_reg_c_id[0x1];
u8 vport_cvlan_insert[0x1];
u8 vport_svlan_insert[0x1];
u8 vport_cvlan_strip[0x1];
@@ -5182,6 +5223,7 @@ enum {
MLX5_ACTION_IN_FIELD_OUT_DIPV4 = 0x16,
MLX5_ACTION_IN_FIELD_OUT_FIRST_VID = 0x17,
MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT = 0x47,
+ MLX5_ACTION_IN_FIELD_METADATA_REG_C_0 = 0x51,
};
struct mlx5_ifc_alloc_modify_header_context_out_bits {
@@ -9711,7 +9753,7 @@ struct mlx5_ifc_host_params_context_bits {
u8 reserved_at_8[0x8];
u8 host_num_of_vfs[0x10];
- u8 reserved_at_20[0x10];
+ u8 host_total_vfs[0x10];
u8 host_pci_bus[0x10];
u8 reserved_at_40[0x10];
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 3ba4edbd17a6..d1f353c64797 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -551,11 +551,6 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u
return radix_tree_lookup(&dev->priv.qp_table.tree, qpn);
}
-static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key)
-{
- return radix_tree_lookup(&dev->priv.mkey_table.tree, key);
-}
-
int mlx5_core_create_dct(struct mlx5_core_dev *dev,
struct mlx5_core_dct *qp,
u32 *in, int inlen,
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 6c51e864336a..6625ea068d5e 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -528,8 +528,10 @@ struct devlink_ops {
int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode);
int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode,
struct netlink_ext_ack *extack);
- int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode);
- int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode,
+ int (*eswitch_encap_mode_get)(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode *p_encap_mode);
+ int (*eswitch_encap_mode_set)(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode encap_mode,
struct netlink_ext_ack *extack);
int (*info_get)(struct devlink *devlink, struct devlink_info_req *req,
struct netlink_ext_ack *extack);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4baf716e535e..89c533778135 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1549,7 +1549,8 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
u32 seq, int flags)
{
const struct devlink_ops *ops = devlink->ops;
- u8 inline_mode, encap_mode;
+ enum devlink_eswitch_encap_mode encap_mode;
+ u8 inline_mode;
void *hdr;
int err = 0;
u16 mode;
@@ -1625,7 +1626,8 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
{
struct devlink *devlink = info->user_ptr[0];
const struct devlink_ops *ops = devlink->ops;
- u8 inline_mode, encap_mode;
+ enum devlink_eswitch_encap_mode encap_mode;
+ u8 inline_mode;
int err = 0;
u16 mode;