aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_tc.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c1107
1 files changed, 263 insertions, 844 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index dd0bfbacad47..0da69b98f38f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -63,6 +63,8 @@
#include "en/mapping.h"
#include "en/tc_ct.h"
#include "en/mod_hdr.h"
+#include "en/tc_priv.h"
+#include "en/tc_tun_encap.h"
#include "lib/devcom.h"
#include "lib/geneve.h"
#include "lib/fs_chains.h"
@@ -71,90 +73,6 @@
#define nic_chains(priv) ((priv)->fs.tc.chains)
#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
-#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
-
-enum {
- MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT,
- MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT,
- MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
- MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
- MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
- MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE,
- MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1,
- MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2,
- MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3,
- MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4,
- MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5,
- MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6,
- MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7,
- MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
-};
-
-#define MLX5E_TC_MAX_SPLITS 1
-
-/* Helper struct for accessing a struct containing list_head array.
- * Containing struct
- * |- Helper array
- * [0] Helper item 0
- * |- list_head item 0
- * |- index (0)
- * [1] Helper item 1
- * |- list_head item 1
- * |- index (1)
- * To access the containing struct from one of the list_head items:
- * 1. Get the helper item from the list_head item using
- * helper item =
- * container_of(list_head item, helper struct type, list_head field)
- * 2. Get the contining struct from the helper item and its index in the array:
- * containing struct =
- * container_of(helper item, containing struct type, helper field[index])
- */
-struct encap_flow_item {
- struct mlx5e_encap_entry *e; /* attached encap instance */
- struct list_head list;
- int index;
-};
-
-struct mlx5e_tc_flow {
- struct rhash_head node;
- struct mlx5e_priv *priv;
- u64 cookie;
- unsigned long flags;
- struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
-
- /* flows sharing the same reformat object - currently mpls decap */
- struct list_head l3_to_l2_reformat;
- struct mlx5e_decap_entry *decap_reformat;
-
- /* Flow can be associated with multiple encap IDs.
- * The number of encaps is bounded by the number of supported
- * destinations.
- */
- struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
- struct mlx5e_tc_flow *peer_flow;
- struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */
- struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
- struct list_head hairpin; /* flows sharing the same hairpin */
- struct list_head peer; /* flows with peer flow */
- struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */
- struct net_device *orig_dev; /* netdev adding flow first */
- int tmp_efi_index;
- struct list_head tmp_list; /* temporary flow list used by neigh update */
- refcount_t refcnt;
- struct rcu_head rcu_head;
- struct completion init_done;
- int tunnel_id; /* the mapped tunnel id of this flow */
- struct mlx5_flow_attr *attr;
-};
-
-struct mlx5e_tc_flow_parse_attr {
- const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
- struct net_device *filter_dev;
- struct mlx5_flow_spec spec;
- struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
- int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
- struct ethhdr eth;
-};
#define MLX5E_TC_TABLE_NUM_GROUPS 4
#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
@@ -165,10 +83,15 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
.moffset = 0,
.mlen = 2,
},
+ [VPORT_TO_REG] = {
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
+ .moffset = 2,
+ .mlen = 2,
+ },
[TUNNEL_TO_REG] = {
.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
.moffset = 1,
- .mlen = 3,
+ .mlen = ((ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS) / 8),
.soffset = MLX5_BYTE_OFF(fte_match_param,
misc_parameters_2.metadata_reg_c_1),
},
@@ -190,6 +113,14 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
[NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
};
+/* To avoid false lock dependency warning set the tc_ht lock
+ * class different than the lock class of the ht being used when deleting
+ * last flow from a group and then deleting a group, we get into del_sw_flow_group()
+ * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
+ * it's different than the ht->mutex here.
+ */
+static struct lock_class_key tc_ht_lock_key;
+
static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
void
@@ -239,11 +170,11 @@ mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
}
int
-mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
- enum mlx5_flow_namespace_type ns,
- enum mlx5e_tc_attr_to_reg type,
- u32 data)
+mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5_flow_namespace_type ns,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 data)
{
int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
@@ -267,9 +198,10 @@ mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
MLX5_SET(set_action_in, modact, offset, moffset * 8);
MLX5_SET(set_action_in, modact, length, mlen * 8);
MLX5_SET(set_action_in, modact, data, data);
+ err = mod_hdr_acts->num_actions;
mod_hdr_acts->num_actions++;
- return 0;
+ return err;
}
static struct mlx5_tc_ct_priv *
@@ -318,6 +250,41 @@ mlx5_tc_rule_delete(struct mlx5e_priv *priv,
mlx5e_del_offloaded_nic_rule(priv, rule, attr);
}
+int
+mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5_flow_namespace_type ns,
+ enum mlx5e_tc_attr_to_reg type,
+ u32 data)
+{
+ int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data);
+
+ return ret < 0 ? ret : 0;
+}
+
+void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ enum mlx5e_tc_attr_to_reg type,
+ int act_id, u32 data)
+{
+ int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
+ int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
+ int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
+ char *modact;
+
+ modact = mod_hdr_acts->actions + (act_id * MLX5_MH_ACT_SZ);
+
+ /* Firmware has 5bit length field and 0 means 32bits */
+ if (mlen == 4)
+ mlen = 0;
+
+ MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, modact, field, mfield);
+ MLX5_SET(set_action_in, modact, offset, moffset * 8);
+ MLX5_SET(set_action_in, modact, length, mlen * 8);
+ MLX5_SET(set_action_in, modact, data, data);
+}
+
struct mlx5e_hairpin {
struct mlx5_hairpin *pair;
@@ -355,15 +322,14 @@ struct mlx5e_hairpin_entry {
static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow);
-static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
+struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
{
if (!flow || !refcount_inc_not_zero(&flow->refcnt))
return ERR_PTR(-EINVAL);
return flow;
}
-static void mlx5e_flow_put(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow)
+void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
{
if (refcount_dec_and_test(&flow->refcnt)) {
mlx5e_tc_del_flow(priv, flow);
@@ -371,48 +337,6 @@ static void mlx5e_flow_put(struct mlx5e_priv *priv,
}
}
-static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
-{
- /* Complete all memory stores before setting bit. */
- smp_mb__before_atomic();
- set_bit(flag, &flow->flags);
-}
-
-#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
-
-static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
- unsigned long flag)
-{
- /* test_and_set_bit() provides all necessary barriers */
- return test_and_set_bit(flag, &flow->flags);
-}
-
-#define flow_flag_test_and_set(flow, flag) \
- __flow_flag_test_and_set(flow, \
- MLX5E_TC_FLOW_FLAG_##flag)
-
-static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
-{
- /* Complete all memory stores before clearing bit. */
- smp_mb__before_atomic();
- clear_bit(flag, &flow->flags);
-}
-
-#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
- MLX5E_TC_FLOW_FLAG_##flag)
-
-static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
-{
- bool ret = test_bit(flag, &flow->flags);
-
- /* Read fields of flow structure only after checking flags. */
- smp_mb__after_atomic();
- return ret;
-}
-
-#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
- MLX5E_TC_FLOW_FLAG_##flag)
-
bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
{
return flow_flag_test(flow, ESWITCH);
@@ -423,7 +347,7 @@ static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
return flow_flag_test(flow, FT);
}
-static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
+bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
{
return flow_flag_test(flow, OFFLOADED);
}
@@ -1138,23 +1062,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
kfree(flow->attr);
}
-static void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow, int out_index);
-
-static int mlx5e_attach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- struct net_device *mirred_dev,
- int out_index,
- struct netlink_ext_ack *extack,
- struct net_device **encap_dev,
- bool *encap_valid);
-static int mlx5e_attach_decap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack);
-static void mlx5e_detach_decap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow);
-
-static struct mlx5_flow_handle *
+struct mlx5_flow_handle *
mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
@@ -1189,10 +1097,9 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
return rule;
}
-static void
-mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
- struct mlx5e_tc_flow *flow,
- struct mlx5_flow_attr *attr)
+void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr)
{
flow_flag_clear(flow, OFFLOADED);
@@ -1211,7 +1118,7 @@ offload_rule_0:
mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
}
-static struct mlx5_flow_handle *
+struct mlx5_flow_handle *
mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec)
@@ -1237,9 +1144,8 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
return rule;
}
-static void
-mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
- struct mlx5e_tc_flow *flow)
+void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow)
{
struct mlx5_flow_attr *slow_attr;
@@ -1307,6 +1213,63 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow)
mutex_unlock(&uplink_priv->unready_flows_lock);
}
+static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv);
+
+bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
+{
+ struct mlx5_core_dev *out_mdev, *route_mdev;
+ struct mlx5e_priv *out_priv, *route_priv;
+
+ out_priv = netdev_priv(out_dev);
+ out_mdev = out_priv->mdev;
+ route_priv = netdev_priv(route_dev);
+ route_mdev = route_priv->mdev;
+
+ if (out_mdev->coredev_type != MLX5_COREDEV_PF ||
+ route_mdev->coredev_type != MLX5_COREDEV_VF)
+ return false;
+
+ return same_hw_devs(out_priv, route_priv);
+}
+
+int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
+{
+ struct mlx5e_priv *out_priv, *route_priv;
+ struct mlx5_core_dev *route_mdev;
+ struct mlx5_eswitch *esw;
+ u16 vhca_id;
+ int err;
+
+ out_priv = netdev_priv(out_dev);
+ esw = out_priv->mdev->priv.eswitch;
+ route_priv = netdev_priv(route_dev);
+ route_mdev = route_priv->mdev;
+
+ vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
+ err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+ return err;
+}
+
+int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &parse_attr->mod_hdr_acts;
+ struct mlx5_modify_hdr *mod_hdr;
+
+ mod_hdr = mlx5_modify_header_alloc(priv->mdev,
+ get_flow_name_space(flow),
+ mod_hdr_acts->num_actions,
+ mod_hdr_acts->actions);
+ if (IS_ERR(mod_hdr))
+ return PTR_ERR(mod_hdr);
+
+ WARN_ON(flow->attr->modify_hdr);
+ flow->attr->modify_hdr = mod_hdr;
+
+ return 0;
+}
+
static int
mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
@@ -1316,21 +1279,15 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct net_device *out_dev, *encap_dev = NULL;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
+ bool vf_tun = false, encap_valid = true;
struct mlx5_esw_flow_attr *esw_attr;
struct mlx5_fc *counter = NULL;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_priv *out_priv;
- bool encap_valid = true;
u32 max_prio, max_chain;
int err = 0;
int out_index;
- if (!mlx5_chains_prios_supported(esw_chains(esw)) && attr->prio != 1) {
- NL_SET_ERR_MSG_MOD(extack,
- "E-switch priorities unsupported, upgrade FW");
- return -EOPNOTSUPP;
- }
-
/* We check chain range only for tc flows.
* For ft flows, we checked attr->chain was originally 0 and set it to
* FDB_FT_CHAIN which is outside tc range.
@@ -1340,20 +1297,28 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
NL_SET_ERR_MSG_MOD(extack,
"Requested chain is out of supported range");
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto err_out;
}
max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
if (attr->prio > max_prio) {
NL_SET_ERR_MSG_MOD(extack,
"Requested priority is out of supported range");
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ if (flow_flag_test(flow, TUN_RX)) {
+ err = mlx5e_attach_decap_route(priv, flow);
+ if (err)
+ goto err_out;
}
if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
err = mlx5e_attach_decap(priv, flow, extack);
if (err)
- return err;
+ goto err_out;
}
parse_attr = attr->parse_attr;
@@ -1371,8 +1336,11 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
extack, &encap_dev, &encap_valid);
if (err)
- return err;
+ goto err_out;
+ if (esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ vf_tun = true;
out_priv = netdev_priv(encap_dev);
rpriv = out_priv->ppriv;
esw_attr->dests[out_index].rep = rpriv->rep;
@@ -1381,20 +1349,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
err = mlx5_eswitch_add_vlan_action(esw, attr);
if (err)
- return err;
+ goto err_out;
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
!(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) {
- err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
- dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
- if (err)
- return err;
+ if (vf_tun) {
+ err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
+ if (err)
+ goto err_out;
+ } else {
+ err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
+ if (err)
+ goto err_out;
+ }
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
counter = mlx5_fc_create(esw_attr->counter_dev, true);
- if (IS_ERR(counter))
- return PTR_ERR(counter);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_out;
+ }
attr->counter = counter;
}
@@ -1408,12 +1383,17 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
else
flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
- if (IS_ERR(flow->rule[0]))
- return PTR_ERR(flow->rule[0]);
- else
- flow_flag_set(flow, OFFLOADED);
+ if (IS_ERR(flow->rule[0])) {
+ err = PTR_ERR(flow->rule[0]);
+ goto err_out;
+ }
+ flow_flag_set(flow, OFFLOADED);
return 0;
+
+err_out:
+ flow_flag_set(flow, FAILED);
+ return err;
}
static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
@@ -1434,8 +1414,11 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_flow_attr *attr = flow->attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ bool vf_tun = false;
int out_index;
+ esw_attr = attr->esw_attr;
mlx5e_put_flow_tunnel_id(flow);
if (flow_flag_test(flow, NOT_READY))
@@ -1453,20 +1436,33 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5_eswitch_del_vlan_action(esw, attr);
- for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
- if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
+ if (flow->decap_route)
+ mlx5e_detach_decap_route(priv, flow);
+
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+ if (esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+ vf_tun = true;
+ if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
mlx5e_detach_encap(priv, flow, out_index);
kfree(attr->parse_attr->tun_info[out_index]);
}
- kvfree(attr->parse_attr);
+ }
mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
- mlx5e_detach_mod_hdr(priv, flow);
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ dealloc_mod_hdr_actions(&attr->parse_attr->mod_hdr_acts);
+ if (vf_tun && attr->modify_hdr)
+ mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
+ else
+ mlx5e_detach_mod_hdr(priv, flow);
+ }
+ kvfree(attr->parse_attr);
+ kvfree(attr->esw_attr->rx_tun_attr);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
- mlx5_fc_destroy(attr->esw_attr->counter_dev, attr->counter);
+ mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
if (flow_flag_test(flow, L3_TO_L2_DECAP))
mlx5e_detach_decap(priv, flow);
@@ -1474,141 +1470,13 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
kfree(flow->attr);
}
-void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e,
- struct list_head *flow_list)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *esw_attr;
- struct mlx5_flow_handle *rule;
- struct mlx5_flow_attr *attr;
- struct mlx5_flow_spec *spec;
- struct mlx5e_tc_flow *flow;
- int err;
-
- e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
- e->reformat_type,
- e->encap_size, e->encap_header,
- MLX5_FLOW_NAMESPACE_FDB);
- if (IS_ERR(e->pkt_reformat)) {
- mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
- PTR_ERR(e->pkt_reformat));
- return;
- }
- e->flags |= MLX5_ENCAP_ENTRY_VALID;
- mlx5e_rep_queue_neigh_stats_work(priv);
-
- list_for_each_entry(flow, flow_list, tmp_list) {
- bool all_flow_encaps_valid = true;
- int i;
-
- if (!mlx5e_is_offloaded_flow(flow))
- continue;
- attr = flow->attr;
- esw_attr = attr->esw_attr;
- spec = &attr->parse_attr->spec;
-
- esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
- esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
- /* Flow can be associated with multiple encap entries.
- * Before offloading the flow verify that all of them have
- * a valid neighbour.
- */
- for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
- if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
- continue;
- if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
- all_flow_encaps_valid = false;
- break;
- }
- }
- /* Do not offload flows with unresolved neighbors */
- if (!all_flow_encaps_valid)
- continue;
- /* update from slow path rule to encap rule */
- rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
- mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
- err);
- continue;
- }
-
- mlx5e_tc_unoffload_from_slow_path(esw, flow);
- flow->rule[0] = rule;
- /* was unset when slow path rule removed */
- flow_flag_set(flow, OFFLOADED);
- }
-}
-
-void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
- struct mlx5e_encap_entry *e,
- struct list_head *flow_list)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *esw_attr;
- struct mlx5_flow_handle *rule;
- struct mlx5_flow_attr *attr;
- struct mlx5_flow_spec *spec;
- struct mlx5e_tc_flow *flow;
- int err;
-
- list_for_each_entry(flow, flow_list, tmp_list) {
- if (!mlx5e_is_offloaded_flow(flow))
- continue;
- attr = flow->attr;
- esw_attr = attr->esw_attr;
- spec = &attr->parse_attr->spec;
-
- /* update from encap rule to slow path rule */
- rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
- /* mark the flow's encap dest as non-valid */
- esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
-
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
- mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
- err);
- continue;
- }
-
- mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
- flow->rule[0] = rule;
- /* was unset when fast path rule removed */
- flow_flag_set(flow, OFFLOADED);
- }
-
- /* we know that the encap is valid */
- e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
- mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
-}
-
-static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
+struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
{
return flow->attr->counter;
}
-/* Takes reference to all flows attached to encap and adds the flows to
- * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
- */
-void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
-{
- struct encap_flow_item *efi;
- struct mlx5e_tc_flow *flow;
-
- list_for_each_entry(efi, &e->flows, list) {
- flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
- if (IS_ERR(mlx5e_flow_get(flow)))
- continue;
- wait_for_completion(&flow->init_done);
-
- flow->tmp_efi_index = efi->index;
- list_add(&flow->tmp_list, flow_list);
- }
-}
-
/* Iterate over tmp_list of flows attached to flow_list head. */
-void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
+void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
{
struct mlx5e_tc_flow *flow, *tmp;
@@ -1616,222 +1484,6 @@ void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_l
mlx5e_flow_put(priv, flow);
}
-static struct mlx5e_encap_entry *
-mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
- struct mlx5e_encap_entry *e)
-{
- struct mlx5e_encap_entry *next = NULL;
-
-retry:
- rcu_read_lock();
-
- /* find encap with non-zero reference counter value */
- for (next = e ?
- list_next_or_null_rcu(&nhe->encap_list,
- &e->encap_list,
- struct mlx5e_encap_entry,
- encap_list) :
- list_first_or_null_rcu(&nhe->encap_list,
- struct mlx5e_encap_entry,
- encap_list);
- next;
- next = list_next_or_null_rcu(&nhe->encap_list,
- &next->encap_list,
- struct mlx5e_encap_entry,
- encap_list))
- if (mlx5e_encap_take(next))
- break;
-
- rcu_read_unlock();
-
- /* release starting encap */
- if (e)
- mlx5e_encap_put(netdev_priv(e->out_dev), e);
- if (!next)
- return next;
-
- /* wait for encap to be fully initialized */
- wait_for_completion(&next->res_ready);
- /* continue searching if encap entry is not in valid state after completion */
- if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
- e = next;
- goto retry;
- }
-
- return next;
-}
-
-void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
-{
- struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
- struct mlx5e_encap_entry *e = NULL;
- struct mlx5e_tc_flow *flow;
- struct mlx5_fc *counter;
- struct neigh_table *tbl;
- bool neigh_used = false;
- struct neighbour *n;
- u64 lastuse;
-
- if (m_neigh->family == AF_INET)
- tbl = &arp_tbl;
-#if IS_ENABLED(CONFIG_IPV6)
- else if (m_neigh->family == AF_INET6)
- tbl = ipv6_stub->nd_tbl;
-#endif
- else
- return;
-
- /* mlx5e_get_next_valid_encap() releases previous encap before returning
- * next one.
- */
- while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
- struct mlx5e_priv *priv = netdev_priv(e->out_dev);
- struct encap_flow_item *efi, *tmp;
- struct mlx5_eswitch *esw;
- LIST_HEAD(flow_list);
-
- esw = priv->mdev->priv.eswitch;
- mutex_lock(&esw->offloads.encap_tbl_lock);
- list_for_each_entry_safe(efi, tmp, &e->flows, list) {
- flow = container_of(efi, struct mlx5e_tc_flow,
- encaps[efi->index]);
- if (IS_ERR(mlx5e_flow_get(flow)))
- continue;
- list_add(&flow->tmp_list, &flow_list);
-
- if (mlx5e_is_offloaded_flow(flow)) {
- counter = mlx5e_tc_get_counter(flow);
- lastuse = mlx5_fc_query_lastuse(counter);
- if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
- neigh_used = true;
- break;
- }
- }
- }
- mutex_unlock(&esw->offloads.encap_tbl_lock);
-
- mlx5e_put_encap_flow_list(priv, &flow_list);
- if (neigh_used) {
- /* release current encap before breaking the loop */
- mlx5e_encap_put(priv, e);
- break;
- }
- }
-
- trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
-
- if (neigh_used) {
- nhe->reported_lastuse = jiffies;
-
- /* find the relevant neigh according to the cached device and
- * dst ip pair
- */
- n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
- if (!n)
- return;
-
- neigh_event_send(n, NULL);
- neigh_release(n);
- }
-}
-
-static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
-{
- WARN_ON(!list_empty(&e->flows));
-
- if (e->compl_result > 0) {
- mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
-
- if (e->flags & MLX5_ENCAP_ENTRY_VALID)
- mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
- }
-
- kfree(e->tun_info);
- kfree(e->encap_header);
- kfree_rcu(e, rcu);
-}
-
-static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
- struct mlx5e_decap_entry *d)
-{
- WARN_ON(!list_empty(&d->flows));
-
- if (!d->compl_result)
- mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
-
- kfree_rcu(d, rcu);
-}
-
-void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-
- if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
- return;
- hash_del_rcu(&e->encap_hlist);
- mutex_unlock(&esw->offloads.encap_tbl_lock);
-
- mlx5e_encap_dealloc(priv, e);
-}
-
-static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-
- if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
- return;
- hash_del_rcu(&d->hlist);
- mutex_unlock(&esw->offloads.decap_tbl_lock);
-
- mlx5e_decap_dealloc(priv, d);
-}
-
-static void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow, int out_index)
-{
- struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-
- /* flow wasn't fully initialized */
- if (!e)
- return;
-
- mutex_lock(&esw->offloads.encap_tbl_lock);
- list_del(&flow->encaps[out_index].list);
- flow->encaps[out_index].e = NULL;
- if (!refcount_dec_and_test(&e->refcnt)) {
- mutex_unlock(&esw->offloads.encap_tbl_lock);
- return;
- }
- hash_del_rcu(&e->encap_hlist);
- mutex_unlock(&esw->offloads.encap_tbl_lock);
-
- mlx5e_encap_dealloc(priv, e);
-}
-
-static void mlx5e_detach_decap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5e_decap_entry *d = flow->decap_reformat;
-
- if (!d)
- return;
-
- mutex_lock(&esw->offloads.decap_tbl_lock);
- list_del(&flow->l3_to_l2_reformat);
- flow->decap_reformat = NULL;
-
- if (!refcount_dec_and_test(&d->refcnt)) {
- mutex_unlock(&esw->offloads.decap_tbl_lock);
- return;
- }
- hash_del_rcu(&d->hlist);
- mutex_unlock(&esw->offloads.decap_tbl_lock);
-
- mlx5e_decap_dealloc(priv, d);
-}
-
static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
{
struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
@@ -2089,6 +1741,29 @@ void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
}
}
+u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
+{
+ void *headers_v;
+ u16 ethertype;
+ u8 ip_version;
+
+ if (outer)
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
+ else
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
+
+ ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
+ /* Return ip_version converted from ethertype anyway */
+ if (!ip_version) {
+ ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
+ if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
+ ip_version = 4;
+ else if (ethertype == ETH_P_IPV6)
+ ip_version = 6;
+ }
+ return ip_version;
+}
+
static int parse_tunnel_attr(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
@@ -2097,6 +1772,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
u8 *match_level,
bool *match_inner)
{
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct netlink_ext_ack *extack = f->common.extack;
bool needs_mapping, sets_mapping;
@@ -2134,6 +1810,31 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
*/
if (!netif_is_bareudp(filter_dev))
flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ err = mlx5e_tc_set_attr_rx_tun(flow, spec);
+ if (err)
+ return err;
+ } else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
+ struct mlx5_flow_spec *tmp_spec;
+
+ tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
+ if (!tmp_spec) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec");
+ netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec");
+ return -ENOMEM;
+ }
+ memcpy(tmp_spec, spec, sizeof(*tmp_spec));
+
+ err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
+ if (err) {
+ kvfree(tmp_spec);
+ NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
+ netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
+ return err;
+ }
+ err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
+ kvfree(tmp_spec);
+ if (err)
+ return err;
}
if (!needs_mapping && !sets_mapping)
@@ -3582,35 +3283,6 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
return 0;
}
-struct encap_key {
- const struct ip_tunnel_key *ip_tun_key;
- struct mlx5e_tc_tunnel *tc_tunnel;
-};
-
-static inline int cmp_encap_info(struct encap_key *a,
- struct encap_key *b)
-{
- return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
- a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
-}
-
-static inline int cmp_decap_info(struct mlx5e_decap_key *a,
- struct mlx5e_decap_key *b)
-{
- return memcmp(&a->key, &b->key, sizeof(b->key));
-}
-
-static inline int hash_encap_info(struct encap_key *key)
-{
- return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
- key->tc_tunnel->tunnel_type);
-}
-
-static inline int hash_decap_info(struct mlx5e_decap_key *key)
-{
- return jhash(&key->key, sizeof(key->key), 0);
-}
-
static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
struct net_device *peer_netdev)
{
@@ -3624,277 +3296,6 @@ static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
same_hw_devs(priv, peer_priv));
}
-bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
-{
- return refcount_inc_not_zero(&e->refcnt);
-}
-
-static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
-{
- return refcount_inc_not_zero(&e->refcnt);
-}
-
-static struct mlx5e_encap_entry *
-mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
- uintptr_t hash_key)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5e_encap_entry *e;
- struct encap_key e_key;
-
- hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
- encap_hlist, hash_key) {
- e_key.ip_tun_key = &e->tun_info->key;
- e_key.tc_tunnel = e->tunnel;
- if (!cmp_encap_info(&e_key, key) &&
- mlx5e_encap_take(e))
- return e;
- }
-
- return NULL;
-}
-
-static struct mlx5e_decap_entry *
-mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
- uintptr_t hash_key)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5e_decap_key r_key;
- struct mlx5e_decap_entry *e;
-
- hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
- hlist, hash_key) {
- r_key = e->key;
- if (!cmp_decap_info(&r_key, key) &&
- mlx5e_decap_take(e))
- return e;
- }
- return NULL;
-}
-
-static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
-{
- size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
-
- return kmemdup(tun_info, tun_size, GFP_KERNEL);
-}
-
-static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- int out_index,
- struct mlx5e_encap_entry *e,
- struct netlink_ext_ack *extack)
-{
- int i;
-
- for (i = 0; i < out_index; i++) {
- if (flow->encaps[i].e != e)
- continue;
- NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
- netdev_err(priv->netdev, "can't duplicate encap action\n");
- return true;
- }
-
- return false;
-}
-
-static int mlx5e_attach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- struct net_device *mirred_dev,
- int out_index,
- struct netlink_ext_ack *extack,
- struct net_device **encap_dev,
- bool *encap_valid)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct mlx5_flow_attr *attr = flow->attr;
- const struct ip_tunnel_info *tun_info;
- struct encap_key key;
- struct mlx5e_encap_entry *e;
- unsigned short family;
- uintptr_t hash_key;
- int err = 0;
-
- parse_attr = attr->parse_attr;
- tun_info = parse_attr->tun_info[out_index];
- family = ip_tunnel_info_af(tun_info);
- key.ip_tun_key = &tun_info->key;
- key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
- if (!key.tc_tunnel) {
- NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
- return -EOPNOTSUPP;
- }
-
- hash_key = hash_encap_info(&key);
-
- mutex_lock(&esw->offloads.encap_tbl_lock);
- e = mlx5e_encap_get(priv, &key, hash_key);
-
- /* must verify if encap is valid or not */
- if (e) {
- /* Check that entry was not already attached to this flow */
- if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
- err = -EOPNOTSUPP;
- goto out_err;
- }
-
- mutex_unlock(&esw->offloads.encap_tbl_lock);
- wait_for_completion(&e->res_ready);
-
- /* Protect against concurrent neigh update. */
- mutex_lock(&esw->offloads.encap_tbl_lock);
- if (e->compl_result < 0) {
- err = -EREMOTEIO;
- goto out_err;
- }
- goto attach_flow;
- }
-
- e = kzalloc(sizeof(*e), GFP_KERNEL);
- if (!e) {
- err = -ENOMEM;
- goto out_err;
- }
-
- refcount_set(&e->refcnt, 1);
- init_completion(&e->res_ready);
-
- tun_info = dup_tun_info(tun_info);
- if (!tun_info) {
- err = -ENOMEM;
- goto out_err_init;
- }
- e->tun_info = tun_info;
- err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
- if (err)
- goto out_err_init;
-
- INIT_LIST_HEAD(&e->flows);
- hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
- mutex_unlock(&esw->offloads.encap_tbl_lock);
-
- if (family == AF_INET)
- err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
- else if (family == AF_INET6)
- err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
-
- /* Protect against concurrent neigh update. */
- mutex_lock(&esw->offloads.encap_tbl_lock);
- complete_all(&e->res_ready);
- if (err) {
- e->compl_result = err;
- goto out_err;
- }
- e->compl_result = 1;
-
-attach_flow:
- flow->encaps[out_index].e = e;
- list_add(&flow->encaps[out_index].list, &e->flows);
- flow->encaps[out_index].index = out_index;
- *encap_dev = e->out_dev;
- if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
- attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
- attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
- *encap_valid = true;
- } else {
- *encap_valid = false;
- }
- mutex_unlock(&esw->offloads.encap_tbl_lock);
-
- return err;
-
-out_err:
- mutex_unlock(&esw->offloads.encap_tbl_lock);
- if (e)
- mlx5e_encap_put(priv, e);
- return err;
-
-out_err_init:
- mutex_unlock(&esw->offloads.encap_tbl_lock);
- kfree(tun_info);
- kfree(e);
- return err;
-}
-
-static int mlx5e_attach_decap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
- struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct mlx5e_decap_entry *d;
- struct mlx5e_decap_key key;
- uintptr_t hash_key;
- int err = 0;
-
- parse_attr = flow->attr->parse_attr;
- if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
- NL_SET_ERR_MSG_MOD(extack,
- "encap header larger than max supported");
- return -EOPNOTSUPP;
- }
-
- key.key = parse_attr->eth;
- hash_key = hash_decap_info(&key);
- mutex_lock(&esw->offloads.decap_tbl_lock);
- d = mlx5e_decap_get(priv, &key, hash_key);
- if (d) {
- mutex_unlock(&esw->offloads.decap_tbl_lock);
- wait_for_completion(&d->res_ready);
- mutex_lock(&esw->offloads.decap_tbl_lock);
- if (d->compl_result) {
- err = -EREMOTEIO;
- goto out_free;
- }
- goto found;
- }
-
- d = kzalloc(sizeof(*d), GFP_KERNEL);
- if (!d) {
- err = -ENOMEM;
- goto out_err;
- }
-
- d->key = key;
- refcount_set(&d->refcnt, 1);
- init_completion(&d->res_ready);
- INIT_LIST_HEAD(&d->flows);
- hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
- mutex_unlock(&esw->offloads.decap_tbl_lock);
-
- d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
- MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
- sizeof(parse_attr->eth),
- &parse_attr->eth,
- MLX5_FLOW_NAMESPACE_FDB);
- if (IS_ERR(d->pkt_reformat)) {
- err = PTR_ERR(d->pkt_reformat);
- d->compl_result = err;
- }
- mutex_lock(&esw->offloads.decap_tbl_lock);
- complete_all(&d->res_ready);
- if (err)
- goto out_free;
-
-found:
- flow->decap_reformat = d;
- attr->decap_pkt_reformat = d->pkt_reformat;
- list_add(&flow->l3_to_l2_reformat, &d->flows);
- mutex_unlock(&esw->offloads.decap_tbl_lock);
- return 0;
-
-out_free:
- mutex_unlock(&esw->offloads.decap_tbl_lock);
- mlx5e_decap_put(priv, d);
- return err;
-
-out_err:
- mutex_unlock(&esw->offloads.decap_tbl_lock);
- return err;
-}
-
static int parse_tc_vlan_action(struct mlx5e_priv *priv,
const struct flow_action_entry *act,
struct mlx5_esw_flow_attr *attr,
@@ -4247,7 +3648,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
if (encap) {
parse_attr->mirred_ifindex[esw_attr->out_count] =
out_dev->ifindex;
- parse_attr->tun_info[esw_attr->out_count] = dup_tun_info(info);
+ parse_attr->tun_info[esw_attr->out_count] =
+ mlx5e_dup_tun_info(info);
if (!parse_attr->tun_info[esw_attr->out_count])
return -ENOMEM;
encap = false;
@@ -4384,6 +3786,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
}
}
+ /* always set IP version for indirect table handling */
+ attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true);
+
if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
/* For prio tag mode, replace vlan pop with rewrite vlan prio
@@ -4664,7 +4069,6 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
return flow;
err_free:
- dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
mlx5e_flow_put(priv, flow);
out:
return ERR_PTR(err);
@@ -4809,6 +4213,7 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
return 0;
err_free:
+ flow_flag_set(flow, FAILED);
dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
mlx5e_flow_put(priv, flow);
out:
@@ -5040,7 +4445,7 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
*/
if (rate) {
rate = (rate * BITS_PER_BYTE) + 500000;
- rate_mbps = max_t(u32, do_div(rate, 1000000), 1);
+ rate_mbps = max_t(u64, do_div(rate, 1000000), 1);
}
err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
@@ -5221,6 +4626,8 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
if (err)
return err;
+ lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
+
if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
@@ -5328,7 +4735,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
}
uplink_priv->tunnel_mapping = mapping;
- mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true);
+ /* 0xFFF is reserved for stack devices slow path table mark */
+ mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
if (IS_ERR(mapping)) {
err = PTR_ERR(mapping);
goto err_enc_opts_mapping;
@@ -5339,8 +4747,18 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
if (err)
goto err_ht_init;
- return err;
+ lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
+
+ uplink_priv->encap = mlx5e_tc_tun_init(priv);
+ if (IS_ERR(uplink_priv->encap)) {
+ err = PTR_ERR(uplink_priv->encap);
+ goto err_register_fib_notifier;
+ }
+ return 0;
+
+err_register_fib_notifier:
+ rhashtable_destroy(tc_ht);
err_ht_init:
mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
err_enc_opts_mapping:
@@ -5357,10 +4775,11 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
{
struct mlx5_rep_uplink_priv *uplink_priv;
- rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
-
uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
+ rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
+ mlx5e_tc_tun_cleanup(uplink_priv->encap);
+
mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
mapping_destroy(uplink_priv->tunnel_mapping);
@@ -5460,7 +4879,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
tc_skb_ext->chain = chain;
zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) &
- ZONE_RESTORE_MAX;
+ ESW_ZONE_ID_MASK;
if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
zone_restore_id))