diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_tc.c')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 998 |
1 files changed, 771 insertions, 227 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f285713def77..901f88a886c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -55,10 +55,14 @@ #include "fs_core.h" #include "en/port.h" #include "en/tc_tun.h" +#include "en/mapping.h" +#include "en/tc_ct.h" #include "lib/devcom.h" #include "lib/geneve.h" #include "diag/en_tc_tracepoint.h" +#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) + struct mlx5_nic_flow_attr { u32 action; u32 flow_tag; @@ -84,6 +88,7 @@ enum { MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, + MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7, }; #define MLX5E_TC_MAX_SPLITS 1 @@ -134,6 +139,8 @@ struct mlx5e_tc_flow { refcount_t refcnt; struct rcu_head rcu_head; struct completion init_done; + int tunnel_id; /* the mapped tunnel id of this flow */ + union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@ -144,15 +151,118 @@ struct mlx5e_tc_flow_parse_attr { const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; struct net_device *filter_dev; struct mlx5_flow_spec spec; - int num_mod_hdr_actions; - int max_mod_hdr_actions; - void *mod_hdr_actions; + struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; }; #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16) +struct tunnel_match_key { + struct flow_dissector_key_control enc_control; + struct flow_dissector_key_keyid enc_key_id; + struct flow_dissector_key_ports enc_tp; + struct flow_dissector_key_ip enc_ip; + union { + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; + }; + + int filter_ifindex; +}; + +/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS. + * Upper TUNNEL_INFO_BITS for general tunnel info. + * Lower ENC_OPTS_BITS bits for enc_opts. + */ +#define TUNNEL_INFO_BITS 6 +#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0) +#define ENC_OPTS_BITS 2 +#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0) +#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS) +#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0) + +struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { + [CHAIN_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, + .moffset = 0, + .mlen = 2, + }, + [TUNNEL_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1, + .moffset = 3, + .mlen = 1, + .soffset = MLX5_BYTE_OFF(fte_match_param, + misc_parameters_2.metadata_reg_c_1), + }, + [ZONE_TO_REG] = zone_to_reg_ct, + [CTSTATE_TO_REG] = ctstate_to_reg_ct, + [MARK_TO_REG] = mark_to_reg_ct, + [LABELS_TO_REG] = labels_to_reg_ct, + [FTEID_TO_REG] = fteid_to_reg_ct, + [TUPLEID_TO_REG] = tupleid_to_reg_ct, +}; + +static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); + +void +mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 data, + u32 mask) +{ + int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; + int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; + void *headers_c = spec->match_criteria; + void *headers_v = spec->match_value; + void *fmask, *fval; + + fmask = headers_c + soffset; + fval = headers_v + soffset; + + mask = cpu_to_be32(mask) >> (32 - (match_len * 8)); + data = cpu_to_be32(data) >> (32 - (match_len * 8)); + + memcpy(fmask, &mask, match_len); + memcpy(fval, &data, match_len); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; +} + +int +mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + u32 data) +{ + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; + int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; + int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; + char *modact; + int err; + + err = alloc_mod_hdr_actions(mdev, MLX5_FLOW_NAMESPACE_FDB, + mod_hdr_acts); + if (err) + return err; + + modact = mod_hdr_acts->actions + + (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ); + + /* Firmware has 5bit length field and 0 means 32bits */ + if (mlen == 4) + mlen = 0; + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, mfield); + MLX5_SET(set_action_in, modact, offset, moffset * 8); + MLX5_SET(set_action_in, modact, length, mlen * 8); + MLX5_SET(set_action_in, modact, data, data); + mod_hdr_acts->num_actions++; + + return 0; +} + struct mlx5e_hairpin { struct mlx5_hairpin *pair; @@ -210,8 +320,6 @@ struct mlx5e_mod_hdr_entry { int compl_result; }; -#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) - static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); @@ -361,10 +469,10 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, struct mod_hdr_key key; u32 hash_key; - num_actions = parse_attr->num_mod_hdr_actions; + num_actions = parse_attr->mod_hdr_acts.num_actions; actions_size = MLX5_MH_ACT_SZ * num_actions; - key.actions = parse_attr->mod_hdr_actions; + key.actions = parse_attr->mod_hdr_acts.actions; key.num_actions = num_actions; hash_key = hash_mod_hdr_info(&key); @@ -954,7 +1062,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); flow_act.modify_hdr = attr->modify_hdr; - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (err) return err; } @@ -1043,8 +1151,16 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_esw_flow_attr *attr) { + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; struct mlx5_flow_handle *rule; + if (flow_flag_test(flow, CT)) { + mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; + + return mlx5_tc_ct_flow_offload(flow->priv, flow, spec, attr, + mod_hdr_acts); + } + rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); if (IS_ERR(rule)) return rule; @@ -1063,10 +1179,15 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, static void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, - struct mlx5_esw_flow_attr *attr) + struct mlx5_esw_flow_attr *attr) { flow_flag_clear(flow, OFFLOADED); + if (flow_flag_test(flow, CT)) { + mlx5_tc_ct_delete_flow(flow->priv, flow, attr); + return; + } + if (attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); @@ -1224,7 +1345,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (err) return err; } @@ -1274,6 +1395,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr = flow->esw_attr; int out_index; + mlx5e_put_flow_tunnel_id(flow); + if (flow_flag_test(flow, NOT_READY)) { remove_unready_flow(flow); kvfree(attr->parse_attr); @@ -1662,150 +1785,272 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, } } +static int flow_has_tc_fwd_action(struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_action *flow_action = &rule->action; + const struct flow_action_entry *act; + int i; -static int parse_tunnel_attr(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct flow_cls_offload *f, - struct net_device *filter_dev, u8 *match_level) + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_GOTO: + return true; + default: + continue; + } + } + + return false; +} + +static int +enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv, + struct flow_dissector_key_enc_opts *opts, + struct netlink_ext_ack *extack, + bool *dont_care) +{ + struct geneve_opt *opt; + int off = 0; + + *dont_care = true; + + while (opts->len > off) { + opt = (struct geneve_opt *)&opts->data[off]; + + if (!(*dont_care) || opt->opt_class || opt->type || + memchr_inv(opt->opt_data, 0, opt->length * 4)) { + *dont_care = false; + + if (opt->opt_class != U16_MAX || + opt->type != U8_MAX || + memchr_inv(opt->opt_data, 0xFF, + opt->length * 4)) { + NL_SET_ERR_MSG(extack, + "Partial match of tunnel options in chain > 0 isn't supported"); + netdev_warn(priv->netdev, + "Partial match of tunnel options in chain > 0 isn't supported"); + return -EOPNOTSUPP; + } + } + + off += sizeof(struct geneve_opt) + opt->length * 4; + } + + return 0; +} + +#define COPY_DISSECTOR(rule, diss_key, dst)\ +({ \ + struct flow_rule *__rule = (rule);\ + typeof(dst) __dst = dst;\ +\ + memcpy(__dst,\ + skb_flow_dissector_target(__rule->match.dissector,\ + diss_key,\ + __rule->match.key),\ + sizeof(*__dst));\ +}) + +static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct flow_cls_offload *f, + struct net_device *filter_dev) { - struct netlink_ext_ack *extack = f->common.extack; - void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers); - void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers); struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; + struct flow_match_enc_opts enc_opts_match; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct tunnel_match_key tunnel_key; + bool enc_opts_is_dont_care = true; + u32 tun_id, enc_opts_id = 0; + struct mlx5_eswitch *esw; + u32 value, mask; int err; - err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, - headers_c, headers_v, match_level); - if (err) { - NL_SET_ERR_MSG_MOD(extack, - "failed to parse tunnel attributes"); + esw = priv->mdev->priv.eswitch; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + memset(&tunnel_key, 0, sizeof(tunnel_key)); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, + &tunnel_key.enc_control); + if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + &tunnel_key.enc_ipv4); + else + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + &tunnel_key.enc_ipv6); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, + &tunnel_key.enc_tp); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, + &tunnel_key.enc_key_id); + tunnel_key.filter_ifindex = filter_dev->ifindex; + + err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id); + if (err) return err; - } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { - struct flow_match_control match; - u16 addr_type; + flow_rule_match_enc_opts(rule, &enc_opts_match); + err = enc_opts_is_dont_care_or_full_match(priv, + enc_opts_match.mask, + extack, + &enc_opts_is_dont_care); + if (err) + goto err_enc_opts; - flow_rule_match_enc_control(rule, &match); - addr_type = match.key->addr_type; + if (!enc_opts_is_dont_care) { + err = mapping_add(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_match.key, &enc_opts_id); + if (err) + goto err_enc_opts; + } - /* For tunnel addr_type used same key id`s as for non-tunnel */ - if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_match_ipv4_addrs match; + value = tun_id << ENC_OPTS_BITS | enc_opts_id; + mask = enc_opts_id ? TUNNEL_ID_MASK : + (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK); - flow_rule_match_enc_ipv4_addrs(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(match.mask->src)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(match.key->src)); + if (attr->chain) { + mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec, + TUNNEL_TO_REG, value, mask); + } else { + mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; + err = mlx5e_tc_match_to_reg_set(priv->mdev, + mod_hdr_acts, + TUNNEL_TO_REG, value); + if (err) + goto err_set; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(match.mask->dst)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(match.key->dst)); - - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, - ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ETH_P_IP); - } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_match_ipv6_addrs match; - - flow_rule_match_enc_ipv6_addrs(rule, &match); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, - ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ETH_P_IPV6); - } + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { - struct flow_match_ip match; + flow->tunnel_id = value; + return 0; - flow_rule_match_enc_ip(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, - match.mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, - match.key->tos & 0x3); +err_set: + if (enc_opts_id) + mapping_remove(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id); +err_enc_opts: + mapping_remove(uplink_priv->tunnel_mapping, tun_id); + return err; +} - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, - match.mask->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, - match.key->tos >> 2); +static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) +{ + u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK; + u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5_eswitch *esw; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, - match.mask->ttl); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, - match.key->ttl); + esw = flow->priv->mdev->priv.eswitch; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + if (tun_id) + mapping_remove(uplink_priv->tunnel_mapping, tun_id); + if (enc_opts_id) + mapping_remove(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id); +} - if (match.mask->ttl && - !MLX5_CAP_ESW_FLOWTABLE_FDB - (priv->mdev, - ft_field_support.outer_ipv4_ttl)) { +u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow) +{ + return flow->tunnel_id; +} + +static int parse_tunnel_attr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct net_device *filter_dev, + u8 *match_level, + bool *match_inner) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct netlink_ext_ack *extack = f->common.extack; + bool needs_mapping, sets_mapping; + int err; + + if (!mlx5e_is_eswitch_flow(flow)) + return -EOPNOTSUPP; + + needs_mapping = !!flow->esw_attr->chain; + sets_mapping = !flow->esw_attr->chain && flow_has_tc_fwd_action(f); + *match_inner = !needs_mapping; + + if ((needs_mapping || sets_mapping) && + !mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + NL_SET_ERR_MSG(extack, + "Chains on tunnel devices isn't supported without register loopback support"); + netdev_warn(priv->netdev, + "Chains on tunnel devices isn't supported without register loopback support"); + return -EOPNOTSUPP; + } + + if (!flow->esw_attr->chain) { + err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, + match_level); + if (err) { NL_SET_ERR_MSG_MOD(extack, - "Matching on TTL is not supported"); - return -EOPNOTSUPP; + "Failed to parse tunnel attributes"); + netdev_warn(priv->netdev, + "Failed to parse tunnel attributes"); + return err; } + flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; } - /* Enforce DMAC when offloading incoming tunneled flows. - * Flow counters require a match on the DMAC. - */ - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16); - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dmac_47_16), priv->netdev->dev_addr); + if (!needs_mapping && !sets_mapping) + return 0; - /* let software handle IP fragments */ - MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); + return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev); +} - return 0; +static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + inner_headers); } -static void *get_match_headers_criteria(u32 flags, - struct mlx5_flow_spec *spec) +static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec) { - return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? - MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - inner_headers) : - MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers); + return MLX5_ADDR_OF(fte_match_param, spec->match_value, + inner_headers); +} + +static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); +} + +static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); } static void *get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec) { return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? - MLX5_ADDR_OF(fte_match_param, spec->match_value, - inner_headers) : - MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers); + get_match_inner_headers_value(spec) : + get_match_outer_headers_value(spec); +} + +static void *get_match_headers_criteria(u32 flags, + struct mlx5_flow_spec *spec) +{ + return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? + get_match_inner_headers_criteria(spec) : + get_match_outer_headers_criteria(spec); } static int mlx5e_flower_parse_meta(struct net_device *filter_dev, @@ -1843,6 +2088,7 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev, } static int __parse_cls_flower(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, struct net_device *filter_dev, @@ -1883,6 +2129,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | BIT(FLOW_DISSECTOR_KEY_TCP) | BIT(FLOW_DISSECTOR_KEY_IP) | + BIT(FLOW_DISSECTOR_KEY_CT) | BIT(FLOW_DISSECTOR_KEY_ENC_IP) | BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); @@ -1892,18 +2139,22 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, } if (mlx5e_get_tc_tun(filter_dev)) { - if (parse_tunnel_attr(priv, spec, f, filter_dev, - outer_match_level)) - return -EOPNOTSUPP; + bool match_inner = false; - /* At this point, header pointers should point to the inner - * headers, outer header were already set by parse_tunnel_attr - */ - match_level = inner_match_level; - headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP, - spec); - headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP, - spec); + err = parse_tunnel_attr(priv, flow, spec, f, filter_dev, + outer_match_level, &match_inner); + if (err) + return err; + + if (match_inner) { + /* header pointers should point to the inner headers + * if the packet was decapsulated already. + * outer headers are set by parse_tunnel_attr. + */ + match_level = inner_match_level; + headers_c = get_match_inner_headers_criteria(spec); + headers_v = get_match_inner_headers_value(spec); + } } err = mlx5e_flower_parse_meta(filter_dev, f); @@ -2220,8 +2471,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, inner_match_level = MLX5_MATCH_NONE; outer_match_level = MLX5_MATCH_NONE; - err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level, - &outer_match_level); + err = __parse_cls_flower(priv, flow, spec, f, filter_dev, + &inner_match_level, &outer_match_level); non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? outer_match_level : inner_match_level; @@ -2381,25 +2632,26 @@ static struct mlx5_fields fields[] = { OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), }; -/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at - * max from the SW pedit action. On success, attr->num_mod_hdr_actions - * says how many HW actions were actually parsed. - */ -static int offload_pedit_fields(struct pedit_headers_action *hdrs, +static int offload_pedit_fields(struct mlx5e_priv *priv, + int namespace, + struct pedit_headers_action *hdrs, struct mlx5e_tc_flow_parse_attr *parse_attr, u32 *action_flags, struct netlink_ext_ack *extack) { struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; - int i, action_size, nactions, max_actions, first, last, next_z; + int i, action_size, first, last, next_z; void *headers_c, *headers_v, *action, *vals_p; u32 *s_masks_p, *a_masks_p, s_mask, a_mask; + struct mlx5e_tc_mod_hdr_acts *mod_acts; struct mlx5_fields *f; unsigned long mask; __be32 mask_be32; __be16 mask_be16; + int err; u8 cmd; + mod_acts = &parse_attr->mod_hdr_acts; headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec); headers_v = get_match_headers_value(*action_flags, &parse_attr->spec); @@ -2409,11 +2661,6 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, add_vals = &hdrs[1].vals; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - action = parse_attr->mod_hdr_actions + - parse_attr->num_mod_hdr_actions * action_size; - - max_actions = parse_attr->max_mod_hdr_actions; - nactions = parse_attr->num_mod_hdr_actions; for (i = 0; i < ARRAY_SIZE(fields); i++) { bool skip; @@ -2439,13 +2686,6 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, return -EOPNOTSUPP; } - if (nactions == max_actions) { - NL_SET_ERR_MSG_MOD(extack, - "too many pedit actions, can't offload"); - printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions); - return -EOPNOTSUPP; - } - skip = false; if (s_mask) { void *match_mask = headers_c + f->match_offset; @@ -2492,6 +2732,18 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, return -EOPNOTSUPP; } + err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "too many pedit actions, can't offload"); + mlx5_core_warn(priv->mdev, + "mlx5: parsed %d pedit actions, can't do more\n", + mod_acts->num_actions); + return err; + } + + action = mod_acts->actions + + (mod_acts->num_actions * action_size); MLX5_SET(set_action_in, action, action_type, cmd); MLX5_SET(set_action_in, action, field, f->field); @@ -2514,11 +2766,9 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, else if (f->field_bsize == 8) MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first); - action += action_size; - nactions++; + ++mod_acts->num_actions; } - parse_attr->num_mod_hdr_actions = nactions; return 0; } @@ -2531,29 +2781,48 @@ static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev, return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions); } -static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, - struct pedit_headers_action *hdrs, - int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr) +int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, + int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) { - int nkeys, action_size, max_actions; + int action_size, new_num_actions, max_hw_actions; + size_t new_sz, old_sz; + void *ret; - nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits + - hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits; - action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions) + return 0; - max_actions = mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace); - /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */ - max_actions = min(max_actions, nkeys * 16); + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL); - if (!parse_attr->mod_hdr_actions) + max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev, + namespace); + new_num_actions = min(max_hw_actions, + mod_hdr_acts->actions ? + mod_hdr_acts->max_actions * 2 : 1); + if (mod_hdr_acts->max_actions == new_num_actions) + return -ENOSPC; + + new_sz = action_size * new_num_actions; + old_sz = mod_hdr_acts->max_actions * action_size; + ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL); + if (!ret) return -ENOMEM; - parse_attr->max_mod_hdr_actions = max_actions; + memset(ret + old_sz, 0, new_sz - old_sz); + mod_hdr_acts->actions = ret; + mod_hdr_acts->max_actions = new_num_actions; + return 0; } +void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + kfree(mod_hdr_acts->actions); + mod_hdr_acts->actions = NULL; + mod_hdr_acts->num_actions = 0; + mod_hdr_acts->max_actions = 0; +} + static const struct pedit_headers zero_masks = {}; static int parse_tc_pedit_action(struct mlx5e_priv *priv, @@ -2605,13 +2874,8 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, int err; u8 cmd; - if (!parse_attr->mod_hdr_actions) { - err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr); - if (err) - goto out_err; - } - - err = offload_pedit_fields(hdrs, parse_attr, action_flags, extack); + err = offload_pedit_fields(priv, namespace, hdrs, parse_attr, + action_flags, extack); if (err < 0) goto out_dealloc_parsed_actions; @@ -2631,8 +2895,7 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, return 0; out_dealloc_parsed_actions: - kfree(parse_attr->mod_hdr_actions); -out_err: + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); return err; } @@ -2677,7 +2940,9 @@ struct ipv6_hoplimit_word { __u8 hop_limit; }; -static bool is_action_keys_supported(const struct flow_action_entry *act) +static int is_action_keys_supported(const struct flow_action_entry *act, + bool ct_flow, bool *modify_ip_header, + struct netlink_ext_ack *extack) { u32 mask, offset; u8 htype; @@ -2696,7 +2961,13 @@ static bool is_action_keys_supported(const struct flow_action_entry *act) if (offset != offsetof(struct iphdr, ttl) || ttl_word->protocol || ttl_word->check) { - return true; + *modify_ip_header = true; + } + + if (ct_flow && offset >= offsetof(struct iphdr, saddr)) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of ipv4 address with action ct"); + return -EOPNOTSUPP; } } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { struct ipv6_hoplimit_word *hoplimit_word = @@ -2705,15 +2976,27 @@ static bool is_action_keys_supported(const struct flow_action_entry *act) if (offset != offsetof(struct ipv6hdr, payload_len) || hoplimit_word->payload_len || hoplimit_word->nexthdr) { - return true; + *modify_ip_header = true; } + + if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr)) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of ipv6 address with action ct"); + return -EOPNOTSUPP; + } + } else if (ct_flow && (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP || + htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP)) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of transport header ports with action ct"); + return -EOPNOTSUPP; } - return false; + + return 0; } static bool modify_header_match_supported(struct mlx5_flow_spec *spec, struct flow_action *flow_action, - u32 actions, + u32 actions, bool ct_flow, struct netlink_ext_ack *extack) { const struct flow_action_entry *act; @@ -2721,7 +3004,7 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, void *headers_v; u16 ethertype; u8 ip_proto; - int i; + int i, err; headers_v = get_match_headers_value(actions, spec); ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); @@ -2736,10 +3019,10 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, act->id != FLOW_ACTION_ADD) continue; - if (is_action_keys_supported(act)) { - modify_ip_header = true; - break; - } + err = is_action_keys_supported(act, ct_flow, + &modify_ip_header, extack); + if (err) + return err; } ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); @@ -2761,23 +3044,29 @@ static bool actions_match_supported(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + bool ct_flow; u32 actions; - if (mlx5e_is_eswitch_flow(flow)) + ct_flow = flow_flag_test(flow, CT); + if (mlx5e_is_eswitch_flow(flow)) { actions = flow->esw_attr->action; - else - actions = flow->nic_attr->action; - if (flow_flag_test(flow, EGRESS) && - !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) || - (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || - (actions & MLX5_FLOW_CONTEXT_ACTION_DROP))) - return false; + if (flow->esw_attr->split_count && ct_flow) { + /* All registers used by ct are cleared when using + * split rules. + */ + NL_SET_ERR_MSG_MOD(extack, + "Can't offload mirroring with action ct"); + return -EOPNOTSUPP; + } + } else { + actions = flow->nic_attr->action; + } if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) return modify_header_match_supported(&parse_attr->spec, flow_action, actions, - extack); + ct_flow, extack); return true; } @@ -2878,8 +3167,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, if (!flow_action_has_entries(flow_action)) return -EINVAL; - if (!flow_action_hw_stats_types_check(flow_action, extack, - FLOW_ACTION_HW_STATS_TYPE_DELAYED_BIT)) + if (!flow_action_hw_stats_check(flow_action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) return -EOPNOTSUPP; attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; @@ -2968,9 +3257,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, /* in case all pedit actions are skipped, remove the MOD_HDR * flag. */ - if (parse_attr->num_mod_hdr_actions == 0) { + if (parse_attr->mod_hdr_acts.num_actions == 0) { action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); } } @@ -3352,6 +3641,46 @@ static int mlx5_validate_goto_chain(struct mlx5_eswitch *esw, return 0; } +static int verify_uplink_forwarding(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct net_device *out_dev, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_rep_priv *rep_priv; + + /* Forwarding non encapsulated traffic between + * uplink ports is allowed only if + * termination_table_raw_traffic cap is set. + * + * Input vport was stored esw_attr->in_rep. + * In LAG case, *priv* is the private data of + * uplink which may be not the input vport. + */ + rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep); + + if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) && + mlx5e_eswitch_uplink_rep(out_dev))) + return 0; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, + termination_table_raw_traffic)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are both uplink, can't offload forwarding"); + pr_err("devices %s %s are both uplink, can't offload forwarding\n", + priv->netdev->name, out_dev->name); + return -EOPNOTSUPP; + } else if (out_dev != rep_priv->netdev) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not the same uplink, can't offload forwarding"); + pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n", + priv->netdev->name, out_dev->name); + return -EOPNOTSUPP; + } + return 0; +} + static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, @@ -3366,15 +3695,15 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; bool ft_flow = mlx5e_is_ft_flow(flow); const struct flow_action_entry *act; + bool encap = false, decap = false; + u32 action = attr->action; int err, i, if_count = 0; - bool encap = false; - u32 action = 0; if (!flow_action_has_entries(flow_action)) return -EINVAL; - if (!flow_action_hw_stats_types_check(flow_action, extack, - FLOW_ACTION_HW_STATS_TYPE_DELAYED_BIT)) + if (!flow_action_hw_stats_check(flow_action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) return -EOPNOTSUPP; flow_action_for_each(i, act, flow_action) { @@ -3449,7 +3778,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); struct net_device *uplink_upper; - struct mlx5e_rep_priv *rep_priv; if (is_duplicated_output_device(priv->netdev, out_dev, @@ -3485,21 +3813,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return err; } - /* Don't allow forwarding between uplink. - * - * Input vport was stored esw_attr->in_rep. - * In LAG case, *priv* is the private data of - * uplink which may be not the input vport. - */ - rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep); - if (mlx5e_eswitch_uplink_rep(rep_priv->netdev) && - mlx5e_eswitch_uplink_rep(out_dev)) { - NL_SET_ERR_MSG_MOD(extack, - "devices are both uplink, can't offload forwarding"); - pr_err("devices %s %s are both uplink, can't offload forwarding\n", - priv->netdev->name, out_dev->name); - return -EOPNOTSUPP; - } + err = verify_uplink_forwarding(priv, flow, out_dev, extack); + if (err) + return err; if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { NL_SET_ERR_MSG_MOD(extack, @@ -3571,7 +3887,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, attr->split_count = attr->out_count; break; case FLOW_ACTION_TUNNEL_DECAP: - action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + decap = true; break; case FLOW_ACTION_GOTO: err = mlx5_validate_goto_chain(esw, flow, act, action, @@ -3582,6 +3898,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; attr->dest_chain = act->chain_index; break; + case FLOW_ACTION_CT: + err = mlx5_tc_ct_parse_action(priv, attr, act, extack); + if (err) + return err; + + flow_flag_set(flow, CT); + break; default: NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); return -EOPNOTSUPP; @@ -3610,9 +3933,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, * flag. we might have set split_count either by pedit or * pop/push. if there is no pop/push either, reset it too. */ - if (parse_attr->num_mod_hdr_actions == 0) { + if (parse_attr->mod_hdr_acts.num_actions == 0) { action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) attr->split_count = 0; @@ -3624,6 +3947,22 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; if (attr->dest_chain) { + if (decap) { + /* It can be supported if we'll create a mapping for + * the tunnel device only (without tunnel), and set + * this tunnel id with this decap flow. + * + * On restore (miss), we'll just set this saved tunnel + * device. + */ + + NL_SET_ERR_MSG(extack, + "Decap with goto isn't supported"); + netdev_warn(priv->netdev, + "Decap with goto isn't supported"); + return -EOPNOTSUPP; + } + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { NL_SET_ERR_MSG_MOD(extack, "Mirroring goto chain rules isn't supported"); @@ -3806,6 +4145,10 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; + err = mlx5_tc_ct_parse_match(priv, &parse_attr->spec, f, extack); + if (err) + goto err_free; + err = mlx5e_tc_add_fdb_flow(priv, flow, extack); complete_all(&flow->init_done); if (err) { @@ -4090,7 +4433,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, goto errout; } - if (mlx5e_is_offloaded_flow(flow)) { + if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) { counter = mlx5e_tc_get_counter(flow); if (!counter) goto errout; @@ -4181,7 +4524,7 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (!flow_action_basic_hw_stats_types_check(flow_action, extack)) + if (!flow_action_basic_hw_stats_check(flow_action, extack)) return -EOPNOTSUPP; flow_action_for_each(i, act, flow_action) { @@ -4205,8 +4548,14 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, struct tc_cls_matchall_offload *ma) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct netlink_ext_ack *extack = ma->common.extack; + if (!mlx5_esw_qos_enabled(esw)) { + NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device"); + return -EOPNOTSUPP; + } + if (ma->common.prio != 1) { NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); return -EINVAL; @@ -4353,12 +4702,63 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) int mlx5e_tc_esw_init(struct rhashtable *tc_ht) { - return rhashtable_init(tc_ht, &tc_ht_params); + const size_t sz_enc_opts = sizeof(struct flow_dissector_key_enc_opts); + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *priv; + struct mapping_ctx *mapping; + int err; + + uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); + priv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + + err = mlx5_tc_ct_init(uplink_priv); + if (err) + goto err_ct; + + mapping = mapping_create(sizeof(struct tunnel_match_key), + TUNNEL_INFO_BITS_MASK, true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto err_tun_mapping; + } + uplink_priv->tunnel_mapping = mapping; + + mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto err_enc_opts_mapping; + } + uplink_priv->tunnel_enc_opts_mapping = mapping; + + err = rhashtable_init(tc_ht, &tc_ht_params); + if (err) + goto err_ht_init; + + return err; + +err_ht_init: + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); +err_enc_opts_mapping: + mapping_destroy(uplink_priv->tunnel_mapping); +err_tun_mapping: + mlx5_tc_ct_clean(uplink_priv); +err_ct: + netdev_warn(priv->netdev, + "Failed to initialize tc (eswitch), err: %d", err); + return err; } void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) { + struct mlx5_rep_uplink_priv *uplink_priv; + rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); + + uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); + mapping_destroy(uplink_priv->tunnel_mapping); + + mlx5_tc_ct_clean(uplink_priv); } int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) @@ -4390,3 +4790,147 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work) } mutex_unlock(&rpriv->unready_flows_lock); } + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv, + u32 tunnel_id) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct flow_dissector_key_enc_opts enc_opts = {}; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct metadata_dst *tun_dst; + struct tunnel_match_key key; + u32 tun_id, enc_opts_id; + struct net_device *dev; + int err; + + enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; + tun_id = tunnel_id >> ENC_OPTS_BITS; + + if (!tun_id) + return true; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); + if (err) { + WARN_ON_ONCE(true); + netdev_dbg(priv->netdev, + "Couldn't find tunnel for tun_id: %d, err: %d\n", + tun_id, err); + return false; + } + + if (enc_opts_id) { + err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id, &enc_opts); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", + enc_opts_id, err); + return false; + } + } + + tun_dst = tun_rx_dst(enc_opts.len); + if (!tun_dst) { + WARN_ON_ONCE(true); + return false; + } + + ip_tunnel_key_init(&tun_dst->u.tun_info.key, + key.enc_ipv4.src, key.enc_ipv4.dst, + key.enc_ip.tos, key.enc_ip.ttl, + 0, /* label */ + key.enc_tp.src, key.enc_tp.dst, + key32_to_tunnel_id(key.enc_key_id.keyid), + TUNNEL_KEY); + + if (enc_opts.len) + ip_tunnel_info_opts_set(&tun_dst->u.tun_info, enc_opts.data, + enc_opts.len, enc_opts.dst_opt_type); + + skb_dst_set(skb, (struct dst_entry *)tun_dst); + dev = dev_get_by_index(&init_net, key.filter_ifindex); + if (!dev) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel device with ifindex: %d\n", + key.filter_ifindex); + return false; + } + + /* Set tun_dev so we do dev_put() after datapath */ + tc_priv->tun_dev = dev; + + skb->dev = dev; + + return true; +} +#endif /* CONFIG_NET_TC_SKB_EXT */ + +bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, + struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv) +{ +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + u32 chain = 0, reg_c0, reg_c1, tunnel_id, tuple_id; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct tc_skb_ext *tc_skb_ext; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + int tunnel_moffset; + int err; + + reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); + if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) + reg_c0 = 0; + reg_c1 = be32_to_cpu(cqe->imm_inval_pkey); + + if (!reg_c0) + return true; + + priv = netdev_priv(skb->dev); + esw = priv->mdev->priv.eswitch; + + err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find chain for chain tag: %d, err: %d\n", + reg_c0, err); + return false; + } + + if (chain) { + tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + if (!tc_skb_ext) { + WARN_ON(1); + return false; + } + + tc_skb_ext->chain = chain; + + tuple_id = reg_c1 & TUPLE_ID_MAX; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, tuple_id)) + return false; + } + + tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset; + tunnel_id = reg_c1 >> (8 * tunnel_moffset); + return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); +#endif /* CONFIG_NET_TC_SKB_EXT */ + + return true; +} + +void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) +{ + if (tc_priv->tun_dev) + dev_put(tc_priv->tun_dev); +} |