diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_tc.c')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 635 |
1 files changed, 430 insertions, 205 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 10f705761666..7fc84f58e28a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -31,6 +31,7 @@ */ #include <net/flow_dissector.h> +#include <net/flow_offload.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_gact.h> @@ -45,10 +46,15 @@ #include <net/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_pedit.h> #include <net/tc_act/tc_csum.h> +#include <net/tc_act/tc_mpls.h> #include <net/arp.h> #include <net/ipv6_stubs.h> +#include <net/bareudp.h> +#include <net/bonding.h> #include "en.h" #include "en_rep.h" +#include "en/rep/tc.h" +#include "en/rep/neigh.h" #include "en_tc.h" #include "eswitch.h" #include "esw/chains.h" @@ -61,7 +67,7 @@ #include "lib/geneve.h" #include "diag/en_tc_tracepoint.h" -#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) +#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) struct mlx5_nic_flow_attr { u32 action; @@ -89,6 +95,7 @@ enum { MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7, + MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8, }; #define MLX5E_TC_MAX_SPLITS 1 @@ -122,6 +129,11 @@ struct mlx5e_tc_flow { u64 cookie; unsigned long flags; struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; + + /* flows sharing the same reformat object - currently mpls decap */ + struct list_head l3_to_l2_reformat; + struct mlx5e_decap_entry *decap_reformat; + /* Flow can be associated with multiple encap IDs. * The number of encaps is bounded by the number of supported * destinations. @@ -134,6 +146,7 @@ struct mlx5e_tc_flow { struct list_head hairpin; /* flows sharing the same hairpin */ struct list_head peer; /* flows with peer flow */ struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */ + struct net_device *orig_dev; /* netdev adding flow first */ int tmp_efi_index; struct list_head tmp_list; /* temporary flow list used by neigh update */ refcount_t refcnt; @@ -153,35 +166,12 @@ struct mlx5e_tc_flow_parse_attr { struct mlx5_flow_spec spec; struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; + struct ethhdr eth; }; #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16) -struct tunnel_match_key { - struct flow_dissector_key_control enc_control; - struct flow_dissector_key_keyid enc_key_id; - struct flow_dissector_key_ports enc_tp; - struct flow_dissector_key_ip enc_ip; - union { - struct flow_dissector_key_ipv4_addrs enc_ipv4; - struct flow_dissector_key_ipv6_addrs enc_ipv6; - }; - - int filter_ifindex; -}; - -/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS. - * Upper TUNNEL_INFO_BITS for general tunnel info. - * Lower ENC_OPTS_BITS bits for enc_opts. - */ -#define TUNNEL_INFO_BITS 6 -#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0) -#define ENC_OPTS_BITS 2 -#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0) -#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS) -#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0) - struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { [CHAIN_TO_REG] = { .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, @@ -220,8 +210,8 @@ mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, fmask = headers_c + soffset; fval = headers_v + soffset; - mask = cpu_to_be32(mask) >> (32 - (match_len * 8)); - data = cpu_to_be32(data) >> (32 - (match_len * 8)); + mask = (__force u32)(cpu_to_be32(mask)) >> (32 - (match_len * 8)); + data = (__force u32)(cpu_to_be32(data)) >> (32 - (match_len * 8)); memcpy(fmask, &mask, match_len); memcpy(fval, &data, match_len); @@ -568,7 +558,7 @@ struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) { - u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {0}; + u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {}; void *tirc; int err; @@ -582,7 +572,7 @@ static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]); MLX5_SET(tirc, tirc, transport_domain, hp->tdn); - err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->tirn); + err = mlx5_core_create_tir(hp->func_mdev, in, &hp->tirn); if (err) goto create_tir_err; @@ -666,7 +656,7 @@ static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false); err = mlx5_core_create_tir(hp->func_mdev, in, - MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]); + &hp->indir_tirn[tt]); if (err) { mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err); goto err_destroy_tirs; @@ -1092,7 +1082,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (IS_ERR(priv->fs.tc.t)) { mutex_unlock(&priv->fs.tc.t_lock); NL_SET_ERR_MSG_MOD(extack, - "Failed to create tc offload table\n"); + "Failed to create tc offload table"); netdev_err(priv->netdev, "Failed to create tc offload table\n"); return PTR_ERR(priv->fs.tc.t); @@ -1144,6 +1134,11 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct netlink_ext_ack *extack, struct net_device **encap_dev, bool *encap_valid); +static int mlx5e_attach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack); +static void mlx5e_detach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); static struct mlx5_flow_handle * mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, @@ -1319,6 +1314,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + if (flow_flag_test(flow, L3_TO_L2_DECAP)) { + err = mlx5e_attach_decap(priv, flow, extack); + if (err) + return err; + } + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { int mirred_ifindex; @@ -1428,6 +1429,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) mlx5_fc_destroy(attr->counter_dev, attr->counter); + + if (flow_flag_test(flow, L3_TO_L2_DECAP)) + mlx5e_detach_decap(priv, flow); } void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, @@ -1704,6 +1708,17 @@ static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entr kfree_rcu(e, rcu); } +static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, + struct mlx5e_decap_entry *d) +{ + WARN_ON(!list_empty(&d->flows)); + + if (!d->compl_result) + mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); + + kfree_rcu(d, rcu); +} + void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -1716,6 +1731,18 @@ void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) mlx5e_encap_dealloc(priv, e); } +static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) + return; + hash_del_rcu(&d->hlist); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + mlx5e_decap_dealloc(priv, d); +} + static void mlx5e_detach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, int out_index) { @@ -1739,6 +1766,29 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, mlx5e_encap_dealloc(priv, e); } +static void mlx5e_detach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_decap_entry *d = flow->decap_reformat; + + if (!d) + return; + + mutex_lock(&esw->offloads.decap_tbl_lock); + list_del(&flow->l3_to_l2_reformat); + flow->decap_reformat = NULL; + + if (!refcount_dec_and_test(&d->refcnt)) { + mutex_unlock(&esw->offloads.decap_tbl_lock); + return; + } + hash_del_rcu(&d->hlist); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + mlx5e_decap_dealloc(priv, d); +} + static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; @@ -1823,10 +1873,8 @@ enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv, memchr_inv(opt->opt_data, 0, opt->length * 4)) { *dont_care = false; - if (opt->opt_class != U16_MAX || - opt->type != U8_MAX || - memchr_inv(opt->opt_data, 0xFF, - opt->length * 4)) { + if (opt->opt_class != htons(U16_MAX) || + opt->type != U8_MAX) { NL_SET_ERR_MSG(extack, "Partial match of tunnel options in chain > 0 isn't supported"); netdev_warn(priv->netdev, @@ -1863,6 +1911,7 @@ static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; struct flow_match_enc_opts enc_opts_match; + struct tunnel_match_enc_opts tun_enc_opts; struct mlx5_rep_uplink_priv *uplink_priv; struct mlx5e_rep_priv *uplink_rpriv; struct tunnel_match_key tunnel_key; @@ -1905,8 +1954,14 @@ static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, goto err_enc_opts; if (!enc_opts_is_dont_care) { + memset(&tun_enc_opts, 0, sizeof(tun_enc_opts)); + memcpy(&tun_enc_opts.key, enc_opts_match.key, + sizeof(*enc_opts_match.key)); + memcpy(&tun_enc_opts.mask, enc_opts_match.mask, + sizeof(*enc_opts_match.mask)); + err = mapping_add(uplink_priv->tunnel_enc_opts_mapping, - enc_opts_match.key, &enc_opts_id); + &tun_enc_opts, &enc_opts_id); if (err) goto err_enc_opts; } @@ -1965,6 +2020,32 @@ u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow) return flow->tunnel_id; } +void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, + struct flow_match_basic *match, bool outer, + void *headers_c, void *headers_v) +{ + bool ip_version_cap; + + ip_version_cap = outer ? + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_ip_version) : + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version); + + if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) && + (match->key->n_proto == htons(ETH_P_IP) || + match->key->n_proto == htons(ETH_P_IPV6))) { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, + match->key->n_proto == htons(ETH_P_IP) ? 4 : 6); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, + ntohs(match->mask->n_proto)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ntohs(match->key->n_proto)); + } +} + static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, @@ -2005,7 +2086,11 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, return err; } - flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + /* With mpls over udp we decapsulate using packet reformat + * object + */ + if (!netif_is_bareudp(filter_dev)) + flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; } if (!needs_mapping && !sets_mapping) @@ -2088,6 +2173,20 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev, return 0; } +static bool skip_key_basic(struct net_device *filter_dev, + struct flow_cls_offload *f) +{ + /* When doing mpls over udp decap, the user needs to provide + * MPLS_UC as the protocol in order to be able to match on mpls + * label fields. However, the actual ethertype is IP so we want to + * avoid matching on this, otherwise we'll fail the match. + */ + if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0) + return true; + + return false; +} + static int __parse_cls_flower(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, @@ -2132,7 +2231,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_IP) | BIT(FLOW_DISSECTOR_KEY_CT) | BIT(FLOW_DISSECTOR_KEY_ENC_IP) | - BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) { + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | + BIT(FLOW_DISSECTOR_KEY_MPLS))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", dissector->used_keys); @@ -2162,14 +2262,14 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, if (err) return err; - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) && + !skip_key_basic(filter_dev, f)) { struct flow_match_basic match; flow_rule_match_basic(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, - ntohs(match.mask->n_proto)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ntohs(match.key->n_proto)); + mlx5e_tc_set_ethertype(priv->mdev, &match, + match_level == outer_match_level, + headers_c, headers_v); if (match.mask->n_proto) *match_level = MLX5_MATCH_L2; @@ -2661,7 +2761,7 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, set_vals = &hdrs[0].vals; add_vals = &hdrs[1].vals; - action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto); for (i = 0; i < ARRAY_SIZE(fields); i++) { bool skip; @@ -2715,10 +2815,10 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, continue; if (f->field_bsize == 32) { - mask_be32 = (__be32)mask; + mask_be32 = (__force __be32)(mask); mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); } else if (f->field_bsize == 16) { - mask_be32 = (__be32)mask; + mask_be32 = (__force __be32)(mask); mask_be16 = *(__be16 *)&mask_be32; mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); } @@ -2794,7 +2894,7 @@ int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions) return 0; - action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto); max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev, namespace); @@ -2827,10 +2927,12 @@ void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) static const struct pedit_headers zero_masks = {}; -static int parse_tc_pedit_action(struct mlx5e_priv *priv, - const struct flow_action_entry *act, int namespace, - struct pedit_headers_action *hdrs, - struct netlink_ext_ack *extack) +static int +parse_pedit_to_modify_hdr(struct mlx5e_priv *priv, + const struct flow_action_entry *act, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) { u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; int err = -EOPNOTSUPP; @@ -2866,6 +2968,46 @@ out_err: return err; } +static int +parse_pedit_to_reformat(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct netlink_ext_ack *extack) +{ + u32 mask, val, offset; + u32 *p; + + if (act->id != FLOW_ACTION_MANGLE) + return -EOPNOTSUPP; + + if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) { + NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported"); + return -EOPNOTSUPP; + } + + mask = ~act->mangle.mask; + val = act->mangle.val; + offset = act->mangle.offset; + p = (u32 *)&parse_attr->eth; + *(p + (offset >> 2)) |= (val & mask); + + return 0; +} + +static int parse_tc_pedit_action(struct mlx5e_priv *priv, + const struct flow_action_entry *act, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + if (flow && flow_flag_test(flow, L3_TO_L2_DECAP)) + return parse_pedit_to_reformat(priv, act, parse_attr, extack); + + return parse_pedit_to_modify_hdr(priv, act, namespace, + parse_attr, hdrs, extack); +} + static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr, struct pedit_headers_action *hdrs, @@ -3003,16 +3145,19 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, { const struct flow_action_entry *act; bool modify_ip_header; + void *headers_c; void *headers_v; u16 ethertype; u8 ip_proto; int i, err; + headers_c = get_match_headers_criteria(actions, spec); headers_v = get_match_headers_value(actions, spec); ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); /* for non-IP we only re-write MACs, so we're okay */ - if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) + if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 && + ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) goto out_ok; modify_ip_header = false; @@ -3129,7 +3274,7 @@ static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace, return -EOPNOTSUPP; } - err = parse_tc_pedit_action(priv, &pedit_act, namespace, hdrs, NULL); + err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, hdrs, NULL, extack); *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; return err; @@ -3195,7 +3340,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, case FLOW_ACTION_MANGLE: case FLOW_ACTION_ADD: err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL, - hdrs, extack); + parse_attr, hdrs, NULL, extack); if (err) return err; @@ -3289,12 +3434,22 @@ static inline int cmp_encap_info(struct encap_key *a, a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; } +static inline int cmp_decap_info(struct mlx5e_decap_key *a, + struct mlx5e_decap_key *b) +{ + return memcmp(&a->key, &b->key, sizeof(b->key)); +} + static inline int hash_encap_info(struct encap_key *key) { return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), key->tc_tunnel->tunnel_type); } +static inline int hash_decap_info(struct mlx5e_decap_key *key) +{ + return jhash(&key->key, sizeof(key->key), 0); +} static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, struct net_device *peer_netdev) @@ -3314,6 +3469,11 @@ bool mlx5e_encap_take(struct mlx5e_encap_entry *e) return refcount_inc_not_zero(&e->refcnt); } +static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) +{ + return refcount_inc_not_zero(&e->refcnt); +} + static struct mlx5e_encap_entry * mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, uintptr_t hash_key) @@ -3334,6 +3494,24 @@ mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, return NULL; } +static struct mlx5e_decap_entry * +mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, + uintptr_t hash_key) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_decap_key r_key; + struct mlx5e_decap_entry *e; + + hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, + hlist, hash_key) { + r_key = e->key; + if (!cmp_decap_info(&r_key, key) && + mlx5e_decap_take(e)) + return e; + } + return NULL; +} + static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info) { size_t tun_size = sizeof(*tun_info) + tun_info->options_len; @@ -3479,6 +3657,84 @@ out_err_init: return err; } +static int mlx5e_attach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_decap_entry *d; + struct mlx5e_decap_key key; + uintptr_t hash_key; + int err = 0; + + parse_attr = attr->parse_attr; + if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { + NL_SET_ERR_MSG_MOD(extack, + "encap header larger than max supported"); + return -EOPNOTSUPP; + } + + key.key = parse_attr->eth; + hash_key = hash_decap_info(&key); + mutex_lock(&esw->offloads.decap_tbl_lock); + d = mlx5e_decap_get(priv, &key, hash_key); + if (d) { + mutex_unlock(&esw->offloads.decap_tbl_lock); + wait_for_completion(&d->res_ready); + mutex_lock(&esw->offloads.decap_tbl_lock); + if (d->compl_result) { + err = -EREMOTEIO; + goto out_free; + } + goto found; + } + + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + err = -ENOMEM; + goto out_err; + } + + d->key = key; + refcount_set(&d->refcnt, 1); + init_completion(&d->res_ready); + INIT_LIST_HEAD(&d->flows); + hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2, + sizeof(parse_attr->eth), + &parse_attr->eth, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(d->pkt_reformat)) { + err = PTR_ERR(d->pkt_reformat); + d->compl_result = err; + } + mutex_lock(&esw->offloads.decap_tbl_lock); + complete_all(&d->res_ready); + if (err) + goto out_free; + +found: + flow->decap_reformat = d; + attr->decap_pkt_reformat = d->pkt_reformat; + list_add(&flow->l3_to_l2_reformat, &d->flows); + mutex_unlock(&esw->offloads.decap_tbl_lock); + return 0; + +out_free: + mutex_unlock(&esw->offloads.decap_tbl_lock); + mlx5e_decap_put(priv, d); + return err; + +out_err: + mutex_unlock(&esw->offloads.decap_tbl_lock); + return err; +} + static int parse_tc_vlan_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, struct mlx5_esw_flow_attr *attr, @@ -3532,6 +3788,28 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, return 0; } +static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev, + struct net_device *out_dev) +{ + struct net_device *fdb_out_dev = out_dev; + struct net_device *uplink_upper; + + rcu_read_lock(); + uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev); + if (uplink_upper && netif_is_lag_master(uplink_upper) && + uplink_upper == out_dev) { + fdb_out_dev = uplink_dev; + } else if (netif_is_lag_master(out_dev)) { + fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev)); + if (fdb_out_dev && + (!mlx5e_eswitch_rep(fdb_out_dev) || + !netdev_port_same_parent_id(fdb_out_dev, uplink_dev))) + fdb_out_dev = NULL; + } + rcu_read_unlock(); + return fdb_out_dev; +} + static int add_vlan_push_action(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr, struct net_device **out_dev, @@ -3713,7 +3991,8 @@ static int verify_uplink_forwarding(struct mlx5e_priv *priv, static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + struct net_device *filter_dev) { struct pedit_headers_action hdrs[2] = {}; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -3727,6 +4006,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, bool encap = false, decap = false; u32 action = attr->action; int err, i, if_count = 0; + bool mpls_push = false; if (!flow_action_has_entries(flow_action)) return -EINVAL; @@ -3741,15 +4021,48 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; break; + case FLOW_ACTION_MPLS_PUSH: + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + reformat_l2_to_l3_tunnel) || + act->mpls_push.proto != htons(ETH_P_MPLS_UC)) { + NL_SET_ERR_MSG_MOD(extack, + "mpls push is supported only for mpls_uc protocol"); + return -EOPNOTSUPP; + } + mpls_push = true; + break; + case FLOW_ACTION_MPLS_POP: + /* we only support mpls pop if it is the first action + * and the filter net device is bareudp. Subsequent + * actions can be pedit and the last can be mirred + * egress redirect. + */ + if (i) { + NL_SET_ERR_MSG_MOD(extack, + "mpls pop supported only as first action"); + return -EOPNOTSUPP; + } + if (!netif_is_bareudp(filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "mpls pop supported only on bareudp devices"); + return -EOPNOTSUPP; + } + + parse_attr->eth.h_proto = act->mpls_pop.proto; + action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_flag_set(flow, L3_TO_L2_DECAP); + break; case FLOW_ACTION_MANGLE: case FLOW_ACTION_ADD: err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB, - hdrs, extack); + parse_attr, hdrs, flow, extack); if (err) return err; - action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - attr->split_count = attr->out_count; + if (!flow_flag_test(flow, L3_TO_L2_DECAP)) { + action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + attr->split_count = attr->out_count; + } break; case FLOW_ACTION_CSUM: if (csum_offload_supported(priv, action, @@ -3771,6 +4084,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EINVAL; } + if (mpls_push && !netif_is_bareudp(out_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "mpls is supported only through a bareudp device"); + return -EOPNOTSUPP; + } + if (ft_flow && out_dev == priv->netdev) { /* Ignore forward to self rules generated * by adding both mlx5 devs to the flow table @@ -3806,7 +4125,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - struct net_device *uplink_upper; if (is_duplicated_output_device(priv->netdev, out_dev, @@ -3818,14 +4136,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, ifindexes[if_count] = out_dev->ifindex; if_count++; - rcu_read_lock(); - uplink_upper = - netdev_master_upper_dev_get_rcu(uplink_dev); - if (uplink_upper && - netif_is_lag_master(uplink_upper) && - uplink_upper == out_dev) - out_dev = uplink_dev; - rcu_read_unlock(); + out_dev = get_fdb_out_dev(uplink_dev, out_dev); + if (!out_dev) + return -ENODEV; if (is_vlan_dev(out_dev)) { err = add_vlan_push_action(priv, attr, @@ -4097,6 +4410,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, INIT_LIST_HEAD(&flow->encaps[out_index].list); INIT_LIST_HEAD(&flow->mod_hdr); INIT_LIST_HEAD(&flow->hairpin); + INIT_LIST_HEAD(&flow->l3_to_l2_reformat); refcount_set(&flow->refcnt, 1); init_completion(&flow->init_done); @@ -4166,7 +4480,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; - err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); + err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev); if (err) goto err_free; @@ -4352,11 +4666,21 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv, return err; } +static bool is_flow_rule_duplicate_allowed(struct net_device *dev, + struct mlx5e_rep_priv *rpriv) +{ + /* Offloaded flow rule is allowed to duplicate on non-uplink representor + * sharing tc block with other slaves of a lag device. + */ + return netif_is_lag_port(dev) && rpriv->rep->vport != MLX5_VPORT_UPLINK; +} + int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, struct flow_cls_offload *f, unsigned long flags) { struct netlink_ext_ack *extack = f->common.extack; struct rhashtable *tc_ht = get_tc_ht(priv, flags); + struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5e_tc_flow *flow; int err = 0; @@ -4364,6 +4688,12 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); rcu_read_unlock(); if (flow) { + /* Same flow rule offloaded to non-uplink representor sharing tc block, + * just return 0. + */ + if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev) + goto out; + NL_SET_ERR_MSG_MOD(extack, "flow cookie already exists, ignoring"); netdev_warn_once(priv->netdev, @@ -4378,6 +4708,12 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, if (err) goto out; + /* Flow rule offloaded to non-uplink representor sharing tc block, + * set the flow's owner dev. + */ + if (is_flow_rule_duplicate_allowed(dev, rpriv)) + flow->orig_dev = dev; + err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params); if (err) goto err_free; @@ -4729,7 +5065,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) int mlx5e_tc_esw_init(struct rhashtable *tc_ht) { - const size_t sz_enc_opts = sizeof(struct flow_dissector_key_enc_opts); + const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts); struct mlx5_rep_uplink_priv *uplink_priv; struct mlx5e_rep_priv *priv; struct mapping_ctx *mapping; @@ -4818,146 +5154,35 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work) mutex_unlock(&rpriv->unready_flows_lock); } -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) -static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, - struct mlx5e_tc_update_priv *tc_priv, - u32 tunnel_id) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct flow_dissector_key_enc_opts enc_opts = {}; - struct mlx5_rep_uplink_priv *uplink_priv; - struct mlx5e_rep_priv *uplink_rpriv; - struct metadata_dst *tun_dst; - struct tunnel_match_key key; - u32 tun_id, enc_opts_id; - struct net_device *dev; - int err; - - enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; - tun_id = tunnel_id >> ENC_OPTS_BITS; - - if (!tun_id) - return true; - - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - uplink_priv = &uplink_rpriv->uplink_priv; - - err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); - if (err) { - WARN_ON_ONCE(true); - netdev_dbg(priv->netdev, - "Couldn't find tunnel for tun_id: %d, err: %d\n", - tun_id, err); - return false; - } - - if (enc_opts_id) { - err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, - enc_opts_id, &enc_opts); - if (err) { - netdev_dbg(priv->netdev, - "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", - enc_opts_id, err); - return false; - } - } - - tun_dst = tun_rx_dst(enc_opts.len); - if (!tun_dst) { - WARN_ON_ONCE(true); - return false; - } - - ip_tunnel_key_init(&tun_dst->u.tun_info.key, - key.enc_ipv4.src, key.enc_ipv4.dst, - key.enc_ip.tos, key.enc_ip.ttl, - 0, /* label */ - key.enc_tp.src, key.enc_tp.dst, - key32_to_tunnel_id(key.enc_key_id.keyid), - TUNNEL_KEY); - - if (enc_opts.len) - ip_tunnel_info_opts_set(&tun_dst->u.tun_info, enc_opts.data, - enc_opts.len, enc_opts.dst_opt_type); - - skb_dst_set(skb, (struct dst_entry *)tun_dst); - dev = dev_get_by_index(&init_net, key.filter_ifindex); - if (!dev) { - netdev_dbg(priv->netdev, - "Couldn't find tunnel device with ifindex: %d\n", - key.filter_ifindex); - return false; +static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, + struct flow_cls_offload *cls_flower, + unsigned long flags) +{ + switch (cls_flower->command) { + case FLOW_CLS_REPLACE: + return mlx5e_configure_flower(priv->netdev, priv, cls_flower, + flags); + case FLOW_CLS_DESTROY: + return mlx5e_delete_flower(priv->netdev, priv, cls_flower, + flags); + case FLOW_CLS_STATS: + return mlx5e_stats_flower(priv->netdev, priv, cls_flower, + flags); + default: + return -EOPNOTSUPP; } - - /* Set tun_dev so we do dev_put() after datapath */ - tc_priv->tun_dev = dev; - - skb->dev = dev; - - return true; } -#endif /* CONFIG_NET_TC_SKB_EXT */ -bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, - struct sk_buff *skb, - struct mlx5e_tc_update_priv *tc_priv) +int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) { -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - u32 chain = 0, reg_c0, reg_c1, tunnel_id, tuple_id; - struct mlx5_rep_uplink_priv *uplink_priv; - struct mlx5e_rep_priv *uplink_rpriv; - struct tc_skb_ext *tc_skb_ext; - struct mlx5_eswitch *esw; - struct mlx5e_priv *priv; - int tunnel_moffset; - int err; - - reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); - if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) - reg_c0 = 0; - reg_c1 = be32_to_cpu(cqe->imm_inval_pkey); - - if (!reg_c0) - return true; + unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD); + struct mlx5e_priv *priv = cb_priv; - priv = netdev_priv(skb->dev); - esw = priv->mdev->priv.eswitch; - - err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain); - if (err) { - netdev_dbg(priv->netdev, - "Couldn't find chain for chain tag: %d, err: %d\n", - reg_c0, err); - return false; - } - - if (chain) { - tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); - if (!tc_skb_ext) { - WARN_ON(1); - return false; - } - - tc_skb_ext->chain = chain; - - tuple_id = reg_c1 & TUPLE_ID_MAX; - - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - uplink_priv = &uplink_rpriv->uplink_priv; - if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, tuple_id)) - return false; + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_setup_tc_cls_flower(priv, type_data, flags); + default: + return -EOPNOTSUPP; } - - tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset; - tunnel_id = reg_c1 >> (8 * tunnel_moffset); - return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); -#endif /* CONFIG_NET_TC_SKB_EXT */ - - return true; -} - -void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) -{ - if (tc_priv->tun_dev) - dev_put(tc_priv->tun_dev); } |