diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_tc.c')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 338 |
1 files changed, 266 insertions, 72 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f8829b517156..44406a5ec15d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -161,15 +161,21 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, } } +/* we get here also when setting rule to the FW failed, etc. It means that the + * flow rule itself might not exist, but some offloading related to the actions + * should be cleaned. + */ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_fc *counter = NULL; - counter = mlx5_flow_rule_counter(flow->rule); - - mlx5_del_flow_rules(flow->rule); + if (!IS_ERR(flow->rule)) { + counter = mlx5_flow_rule_counter(flow->rule); + mlx5_del_flow_rules(flow->rule); + mlx5_fc_destroy(priv->mdev, counter); + } if (esw && esw->mode == SRIOV_OFFLOADS) { mlx5_eswitch_del_vlan_action(esw, flow->attr); @@ -177,8 +183,6 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, mlx5e_detach_encap(priv, flow); } - mlx5_fc_destroy(priv->mdev, counter); - if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; @@ -225,6 +229,11 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + struct flow_dissector_key_control *enc_control = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL, + f->key); + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { struct flow_dissector_key_ports *key = skb_flow_dissector_target(f->dissector, @@ -237,28 +246,34 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, /* Full udp dst port must be given */ if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) - return -EOPNOTSUPP; - - /* udp src port isn't supported */ - if (memchr_inv(&mask->src, 0, sizeof(mask->src))) - return -EOPNOTSUPP; + goto vxlan_match_offload_err; if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) parse_vxlan_attr(spec, f); - else + else { + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst)); return -EOPNOTSUPP; + } MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, ntohs(mask->dst)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(key->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_sport, ntohs(mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_sport, ntohs(key->src)); } else { /* udp dst port must be given */ - return -EOPNOTSUPP; +vxlan_match_offload_err: + netdev_warn(priv->netdev, + "IP tunnel decap offload supported only for vxlan, must set UDP dport\n"); + return -EOPNOTSUPP; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { struct flow_dissector_key_ipv4_addrs *key = skb_flow_dissector_target(f->dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, @@ -280,10 +295,36 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, MLX5_SET(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4, ntohl(key->dst)); - } - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); + } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + f->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + f->mask); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6); + } /* Enforce DMAC when offloading incoming tunneled flows. * Flow counters require a match on the DMAC. @@ -343,6 +384,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, f->key); switch (key->addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: if (parse_tunnel_attr(priv, spec, f)) return -EOPNOTSUPP; break; @@ -375,6 +417,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, key->flags & FLOW_DIS_IS_FRAGMENT); + + /* the HW doesn't need L3 inline to match on frag=no */ + if (key->flags & FLOW_DIS_IS_FRAGMENT) + *min_inline = MLX5_INLINE_MODE_IP; } } @@ -438,8 +484,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, FLOW_DISSECTOR_KEY_VLAN, f->mask); if (mask->vlan_id || mask->vlan_priority) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id); MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id); @@ -622,15 +668,15 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return 0; } -static inline int cmp_encap_info(struct mlx5_encap_info *a, - struct mlx5_encap_info *b) +static inline int cmp_encap_info(struct ip_tunnel_key *a, + struct ip_tunnel_key *b) { return memcmp(a, b, sizeof(*a)); } -static inline int hash_encap_info(struct mlx5_encap_info *info) +static inline int hash_encap_info(struct ip_tunnel_key *key) { - return jhash(info, sizeof(*info), 0); + return jhash(key, sizeof(*key), 0); } static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, @@ -638,41 +684,76 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, struct net_device **out_dev, struct flowi4 *fl4, struct neighbour **out_n, - __be32 *saddr, int *out_ttl) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct rtable *rt; struct neighbour *n = NULL; - int ttl; #if IS_ENABLED(CONFIG_INET) + int ret; + rt = ip_route_output_key(dev_net(mirred_dev), fl4); - if (IS_ERR(rt)) { - pr_warn("%s: no route to %pI4\n", __func__, &fl4->daddr); - return -EOPNOTSUPP; - } + ret = PTR_ERR_OR_ZERO(rt); + if (ret) + return ret; #else return -EOPNOTSUPP; #endif + /* if the egress device isn't on the same HW e-switch, we use the uplink */ + if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) + *out_dev = mlx5_eswitch_get_uplink_netdev(esw); + else + *out_dev = rt->dst.dev; - if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) { - pr_warn("%s: Can't offload the flow, netdevices aren't on the same HW e-switch\n", - __func__); - ip_rt_put(rt); - return -EOPNOTSUPP; - } - - ttl = ip4_dst_hoplimit(&rt->dst); + *out_ttl = ip4_dst_hoplimit(&rt->dst); n = dst_neigh_lookup(&rt->dst, &fl4->daddr); ip_rt_put(rt); if (!n) return -ENOMEM; *out_n = n; - *saddr = fl4->saddr; - *out_ttl = ttl; - *out_dev = rt->dst.dev; + return 0; +} + +static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct flowi6 *fl6, + struct neighbour **out_n, + int *out_ttl) +{ + struct neighbour *n = NULL; + struct dst_entry *dst; + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int ret; + + dst = ip6_route_output(dev_net(mirred_dev), NULL, fl6); + ret = dst->error; + if (ret) { + dst_release(dst); + return ret; + } + + *out_ttl = ip6_dst_hoplimit(dst); + /* if the egress device isn't on the same HW e-switch, we use the uplink */ + if (!switchdev_port_same_parent_id(priv->netdev, dst->dev)) + *out_dev = mlx5_eswitch_get_uplink_netdev(esw); + else + *out_dev = dst->dev; +#else + return -EOPNOTSUPP; +#endif + + n = dst_neigh_lookup(dst, &fl6->daddr); + dst_release(dst); + if (!n) + return -ENOMEM; + + *out_n = n; return 0; } @@ -712,19 +793,52 @@ static int gen_vxlan_header_ipv4(struct net_device *out_dev, return encap_size; } +static int gen_vxlan_header_ipv6(struct net_device *out_dev, + char buf[], + unsigned char h_dest[ETH_ALEN], + int ttl, + struct in6_addr *daddr, + struct in6_addr *saddr, + __be16 udp_dst_port, + __be32 vx_vni) +{ + int encap_size = VXLAN_HLEN + sizeof(struct ipv6hdr) + ETH_HLEN; + struct ethhdr *eth = (struct ethhdr *)buf; + struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr)); + struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr)); + struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); + + memset(buf, 0, encap_size); + + ether_addr_copy(eth->h_dest, h_dest); + ether_addr_copy(eth->h_source, out_dev->dev_addr); + eth->h_proto = htons(ETH_P_IPV6); + + ip6_flow_hdr(ip6h, 0, 0); + /* the HW fills up ipv6 payload len */ + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = ttl; + ip6h->daddr = *daddr; + ip6h->saddr = *saddr; + + udp->dest = udp_dst_port; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(vx_vni); + + return encap_size; +} + static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5_encap_entry *e, struct net_device **out_dev) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct ip_tunnel_key *tun_key = &e->tun_info.key; + int encap_size, ttl, err; + struct neighbour *n = NULL; struct flowi4 fl4 = {}; - struct neighbour *n; char *encap_header; - int encap_size; - __be32 saddr; - int ttl; - int err; encap_header = kzalloc(max_encap_size, GFP_KERNEL); if (!encap_header) @@ -733,36 +847,108 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: fl4.flowi4_proto = IPPROTO_UDP; - fl4.fl4_dport = e->tun_info.tp_dst; + fl4.fl4_dport = tun_key->tp_dst; break; default: err = -EOPNOTSUPP; goto out; } - fl4.daddr = e->tun_info.daddr; + fl4.flowi4_tos = tun_key->tos; + fl4.daddr = tun_key->u.ipv4.dst; + fl4.saddr = tun_key->u.ipv4.src; err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev, - &fl4, &n, &saddr, &ttl); + &fl4, &n, &ttl); if (err) goto out; + if (!(n->nud_state & NUD_VALID)) { + pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr); + err = -EOPNOTSUPP; + goto out; + } + e->n = n; e->out_dev = *out_dev; + neigh_ha_snapshot(e->h_dest, n, *out_dev); + + switch (e->tunnel_type) { + case MLX5_HEADER_TYPE_VXLAN: + encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header, + e->h_dest, ttl, + fl4.daddr, + fl4.saddr, tun_key->tp_dst, + tunnel_id_to_key32(tun_key->tun_id)); + break; + default: + err = -EOPNOTSUPP; + goto out; + } + + err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, + encap_size, encap_header, &e->encap_id); +out: + if (err && n) + neigh_release(n); + kfree(encap_header); + return err; +} + +static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5_encap_entry *e, + struct net_device **out_dev) + +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct ip_tunnel_key *tun_key = &e->tun_info.key; + int encap_size, err, ttl = 0; + struct neighbour *n = NULL; + struct flowi6 fl6 = {}; + char *encap_header; + + encap_header = kzalloc(max_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; + + switch (e->tunnel_type) { + case MLX5_HEADER_TYPE_VXLAN: + fl6.flowi6_proto = IPPROTO_UDP; + fl6.fl6_dport = tun_key->tp_dst; + break; + default: + err = -EOPNOTSUPP; + goto out; + } + + fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); + fl6.daddr = tun_key->u.ipv6.dst; + fl6.saddr = tun_key->u.ipv6.src; + + err = mlx5e_route_lookup_ipv6(priv, mirred_dev, out_dev, + &fl6, &n, &ttl); + if (err) + goto out; + if (!(n->nud_state & NUD_VALID)) { - err = -ENOTSUPP; + pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__, &fl6.daddr); + err = -EOPNOTSUPP; goto out; } + e->n = n; + e->out_dev = *out_dev; + neigh_ha_snapshot(e->h_dest, n, *out_dev); switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header, + encap_size = gen_vxlan_header_ipv6(*out_dev, encap_header, e->h_dest, ttl, - e->tun_info.daddr, - saddr, e->tun_info.tp_dst, - e->tun_info.tun_id); + &fl6.daddr, + &fl6.saddr, tun_key->tp_dst, + tunnel_id_to_key32(tun_key->tun_id)); break; default: err = -EOPNOTSUPP; @@ -772,6 +958,8 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, encap_size, encap_header, &e->encap_id); out: + if (err && n) + neigh_release(n); kfree(encap_header); return err; } @@ -784,40 +972,38 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; unsigned short family = ip_tunnel_info_af(tun_info); struct ip_tunnel_key *key = &tun_info->key; - struct mlx5_encap_info info; struct mlx5_encap_entry *e; struct net_device *out_dev; + int tunnel_type, err = -EOPNOTSUPP; uintptr_t hash_key; bool found = false; - int tunnel_type; - int err; - /* udp dst port must be given */ + /* udp dst port must be set */ if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst))) + goto vxlan_encap_offload_err; + + /* setting udp src port isn't supported */ + if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) { +vxlan_encap_offload_err: + netdev_warn(priv->netdev, + "must set udp dst port and not set udp src port\n"); return -EOPNOTSUPP; + } if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { - info.tp_dst = key->tp_dst; - info.tun_id = tunnel_id_to_key32(key->tun_id); tunnel_type = MLX5_HEADER_TYPE_VXLAN; } else { + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst)); return -EOPNOTSUPP; } - switch (family) { - case AF_INET: - info.daddr = key->u.ipv4.dst; - break; - default: - return -EOPNOTSUPP; - } - - hash_key = hash_encap_info(&info); + hash_key = hash_encap_info(key); hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { - if (!cmp_encap_info(&e->tun_info, &info)) { + if (!cmp_encap_info(&e->tun_info.key, key)) { found = true; break; } @@ -832,11 +1018,15 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, if (!e) return -ENOMEM; - e->tun_info = info; + e->tun_info = *tun_info; e->tunnel_type = tunnel_type; INIT_LIST_HEAD(&e->flows); - err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev); + if (family == AF_INET) + err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev); + else if (family == AF_INET6) + err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e, &out_dev); + if (err) goto out_err; @@ -986,7 +1176,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); - goto err_free; + goto err_del_rule; } err = rhashtable_insert_fast(&tc->ht, &flow->node, @@ -997,7 +1187,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, goto out; err_del_rule: - mlx5_del_flow_rules(flow->rule); + mlx5e_tc_del_flow(priv, flow); err_free: kfree(flow); @@ -1050,10 +1240,14 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + preempt_disable(); + tcf_exts_to_list(f->exts, &actions); list_for_each_entry(a, &actions, list) tcf_action_stats_update(a, bytes, packets, lastuse); + preempt_enable(); + return 0; } |