aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_tc.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c338
1 files changed, 266 insertions, 72 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index f8829b517156..44406a5ec15d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -161,15 +161,21 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
}
}
+/* we get here also when setting rule to the FW failed, etc. It means that the
+ * flow rule itself might not exist, but some offloading related to the actions
+ * should be cleaned.
+ */
static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_fc *counter = NULL;
- counter = mlx5_flow_rule_counter(flow->rule);
-
- mlx5_del_flow_rules(flow->rule);
+ if (!IS_ERR(flow->rule)) {
+ counter = mlx5_flow_rule_counter(flow->rule);
+ mlx5_del_flow_rules(flow->rule);
+ mlx5_fc_destroy(priv->mdev, counter);
+ }
if (esw && esw->mode == SRIOV_OFFLOADS) {
mlx5_eswitch_del_vlan_action(esw, flow->attr);
@@ -177,8 +183,6 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
mlx5e_detach_encap(priv, flow);
}
- mlx5_fc_destroy(priv->mdev, counter);
-
if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
mlx5_destroy_flow_table(priv->fs.tc.t);
priv->fs.tc.t = NULL;
@@ -225,6 +229,11 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
outer_headers);
+ struct flow_dissector_key_control *enc_control =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_CONTROL,
+ f->key);
+
if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
struct flow_dissector_key_ports *key =
skb_flow_dissector_target(f->dissector,
@@ -237,28 +246,34 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
/* Full udp dst port must be given */
if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
- return -EOPNOTSUPP;
-
- /* udp src port isn't supported */
- if (memchr_inv(&mask->src, 0, sizeof(mask->src)))
- return -EOPNOTSUPP;
+ goto vxlan_match_offload_err;
if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) &&
MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
parse_vxlan_attr(spec, f);
- else
+ else {
+ netdev_warn(priv->netdev,
+ "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
return -EOPNOTSUPP;
+ }
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
udp_dport, ntohs(mask->dst));
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
udp_dport, ntohs(key->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_sport, ntohs(mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_sport, ntohs(key->src));
} else { /* udp dst port must be given */
- return -EOPNOTSUPP;
+vxlan_match_offload_err:
+ netdev_warn(priv->netdev,
+ "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
+ return -EOPNOTSUPP;
}
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+ if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
struct flow_dissector_key_ipv4_addrs *key =
skb_flow_dissector_target(f->dissector,
FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
@@ -280,10 +295,36 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
ntohl(key->dst));
- }
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
+ } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_dissector_key_ipv6_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv6_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
+ f->mask);
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
+ }
/* Enforce DMAC when offloading incoming tunneled flows.
* Flow counters require a match on the DMAC.
@@ -343,6 +384,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
f->key);
switch (key->addr_type) {
case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
if (parse_tunnel_attr(priv, spec, f))
return -EOPNOTSUPP;
break;
@@ -375,6 +417,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
key->flags & FLOW_DIS_IS_FRAGMENT);
+
+ /* the HW doesn't need L3 inline to match on frag=no */
+ if (key->flags & FLOW_DIS_IS_FRAGMENT)
+ *min_inline = MLX5_INLINE_MODE_IP;
}
}
@@ -438,8 +484,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
FLOW_DISSECTOR_KEY_VLAN,
f->mask);
if (mask->vlan_id || mask->vlan_priority) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
@@ -622,15 +668,15 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
return 0;
}
-static inline int cmp_encap_info(struct mlx5_encap_info *a,
- struct mlx5_encap_info *b)
+static inline int cmp_encap_info(struct ip_tunnel_key *a,
+ struct ip_tunnel_key *b)
{
return memcmp(a, b, sizeof(*a));
}
-static inline int hash_encap_info(struct mlx5_encap_info *info)
+static inline int hash_encap_info(struct ip_tunnel_key *key)
{
- return jhash(info, sizeof(*info), 0);
+ return jhash(key, sizeof(*key), 0);
}
static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
@@ -638,41 +684,76 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
struct net_device **out_dev,
struct flowi4 *fl4,
struct neighbour **out_n,
- __be32 *saddr,
int *out_ttl)
{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct rtable *rt;
struct neighbour *n = NULL;
- int ttl;
#if IS_ENABLED(CONFIG_INET)
+ int ret;
+
rt = ip_route_output_key(dev_net(mirred_dev), fl4);
- if (IS_ERR(rt)) {
- pr_warn("%s: no route to %pI4\n", __func__, &fl4->daddr);
- return -EOPNOTSUPP;
- }
+ ret = PTR_ERR_OR_ZERO(rt);
+ if (ret)
+ return ret;
#else
return -EOPNOTSUPP;
#endif
+ /* if the egress device isn't on the same HW e-switch, we use the uplink */
+ if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
+ *out_dev = mlx5_eswitch_get_uplink_netdev(esw);
+ else
+ *out_dev = rt->dst.dev;
- if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) {
- pr_warn("%s: Can't offload the flow, netdevices aren't on the same HW e-switch\n",
- __func__);
- ip_rt_put(rt);
- return -EOPNOTSUPP;
- }
-
- ttl = ip4_dst_hoplimit(&rt->dst);
+ *out_ttl = ip4_dst_hoplimit(&rt->dst);
n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
ip_rt_put(rt);
if (!n)
return -ENOMEM;
*out_n = n;
- *saddr = fl4->saddr;
- *out_ttl = ttl;
- *out_dev = rt->dst.dev;
+ return 0;
+}
+
+static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct net_device **out_dev,
+ struct flowi6 *fl6,
+ struct neighbour **out_n,
+ int *out_ttl)
+{
+ struct neighbour *n = NULL;
+ struct dst_entry *dst;
+
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ int ret;
+
+ dst = ip6_route_output(dev_net(mirred_dev), NULL, fl6);
+ ret = dst->error;
+ if (ret) {
+ dst_release(dst);
+ return ret;
+ }
+
+ *out_ttl = ip6_dst_hoplimit(dst);
+ /* if the egress device isn't on the same HW e-switch, we use the uplink */
+ if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
+ *out_dev = mlx5_eswitch_get_uplink_netdev(esw);
+ else
+ *out_dev = dst->dev;
+#else
+ return -EOPNOTSUPP;
+#endif
+
+ n = dst_neigh_lookup(dst, &fl6->daddr);
+ dst_release(dst);
+ if (!n)
+ return -ENOMEM;
+
+ *out_n = n;
return 0;
}
@@ -712,19 +793,52 @@ static int gen_vxlan_header_ipv4(struct net_device *out_dev,
return encap_size;
}
+static int gen_vxlan_header_ipv6(struct net_device *out_dev,
+ char buf[],
+ unsigned char h_dest[ETH_ALEN],
+ int ttl,
+ struct in6_addr *daddr,
+ struct in6_addr *saddr,
+ __be16 udp_dst_port,
+ __be32 vx_vni)
+{
+ int encap_size = VXLAN_HLEN + sizeof(struct ipv6hdr) + ETH_HLEN;
+ struct ethhdr *eth = (struct ethhdr *)buf;
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
+ struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
+ struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
+
+ memset(buf, 0, encap_size);
+
+ ether_addr_copy(eth->h_dest, h_dest);
+ ether_addr_copy(eth->h_source, out_dev->dev_addr);
+ eth->h_proto = htons(ETH_P_IPV6);
+
+ ip6_flow_hdr(ip6h, 0, 0);
+ /* the HW fills up ipv6 payload len */
+ ip6h->nexthdr = IPPROTO_UDP;
+ ip6h->hop_limit = ttl;
+ ip6h->daddr = *daddr;
+ ip6h->saddr = *saddr;
+
+ udp->dest = udp_dst_port;
+ vxh->vx_flags = VXLAN_HF_VNI;
+ vxh->vx_vni = vxlan_vni_field(vx_vni);
+
+ return encap_size;
+}
+
static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5_encap_entry *e,
struct net_device **out_dev)
{
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ struct ip_tunnel_key *tun_key = &e->tun_info.key;
+ int encap_size, ttl, err;
+ struct neighbour *n = NULL;
struct flowi4 fl4 = {};
- struct neighbour *n;
char *encap_header;
- int encap_size;
- __be32 saddr;
- int ttl;
- int err;
encap_header = kzalloc(max_encap_size, GFP_KERNEL);
if (!encap_header)
@@ -733,36 +847,108 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
switch (e->tunnel_type) {
case MLX5_HEADER_TYPE_VXLAN:
fl4.flowi4_proto = IPPROTO_UDP;
- fl4.fl4_dport = e->tun_info.tp_dst;
+ fl4.fl4_dport = tun_key->tp_dst;
break;
default:
err = -EOPNOTSUPP;
goto out;
}
- fl4.daddr = e->tun_info.daddr;
+ fl4.flowi4_tos = tun_key->tos;
+ fl4.daddr = tun_key->u.ipv4.dst;
+ fl4.saddr = tun_key->u.ipv4.src;
err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev,
- &fl4, &n, &saddr, &ttl);
+ &fl4, &n, &ttl);
if (err)
goto out;
+ if (!(n->nud_state & NUD_VALID)) {
+ pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr);
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
e->n = n;
e->out_dev = *out_dev;
+ neigh_ha_snapshot(e->h_dest, n, *out_dev);
+
+ switch (e->tunnel_type) {
+ case MLX5_HEADER_TYPE_VXLAN:
+ encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header,
+ e->h_dest, ttl,
+ fl4.daddr,
+ fl4.saddr, tun_key->tp_dst,
+ tunnel_id_to_key32(tun_key->tun_id));
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
+ encap_size, encap_header, &e->encap_id);
+out:
+ if (err && n)
+ neigh_release(n);
+ kfree(encap_header);
+ return err;
+}
+
+static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5_encap_entry *e,
+ struct net_device **out_dev)
+
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ struct ip_tunnel_key *tun_key = &e->tun_info.key;
+ int encap_size, err, ttl = 0;
+ struct neighbour *n = NULL;
+ struct flowi6 fl6 = {};
+ char *encap_header;
+
+ encap_header = kzalloc(max_encap_size, GFP_KERNEL);
+ if (!encap_header)
+ return -ENOMEM;
+
+ switch (e->tunnel_type) {
+ case MLX5_HEADER_TYPE_VXLAN:
+ fl6.flowi6_proto = IPPROTO_UDP;
+ fl6.fl6_dport = tun_key->tp_dst;
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
+ fl6.daddr = tun_key->u.ipv6.dst;
+ fl6.saddr = tun_key->u.ipv6.src;
+
+ err = mlx5e_route_lookup_ipv6(priv, mirred_dev, out_dev,
+ &fl6, &n, &ttl);
+ if (err)
+ goto out;
+
if (!(n->nud_state & NUD_VALID)) {
- err = -ENOTSUPP;
+ pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__, &fl6.daddr);
+ err = -EOPNOTSUPP;
goto out;
}
+ e->n = n;
+ e->out_dev = *out_dev;
+
neigh_ha_snapshot(e->h_dest, n, *out_dev);
switch (e->tunnel_type) {
case MLX5_HEADER_TYPE_VXLAN:
- encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header,
+ encap_size = gen_vxlan_header_ipv6(*out_dev, encap_header,
e->h_dest, ttl,
- e->tun_info.daddr,
- saddr, e->tun_info.tp_dst,
- e->tun_info.tun_id);
+ &fl6.daddr,
+ &fl6.saddr, tun_key->tp_dst,
+ tunnel_id_to_key32(tun_key->tun_id));
break;
default:
err = -EOPNOTSUPP;
@@ -772,6 +958,8 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
encap_size, encap_header, &e->encap_id);
out:
+ if (err && n)
+ neigh_release(n);
kfree(encap_header);
return err;
}
@@ -784,40 +972,38 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
unsigned short family = ip_tunnel_info_af(tun_info);
struct ip_tunnel_key *key = &tun_info->key;
- struct mlx5_encap_info info;
struct mlx5_encap_entry *e;
struct net_device *out_dev;
+ int tunnel_type, err = -EOPNOTSUPP;
uintptr_t hash_key;
bool found = false;
- int tunnel_type;
- int err;
- /* udp dst port must be given */
+ /* udp dst port must be set */
if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
+ goto vxlan_encap_offload_err;
+
+ /* setting udp src port isn't supported */
+ if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
+vxlan_encap_offload_err:
+ netdev_warn(priv->netdev,
+ "must set udp dst port and not set udp src port\n");
return -EOPNOTSUPP;
+ }
if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) &&
MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
- info.tp_dst = key->tp_dst;
- info.tun_id = tunnel_id_to_key32(key->tun_id);
tunnel_type = MLX5_HEADER_TYPE_VXLAN;
} else {
+ netdev_warn(priv->netdev,
+ "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
return -EOPNOTSUPP;
}
- switch (family) {
- case AF_INET:
- info.daddr = key->u.ipv4.dst;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- hash_key = hash_encap_info(&info);
+ hash_key = hash_encap_info(key);
hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
encap_hlist, hash_key) {
- if (!cmp_encap_info(&e->tun_info, &info)) {
+ if (!cmp_encap_info(&e->tun_info.key, key)) {
found = true;
break;
}
@@ -832,11 +1018,15 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
if (!e)
return -ENOMEM;
- e->tun_info = info;
+ e->tun_info = *tun_info;
e->tunnel_type = tunnel_type;
INIT_LIST_HEAD(&e->flows);
- err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev);
+ if (family == AF_INET)
+ err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev);
+ else if (family == AF_INET6)
+ err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e, &out_dev);
+
if (err)
goto out_err;
@@ -986,7 +1176,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
if (IS_ERR(flow->rule)) {
err = PTR_ERR(flow->rule);
- goto err_free;
+ goto err_del_rule;
}
err = rhashtable_insert_fast(&tc->ht, &flow->node,
@@ -997,7 +1187,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
goto out;
err_del_rule:
- mlx5_del_flow_rules(flow->rule);
+ mlx5e_tc_del_flow(priv, flow);
err_free:
kfree(flow);
@@ -1050,10 +1240,14 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
+ preempt_disable();
+
tcf_exts_to_list(f->exts, &actions);
list_for_each_entry(a, &actions, list)
tcf_action_stats_update(a, bytes, packets, lastuse);
+ preempt_enable();
+
return 0;
}