From 768c3667e6f36bc9db0dac854aa198651b27412f Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 12 May 2020 16:41:41 +0300 Subject: net/mlx5e: Extract TC-specific code from en_rep.c to rep/tc.c As a preparation for introducing new kconfig option that controls compilation of all TC offloads code in mlx5, extract TC-specific code from en_rep.c to standalone file. This allows easily compiling out the code by only including new source in make file when corresponding kconfig is enabled instead of adding multiple ifdef blocks to en_rep. Signed-off-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_rep.h') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 6a2337900420..74d46e9a201a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -203,16 +203,19 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); -int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e); -void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e); - void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); bool mlx5e_eswitch_rep(struct net_device *netdev); bool mlx5e_eswitch_uplink_rep(struct net_device *netdev); +struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh); +int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct mlx5e_neigh_hash_entry **nhe); +void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe); + #else /* CONFIG_MLX5_ESWITCH */ static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; } static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; } -- cgit v1.2.3-59-g8ed1b From 549c243e4e010067a075e248f4d72e8dda844e12 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 12 May 2020 17:29:22 +0300 Subject: net/mlx5e: Extract neigh-specific code from en_rep.c to rep/neigh.c As a preparation for introducing new kconfig option that controls compilation of all TC offloads code in mlx5, extract neigh-specific code from en_rep.c to standalone file. This allows easily compiling out the code by only including new source in make file when corresponding kconfig is enabled instead of adding multiple ifdef blocks to en_rep. Signed-off-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../net/ethernet/mellanox/mlx5/core/en/rep/neigh.c | 368 +++++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en/rep/neigh.h | 23 ++ .../net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 1 + .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 354 +------------------- drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 8 - drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 - 8 files changed, 395 insertions(+), 368 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_rep.h') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index c21453970dbb..3c9d78e6695c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -33,7 +33,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o -mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/rep/tc.o en/tc_tun.o lib/port_tun.o \ +mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/rep/tc.o en/rep/neigh.o en/tc_tun.o lib/port_tun.o \ lag_mp.o \ lib/geneve.o en/mapping.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ en/tc_tun_geneve.o diag/en_tc_tracepoint.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c new file mode 100644 index 000000000000..baa162432e75 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "neigh.h" +#include "tc.h" +#include "en_rep.h" +#include "fs_core.h" +#include "diag/en_rep_tracepoint.h" + +static unsigned long mlx5e_rep_ipv6_interval(void) +{ + if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl) + return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME); + + return ~0UL; +} + +static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) +{ + unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); + unsigned long ipv6_interval = mlx5e_rep_ipv6_interval(); + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + + rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); + mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval); +} + +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + + mlx5_fc_queue_stats_work(priv->mdev, + &neigh_update->neigh_stats_work, + neigh_update->min_interval); +} + +static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) +{ + return refcount_inc_not_zero(&nhe->refcnt); +} + +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe); + +void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) +{ + if (refcount_dec_and_test(&nhe->refcnt)) { + mlx5e_rep_neigh_entry_remove(nhe); + kfree_rcu(nhe, rcu); + } +} + +static struct mlx5e_neigh_hash_entry * +mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_neigh_hash_entry *next = NULL; + + rcu_read_lock(); + + for (next = nhe ? + list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, + &nhe->neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list) : + list_first_or_null_rcu(&rpriv->neigh_update.neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list); + next; + next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, + &next->neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list)) + if (mlx5e_rep_neigh_entry_hold(next)) + break; + + rcu_read_unlock(); + + if (nhe) + mlx5e_rep_neigh_entry_release(nhe); + + return next; +} + +static void mlx5e_rep_neigh_stats_work(struct work_struct *work) +{ + struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, + neigh_update.neigh_stats_work.work); + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe = NULL; + + rtnl_lock(); + if (!list_empty(&rpriv->neigh_update.neigh_list)) + mlx5e_rep_queue_neigh_stats_work(priv); + + while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL) + mlx5e_tc_update_neigh_used_value(nhe); + + rtnl_unlock(); +} + +static void mlx5e_rep_neigh_update(struct work_struct *work) +{ + struct mlx5e_neigh_hash_entry *nhe = + container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work); + struct neighbour *n = nhe->n; + struct mlx5e_encap_entry *e; + unsigned char ha[ETH_ALEN]; + struct mlx5e_priv *priv; + bool neigh_connected; + u8 nud_state, dead; + + rtnl_lock(); + + /* If these parameters are changed after we release the lock, + * we'll receive another event letting us know about it. + * We use this lock to avoid inconsistency between the neigh validity + * and it's hw address. + */ + read_lock_bh(&n->lock); + memcpy(ha, n->ha, ETH_ALEN); + nud_state = n->nud_state; + dead = n->dead; + read_unlock_bh(&n->lock); + + neigh_connected = (nud_state & NUD_VALID) && !dead; + + trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); + + list_for_each_entry(e, &nhe->encap_list, encap_list) { + if (!mlx5e_encap_take(e)) + continue; + + priv = netdev_priv(e->out_dev); + mlx5e_rep_update_flows(priv, e, neigh_connected, ha); + mlx5e_encap_put(priv, e); + } + mlx5e_rep_neigh_entry_release(nhe); + rtnl_unlock(); + neigh_release(n); +} + +static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe, + struct neighbour *n) +{ + /* Take a reference to ensure the neighbour and mlx5 encap + * entry won't be destructed until we drop the reference in + * delayed work. + */ + neigh_hold(n); + + /* This assignment is valid as long as the the neigh reference + * is taken + */ + nhe->n = n; + + if (!queue_work(priv->wq, &nhe->neigh_update_work)) { + mlx5e_rep_neigh_entry_release(nhe); + neigh_release(n); + } +} + +static int mlx5e_rep_netevent_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, + neigh_update.netevent_nb); + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe = NULL; + struct mlx5e_neigh m_neigh = {}; + struct neigh_parms *p; + struct neighbour *n; + bool found = false; + + switch (event) { + case NETEVENT_NEIGH_UPDATE: + n = ptr; +#if IS_ENABLED(CONFIG_IPV6) + if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) +#else + if (n->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + m_neigh.dev = n->dev; + m_neigh.family = n->ops->family; + memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + + rcu_read_lock(); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); + rcu_read_unlock(); + if (!nhe) + return NOTIFY_DONE; + + mlx5e_rep_queue_neigh_update_work(priv, nhe, n); + break; + + case NETEVENT_DELAY_PROBE_TIME_UPDATE: + p = ptr; + + /* We check the device is present since we don't care about + * changes in the default table, we only care about changes + * done per device delay prob time parameter. + */ +#if IS_ENABLED(CONFIG_IPV6) + if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) +#else + if (!p->dev || p->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + rcu_read_lock(); + list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, + neigh_list) { + if (p->dev == nhe->m_neigh.dev) { + found = true; + break; + } + } + rcu_read_unlock(); + if (!found) + return NOTIFY_DONE; + + neigh_update->min_interval = min_t(unsigned long, + NEIGH_VAR(p, DELAY_PROBE_TIME), + neigh_update->min_interval); + mlx5_fc_update_sampling_interval(priv->mdev, + neigh_update->min_interval); + break; + } + return NOTIFY_DONE; +} + +static const struct rhashtable_params mlx5e_neigh_ht_params = { + .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), + .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), + .key_len = sizeof(struct mlx5e_neigh), + .automatic_shrinking = true, +}; + +int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + int err; + + err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); + if (err) + return err; + + INIT_LIST_HEAD(&neigh_update->neigh_list); + mutex_init(&neigh_update->encap_lock); + INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, + mlx5e_rep_neigh_stats_work); + mlx5e_rep_neigh_update_init_interval(rpriv); + + rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event; + err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb); + if (err) + goto out_err; + return 0; + +out_err: + rhashtable_destroy(&neigh_update->neigh_ht); + return err; +} + +void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + unregister_netevent_notifier(&neigh_update->netevent_nb); + + flush_workqueue(priv->wq); /* flush neigh update works */ + + cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); + + mutex_destroy(&neigh_update->encap_lock); + rhashtable_destroy(&neigh_update->neigh_ht); +} + +static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + int err; + + err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + if (err) + return err; + + list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); + + return err; +} + +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv; + + mutex_lock(&rpriv->neigh_update.encap_lock); + + list_del_rcu(&nhe->neigh_list); + + rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + mutex_unlock(&rpriv->neigh_update.encap_lock); +} + +/* This function must only be called under the representor's encap_lock or + * inside rcu read lock section. + */ +struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct mlx5e_neigh_hash_entry *nhe; + + nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, + mlx5e_neigh_ht_params); + return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL; +} + +int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct mlx5e_neigh_hash_entry **nhe) +{ + int err; + + *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); + if (!*nhe) + return -ENOMEM; + + (*nhe)->priv = priv; + memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); + INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update); + spin_lock_init(&(*nhe)->encap_list_lock); + INIT_LIST_HEAD(&(*nhe)->encap_list); + refcount_set(&(*nhe)->refcnt, 1); + + err = mlx5e_rep_neigh_entry_insert(priv, *nhe); + if (err) + goto out_free; + return 0; + +out_free: + kfree(*nhe); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h new file mode 100644 index 000000000000..8eddb3ac0d74 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __MLX5_EN_REP_NEIGH__ +#define __MLX5_EN_REP_NEIGH__ + +#include "en.h" +#include "en_rep.h" + +int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv); +void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv); + +struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh); +int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct mlx5e_neigh_hash_entry **nhe); +void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe); + +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); + +#endif /* __MLX5_EN_REP_NEIGH__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index edc574582135..c609a5e50ebc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -9,6 +9,7 @@ #include #include #include "tc.h" +#include "neigh.h" #include "en_rep.h" #include "eswitch.h" #include "esw/chains.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 9fdd79afa6e4..9be1fcc269b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -7,6 +7,7 @@ #include "en/tc_tun.h" #include "en_tc.h" #include "rep/tc.h" +#include "rep/neigh.h" struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index c84f0d9b516e..a46405c6d560 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -46,6 +45,7 @@ #include "en_rep.h" #include "en_tc.h" #include "en/rep/tc.h" +#include "en/rep/neigh.h" #include "fs_core.h" #include "lib/mlx5.h" #define CREATE_TRACE_POINTS @@ -474,358 +474,6 @@ void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) mlx5e_sqs2vport_stop(esw, rep); } -static unsigned long mlx5e_rep_ipv6_interval(void) -{ - if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl) - return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME); - - return ~0UL; -} - -static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) -{ - unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); - unsigned long ipv6_interval = mlx5e_rep_ipv6_interval(); - struct net_device *netdev = rpriv->netdev; - struct mlx5e_priv *priv = netdev_priv(netdev); - - rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); - mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval); -} - -void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) -{ - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; - - mlx5_fc_queue_stats_work(priv->mdev, - &neigh_update->neigh_stats_work, - neigh_update->min_interval); -} - -static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) -{ - return refcount_inc_not_zero(&nhe->refcnt); -} - -static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe); - -void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) -{ - if (refcount_dec_and_test(&nhe->refcnt)) { - mlx5e_rep_neigh_entry_remove(nhe); - kfree_rcu(nhe, rcu); - } -} - -static struct mlx5e_neigh_hash_entry * -mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv, - struct mlx5e_neigh_hash_entry *nhe) -{ - struct mlx5e_neigh_hash_entry *next = NULL; - - rcu_read_lock(); - - for (next = nhe ? - list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, - &nhe->neigh_list, - struct mlx5e_neigh_hash_entry, - neigh_list) : - list_first_or_null_rcu(&rpriv->neigh_update.neigh_list, - struct mlx5e_neigh_hash_entry, - neigh_list); - next; - next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, - &next->neigh_list, - struct mlx5e_neigh_hash_entry, - neigh_list)) - if (mlx5e_rep_neigh_entry_hold(next)) - break; - - rcu_read_unlock(); - - if (nhe) - mlx5e_rep_neigh_entry_release(nhe); - - return next; -} - -static void mlx5e_rep_neigh_stats_work(struct work_struct *work) -{ - struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, - neigh_update.neigh_stats_work.work); - struct net_device *netdev = rpriv->netdev; - struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_neigh_hash_entry *nhe = NULL; - - rtnl_lock(); - if (!list_empty(&rpriv->neigh_update.neigh_list)) - mlx5e_rep_queue_neigh_stats_work(priv); - - while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL) - mlx5e_tc_update_neigh_used_value(nhe); - - rtnl_unlock(); -} - -static void mlx5e_rep_neigh_update(struct work_struct *work) -{ - struct mlx5e_neigh_hash_entry *nhe = - container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work); - struct neighbour *n = nhe->n; - struct mlx5e_encap_entry *e; - unsigned char ha[ETH_ALEN]; - struct mlx5e_priv *priv; - bool neigh_connected; - u8 nud_state, dead; - - rtnl_lock(); - - /* If these parameters are changed after we release the lock, - * we'll receive another event letting us know about it. - * We use this lock to avoid inconsistency between the neigh validity - * and it's hw address. - */ - read_lock_bh(&n->lock); - memcpy(ha, n->ha, ETH_ALEN); - nud_state = n->nud_state; - dead = n->dead; - read_unlock_bh(&n->lock); - - neigh_connected = (nud_state & NUD_VALID) && !dead; - - trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); - - list_for_each_entry(e, &nhe->encap_list, encap_list) { - if (!mlx5e_encap_take(e)) - continue; - - priv = netdev_priv(e->out_dev); - mlx5e_rep_update_flows(priv, e, neigh_connected, ha); - mlx5e_encap_put(priv, e); - } - mlx5e_rep_neigh_entry_release(nhe); - rtnl_unlock(); - neigh_release(n); -} - -static void -mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv, - struct mlx5e_neigh_hash_entry *nhe, - struct neighbour *n) -{ - /* Take a reference to ensure the neighbour and mlx5 encap - * entry won't be destructed until we drop the reference in - * delayed work. - */ - neigh_hold(n); - - /* This assignment is valid as long as the the neigh reference - * is taken - */ - nhe->n = n; - - if (!queue_work(priv->wq, &nhe->neigh_update_work)) { - mlx5e_rep_neigh_entry_release(nhe); - neigh_release(n); - } -} - -static int mlx5e_rep_netevent_event(struct notifier_block *nb, - unsigned long event, void *ptr) -{ - struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, - neigh_update.netevent_nb); - struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; - struct net_device *netdev = rpriv->netdev; - struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_neigh_hash_entry *nhe = NULL; - struct mlx5e_neigh m_neigh = {}; - struct neigh_parms *p; - struct neighbour *n; - bool found = false; - - switch (event) { - case NETEVENT_NEIGH_UPDATE: - n = ptr; -#if IS_ENABLED(CONFIG_IPV6) - if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) -#else - if (n->tbl != &arp_tbl) -#endif - return NOTIFY_DONE; - - m_neigh.dev = n->dev; - m_neigh.family = n->ops->family; - memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - - rcu_read_lock(); - nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); - rcu_read_unlock(); - if (!nhe) - return NOTIFY_DONE; - - mlx5e_rep_queue_neigh_update_work(priv, nhe, n); - break; - - case NETEVENT_DELAY_PROBE_TIME_UPDATE: - p = ptr; - - /* We check the device is present since we don't care about - * changes in the default table, we only care about changes - * done per device delay prob time parameter. - */ -#if IS_ENABLED(CONFIG_IPV6) - if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) -#else - if (!p->dev || p->tbl != &arp_tbl) -#endif - return NOTIFY_DONE; - - rcu_read_lock(); - list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, - neigh_list) { - if (p->dev == nhe->m_neigh.dev) { - found = true; - break; - } - } - rcu_read_unlock(); - if (!found) - return NOTIFY_DONE; - - neigh_update->min_interval = min_t(unsigned long, - NEIGH_VAR(p, DELAY_PROBE_TIME), - neigh_update->min_interval); - mlx5_fc_update_sampling_interval(priv->mdev, - neigh_update->min_interval); - break; - } - return NOTIFY_DONE; -} - -static const struct rhashtable_params mlx5e_neigh_ht_params = { - .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), - .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), - .key_len = sizeof(struct mlx5e_neigh), - .automatic_shrinking = true, -}; - -static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) -{ - struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; - int err; - - err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); - if (err) - return err; - - INIT_LIST_HEAD(&neigh_update->neigh_list); - mutex_init(&neigh_update->encap_lock); - INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, - mlx5e_rep_neigh_stats_work); - mlx5e_rep_neigh_update_init_interval(rpriv); - - rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event; - err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb); - if (err) - goto out_err; - return 0; - -out_err: - rhashtable_destroy(&neigh_update->neigh_ht); - return err; -} - -static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) -{ - struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; - struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); - - unregister_netevent_notifier(&neigh_update->netevent_nb); - - flush_workqueue(priv->wq); /* flush neigh update works */ - - cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); - - mutex_destroy(&neigh_update->encap_lock); - rhashtable_destroy(&neigh_update->neigh_ht); -} - -static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, - struct mlx5e_neigh_hash_entry *nhe) -{ - struct mlx5e_rep_priv *rpriv = priv->ppriv; - int err; - - err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht, - &nhe->rhash_node, - mlx5e_neigh_ht_params); - if (err) - return err; - - list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); - - return err; -} - -static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe) -{ - struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv; - - mutex_lock(&rpriv->neigh_update.encap_lock); - - list_del_rcu(&nhe->neigh_list); - - rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, - &nhe->rhash_node, - mlx5e_neigh_ht_params); - mutex_unlock(&rpriv->neigh_update.encap_lock); -} - -/* This function must only be called under the representor's encap_lock or - * inside rcu read lock section. - */ -struct mlx5e_neigh_hash_entry * -mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, - struct mlx5e_neigh *m_neigh) -{ - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; - struct mlx5e_neigh_hash_entry *nhe; - - nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, - mlx5e_neigh_ht_params); - return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL; -} - -int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e, - struct mlx5e_neigh_hash_entry **nhe) -{ - int err; - - *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); - if (!*nhe) - return -ENOMEM; - - (*nhe)->priv = priv; - memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); - INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update); - spin_lock_init(&(*nhe)->encap_list_lock); - INIT_LIST_HEAD(&(*nhe)->encap_list); - refcount_set(&(*nhe)->refcnt, 1); - - err = mlx5e_rep_neigh_entry_insert(priv, *nhe); - if (err) - goto out_free; - return 0; - -out_free: - kfree(*nhe); - return err; -} - static int mlx5e_rep_open(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 74d46e9a201a..81ed06e58fea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -208,14 +208,6 @@ void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); bool mlx5e_eswitch_rep(struct net_device *netdev); bool mlx5e_eswitch_uplink_rep(struct net_device *netdev); -struct mlx5e_neigh_hash_entry * -mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, - struct mlx5e_neigh *m_neigh); -int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e, - struct mlx5e_neigh_hash_entry **nhe); -void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe); - #else /* CONFIG_MLX5_ESWITCH */ static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; } static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 251975ccbdf7..749390dc7aaa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4777,9 +4777,3 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work) } mutex_unlock(&rpriv->unready_flows_lock); } - -void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) -{ - if (tc_priv->tun_dev) - dev_put(tc_priv->tun_dev); -} -- cgit v1.2.3-59-g8ed1b From 14e6b038afa014ac2288a2f3d692697f708ba344 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 3 Feb 2020 13:44:14 +0200 Subject: net/mlx5e: Add support for hw decapsulation of MPLS over UDP MPLS over UDP is supported in hardware by using a packet reformat object with reformat type equal L3_TUNNEL_TO_L2 which both decapsulates the outer L3, L4 and MPLS headers, and allows for setting the L2 headers of the resulting decapsulated packet. For the hardware to operate correctly, the configuration of the firmware must have FLEX_PARSER_PROFILE_ENABLE = 1. Example tc rule: tc filter add dev bareudp0 protocol all prio 1 root flower enc_dst_port \ 6635 enc_src_ip 8.8.8.23 action mpls pop protocol ip pipe \ action pedit ex munge eth dst set 00:11:22:33:44:21 pipe action \ mirred egress redirect dev enp59s0f0_0 We use pedit to set the correct destination MAC. For MPLS over UDP decapsulation to take place, the driver logic requires the following: 1. flower filter added on bareudp device. 2. action mpls pop 3. zero or more pedit munge actions 4. one redirect action Current implementation supports only IPv4 and no VLAN. tc filter show output looks like this: filter protocol all pref 1 flower chain 0 filter protocol all pref 1 flower chain 0 handle 0x1 enc_src_ip 8.8.8.24 enc_dst_port 6635 in_hw in_hw_count 1 action order 1: mpls pop protocol ip pipe index 2 ref 1 bind 1 action order 2: pedit action pipe keys 2 index 1 ref 1 bind 1 key #0 at eth+0: val 00112233 mask 00000000 key #1 at eth+4: val 44210000 mask 0000ffff action order 3: mirred (Egress Redirect to device enp59s0f0_0) stolen index 2 ref 1 bind 1 Signed-off-by: Eli Cohen Reviewed-by: Roi Dayan Reviewed-by: Eli Britstein Reviewed-by: Vlad Buslov Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 16 ++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 217 ++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 3 + drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 3 + .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 + 5 files changed, 238 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_rep.h') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 81ed06e58fea..93e911baacad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -158,6 +158,22 @@ struct mlx5e_neigh_hash_entry { enum { /* set when the encap entry is successfully offloaded into HW */ MLX5_ENCAP_ENTRY_VALID = BIT(0), + MLX5_REFORMAT_DECAP = BIT(1), +}; + +struct mlx5e_decap_key { + struct ethhdr key; +}; + +struct mlx5e_decap_entry { + struct mlx5e_decap_key key; + struct list_head flows; + struct hlist_node hlist; + refcount_t refcnt; + struct completion res_ready; + int compl_result; + struct mlx5_pkt_reformat *pkt_reformat; + struct rcu_head rcu; }; struct mlx5e_encap_entry { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 801fcd1b5f85..a6b18f0444e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -93,6 +94,7 @@ enum { MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7, + MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8, }; #define MLX5E_TC_MAX_SPLITS 1 @@ -126,6 +128,11 @@ struct mlx5e_tc_flow { u64 cookie; unsigned long flags; struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; + + /* flows sharing the same reformat object - currently mpls decap */ + struct list_head l3_to_l2_reformat; + struct mlx5e_decap_entry *decap_reformat; + /* Flow can be associated with multiple encap IDs. * The number of encaps is bounded by the number of supported * destinations. @@ -157,6 +164,7 @@ struct mlx5e_tc_flow_parse_attr { struct mlx5_flow_spec spec; struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; + struct ethhdr eth; }; #define MLX5E_TC_TABLE_NUM_GROUPS 4 @@ -1124,6 +1132,11 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct netlink_ext_ack *extack, struct net_device **encap_dev, bool *encap_valid); +static int mlx5e_attach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack); +static void mlx5e_detach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); static struct mlx5_flow_handle * mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, @@ -1299,6 +1312,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + if (flow_flag_test(flow, L3_TO_L2_DECAP)) { + err = mlx5e_attach_decap(priv, flow, extack); + if (err) + return err; + } + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { int mirred_ifindex; @@ -1408,6 +1427,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) mlx5_fc_destroy(attr->counter_dev, attr->counter); + + if (flow_flag_test(flow, L3_TO_L2_DECAP)) + mlx5e_detach_decap(priv, flow); } void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, @@ -1684,6 +1706,17 @@ static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entr kfree_rcu(e, rcu); } +static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, + struct mlx5e_decap_entry *d) +{ + WARN_ON(!list_empty(&d->flows)); + + if (!d->compl_result) + mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); + + kfree_rcu(d, rcu); +} + void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -1696,6 +1729,18 @@ void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) mlx5e_encap_dealloc(priv, e); } +static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) + return; + hash_del_rcu(&d->hlist); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + mlx5e_decap_dealloc(priv, d); +} + static void mlx5e_detach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, int out_index) { @@ -1719,6 +1764,29 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, mlx5e_encap_dealloc(priv, e); } +static void mlx5e_detach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_decap_entry *d = flow->decap_reformat; + + if (!d) + return; + + mutex_lock(&esw->offloads.decap_tbl_lock); + list_del(&flow->l3_to_l2_reformat); + flow->decap_reformat = NULL; + + if (!refcount_dec_and_test(&d->refcnt)) { + mutex_unlock(&esw->offloads.decap_tbl_lock); + return; + } + hash_del_rcu(&d->hlist); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + mlx5e_decap_dealloc(priv, d); +} + static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; @@ -1990,7 +2058,11 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, return err; } - flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + /* With mpls over udp we decapsulate using packet reformat + * object + */ + if (!netif_is_bareudp(filter_dev)) + flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; } if (!needs_mapping && !sets_mapping) @@ -3285,12 +3357,22 @@ static inline int cmp_encap_info(struct encap_key *a, a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; } +static inline int cmp_decap_info(struct mlx5e_decap_key *a, + struct mlx5e_decap_key *b) +{ + return memcmp(&a->key, &b->key, sizeof(b->key)); +} + static inline int hash_encap_info(struct encap_key *key) { return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), key->tc_tunnel->tunnel_type); } +static inline int hash_decap_info(struct mlx5e_decap_key *key) +{ + return jhash(&key->key, sizeof(key->key), 0); +} static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, struct net_device *peer_netdev) @@ -3305,13 +3387,16 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, same_hw_devs(priv, peer_priv)); } - - bool mlx5e_encap_take(struct mlx5e_encap_entry *e) { return refcount_inc_not_zero(&e->refcnt); } +static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) +{ + return refcount_inc_not_zero(&e->refcnt); +} + static struct mlx5e_encap_entry * mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, uintptr_t hash_key) @@ -3332,6 +3417,24 @@ mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, return NULL; } +static struct mlx5e_decap_entry * +mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, + uintptr_t hash_key) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_decap_key r_key; + struct mlx5e_decap_entry *e; + + hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, + hlist, hash_key) { + r_key = e->key; + if (!cmp_decap_info(&r_key, key) && + mlx5e_decap_take(e)) + return e; + } + return NULL; +} + static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info) { size_t tun_size = sizeof(*tun_info) + tun_info->options_len; @@ -3477,6 +3580,84 @@ out_err_init: return err; } +static int mlx5e_attach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_decap_entry *d; + struct mlx5e_decap_key key; + uintptr_t hash_key; + int err; + + parse_attr = attr->parse_attr; + if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { + NL_SET_ERR_MSG_MOD(extack, + "encap header larger than max supported"); + return -EOPNOTSUPP; + } + + key.key = parse_attr->eth; + hash_key = hash_decap_info(&key); + mutex_lock(&esw->offloads.decap_tbl_lock); + d = mlx5e_decap_get(priv, &key, hash_key); + if (d) { + mutex_unlock(&esw->offloads.decap_tbl_lock); + wait_for_completion(&d->res_ready); + mutex_lock(&esw->offloads.decap_tbl_lock); + if (d->compl_result) { + err = -EREMOTEIO; + goto out_free; + } + goto found; + } + + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + err = -ENOMEM; + goto out_err; + } + + d->key = key; + refcount_set(&d->refcnt, 1); + init_completion(&d->res_ready); + INIT_LIST_HEAD(&d->flows); + hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2, + sizeof(parse_attr->eth), + &parse_attr->eth, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(d->pkt_reformat)) { + err = PTR_ERR(d->pkt_reformat); + d->compl_result = err; + } + mutex_lock(&esw->offloads.decap_tbl_lock); + complete_all(&d->res_ready); + if (err) + goto out_free; + +found: + flow->decap_reformat = d; + attr->decap_pkt_reformat = d->pkt_reformat; + list_add(&flow->l3_to_l2_reformat, &d->flows); + mutex_unlock(&esw->offloads.decap_tbl_lock); + return 0; + +out_free: + mutex_unlock(&esw->offloads.decap_tbl_lock); + mlx5e_decap_put(priv, d); + return err; + +out_err: + mutex_unlock(&esw->offloads.decap_tbl_lock); + return err; +} + static int parse_tc_vlan_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, struct mlx5_esw_flow_attr *attr, @@ -3688,7 +3869,8 @@ static int verify_uplink_forwarding(struct mlx5e_priv *priv, static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + struct net_device *filter_dev) { struct pedit_headers_action hdrs[2] = {}; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -3727,8 +3909,32 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, } mpls_push = true; break; + case FLOW_ACTION_MPLS_POP: + /* we only support mpls pop if it is the first action + * and the filter net device is bareudp. Subsequent + * actions can be pedit and the last can be mirred + * egress redirect. + */ + if (i) { + NL_SET_ERR_MSG_MOD(extack, + "mpls pop supported only as first action"); + return -EOPNOTSUPP; + } + if (!netif_is_bareudp(filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "mpls pop supported only on bareudp devices"); + return -EOPNOTSUPP; + } + + parse_attr->eth.h_proto = act->mpls_pop.proto; + action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_flag_set(flow, L3_TO_L2_DECAP); + break; case FLOW_ACTION_MANGLE: case FLOW_ACTION_ADD: + if (flow_flag_test(flow, L3_TO_L2_DECAP)) + return -EOPNOTSUPP; + err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB, hdrs, extack); if (err) @@ -4093,6 +4299,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, INIT_LIST_HEAD(&flow->encaps[out_index].list); INIT_LIST_HEAD(&flow->mod_hdr); INIT_LIST_HEAD(&flow->hairpin); + INIT_LIST_HEAD(&flow->l3_to_l2_reformat); refcount_set(&flow->refcnt, 1); init_completion(&flow->init_done); @@ -4162,7 +4369,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; - err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); + err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev); if (err) goto err_free; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index c5eb4e7754a9..ac79b7c9aeb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2262,6 +2262,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) hash_init(esw->offloads.encap_tbl); mutex_init(&esw->offloads.mod_hdr.lock); hash_init(esw->offloads.mod_hdr.hlist); + mutex_init(&esw->offloads.decap_tbl_lock); + hash_init(esw->offloads.decap_tbl); atomic64_set(&esw->offloads.num_flows, 0); mutex_init(&esw->state_lock); mutex_init(&esw->mode_lock); @@ -2303,6 +2305,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) mutex_destroy(&esw->state_lock); mutex_destroy(&esw->offloads.mod_hdr.lock); mutex_destroy(&esw->offloads.encap_tbl_lock); + mutex_destroy(&esw->offloads.decap_tbl_lock); kfree(esw->vports); kfree(esw); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 4a1c6c78bb14..ccbbea3e0505 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -209,6 +209,8 @@ struct mlx5_esw_offload { struct mutex peer_mutex; struct mutex encap_tbl_lock; /* protects encap_tbl */ DECLARE_HASHTABLE(encap_tbl, 8); + struct mutex decap_tbl_lock; /* protects decap_tbl */ + DECLARE_HASHTABLE(decap_tbl, 8); struct mod_hdr_tbl mod_hdr; DECLARE_HASHTABLE(termtbl_tbl, 8); struct mutex termtbl_mutex; /* protects termtbl hash */ @@ -432,6 +434,7 @@ struct mlx5_esw_flow_attr { struct mlx5_flow_table *fdb; struct mlx5_flow_table *dest_ft; struct mlx5_ct_attr ct_attr; + struct mlx5_pkt_reformat *decap_pkt_reformat; struct mlx5e_tc_flow_parse_attr *parse_attr; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1c9be19ee025..554fc64d8ef6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -366,6 +366,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } } } + + if (attr->decap_pkt_reformat) + flow_act.pkt_reformat = attr->decap_pkt_reformat; + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[i].counter_id = mlx5_fc_id(attr->counter); -- cgit v1.2.3-59-g8ed1b From 7e51891a237f9ea319f53f9beb83afb0077d88e6 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 21 Jun 2019 13:23:44 -0700 Subject: net/mlx5e: Use netdev events to set/del egress acl forward-to-vport rule Register a notifier block to handle netdev events for bond device of non-uplink representors to support eswitch vports bonding. When a non-uplink representor is a lower dev (slave) of bond and becomes active, adding egress acl forward-to-vport rule of all slave netdevs (active + standby) to forward to this representor's vport. Use change lower netdev event to do this. Use change upper event to detect slave representor unslaved from lag device to delete its vport egress acl forward rule if any. Signed-off-by: Or Gerlitz Signed-off-by: Vu Pham Reviewed-by: Parav Pandit Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 3 +- .../net/ethernet/mellanox/mlx5/core/en/rep/bond.c | 161 +++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 8 +- drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 7 + 4 files changed, 175 insertions(+), 4 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_rep.h') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 3934dc258041..b61e47bc16e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -34,7 +34,8 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o -mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o lib/geneve.o lib/port_tun.o lag_mp.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag_mp.o lib/geneve.o lib/port_tun.o \ + en_rep.o en/rep/bond.o mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ en/mapping.o esw/chains.o en/tc_tun.o \ en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c new file mode 100644 index 000000000000..d0aab36f1947 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include + +#include "mlx5_core.h" +#include "eswitch.h" +#include "esw/acl/ofld.h" +#include "en_rep.h" + +struct mlx5e_rep_bond { + struct notifier_block nb; + struct netdev_net_notifier nn; +}; + +static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + /* A given netdev is not a representor or not a slave of LAG configuration */ + if (!mlx5e_eswitch_rep(netdev) || !bond_slave_get_rtnl(netdev)) + return false; + + /* Egress acl forward to vport is supported only non-uplink representor */ + return rpriv->rep->vport != MLX5_VPORT_UPLINK; +} + +static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr) +{ + struct netdev_notifier_changelowerstate_info *info; + struct netdev_lag_lower_state_info *lag_info; + struct mlx5e_rep_priv *rpriv; + struct net_device *lag_dev; + struct mlx5e_priv *priv; + struct list_head *iter; + struct net_device *dev; + u16 acl_vport_num; + u16 fwd_vport_num; + + if (!mlx5e_rep_is_lag_netdev(netdev)) + return; + + info = ptr; + lag_info = info->lower_state_info; + /* This is not an event of a representor becoming active slave */ + if (!lag_info->tx_enabled) + return; + + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + fwd_vport_num = rpriv->rep->vport; + lag_dev = netdev_master_upper_dev_get(netdev); + + netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n", + lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev)); + + /* Point everyone's egress acl to the vport of the active representor */ + netdev_for_each_lower_dev(lag_dev, dev, iter) { + priv = netdev_priv(dev); + rpriv = priv->ppriv; + acl_vport_num = rpriv->rep->vport; + if (acl_vport_num != fwd_vport_num) { + mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch, + fwd_vport_num, + acl_vport_num); + } + } +} + +static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr) +{ + struct netdev_notifier_changeupper_info *info = ptr; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *priv; + + if (!mlx5e_rep_is_lag_netdev(netdev)) + return; + + /* Nothing to setup for new enslaved representor */ + if (info->linking) + return; + + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + netdev_dbg(netdev, "Unslave, reset vport(%d) egress acl\n", rpriv->rep->vport); + + /* Reset all egress acl rules of unslave representor's vport */ + mlx5_esw_acl_egress_vport_unbond(priv->mdev->priv.eswitch, + rpriv->rep->vport); +} + +/* Bond device of representors and netdev events are used here in specific way + * to support eswitch vports bonding and to perform failover of eswitch vport + * by modifying the vport's egress acl of lower dev representors. Thus this + * also change the traditional behavior of lower dev under bond device. + * All non-representor netdevs or representors of other vendors as lower dev + * of bond device are not supported. + */ +static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + + switch (event) { + case NETDEV_CHANGELOWERSTATE: + mlx5e_rep_changelowerstate_event(netdev, ptr); + break; + case NETDEV_CHANGEUPPER: + mlx5e_rep_changeupper_event(netdev, ptr); + break; + } + return NOTIFY_DONE; +} + +/* If HW support eswitch vports bonding, register a specific notifier to + * handle it when two or more representors are bonded + */ +int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv; + int ret = 0; + + priv = netdev_priv(netdev); + if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch)) + goto out; + + uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL); + if (!uplink_priv->bond) { + ret = -ENOMEM; + goto out; + } + + uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent; + ret = register_netdevice_notifier_dev_net(netdev, + &uplink_priv->bond->nb, + &uplink_priv->bond->nn); + if (ret) { + netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret); + kvfree(uplink_priv->bond); + uplink_priv->bond = NULL; + } +out: + return ret; +} + +void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) || + !rpriv->uplink_priv.bond) + return; + + unregister_netdevice_notifier_dev_net(rpriv->netdev, + &rpriv->uplink_priv.bond->nb, + &rpriv->uplink_priv.bond->nn); + kvfree(rpriv->uplink_priv.bond); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 4e13e37a9ecd..12593d75e885 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -959,16 +959,18 @@ static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv) mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev); + mlx5e_rep_bond_init(rpriv); err = mlx5e_rep_tc_netdevice_event_register(rpriv); if (err) { mlx5_core_err(priv->mdev, "Failed to register netdev notifier, err: %d\n", err); - goto tc_rep_cleanup; + goto err_event_reg; } return 0; -tc_rep_cleanup: +err_event_reg: + mlx5e_rep_bond_cleanup(rpriv); mlx5e_rep_tc_cleanup(rpriv); return err; } @@ -1001,7 +1003,7 @@ static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv) { mlx5e_rep_tc_netdevice_event_unregister(rpriv); mlx5e_rep_indr_clean_block_privs(rpriv); - + mlx5e_rep_bond_cleanup(rpriv); mlx5e_rep_tc_cleanup(rpriv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 1c4af8522467..7e56787aa224 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -56,6 +56,7 @@ struct mlx5e_neigh_update_table { }; struct mlx5_tc_ct_priv; +struct mlx5e_rep_bond; struct mlx5_rep_uplink_priv { /* Filters DB - instantiated by the uplink representor and shared by * the uplink's VFs @@ -89,6 +90,9 @@ struct mlx5_rep_uplink_priv { struct mapping_ctx *tunnel_enc_opts_mapping; struct mlx5_tc_ct_priv *ct_priv; + + /* support eswitch vports bonding */ + struct mlx5e_rep_bond *bond; }; struct mlx5e_rep_priv { @@ -211,6 +215,9 @@ struct mlx5e_rep_sq { void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev); void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev); +int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv); +void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv); + bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); -- cgit v1.2.3-59-g8ed1b From d97555e1452943264295cd3c1f066474bc3660dd Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Fri, 28 Feb 2020 14:28:27 -0800 Subject: net/mlx5e: Add bond_metadata and its slave entries Adding bond_metadata and its slave entries to represent a lag device and its slaves VF representors. Bond_metadata structure includes a unique metadata shared by slaves VF respresentors, and a list of slaves representors slave entries. On enslaving event, create a bond_metadata structure representing the upper lag device of this slave representor if it has not been created yet. Create and add entry for the slave representor to the slaves list. On unslaving event, free the slave entry of the slave representor. On the last unslave event, free the bond_metadata structure and its resources. Introduce APIs to create and remove bond_metadata and its resources, enslave and unslave VF representor slave entries. Signed-off-by: Vu Pham Reviewed-by: Parav Pandit Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/rep/bond.c | 128 +++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 5 + 2 files changed, 133 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_rep.h') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c index d0aab36f1947..932e94362ceb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ +#include +#include #include #include "mlx5_core.h" @@ -11,8 +13,132 @@ struct mlx5e_rep_bond { struct notifier_block nb; struct netdev_net_notifier nn; + struct list_head metadata_list; }; +struct mlx5e_rep_bond_slave_entry { + struct list_head list; + struct net_device *netdev; +}; + +struct mlx5e_rep_bond_metadata { + struct list_head list; /* link to global list of rep_bond_metadata */ + struct mlx5_eswitch *esw; + /* private of uplink holding rep bond metadata list */ + struct net_device *lag_dev; + u32 metadata_reg_c_0; + + struct list_head slaves_list; /* slaves list */ + int slaves; +}; + +static struct mlx5e_rep_bond_metadata * +mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv, + const struct net_device *lag_dev) +{ + struct mlx5e_rep_bond_metadata *found = NULL; + struct mlx5e_rep_bond_metadata *cur; + + list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) { + if (cur->lag_dev == lag_dev) { + found = cur; + break; + } + } + + return found; +} + +static struct mlx5e_rep_bond_slave_entry * +mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata, + const struct net_device *netdev) +{ + struct mlx5e_rep_bond_slave_entry *found = NULL; + struct mlx5e_rep_bond_slave_entry *cur; + + list_for_each_entry(cur, &mdata->slaves_list, list) { + if (cur->netdev == netdev) { + found = cur; + break; + } + } + + return found; +} + +static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata) +{ + netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n", + mdata->metadata_reg_c_0); + list_del(&mdata->list); + WARN_ON(!list_empty(&mdata->slaves_list)); + kfree(mdata); +} + +/* This must be called under rtnl_lock */ +int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, + struct net_device *lag_dev) +{ + struct mlx5e_rep_bond_slave_entry *s_entry; + struct mlx5e_rep_bond_metadata *mdata; + struct mlx5e_rep_priv *rpriv; + + ASSERT_RTNL(); + + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev); + if (!mdata) { + /* First netdev becomes slave, no metadata presents the lag_dev. Create one */ + mdata = kzalloc(sizeof(*mdata), GFP_KERNEL); + if (!mdata) + return -ENOMEM; + + mdata->lag_dev = lag_dev; + mdata->esw = esw; + INIT_LIST_HEAD(&mdata->slaves_list); + list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list); + + netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n", + mdata->metadata_reg_c_0); + } + + s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL); + if (!s_entry) + return -ENOMEM; + + s_entry->netdev = netdev; + mdata->slaves++; + list_add_tail(&s_entry->list, &mdata->slaves_list); + + return 0; +} + +/* This must be called under rtnl_lock */ +void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, + const struct net_device *netdev, + const struct net_device *lag_dev) +{ + struct mlx5e_rep_bond_slave_entry *s_entry; + struct mlx5e_rep_bond_metadata *mdata; + struct mlx5e_rep_priv *rpriv; + + ASSERT_RTNL(); + + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev); + if (!mdata) + return; + + s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev); + if (!s_entry) + return; + + list_del(&s_entry->list); + if (--mdata->slaves == 0) + mlx5e_rep_bond_metadata_release(mdata); + kfree(s_entry); +} + static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -133,6 +259,7 @@ int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv) goto out; } + INIT_LIST_HEAD(&uplink_priv->bond->metadata_list); uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent; ret = register_netdevice_notifier_dev_net(netdev, &uplink_priv->bond->nb, @@ -142,6 +269,7 @@ int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv) kvfree(uplink_priv->bond); uplink_priv->bond = NULL; } + out: return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 7e56787aa224..ed741b6e6af2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -217,6 +217,11 @@ void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev); void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev); int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv); void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv); +int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, + struct net_device *lag_dev); +void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, + const struct net_device *netdev, + const struct net_device *lag_dev); bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); -- cgit v1.2.3-59-g8ed1b From 88e96e533cfa11e996c59a44bbb6b0e0b9891970 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Mon, 2 Mar 2020 10:33:49 -0800 Subject: net/mlx5e: Slave representors sharing unique metadata for match Bonded slave representors' vports must share a unique metadata for match. On enslaving event of slave representor to lag device, allocate new unique "bond_metadata" for match if this is the first slave. The subsequent enslaved representors will share the same unique "bond_metadata". On unslaving event of slave representor, reset the slave representor's vport to use its own default metadata. Replace ingress acl and rx rules of the slave representors' vports using new vport->bond_metadata. Signed-off-by: Vu Pham Reviewed-by: Parav Pandit Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/rep/bond.c | 65 ++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 22 +++++++- drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 1 + 3 files changed, 80 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_rep.h') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c index 932e94362ceb..13500f60bef6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -71,6 +71,7 @@ static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdat netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n", mdata->metadata_reg_c_0); list_del(&mdata->list); + mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0); WARN_ON(!list_empty(&mdata->slaves_list)); kfree(mdata); } @@ -82,6 +83,8 @@ int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, struct mlx5e_rep_bond_slave_entry *s_entry; struct mlx5e_rep_bond_metadata *mdata; struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *priv; + int err; ASSERT_RTNL(); @@ -96,6 +99,11 @@ int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, mdata->lag_dev = lag_dev; mdata->esw = esw; INIT_LIST_HEAD(&mdata->slaves_list); + mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw); + if (!mdata->metadata_reg_c_0) { + kfree(mdata); + return -ENOSPC; + } list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list); netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n", @@ -103,14 +111,33 @@ int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, } s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL); - if (!s_entry) - return -ENOMEM; + if (!s_entry) { + err = -ENOMEM; + goto entry_alloc_err; + } s_entry->netdev = netdev; + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + + err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, + mdata->metadata_reg_c_0); + if (err) + goto ingress_err; + mdata->slaves++; list_add_tail(&s_entry->list, &mdata->slaves_list); + netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n", + rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0); return 0; + +ingress_err: + kfree(s_entry); +entry_alloc_err: + if (!mdata->slaves) + mlx5e_rep_bond_metadata_release(mdata); + return err; } /* This must be called under rtnl_lock */ @@ -121,6 +148,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, struct mlx5e_rep_bond_slave_entry *s_entry; struct mlx5e_rep_bond_metadata *mdata; struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *priv; ASSERT_RTNL(); @@ -133,7 +161,16 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, if (!s_entry) return; + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + + mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0); + mlx5e_rep_bond_update(priv, false); list_del(&s_entry->list); + + netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n", + rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0); + if (--mdata->slaves == 0) mlx5e_rep_bond_metadata_release(mdata); kfree(s_entry); @@ -163,6 +200,7 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt struct net_device *dev; u16 acl_vport_num; u16 fwd_vport_num; + int err; if (!mlx5e_rep_is_lag_netdev(netdev)) return; @@ -187,11 +225,28 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt rpriv = priv->ppriv; acl_vport_num = rpriv->rep->vport; if (acl_vport_num != fwd_vport_num) { - mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch, - fwd_vport_num, - acl_vport_num); + /* Only single rx_rule for unique bond_metadata should be + * present, delete it if it's saved as passive vport's + * rx_rule with destination as passive vport's root_ft + */ + mlx5e_rep_bond_update(priv, true); + err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch, + fwd_vport_num, + acl_vport_num); + if (err) + netdev_warn(dev, + "configure slave vport(%d) egress fwd, err(%d)", + acl_vport_num, err); } } + + /* Insert new rx_rule for unique bond_metadata, save it as active vport's + * rx_rule with new destination as active vport's root_ft + */ + err = mlx5e_rep_bond_update(netdev_priv(netdev), false); + if (err) + netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)", + fwd_vport_num, err); } static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 12593d75e885..af89a4803c7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -854,6 +854,24 @@ static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv) return 0; } +static void rep_vport_rx_rule_destroy(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + if (!rpriv->vport_rx_rule) + return; + + mlx5_del_flow_rules(rpriv->vport_rx_rule); + rpriv->vport_rx_rule = NULL; +} + +int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup) +{ + rep_vport_rx_rule_destroy(priv); + + return cleanup ? 0 : mlx5e_create_rep_vport_rx_rule(priv); +} + static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -918,9 +936,7 @@ err_close_drop_rq: static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - - mlx5_del_flow_rules(rpriv->vport_rx_rule); + rep_vport_rx_rule_destroy(priv); mlx5e_destroy_rep_root_ft(priv); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); mlx5e_destroy_direct_tirs(priv, priv->direct_tir); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index ed741b6e6af2..da9f1686d525 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -222,6 +222,7 @@ int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, const struct net_device *netdev, const struct net_device *lag_dev); +int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup); bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); -- cgit v1.2.3-59-g8ed1b From 9eabd188716b2c53d8b9d23e969c6c17049f0fcc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 29 May 2020 02:25:38 +0200 Subject: mlx5: update indirect block support Register ndo callback via flow_indr_dev_register() and flow_indr_dev_unregister(). No need for mlx5e_rep_indr_clean_block_privs() since flow_block_cb_free() already releases the internal mapping via ->release callback, which in this case is mlx5e_rep_indr_tc_block_unbind(). Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 81 +++------------------- .../net/ethernet/mellanox/mlx5/core/en/rep/tc.h | 4 -- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 5 -- 4 files changed, 8 insertions(+), 83 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_rep.h') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index c609a5e50ebc..80713123de5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -306,20 +306,6 @@ mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv, return NULL; } -static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, - struct net_device *netdev); - -void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) -{ - struct mlx5e_rep_indr_block_priv *cb_priv, *temp; - struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list; - - list_for_each_entry_safe(cb_priv, temp, head, list) { - mlx5e_rep_indr_unregister_block(rpriv, cb_priv->netdev); - kfree(cb_priv); - } -} - static int mlx5e_rep_indr_offload(struct net_device *netdev, struct flow_cls_offload *flower, @@ -423,9 +409,14 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, struct flow_block_offload *f, flow_setup_cb_t *setup_cb) { + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); struct mlx5e_rep_indr_block_priv *indr_priv; struct flow_block_cb *block_cb; + if (!mlx5e_tc_tun_device_to_offload(priv, netdev) && + !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev)) + return -EOPNOTSUPP; + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) return -EOPNOTSUPP; @@ -492,76 +483,20 @@ int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv, } } -static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, - struct net_device *netdev) -{ - int err; - - err = __flow_indr_block_cb_register(netdev, rpriv, - mlx5e_rep_indr_setup_cb, - rpriv); - if (err) { - struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); - - mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n", - netdev_name(netdev), err); - } - return err; -} - -static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, - struct net_device *netdev) -{ - __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_cb, - rpriv); -} - -static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, - unsigned long event, void *ptr) -{ - struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, - uplink_priv.netdevice_nb); - struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); - struct net_device *netdev = netdev_notifier_info_to_dev(ptr); - - if (!mlx5e_tc_tun_device_to_offload(priv, netdev) && - !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev)) - return NOTIFY_OK; - - switch (event) { - case NETDEV_REGISTER: - mlx5e_rep_indr_register_block(rpriv, netdev); - break; - case NETDEV_UNREGISTER: - mlx5e_rep_indr_unregister_block(rpriv, netdev); - break; - } - return NOTIFY_OK; -} - int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv) { struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; - int err; /* init indirect block notifications */ INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list); - uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event; - err = register_netdevice_notifier_dev_net(rpriv->netdev, - &uplink_priv->netdevice_nb, - &uplink_priv->netdevice_nn); - return err; + return flow_indr_dev_register(mlx5e_rep_indr_setup_cb, rpriv); } void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) { - struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; - - /* clean indirect TC block notifications */ - unregister_netdevice_notifier_dev_net(rpriv->netdev, - &uplink_priv->netdevice_nb, - &uplink_priv->netdevice_nn); + flow_indr_dev_unregister(mlx5e_rep_indr_setup_cb, rpriv, + mlx5e_rep_indr_setup_tc_cb); } #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h index 86f92abf2fdd..fdf9702c2d7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h @@ -33,7 +33,6 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data); -void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv); bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, @@ -65,9 +64,6 @@ static inline int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { return -EOPNOTSUPP; } -static inline void -mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) {} - struct mlx5e_tc_update_priv; static inline bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index af89a4803c7d..006807e04eda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1018,7 +1018,6 @@ destroy_tises: static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv) { mlx5e_rep_tc_netdevice_event_unregister(rpriv); - mlx5e_rep_indr_clean_block_privs(rpriv); mlx5e_rep_bond_cleanup(rpriv); mlx5e_rep_tc_cleanup(rpriv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index da9f1686d525..1d5669801484 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -69,13 +69,8 @@ struct mlx5_rep_uplink_priv { * tc_indr_block_cb_priv_list is used to lookup indirect callback * private data * - * netdevice_nb is the netdev events notifier - used to register - * tunnel devices for block events - * */ struct list_head tc_indr_block_priv_list; - struct notifier_block netdevice_nb; - struct netdev_net_notifier netdevice_nn; struct mlx5_tun_entropy tun_entropy; -- cgit v1.2.3-59-g8ed1b