aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c350
1 files changed, 350 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
new file mode 100644
index 000000000000..bdb71332cbf2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <net/lag.h>
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "esw/acl/ofld.h"
+#include "en_rep.h"
+
+struct mlx5e_rep_bond {
+ struct notifier_block nb;
+ struct netdev_net_notifier nn;
+ struct list_head metadata_list;
+};
+
+struct mlx5e_rep_bond_slave_entry {
+ struct list_head list;
+ struct net_device *netdev;
+};
+
+struct mlx5e_rep_bond_metadata {
+ struct list_head list; /* link to global list of rep_bond_metadata */
+ struct mlx5_eswitch *esw;
+ /* private of uplink holding rep bond metadata list */
+ struct net_device *lag_dev;
+ u32 metadata_reg_c_0;
+
+ struct list_head slaves_list; /* slaves list */
+ int slaves;
+};
+
+static struct mlx5e_rep_bond_metadata *
+mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv,
+ const struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_metadata *found = NULL;
+ struct mlx5e_rep_bond_metadata *cur;
+
+ list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) {
+ if (cur->lag_dev == lag_dev) {
+ found = cur;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static struct mlx5e_rep_bond_slave_entry *
+mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata,
+ const struct net_device *netdev)
+{
+ struct mlx5e_rep_bond_slave_entry *found = NULL;
+ struct mlx5e_rep_bond_slave_entry *cur;
+
+ list_for_each_entry(cur, &mdata->slaves_list, list) {
+ if (cur->netdev == netdev) {
+ found = cur;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata)
+{
+ netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n",
+ mdata->metadata_reg_c_0);
+ list_del(&mdata->list);
+ mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0);
+ WARN_ON(!list_empty(&mdata->slaves_list));
+ kfree(mdata);
+}
+
+/* This must be called under rtnl_lock */
+int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
+ struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_slave_entry *s_entry;
+ struct mlx5e_rep_bond_metadata *mdata;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *priv;
+ int err;
+
+ ASSERT_RTNL();
+
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
+ if (!mdata) {
+ /* First netdev becomes slave, no metadata presents the lag_dev. Create one */
+ mdata = kzalloc(sizeof(*mdata), GFP_KERNEL);
+ if (!mdata)
+ return -ENOMEM;
+
+ mdata->lag_dev = lag_dev;
+ mdata->esw = esw;
+ INIT_LIST_HEAD(&mdata->slaves_list);
+ mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw);
+ if (!mdata->metadata_reg_c_0) {
+ kfree(mdata);
+ return -ENOSPC;
+ }
+ list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list);
+
+ netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n",
+ mdata->metadata_reg_c_0);
+ }
+
+ s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL);
+ if (!s_entry) {
+ err = -ENOMEM;
+ goto entry_alloc_err;
+ }
+
+ s_entry->netdev = netdev;
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+
+ err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport,
+ mdata->metadata_reg_c_0);
+ if (err)
+ goto ingress_err;
+
+ mdata->slaves++;
+ list_add_tail(&s_entry->list, &mdata->slaves_list);
+ netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
+ rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
+
+ return 0;
+
+ingress_err:
+ kfree(s_entry);
+entry_alloc_err:
+ if (!mdata->slaves)
+ mlx5e_rep_bond_metadata_release(mdata);
+ return err;
+}
+
+/* This must be called under rtnl_lock */
+void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
+ const struct net_device *netdev,
+ const struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_slave_entry *s_entry;
+ struct mlx5e_rep_bond_metadata *mdata;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *priv;
+
+ ASSERT_RTNL();
+
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
+ if (!mdata)
+ return;
+
+ s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev);
+ if (!s_entry)
+ return;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+
+ /* Reset bond_metadata to zero first then reset all ingress/egress
+ * acls and rx rules of unslave representor's vport
+ */
+ mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0);
+ mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport);
+ mlx5e_rep_bond_update(priv, false);
+
+ list_del(&s_entry->list);
+
+ netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
+ rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
+
+ if (--mdata->slaves == 0)
+ mlx5e_rep_bond_metadata_release(mdata);
+ kfree(s_entry);
+}
+
+static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ /* A given netdev is not a representor or not a slave of LAG configuration */
+ if (!mlx5e_eswitch_rep(netdev) || !bond_slave_get_rtnl(netdev))
+ return false;
+
+ /* Egress acl forward to vport is supported only non-uplink representor */
+ return rpriv->rep->vport != MLX5_VPORT_UPLINK;
+}
+
+static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
+{
+ struct netdev_notifier_changelowerstate_info *info;
+ struct netdev_lag_lower_state_info *lag_info;
+ struct mlx5e_rep_priv *rpriv;
+ struct net_device *lag_dev;
+ struct mlx5e_priv *priv;
+ struct list_head *iter;
+ struct net_device *dev;
+ u16 acl_vport_num;
+ u16 fwd_vport_num;
+ int err;
+
+ if (!mlx5e_rep_is_lag_netdev(netdev))
+ return;
+
+ info = ptr;
+ lag_info = info->lower_state_info;
+ /* This is not an event of a representor becoming active slave */
+ if (!lag_info->tx_enabled)
+ return;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+ fwd_vport_num = rpriv->rep->vport;
+ lag_dev = netdev_master_upper_dev_get(netdev);
+
+ netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n",
+ lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev));
+
+ /* Point everyone's egress acl to the vport of the active representor */
+ netdev_for_each_lower_dev(lag_dev, dev, iter) {
+ priv = netdev_priv(dev);
+ rpriv = priv->ppriv;
+ acl_vport_num = rpriv->rep->vport;
+ if (acl_vport_num != fwd_vport_num) {
+ /* Only single rx_rule for unique bond_metadata should be
+ * present, delete it if it's saved as passive vport's
+ * rx_rule with destination as passive vport's root_ft
+ */
+ mlx5e_rep_bond_update(priv, true);
+ err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch,
+ fwd_vport_num,
+ acl_vport_num);
+ if (err)
+ netdev_warn(dev,
+ "configure slave vport(%d) egress fwd, err(%d)",
+ acl_vport_num, err);
+ }
+ }
+
+ /* Insert new rx_rule for unique bond_metadata, save it as active vport's
+ * rx_rule with new destination as active vport's root_ft
+ */
+ err = mlx5e_rep_bond_update(netdev_priv(netdev), false);
+ if (err)
+ netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)",
+ fwd_vport_num, err);
+}
+
+static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
+{
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct mlx5e_rep_priv *rpriv;
+ struct net_device *lag_dev;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_rep_is_lag_netdev(netdev))
+ return;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+ lag_dev = info->upper_dev;
+
+ netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n",
+ info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name);
+
+ if (info->linking)
+ mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev);
+ else
+ mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev);
+}
+
+/* Bond device of representors and netdev events are used here in specific way
+ * to support eswitch vports bonding and to perform failover of eswitch vport
+ * by modifying the vport's egress acl of lower dev representors. Thus this
+ * also change the traditional behavior of lower dev under bond device.
+ * All non-representor netdevs or representors of other vendors as lower dev
+ * of bond device are not supported.
+ */
+static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ switch (event) {
+ case NETDEV_CHANGELOWERSTATE:
+ mlx5e_rep_changelowerstate_event(netdev, ptr);
+ break;
+ case NETDEV_CHANGEUPPER:
+ mlx5e_rep_changeupper_event(netdev, ptr);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+/* If HW support eswitch vports bonding, register a specific notifier to
+ * handle it when two or more representors are bonded
+ */
+int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv;
+ int ret = 0;
+
+ priv = netdev_priv(netdev);
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch))
+ goto out;
+
+ uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL);
+ if (!uplink_priv->bond) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&uplink_priv->bond->metadata_list);
+ uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent;
+ ret = register_netdevice_notifier_dev_net(netdev,
+ &uplink_priv->bond->nb,
+ &uplink_priv->bond->nn);
+ if (ret) {
+ netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret);
+ kvfree(uplink_priv->bond);
+ uplink_priv->bond = NULL;
+ }
+
+out:
+ return ret;
+}
+
+void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) ||
+ !rpriv->uplink_priv.bond)
+ return;
+
+ unregister_netdevice_notifier_dev_net(rpriv->netdev,
+ &rpriv->uplink_priv.bond->nb,
+ &rpriv->uplink_priv.bond->nn);
+ kvfree(rpriv->uplink_priv.bond);
+}