aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/net/ethernet/mellanox/mlx5/core/en
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h54
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/health.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c350
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c368
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h34
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c646
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h77
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c332
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c134
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h153
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c101
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c113
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c51
23 files changed, 2258 insertions, 350 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
new file mode 100644
index 000000000000..7be6b2d36b60
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5E_DCBNL_H__
+#define __MLX5E_DCBNL_H__
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+
+#define MLX5E_MAX_PRIORITY (8)
+
+struct mlx5e_cee_config {
+ /* bw pct for priority group */
+ u8 pg_bw_pct[CEE_DCBX_MAX_PGS];
+ u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO];
+ bool pfc_setting[CEE_DCBX_MAX_PRIO];
+ bool pfc_enable;
+};
+
+struct mlx5e_dcbx {
+ enum mlx5_dcbx_oper_mode mode;
+ struct mlx5e_cee_config cee_cfg; /* pending configuration */
+ u8 dscp_app_cnt;
+
+ /* The only setting that cannot be read from FW */
+ u8 tc_tsa[IEEE_8021QAZ_MAX_TCS];
+ u8 cap;
+
+ /* Buffer configuration */
+ bool manual_buffer;
+ u32 cable_len;
+ u32 xoff;
+};
+
+#define MLX5E_MAX_DSCP (64)
+
+struct mlx5e_dcbx_dp {
+ u8 dscp2prio[MLX5E_MAX_DSCP];
+ u8 trust_state;
+};
+
+void mlx5e_dcbnl_build_netdev(struct net_device *netdev);
+void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev);
+void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
+void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv);
+void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv);
+#else
+static inline void mlx5e_dcbnl_build_netdev(struct net_device *netdev) {}
+static inline void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev) {}
+static inline void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) {}
+static inline void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) {}
+static inline void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) {}
+#endif
+
+#endif /* __MLX5E_DCBNL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
index 3a199a03d929..7283443868f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -43,7 +43,7 @@ int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
void *cqc;
int err;
- err = mlx5_core_query_cq(priv->mdev, &cq->mcq, out, sizeof(out));
+ err = mlx5_core_query_cq(priv->mdev, &cq->mcq, out);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
index 7cd5b02e0f10..8fe8b4d6ad1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
@@ -38,12 +38,11 @@ int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv)
void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv)
{
- u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {};
- u32 out[MLX5_ST_SZ_DW(arm_monitor_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {};
MLX5_SET(arm_monitor_counter_in, in, opcode,
MLX5_CMD_OP_ARM_MONITOR_COUNTER);
- mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(priv->mdev, arm_monitor_counter, in);
}
static void mlx5e_monitor_counters_work(struct work_struct *work)
@@ -66,19 +65,6 @@ static int mlx5e_monitor_event_handler(struct notifier_block *nb,
return NOTIFY_OK;
}
-static void mlx5e_monitor_counter_start(struct mlx5e_priv *priv)
-{
- MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler,
- MONITOR_COUNTER);
- mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb);
-}
-
-static void mlx5e_monitor_counter_stop(struct mlx5e_priv *priv)
-{
- mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb);
- cancel_work_sync(&priv->monitor_counters_work);
-}
-
static int fill_monitor_counter_ppcnt_set1(int cnt, u32 *in)
{
enum mlx5_monitor_counter_ppcnt ppcnt_cnt;
@@ -118,8 +104,7 @@ static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv)
int num_q_counters = MLX5_CAP_GEN(mdev, num_q_monitor_counters);
int num_ppcnt_counters = !MLX5_CAP_PCAM_REG(mdev, ppcnt) ? 0 :
MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters);
- u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
- u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
int q_counter = priv->q_counter;
int cnt = 0;
@@ -136,34 +121,31 @@ static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv)
MLX5_SET(set_monitor_counter_in, in, opcode,
MLX5_CMD_OP_SET_MONITOR_COUNTER);
- mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(mdev, set_monitor_counter, in);
}
/* check if mlx5e_monitor_counter_supported before calling this function*/
void mlx5e_monitor_counter_init(struct mlx5e_priv *priv)
{
INIT_WORK(&priv->monitor_counters_work, mlx5e_monitor_counters_work);
- mlx5e_monitor_counter_start(priv);
+ MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler,
+ MONITOR_COUNTER);
+ mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb);
+
mlx5e_set_monitor_counter(priv);
mlx5e_monitor_counter_arm(priv);
queue_work(priv->wq, &priv->update_stats_work);
}
-static void mlx5e_monitor_counter_disable(struct mlx5e_priv *priv)
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv)
{
- u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
- u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
- MLX5_SET(set_monitor_counter_in, in, num_of_counters, 0);
MLX5_SET(set_monitor_counter_in, in, opcode,
MLX5_CMD_OP_SET_MONITOR_COUNTER);
- mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
-}
-
-/* check if mlx5e_monitor_counter_supported before calling this function*/
-void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv)
-{
- mlx5e_monitor_counter_disable(priv);
- mlx5e_monitor_counter_stop(priv);
+ mlx5_cmd_exec_in(priv->mdev, set_monitor_counter, in);
+ mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb);
+ cancel_work_sync(&priv->monitor_counters_work);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index eb2e1f2138e4..38e4f19d69f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -12,15 +12,16 @@ static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params,
u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- u16 headroom = NET_IP_ALIGN;
+ u16 headroom;
- if (mlx5e_rx_is_xdp(params, xsk)) {
+ if (xsk)
+ return xsk->headroom;
+
+ headroom = NET_IP_ALIGN;
+ if (mlx5e_rx_is_xdp(params, xsk))
headroom += XDP_PACKET_HEADROOM;
- if (xsk)
- headroom += xsk->headroom;
- } else {
+ else
headroom += MLX5_RX_HEADROOM;
- }
return headroom;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 2c4a670c8ffd..2a8950b3056f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -369,17 +369,19 @@ enum mlx5e_fec_supported_link_mode {
*_policy = MLX5_GET(pplm_reg, _buf, fec_override_admin_##link); \
} while (0)
-#define MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(buf, policy, write, link) \
- do { \
- u16 *__policy = &(policy); \
- bool _write = (write); \
- \
- if (_write && *__policy) \
- *__policy = find_first_bit((u_long *)__policy, \
- sizeof(u16) * BITS_PER_BYTE);\
- MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, *__policy, _write, link); \
- if (!_write && *__policy) \
- *__policy = 1 << *__policy; \
+#define MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(buf, policy, write, link) \
+ do { \
+ unsigned long policy_long; \
+ u16 *__policy = &(policy); \
+ bool _write = (write); \
+ \
+ policy_long = *__policy; \
+ if (_write && *__policy) \
+ *__policy = find_first_bit(&policy_long, \
+ sizeof(policy_long) * BITS_PER_BYTE);\
+ MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, *__policy, _write, link); \
+ if (!_write && *__policy) \
+ *__policy = 1 << *__policy; \
} while (0)
/* get/set FEC admin field for a given speed */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
new file mode 100644
index 000000000000..bdb71332cbf2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <net/lag.h>
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "esw/acl/ofld.h"
+#include "en_rep.h"
+
+struct mlx5e_rep_bond {
+ struct notifier_block nb;
+ struct netdev_net_notifier nn;
+ struct list_head metadata_list;
+};
+
+struct mlx5e_rep_bond_slave_entry {
+ struct list_head list;
+ struct net_device *netdev;
+};
+
+struct mlx5e_rep_bond_metadata {
+ struct list_head list; /* link to global list of rep_bond_metadata */
+ struct mlx5_eswitch *esw;
+ /* private of uplink holding rep bond metadata list */
+ struct net_device *lag_dev;
+ u32 metadata_reg_c_0;
+
+ struct list_head slaves_list; /* slaves list */
+ int slaves;
+};
+
+static struct mlx5e_rep_bond_metadata *
+mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv,
+ const struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_metadata *found = NULL;
+ struct mlx5e_rep_bond_metadata *cur;
+
+ list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) {
+ if (cur->lag_dev == lag_dev) {
+ found = cur;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static struct mlx5e_rep_bond_slave_entry *
+mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata,
+ const struct net_device *netdev)
+{
+ struct mlx5e_rep_bond_slave_entry *found = NULL;
+ struct mlx5e_rep_bond_slave_entry *cur;
+
+ list_for_each_entry(cur, &mdata->slaves_list, list) {
+ if (cur->netdev == netdev) {
+ found = cur;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata)
+{
+ netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n",
+ mdata->metadata_reg_c_0);
+ list_del(&mdata->list);
+ mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0);
+ WARN_ON(!list_empty(&mdata->slaves_list));
+ kfree(mdata);
+}
+
+/* This must be called under rtnl_lock */
+int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
+ struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_slave_entry *s_entry;
+ struct mlx5e_rep_bond_metadata *mdata;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *priv;
+ int err;
+
+ ASSERT_RTNL();
+
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
+ if (!mdata) {
+ /* First netdev becomes slave, no metadata presents the lag_dev. Create one */
+ mdata = kzalloc(sizeof(*mdata), GFP_KERNEL);
+ if (!mdata)
+ return -ENOMEM;
+
+ mdata->lag_dev = lag_dev;
+ mdata->esw = esw;
+ INIT_LIST_HEAD(&mdata->slaves_list);
+ mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw);
+ if (!mdata->metadata_reg_c_0) {
+ kfree(mdata);
+ return -ENOSPC;
+ }
+ list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list);
+
+ netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n",
+ mdata->metadata_reg_c_0);
+ }
+
+ s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL);
+ if (!s_entry) {
+ err = -ENOMEM;
+ goto entry_alloc_err;
+ }
+
+ s_entry->netdev = netdev;
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+
+ err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport,
+ mdata->metadata_reg_c_0);
+ if (err)
+ goto ingress_err;
+
+ mdata->slaves++;
+ list_add_tail(&s_entry->list, &mdata->slaves_list);
+ netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
+ rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
+
+ return 0;
+
+ingress_err:
+ kfree(s_entry);
+entry_alloc_err:
+ if (!mdata->slaves)
+ mlx5e_rep_bond_metadata_release(mdata);
+ return err;
+}
+
+/* This must be called under rtnl_lock */
+void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
+ const struct net_device *netdev,
+ const struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_slave_entry *s_entry;
+ struct mlx5e_rep_bond_metadata *mdata;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *priv;
+
+ ASSERT_RTNL();
+
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
+ if (!mdata)
+ return;
+
+ s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev);
+ if (!s_entry)
+ return;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+
+ /* Reset bond_metadata to zero first then reset all ingress/egress
+ * acls and rx rules of unslave representor's vport
+ */
+ mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0);
+ mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport);
+ mlx5e_rep_bond_update(priv, false);
+
+ list_del(&s_entry->list);
+
+ netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
+ rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
+
+ if (--mdata->slaves == 0)
+ mlx5e_rep_bond_metadata_release(mdata);
+ kfree(s_entry);
+}
+
+static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ /* A given netdev is not a representor or not a slave of LAG configuration */
+ if (!mlx5e_eswitch_rep(netdev) || !bond_slave_get_rtnl(netdev))
+ return false;
+
+ /* Egress acl forward to vport is supported only non-uplink representor */
+ return rpriv->rep->vport != MLX5_VPORT_UPLINK;
+}
+
+static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
+{
+ struct netdev_notifier_changelowerstate_info *info;
+ struct netdev_lag_lower_state_info *lag_info;
+ struct mlx5e_rep_priv *rpriv;
+ struct net_device *lag_dev;
+ struct mlx5e_priv *priv;
+ struct list_head *iter;
+ struct net_device *dev;
+ u16 acl_vport_num;
+ u16 fwd_vport_num;
+ int err;
+
+ if (!mlx5e_rep_is_lag_netdev(netdev))
+ return;
+
+ info = ptr;
+ lag_info = info->lower_state_info;
+ /* This is not an event of a representor becoming active slave */
+ if (!lag_info->tx_enabled)
+ return;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+ fwd_vport_num = rpriv->rep->vport;
+ lag_dev = netdev_master_upper_dev_get(netdev);
+
+ netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n",
+ lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev));
+
+ /* Point everyone's egress acl to the vport of the active representor */
+ netdev_for_each_lower_dev(lag_dev, dev, iter) {
+ priv = netdev_priv(dev);
+ rpriv = priv->ppriv;
+ acl_vport_num = rpriv->rep->vport;
+ if (acl_vport_num != fwd_vport_num) {
+ /* Only single rx_rule for unique bond_metadata should be
+ * present, delete it if it's saved as passive vport's
+ * rx_rule with destination as passive vport's root_ft
+ */
+ mlx5e_rep_bond_update(priv, true);
+ err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch,
+ fwd_vport_num,
+ acl_vport_num);
+ if (err)
+ netdev_warn(dev,
+ "configure slave vport(%d) egress fwd, err(%d)",
+ acl_vport_num, err);
+ }
+ }
+
+ /* Insert new rx_rule for unique bond_metadata, save it as active vport's
+ * rx_rule with new destination as active vport's root_ft
+ */
+ err = mlx5e_rep_bond_update(netdev_priv(netdev), false);
+ if (err)
+ netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)",
+ fwd_vport_num, err);
+}
+
+static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
+{
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct mlx5e_rep_priv *rpriv;
+ struct net_device *lag_dev;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_rep_is_lag_netdev(netdev))
+ return;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+ lag_dev = info->upper_dev;
+
+ netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n",
+ info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name);
+
+ if (info->linking)
+ mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev);
+ else
+ mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev);
+}
+
+/* Bond device of representors and netdev events are used here in specific way
+ * to support eswitch vports bonding and to perform failover of eswitch vport
+ * by modifying the vport's egress acl of lower dev representors. Thus this
+ * also change the traditional behavior of lower dev under bond device.
+ * All non-representor netdevs or representors of other vendors as lower dev
+ * of bond device are not supported.
+ */
+static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ switch (event) {
+ case NETDEV_CHANGELOWERSTATE:
+ mlx5e_rep_changelowerstate_event(netdev, ptr);
+ break;
+ case NETDEV_CHANGEUPPER:
+ mlx5e_rep_changeupper_event(netdev, ptr);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+/* If HW support eswitch vports bonding, register a specific notifier to
+ * handle it when two or more representors are bonded
+ */
+int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv;
+ int ret = 0;
+
+ priv = netdev_priv(netdev);
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch))
+ goto out;
+
+ uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL);
+ if (!uplink_priv->bond) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&uplink_priv->bond->metadata_list);
+ uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent;
+ ret = register_netdevice_notifier_dev_net(netdev,
+ &uplink_priv->bond->nb,
+ &uplink_priv->bond->nn);
+ if (ret) {
+ netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret);
+ kvfree(uplink_priv->bond);
+ uplink_priv->bond = NULL;
+ }
+
+out:
+ return ret;
+}
+
+void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) ||
+ !rpriv->uplink_priv.bond)
+ return;
+
+ unregister_netdevice_notifier_dev_net(rpriv->netdev,
+ &rpriv->uplink_priv.bond->nb,
+ &rpriv->uplink_priv.bond->nn);
+ kvfree(rpriv->uplink_priv.bond);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
new file mode 100644
index 000000000000..baa162432e75
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#include <linux/refcount.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/rtnetlink.h>
+#include <linux/workqueue.h>
+#include <linux/rwlock.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <net/netevent.h>
+#include "neigh.h"
+#include "tc.h"
+#include "en_rep.h"
+#include "fs_core.h"
+#include "diag/en_rep_tracepoint.h"
+
+static unsigned long mlx5e_rep_ipv6_interval(void)
+{
+ if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
+ return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
+
+ return ~0UL;
+}
+
+static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
+{
+ unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
+ unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
+}
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+
+ mlx5_fc_queue_stats_work(priv->mdev,
+ &neigh_update->neigh_stats_work,
+ neigh_update->min_interval);
+}
+
+static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
+{
+ return refcount_inc_not_zero(&nhe->refcnt);
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
+
+void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
+{
+ if (refcount_dec_and_test(&nhe->refcnt)) {
+ mlx5e_rep_neigh_entry_remove(nhe);
+ kfree_rcu(nhe, rcu);
+ }
+}
+
+static struct mlx5e_neigh_hash_entry *
+mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_neigh_hash_entry *next = NULL;
+
+ rcu_read_lock();
+
+ for (next = nhe ?
+ list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
+ &nhe->neigh_list,
+ struct mlx5e_neigh_hash_entry,
+ neigh_list) :
+ list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
+ struct mlx5e_neigh_hash_entry,
+ neigh_list);
+ next;
+ next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
+ &next->neigh_list,
+ struct mlx5e_neigh_hash_entry,
+ neigh_list))
+ if (mlx5e_rep_neigh_entry_hold(next))
+ break;
+
+ rcu_read_unlock();
+
+ if (nhe)
+ mlx5e_rep_neigh_entry_release(nhe);
+
+ return next;
+}
+
+static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
+ neigh_update.neigh_stats_work.work);
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe = NULL;
+
+ rtnl_lock();
+ if (!list_empty(&rpriv->neigh_update.neigh_list))
+ mlx5e_rep_queue_neigh_stats_work(priv);
+
+ while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
+ mlx5e_tc_update_neigh_used_value(nhe);
+
+ rtnl_unlock();
+}
+
+static void mlx5e_rep_neigh_update(struct work_struct *work)
+{
+ struct mlx5e_neigh_hash_entry *nhe =
+ container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
+ struct neighbour *n = nhe->n;
+ struct mlx5e_encap_entry *e;
+ unsigned char ha[ETH_ALEN];
+ struct mlx5e_priv *priv;
+ bool neigh_connected;
+ u8 nud_state, dead;
+
+ rtnl_lock();
+
+ /* If these parameters are changed after we release the lock,
+ * we'll receive another event letting us know about it.
+ * We use this lock to avoid inconsistency between the neigh validity
+ * and it's hw address.
+ */
+ read_lock_bh(&n->lock);
+ memcpy(ha, n->ha, ETH_ALEN);
+ nud_state = n->nud_state;
+ dead = n->dead;
+ read_unlock_bh(&n->lock);
+
+ neigh_connected = (nud_state & NUD_VALID) && !dead;
+
+ trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
+
+ list_for_each_entry(e, &nhe->encap_list, encap_list) {
+ if (!mlx5e_encap_take(e))
+ continue;
+
+ priv = netdev_priv(e->out_dev);
+ mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
+ mlx5e_encap_put(priv, e);
+ }
+ mlx5e_rep_neigh_entry_release(nhe);
+ rtnl_unlock();
+ neigh_release(n);
+}
+
+static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe,
+ struct neighbour *n)
+{
+ /* Take a reference to ensure the neighbour and mlx5 encap
+ * entry won't be destructed until we drop the reference in
+ * delayed work.
+ */
+ neigh_hold(n);
+
+ /* This assignment is valid as long as the the neigh reference
+ * is taken
+ */
+ nhe->n = n;
+
+ if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
+ mlx5e_rep_neigh_entry_release(nhe);
+ neigh_release(n);
+ }
+}
+
+static int mlx5e_rep_netevent_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
+ neigh_update.netevent_nb);
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe = NULL;
+ struct mlx5e_neigh m_neigh = {};
+ struct neigh_parms *p;
+ struct neighbour *n;
+ bool found = false;
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ n = ptr;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
+#else
+ if (n->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ m_neigh.dev = n->dev;
+ m_neigh.family = n->ops->family;
+ memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+
+ rcu_read_lock();
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
+ rcu_read_unlock();
+ if (!nhe)
+ return NOTIFY_DONE;
+
+ mlx5e_rep_queue_neigh_update_work(priv, nhe, n);
+ break;
+
+ case NETEVENT_DELAY_PROBE_TIME_UPDATE:
+ p = ptr;
+
+ /* We check the device is present since we don't care about
+ * changes in the default table, we only care about changes
+ * done per device delay prob time parameter.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
+#else
+ if (!p->dev || p->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
+ neigh_list) {
+ if (p->dev == nhe->m_neigh.dev) {
+ found = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ if (!found)
+ return NOTIFY_DONE;
+
+ neigh_update->min_interval = min_t(unsigned long,
+ NEIGH_VAR(p, DELAY_PROBE_TIME),
+ neigh_update->min_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev,
+ neigh_update->min_interval);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static const struct rhashtable_params mlx5e_neigh_ht_params = {
+ .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
+ .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
+ .key_len = sizeof(struct mlx5e_neigh),
+ .automatic_shrinking = true,
+};
+
+int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ int err;
+
+ err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
+ if (err)
+ return err;
+
+ INIT_LIST_HEAD(&neigh_update->neigh_list);
+ mutex_init(&neigh_update->encap_lock);
+ INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
+ mlx5e_rep_neigh_stats_work);
+ mlx5e_rep_neigh_update_init_interval(rpriv);
+
+ rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
+ err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
+ if (err)
+ goto out_err;
+ return 0;
+
+out_err:
+ rhashtable_destroy(&neigh_update->neigh_ht);
+ return err;
+}
+
+void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ unregister_netevent_notifier(&neigh_update->netevent_nb);
+
+ flush_workqueue(priv->wq); /* flush neigh update works */
+
+ cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
+
+ mutex_destroy(&neigh_update->encap_lock);
+ rhashtable_destroy(&neigh_update->neigh_ht);
+}
+
+static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ int err;
+
+ err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ if (err)
+ return err;
+
+ list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
+
+ return err;
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
+
+ mutex_lock(&rpriv->neigh_update.encap_lock);
+
+ list_del_rcu(&nhe->neigh_list);
+
+ rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ mutex_unlock(&rpriv->neigh_update.encap_lock);
+}
+
+/* This function must only be called under the representor's encap_lock or
+ * inside rcu read lock section.
+ */
+struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct mlx5e_neigh_hash_entry *nhe;
+
+ nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
+ mlx5e_neigh_ht_params);
+ return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
+}
+
+int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh_hash_entry **nhe)
+{
+ int err;
+
+ *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
+ if (!*nhe)
+ return -ENOMEM;
+
+ (*nhe)->priv = priv;
+ memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
+ INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
+ spin_lock_init(&(*nhe)->encap_list_lock);
+ INIT_LIST_HEAD(&(*nhe)->encap_list);
+ refcount_set(&(*nhe)->refcnt, 1);
+
+ err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
+ if (err)
+ goto out_free;
+ return 0;
+
+out_free:
+ kfree(*nhe);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
new file mode 100644
index 000000000000..32b239189c95
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_REP_NEIGH__
+#define __MLX5_EN_REP_NEIGH__
+
+#include "en.h"
+#include "en_rep.h"
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv);
+
+struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh);
+int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh_hash_entry **nhe);
+void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe);
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static inline int
+mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) {}
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+#endif /* __MLX5_EN_REP_NEIGH__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
new file mode 100644
index 000000000000..80713123de5c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -0,0 +1,646 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#include <net/dst_metadata.h>
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/rtnetlink.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include "tc.h"
+#include "neigh.h"
+#include "en_rep.h"
+#include "eswitch.h"
+#include "esw/chains.h"
+#include "en/tc_ct.h"
+#include "en/mapping.h"
+#include "en/tc_tun.h"
+#include "lib/port_tun.h"
+
+struct mlx5e_rep_indr_block_priv {
+ struct net_device *netdev;
+ struct mlx5e_rep_priv *rpriv;
+
+ struct list_head list;
+};
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
+ struct mlx5e_neigh_hash_entry *nhe;
+ int err;
+
+ err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
+ if (err)
+ return err;
+
+ mutex_lock(&rpriv->neigh_update.encap_lock);
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
+ if (!nhe) {
+ err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
+ if (err) {
+ mutex_unlock(&rpriv->neigh_update.encap_lock);
+ mlx5_tun_entropy_refcount_dec(tun_entropy,
+ e->reformat_type);
+ return err;
+ }
+ }
+
+ e->nhe = nhe;
+ spin_lock(&nhe->encap_list_lock);
+ list_add_rcu(&e->encap_list, &nhe->encap_list);
+ spin_unlock(&nhe->encap_list_lock);
+
+ mutex_unlock(&rpriv->neigh_update.encap_lock);
+
+ return 0;
+}
+
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
+
+ if (!e->nhe)
+ return;
+
+ spin_lock(&e->nhe->encap_list_lock);
+ list_del_rcu(&e->encap_list);
+ spin_unlock(&e->nhe->encap_list_lock);
+
+ mlx5e_rep_neigh_entry_release(e->nhe);
+ e->nhe = NULL;
+ mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
+}
+
+void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ bool neigh_connected,
+ unsigned char ha[ETH_ALEN])
+{
+ struct ethhdr *eth = (struct ethhdr *)e->encap_header;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ bool encap_connected;
+ LIST_HEAD(flow_list);
+
+ ASSERT_RTNL();
+
+ /* wait for encap to be fully initialized */
+ wait_for_completion(&e->res_ready);
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
+ if (e->compl_result < 0 || (encap_connected == neigh_connected &&
+ ether_addr_equal(e->h_dest, ha)))
+ goto unlock;
+
+ mlx5e_take_all_encap_flows(e, &flow_list);
+
+ if ((e->flags & MLX5_ENCAP_ENTRY_VALID) &&
+ (!neigh_connected || !ether_addr_equal(e->h_dest, ha)))
+ mlx5e_tc_encap_flows_del(priv, e, &flow_list);
+
+ if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
+ ether_addr_copy(e->h_dest, ha);
+ ether_addr_copy(eth->h_dest, ha);
+ /* Update the encap source mac, in case that we delete
+ * the flows when encap source mac changed.
+ */
+ ether_addr_copy(eth->h_source, e->route_dev->dev_addr);
+
+ mlx5e_tc_encap_flows_add(priv, e, &flow_list);
+ }
+unlock:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ mlx5e_put_encap_flow_list(priv, &flow_list);
+}
+
+static int
+mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
+ struct flow_cls_offload *cls_flower, int flags)
+{
+ switch (cls_flower->command) {
+ case FLOW_CLS_REPLACE:
+ return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+ flags);
+ case FLOW_CLS_DESTROY:
+ return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+ flags);
+ case FLOW_CLS_STATS:
+ return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+ flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static
+int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma)
+{
+ switch (ma->command) {
+ case TC_CLSMATCHALL_REPLACE:
+ return mlx5e_tc_configure_matchall(priv, ma);
+ case TC_CLSMATCHALL_DESTROY:
+ return mlx5e_tc_delete_matchall(priv, ma);
+ case TC_CLSMATCHALL_STATS:
+ mlx5e_tc_stats_matchall(priv, ma);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
+ struct mlx5e_priv *priv = cb_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
+ case TC_SETUP_CLSMATCHALL:
+ return mlx5e_rep_setup_tc_cls_matchall(priv, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct flow_cls_offload tmp, *f = type_data;
+ struct mlx5e_priv *priv = cb_priv;
+ struct mlx5_eswitch *esw;
+ unsigned long flags;
+ int err;
+
+ flags = MLX5_TC_FLAG(INGRESS) |
+ MLX5_TC_FLAG(ESW_OFFLOAD) |
+ MLX5_TC_FLAG(FT_OFFLOAD);
+ esw = priv->mdev->priv.eswitch;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ memcpy(&tmp, f, sizeof(*f));
+
+ if (!mlx5_esw_chains_prios_supported(esw))
+ return -EOPNOTSUPP;
+
+ /* Re-use tc offload path by moving the ft flow to the
+ * reserved ft chain.
+ *
+ * FT offload can use prio range [0, INT_MAX], so we normalize
+ * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
+ * as with tc, where prio 0 isn't supported.
+ *
+ * We only support chain 0 of FT offload.
+ */
+ if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw))
+ return -EOPNOTSUPP;
+ if (tmp.common.chain_index != 0)
+ return -EOPNOTSUPP;
+
+ tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
+ tmp.common.prio++;
+ err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
+ memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
+ return err;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static LIST_HEAD(mlx5e_rep_block_tc_cb_list);
+static LIST_HEAD(mlx5e_rep_block_ft_cb_list);
+int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct flow_block_offload *f = type_data;
+
+ f->unlocked_driver_cb = true;
+
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return flow_block_cb_setup_simple(type_data,
+ &mlx5e_rep_block_tc_cb_list,
+ mlx5e_rep_setup_tc_cb,
+ priv, priv, true);
+ case TC_SETUP_FT:
+ return flow_block_cb_setup_simple(type_data,
+ &mlx5e_rep_block_ft_cb_list,
+ mlx5e_rep_setup_ft_cb,
+ priv, priv, true);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ int err;
+
+ mutex_init(&uplink_priv->unready_flows_lock);
+ INIT_LIST_HEAD(&uplink_priv->unready_flows);
+
+ /* init shared tc flow table */
+ err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
+ return err;
+}
+
+void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ /* delete shared tc flow table */
+ mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
+ mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
+}
+
+void mlx5e_rep_tc_enable(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
+ mlx5e_tc_reoffload_flows_work);
+}
+
+void mlx5e_rep_tc_disable(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
+}
+
+int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
+
+ return NOTIFY_OK;
+}
+
+static struct mlx5e_rep_indr_block_priv *
+mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev)
+{
+ struct mlx5e_rep_indr_block_priv *cb_priv;
+
+ /* All callback list access should be protected by RTNL. */
+ ASSERT_RTNL();
+
+ list_for_each_entry(cb_priv,
+ &rpriv->uplink_priv.tc_indr_block_priv_list,
+ list)
+ if (cb_priv->netdev == netdev)
+ return cb_priv;
+
+ return NULL;
+}
+
+static int
+mlx5e_rep_indr_offload(struct net_device *netdev,
+ struct flow_cls_offload *flower,
+ struct mlx5e_rep_indr_block_priv *indr_priv,
+ unsigned long flags)
+{
+ struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
+ int err = 0;
+
+ switch (flower->command) {
+ case FLOW_CLS_REPLACE:
+ err = mlx5e_configure_flower(netdev, priv, flower, flags);
+ break;
+ case FLOW_CLS_DESTROY:
+ err = mlx5e_delete_flower(netdev, priv, flower, flags);
+ break;
+ case FLOW_CLS_STATS:
+ err = mlx5e_stats_flower(netdev, priv, flower, flags);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type,
+ void *type_data, void *indr_priv)
+{
+ unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
+ struct mlx5e_rep_indr_block_priv *priv = indr_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_indr_offload(priv->netdev, type_data, priv,
+ flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type,
+ void *type_data, void *indr_priv)
+{
+ struct mlx5e_rep_indr_block_priv *priv = indr_priv;
+ struct flow_cls_offload *f = type_data;
+ struct flow_cls_offload tmp;
+ struct mlx5e_priv *mpriv;
+ struct mlx5_eswitch *esw;
+ unsigned long flags;
+ int err;
+
+ mpriv = netdev_priv(priv->rpriv->netdev);
+ esw = mpriv->mdev->priv.eswitch;
+
+ flags = MLX5_TC_FLAG(EGRESS) |
+ MLX5_TC_FLAG(ESW_OFFLOAD) |
+ MLX5_TC_FLAG(FT_OFFLOAD);
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ memcpy(&tmp, f, sizeof(*f));
+
+ /* Re-use tc offload path by moving the ft flow to the
+ * reserved ft chain.
+ *
+ * FT offload can use prio range [0, INT_MAX], so we normalize
+ * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
+ * as with tc, where prio 0 isn't supported.
+ *
+ * We only support chain 0 of FT offload.
+ */
+ if (!mlx5_esw_chains_prios_supported(esw) ||
+ tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw) ||
+ tmp.common.chain_index)
+ return -EOPNOTSUPP;
+
+ tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
+ tmp.common.prio++;
+ err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags);
+ memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
+ return err;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void mlx5e_rep_indr_block_unbind(void *cb_priv)
+{
+ struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv;
+
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+}
+
+static LIST_HEAD(mlx5e_block_cb_list);
+
+static int
+mlx5e_rep_indr_setup_block(struct net_device *netdev,
+ struct mlx5e_rep_priv *rpriv,
+ struct flow_block_offload *f,
+ flow_setup_cb_t *setup_cb)
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5e_rep_indr_block_priv *indr_priv;
+ struct flow_block_cb *block_cb;
+
+ if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
+ !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev))
+ return -EOPNOTSUPP;
+
+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ f->unlocked_driver_cb = true;
+ f->driver_block_list = &mlx5e_block_cb_list;
+
+ switch (f->command) {
+ case FLOW_BLOCK_BIND:
+ indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
+ if (indr_priv)
+ return -EEXIST;
+
+ indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
+ if (!indr_priv)
+ return -ENOMEM;
+
+ indr_priv->netdev = netdev;
+ indr_priv->rpriv = rpriv;
+ list_add(&indr_priv->list,
+ &rpriv->uplink_priv.tc_indr_block_priv_list);
+
+ block_cb = flow_block_cb_alloc(setup_cb, indr_priv, indr_priv,
+ mlx5e_rep_indr_block_unbind);
+ if (IS_ERR(block_cb)) {
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+ return PTR_ERR(block_cb);
+ }
+ flow_block_cb_add(block_cb, f);
+ list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list);
+
+ return 0;
+ case FLOW_BLOCK_UNBIND:
+ indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
+ if (!indr_priv)
+ return -ENOENT;
+
+ block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv);
+ if (!block_cb)
+ return -ENOENT;
+
+ flow_block_cb_remove(block_cb, f);
+ list_del(&block_cb->driver_list);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static
+int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv,
+ enum tc_setup_type type, void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data,
+ mlx5e_rep_indr_setup_tc_cb);
+ case TC_SETUP_FT:
+ return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data,
+ mlx5e_rep_indr_setup_ft_cb);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+
+ /* init indirect block notifications */
+ INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
+
+ return flow_indr_dev_register(mlx5e_rep_indr_setup_cb, rpriv);
+}
+
+void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv)
+{
+ flow_indr_dev_unregister(mlx5e_rep_indr_setup_cb, rpriv,
+ mlx5e_rep_indr_setup_tc_cb);
+}
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5e_tc_update_priv *tc_priv,
+ u32 tunnel_id)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct tunnel_match_enc_opts enc_opts = {};
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct metadata_dst *tun_dst;
+ struct tunnel_match_key key;
+ u32 tun_id, enc_opts_id;
+ struct net_device *dev;
+ int err;
+
+ enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
+ tun_id = tunnel_id >> ENC_OPTS_BITS;
+
+ if (!tun_id)
+ return true;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+
+ err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
+ if (err) {
+ WARN_ON_ONCE(true);
+ netdev_dbg(priv->netdev,
+ "Couldn't find tunnel for tun_id: %d, err: %d\n",
+ tun_id, err);
+ return false;
+ }
+
+ if (enc_opts_id) {
+ err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
+ enc_opts_id, &enc_opts);
+ if (err) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
+ enc_opts_id, err);
+ return false;
+ }
+ }
+
+ tun_dst = tun_rx_dst(enc_opts.key.len);
+ if (!tun_dst) {
+ WARN_ON_ONCE(true);
+ return false;
+ }
+
+ ip_tunnel_key_init(&tun_dst->u.tun_info.key,
+ key.enc_ipv4.src, key.enc_ipv4.dst,
+ key.enc_ip.tos, key.enc_ip.ttl,
+ 0, /* label */
+ key.enc_tp.src, key.enc_tp.dst,
+ key32_to_tunnel_id(key.enc_key_id.keyid),
+ TUNNEL_KEY);
+
+ if (enc_opts.key.len)
+ ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
+ enc_opts.key.data,
+ enc_opts.key.len,
+ enc_opts.key.dst_opt_type);
+
+ skb_dst_set(skb, (struct dst_entry *)tun_dst);
+ dev = dev_get_by_index(&init_net, key.filter_ifindex);
+ if (!dev) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find tunnel device with ifindex: %d\n",
+ key.filter_ifindex);
+ return false;
+ }
+
+ /* Set tun_dev so we do dev_put() after datapath */
+ tc_priv->tun_dev = dev;
+
+ skb->dev = dev;
+
+ return true;
+}
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
+bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
+ struct sk_buff *skb,
+ struct mlx5e_tc_update_priv *tc_priv)
+{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ u32 chain = 0, reg_c0, reg_c1, tunnel_id, tuple_id;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct tc_skb_ext *tc_skb_ext;
+ struct mlx5_eswitch *esw;
+ struct mlx5e_priv *priv;
+ int tunnel_moffset;
+ int err;
+
+ reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
+ if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
+ reg_c0 = 0;
+ reg_c1 = be32_to_cpu(cqe->ft_metadata);
+
+ if (!reg_c0)
+ return true;
+
+ priv = netdev_priv(skb->dev);
+ esw = priv->mdev->priv.eswitch;
+
+ err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain);
+ if (err) {
+ netdev_dbg(priv->netdev,
+ "Couldn't find chain for chain tag: %d, err: %d\n",
+ reg_c0, err);
+ return false;
+ }
+
+ if (chain) {
+ tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
+ if (!tc_skb_ext) {
+ WARN_ON(1);
+ return false;
+ }
+
+ tc_skb_ext->chain = chain;
+
+ tuple_id = reg_c1 & TUPLE_ID_MAX;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, tuple_id))
+ return false;
+ }
+
+ tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset;
+ tunnel_id = reg_c1 >> (8 * tunnel_moffset);
+ return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
+ return true;
+}
+
+void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
+{
+ if (tc_priv->tun_dev)
+ dev_put(tc_priv->tun_dev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
new file mode 100644
index 000000000000..fdf9702c2d7d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_REP_TC_H__
+#define __MLX5_EN_REP_TC_H__
+
+#include <linux/skbuff.h>
+#include "en_tc.h"
+#include "en_rep.h"
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv);
+
+int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv);
+
+void mlx5e_rep_tc_enable(struct mlx5e_priv *priv);
+void mlx5e_rep_tc_disable(struct mlx5e_priv *priv);
+
+int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv);
+
+void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ bool neigh_connected,
+ unsigned char ha[ETH_ALEN]);
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e);
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e);
+
+int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data);
+
+bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
+ struct sk_buff *skb,
+ struct mlx5e_tc_update_priv *tc_priv);
+void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+struct mlx5e_rep_priv;
+static inline int
+mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv) {}
+
+static inline int
+mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) {}
+
+static inline void
+mlx5e_rep_tc_enable(struct mlx5e_priv *priv) {}
+static inline void
+mlx5e_rep_tc_disable(struct mlx5e_priv *priv) {}
+
+static inline int
+mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv) { return NOTIFY_DONE; }
+
+static inline int
+mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data) { return -EOPNOTSUPP; }
+
+struct mlx5e_tc_update_priv;
+static inline bool
+mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
+ struct sk_buff *skb,
+ struct mlx5e_tc_update_priv *tc_priv) { return true; }
+static inline void
+mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) {}
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
+#endif /* __MLX5_EN_REP_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index a172c5e39710..afc19dca1f5f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -24,6 +24,7 @@
#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
#define MLX5_CT_STATE_TRK_BIT BIT(2)
+#define MLX5_CT_STATE_NAT_BIT BIT(3)
#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
@@ -61,6 +62,15 @@ struct mlx5_ct_zone_rule {
bool nat;
};
+struct mlx5_tc_ct_pre {
+ struct mlx5_flow_table *fdb;
+ struct mlx5_flow_group *flow_grp;
+ struct mlx5_flow_group *miss_grp;
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_handle *miss_rule;
+ struct mlx5_modify_hdr *modify_hdr;
+};
+
struct mlx5_ct_ft {
struct rhash_head node;
u16 zone;
@@ -68,14 +78,14 @@ struct mlx5_ct_ft {
struct nf_flowtable *nf_ft;
struct mlx5_tc_ct_priv *ct_priv;
struct rhashtable ct_entries_ht;
+ struct mlx5_tc_ct_pre pre_ct;
+ struct mlx5_tc_ct_pre pre_ct_nat;
};
struct mlx5_ct_entry {
u16 zone;
struct rhash_head node;
- struct flow_rule *flow_rule;
struct mlx5_fc *counter;
- unsigned long lastuse;
unsigned long cookie;
unsigned long restore_cookie;
struct mlx5_ct_zone_rule zone_rules[2];
@@ -109,7 +119,7 @@ mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv)
}
static int
-mlx5_tc_ct_set_tuple_match(struct mlx5_flow_spec *spec,
+mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
struct flow_rule *rule)
{
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
@@ -124,10 +134,8 @@ mlx5_tc_ct_set_tuple_match(struct mlx5_flow_spec *spec,
flow_rule_match_basic(rule, &match);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
- ntohs(match.mask->n_proto));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
- ntohs(match.key->n_proto));
+ mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
+ headers_v);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
match.mask->ip_proto);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
@@ -384,7 +392,7 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
char *modact;
int err, i;
- action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
+ action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
@@ -428,6 +436,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_eswitch *esw = ct_priv->esw;
struct mlx5_modify_hdr *mod_hdr;
struct flow_action_entry *meta;
+ u16 ct_state = 0;
int err;
meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
@@ -446,11 +455,13 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
&mod_acts);
if (err)
goto err_mapping;
+
+ ct_state |= MLX5_CT_STATE_NAT_BIT;
}
+ ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
- (MLX5_CT_STATE_ESTABLISHED_BIT |
- MLX5_CT_STATE_TRK_BIT),
+ ct_state,
meta->ct_metadata.mark,
meta->ct_metadata.labels[0],
tupleid);
@@ -520,7 +531,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
attr->counter = entry->counter;
attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
- mlx5_tc_ct_set_tuple_match(spec, flow_rule);
+ mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
entry->zone & MLX5_CT_ZONE_MASK,
MLX5_CT_ZONE_MASK);
@@ -603,7 +614,6 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
return -ENOMEM;
entry->zone = ft->zone;
- entry->flow_rule = flow_rule;
entry->cookie = flow->cookie;
entry->restore_cookie = meta_action->ct_metadata.cookie;
@@ -687,7 +697,7 @@ mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
return mlx5_tc_ct_block_flow_offload_stats(ft, f);
default:
break;
- };
+ }
return -EOPNOTSUPP;
}
@@ -699,6 +709,7 @@ mlx5_tc_ct_parse_match(struct mlx5e_priv *priv,
struct netlink_ext_ack *extack)
{
struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct flow_dissector_key_ct *mask, *key;
bool trk, est, untrk, unest, new;
u32 ctstate = 0, ctstate_mask = 0;
@@ -706,7 +717,7 @@ mlx5_tc_ct_parse_match(struct mlx5e_priv *priv,
u16 ct_state, ct_state_mask;
struct flow_match_ct match;
- if (!flow_rule_match_key(f->rule, FLOW_DISSECTOR_KEY_CT))
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
return 0;
if (!ct_priv) {
@@ -715,7 +726,7 @@ mlx5_tc_ct_parse_match(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
}
- flow_rule_match_ct(f->rule, &match);
+ flow_rule_match_ct(rule, &match);
key = match.key;
mask = match.mask;
@@ -794,6 +805,238 @@ mlx5_tc_ct_parse_action(struct mlx5e_priv *priv,
return 0;
}
+static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct,
+ bool nat)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+ struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
+ struct mlx5_core_dev *dev = ct_priv->esw->dev;
+ struct mlx5_flow_table *fdb = pre_ct->fdb;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_modify_hdr *mod_hdr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ u32 ctstate;
+ u16 zone;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
+ err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ZONE_TO_REG, zone);
+ if (err) {
+ ct_dbg("Failed to set zone register mapping");
+ goto err_mapping;
+ }
+
+ mod_hdr = mlx5_modify_header_alloc(dev,
+ MLX5_FLOW_NAMESPACE_FDB,
+ pre_mod_acts.num_actions,
+ pre_mod_acts.actions);
+
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ ct_dbg("Failed to create pre ct mod hdr");
+ goto err_mapping;
+ }
+ pre_ct->modify_hdr = mod_hdr;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ flow_act.modify_hdr = mod_hdr;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+
+ /* add flow rule */
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
+ zone, MLX5_CT_ZONE_MASK);
+ ctstate = MLX5_CT_STATE_TRK_BIT;
+ if (nat)
+ ctstate |= MLX5_CT_STATE_NAT_BIT;
+ mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
+
+ dest.ft = ct_priv->post_ct;
+ rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ ct_dbg("Failed to add pre ct flow rule zone %d", zone);
+ goto err_flow_rule;
+ }
+ pre_ct->flow_rule = rule;
+
+ /* add miss rule */
+ memset(spec, 0, sizeof(*spec));
+ dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
+ rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ ct_dbg("Failed to add pre ct miss rule zone %d", zone);
+ goto err_miss_rule;
+ }
+ pre_ct->miss_rule = rule;
+
+ dealloc_mod_hdr_actions(&pre_mod_acts);
+ kvfree(spec);
+ return 0;
+
+err_miss_rule:
+ mlx5_del_flow_rules(pre_ct->flow_rule);
+err_flow_rule:
+ mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
+err_mapping:
+ dealloc_mod_hdr_actions(&pre_mod_acts);
+ kvfree(spec);
+ return err;
+}
+
+static void
+tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct)
+{
+ struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+ struct mlx5_core_dev *dev = ct_priv->esw->dev;
+
+ mlx5_del_flow_rules(pre_ct->flow_rule);
+ mlx5_del_flow_rules(pre_ct->miss_rule);
+ mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
+}
+
+static int
+mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct,
+ bool nat)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+ struct mlx5_core_dev *dev = ct_priv->esw->dev;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *g;
+ u32 metadata_reg_c_2_mask;
+ u32 *flow_group_in;
+ void *misc;
+ int err;
+
+ ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!ns) {
+ err = -EOPNOTSUPP;
+ ct_dbg("Failed to get FDB flow namespace");
+ return err;
+ }
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.prio = FDB_TC_OFFLOAD;
+ ft_attr.max_fte = 2;
+ ft_attr.level = 1;
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ ct_dbg("Failed to create pre ct table");
+ goto out_free;
+ }
+ pre_ct->fdb = ft;
+
+ /* create flow group */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+
+ misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria.misc_parameters_2);
+
+ metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
+ metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
+ if (nat)
+ metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
+ metadata_reg_c_2_mask);
+
+ g = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ ct_dbg("Failed to create pre ct group");
+ goto err_flow_grp;
+ }
+ pre_ct->flow_grp = g;
+
+ /* create miss group */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+ g = mlx5_create_flow_group(ft, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ ct_dbg("Failed to create pre ct miss group");
+ goto err_miss_grp;
+ }
+ pre_ct->miss_grp = g;
+
+ err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
+ if (err)
+ goto err_add_rules;
+
+ kvfree(flow_group_in);
+ return 0;
+
+err_add_rules:
+ mlx5_destroy_flow_group(pre_ct->miss_grp);
+err_miss_grp:
+ mlx5_destroy_flow_group(pre_ct->flow_grp);
+err_flow_grp:
+ mlx5_destroy_flow_table(ft);
+out_free:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void
+mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
+ struct mlx5_tc_ct_pre *pre_ct)
+{
+ tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
+ mlx5_destroy_flow_group(pre_ct->miss_grp);
+ mlx5_destroy_flow_group(pre_ct->flow_grp);
+ mlx5_destroy_flow_table(pre_ct->fdb);
+}
+
+static int
+mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
+{
+ int err;
+
+ err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
+ if (err)
+ return err;
+
+ err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
+ if (err)
+ goto err_pre_ct_nat;
+
+ return 0;
+
+err_pre_ct_nat:
+ mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
+ return err;
+}
+
+static void
+mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
+{
+ mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
+ mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
+}
+
static struct mlx5_ct_ft *
mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
struct nf_flowtable *nf_ft)
@@ -816,6 +1059,10 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
ft->ct_priv = ct_priv;
refcount_set(&ft->refcount, 1);
+ err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
+ if (err)
+ goto err_alloc_pre_ct;
+
err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
if (err)
goto err_init;
@@ -837,6 +1084,8 @@ err_add_cb:
err_insert:
rhashtable_destroy(&ft->ct_entries_ht);
err_init:
+ mlx5_tc_ct_free_pre_ct_tables(ft);
+err_alloc_pre_ct:
kfree(ft);
return ERR_PTR(err);
}
@@ -862,21 +1111,40 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
rhashtable_free_and_destroy(&ft->ct_entries_ht,
mlx5_tc_ct_flush_ft_entry,
ct_priv);
+ mlx5_tc_ct_free_pre_ct_tables(ft);
kfree(ft);
}
/* We translate the tc filter with CT action to the following HW model:
*
- * +-------------------+ +--------------------+ +--------------+
- * + pre_ct (tc chain) +----->+ CT (nat or no nat) +--->+ post_ct +----->
- * + original match + | + tuple + zone match + | + fte_id match + |
- * +-------------------+ | +--------------------+ | +--------------+ |
- * v v v
- * set chain miss mapping set mark original
- * set fte_id set label filter
- * set zone set established actions
- * set tunnel_id do nat (if needed)
- * do decap
+ * +---------------------+
+ * + fdb prio (tc chain) +
+ * + original match +
+ * +---------------------+
+ * | set chain miss mapping
+ * | set fte_id
+ * | set tunnel_id
+ * | do decap
+ * v
+ * +---------------------+
+ * + pre_ct/pre_ct_nat + if matches +---------------------+
+ * + zone+nat match +---------------->+ post_ct (see below) +
+ * +---------------------+ set zone +---------------------+
+ * | set zone
+ * v
+ * +--------------------+
+ * + CT (nat or no nat) +
+ * + tuple + zone match +
+ * +--------------------+
+ * | set mark
+ * | set label
+ * | set established
+ * | do nat (if needed)
+ * v
+ * +--------------+
+ * + post_ct + original filter actions
+ * + fte_id match +------------------------>
+ * +--------------+
*/
static int
__mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
@@ -891,7 +1159,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
struct mlx5_flow_spec *post_ct_spec = NULL;
struct mlx5_eswitch *esw = ct_priv->esw;
struct mlx5_esw_flow_attr *pre_ct_attr;
- struct mlx5_modify_hdr *mod_hdr;
+ struct mlx5_modify_hdr *mod_hdr;
struct mlx5_flow_handle *rule;
struct mlx5_ct_flow *ct_flow;
int chain_mapping = 0, err;
@@ -954,14 +1222,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
goto err_mapping;
}
- err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, ZONE_TO_REG,
- attr->ct_attr.zone &
- MLX5_CT_ZONE_MASK);
- if (err) {
- ct_dbg("Failed to set zone register mapping");
- goto err_mapping;
- }
-
err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
FTEID_TO_REG, fte_id);
if (err) {
@@ -1021,7 +1281,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
/* Change original rule point to ct table */
pre_ct_attr->dest_chain = 0;
- pre_ct_attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct;
+ pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.fdb : ft->pre_ct.fdb;
ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw,
orig_spec,
pre_ct_attr);
@@ -1131,7 +1391,7 @@ mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
{
bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
- struct mlx5_flow_handle *rule;
+ struct mlx5_flow_handle *rule = ERR_PTR(-EINVAL);
int err;
if (!ct_priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
index 091d305b633e..626f6c04882e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -130,7 +130,9 @@ mlx5_tc_ct_parse_match(struct mlx5e_priv *priv,
struct flow_cls_offload *f,
struct netlink_ext_ack *extack)
{
- if (!flow_rule_match_key(f->rule, FLOW_DISSECTOR_KEY_CT))
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
return 0;
NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled.");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index b45c3f46570b..7cce85faa16f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -4,8 +4,11 @@
#include <net/vxlan.h>
#include <net/gre.h>
#include <net/geneve.h>
+#include <net/bareudp.h>
#include "en/tc_tun.h"
#include "en_tc.h"
+#include "rep/tc.h"
+#include "rep/neigh.h"
struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
{
@@ -16,6 +19,8 @@ struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
else if (netif_is_gretap(tunnel_dev) ||
netif_is_ip6gretap(tunnel_dev))
return &gre_tunnel;
+ else if (netif_is_bareudp(tunnel_dev))
+ return &mplsoudp_tunnel;
else
return NULL;
}
@@ -96,9 +101,8 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
}
rt = ip_route_output_key(dev_net(mirred_dev), fl4);
- ret = PTR_ERR_OR_ZERO(rt);
- if (ret)
- return ret;
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
ip_rt_put(rt);
@@ -508,6 +512,13 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+ struct flow_dissector_key_basic key_basic = {};
+ struct flow_dissector_key_basic mask_basic = {
+ .n_proto = htons(0xFFFF),
+ };
+ struct flow_match_basic match_basic = {
+ .key = &key_basic, .mask = &mask_basic,
+ };
struct flow_match_control match;
u16 addr_type;
@@ -533,10 +544,9 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
ntohl(match.key->dst));
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
- ethertype);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
- ETH_P_IP);
+ key_basic.n_proto = htons(ETH_P_IP);
+ mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
+ headers_c, headers_v);
} else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
struct flow_match_ipv6_addrs match;
@@ -559,10 +569,9 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
&match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
ipv6));
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
- ethertype);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
- ETH_P_IPV6);
+ key_basic.n_proto = htons(ETH_P_IPV6);
+ mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
+ headers_c, headers_v);
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
index 1630f0ec3ad7..704359df6095 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -16,6 +16,7 @@ enum {
MLX5E_TC_TUNNEL_TYPE_VXLAN,
MLX5E_TC_TUNNEL_TYPE_GENEVE,
MLX5E_TC_TUNNEL_TYPE_GRETAP,
+ MLX5E_TC_TUNNEL_TYPE_MPLSOUDP,
};
struct mlx5e_tc_tunnel {
@@ -46,6 +47,7 @@ struct mlx5e_tc_tunnel {
extern struct mlx5e_tc_tunnel vxlan_tunnel;
extern struct mlx5e_tc_tunnel geneve_tunnel;
extern struct mlx5e_tc_tunnel gre_tunnel;
+extern struct mlx5e_tc_tunnel mplsoudp_tunnel;
struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
new file mode 100644
index 000000000000..1f9526244222
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/bareudp.h>
+#include <net/mpls.h>
+#include "en/tc_tun.h"
+
+static bool can_offload(struct mlx5e_priv *priv)
+{
+ return MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l3_tunnel_to_l2);
+}
+
+static int calc_hlen(struct mlx5e_encap_entry *e)
+{
+ return sizeof(struct udphdr) + MPLS_HLEN;
+}
+
+static int init_encap_attr(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ e->tunnel = &mplsoudp_tunnel;
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+ return 0;
+}
+
+static int generate_ip_tun_hdr(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *r)
+{
+ const struct ip_tunnel_key *tun_key = &r->tun_info->key;
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct mpls_shim_hdr *mpls;
+ u32 tun_id;
+
+ tun_id = be32_to_cpu(tunnel_id_to_key32(tun_key->tun_id));
+ mpls = (struct mpls_shim_hdr *)(udp + 1);
+ *ip_proto = IPPROTO_UDP;
+
+ udp->dest = tun_key->tp_dst;
+ *mpls = mpls_entry_encode(tun_id, tun_key->ttl, tun_key->tos, true);
+
+ return 0;
+}
+
+static int parse_udp_ports(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ return mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+}
+
+static int parse_tunnel(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct flow_match_enc_keyid enc_keyid;
+ struct flow_match_mpls match;
+ void *misc2_c;
+ void *misc2_v;
+
+ misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+ misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS))
+ return 0;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+ return 0;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+ if (!enc_keyid.mask->keyid)
+ return 0;
+
+ if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
+ MLX5_FLEX_PROTO_CW_MPLS_UDP))
+ return -EOPNOTSUPP;
+
+ flow_rule_match_mpls(rule, &match);
+
+ /* Only support matching the first LSE */
+ if (match.mask->used_lses != 1)
+ return -EOPNOTSUPP;
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_label,
+ match.mask->ls[0].mpls_label);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_label,
+ match.key->ls[0].mpls_label);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_exp,
+ match.mask->ls[0].mpls_tc);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_exp, match.key->ls[0].mpls_tc);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_s_bos,
+ match.mask->ls[0].mpls_bos);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_s_bos,
+ match.key->ls[0].mpls_bos);
+
+ MLX5_SET(fte_match_set_misc2, misc2_c,
+ outer_first_mpls_over_udp.mpls_ttl,
+ match.mask->ls[0].mpls_ttl);
+ MLX5_SET(fte_match_set_misc2, misc2_v,
+ outer_first_mpls_over_udp.mpls_ttl,
+ match.key->ls[0].mpls_ttl);
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+
+ return 0;
+}
+
+struct mlx5e_tc_tunnel mplsoudp_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_MPLSOUDP,
+ .match_level = MLX5_MATCH_L4,
+ .can_offload = can_offload,
+ .calc_hlen = calc_hlen,
+ .init_encap_attr = init_encap_attr,
+ .generate_ip_tun_hdr = generate_ip_tun_hdr,
+ .parse_udp_ports = parse_udp_ports,
+ .parse_tunnel = parse_tunnel,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index f07b1399744e..bfd3e1161bc6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -6,46 +6,32 @@
#include "en.h"
-#define MLX5E_SQ_NOPS_ROOM (MLX5_SEND_WQE_MAX_WQEBBS - 1)
-#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
- MLX5E_SQ_NOPS_ROOM)
-
-#ifndef CONFIG_MLX5_EN_TLS
-#define MLX5E_SQ_TLS_ROOM (0)
-#else
-/* TLS offload requires additional stop_room for:
- * - a resync SKB.
- * kTLS offload requires fixed additional stop_room for:
- * - a static params WQE, and a progress params WQE.
- * The additional MTU-depending room for the resync DUMP WQEs
- * will be calculated and added in runtime.
- */
-#define MLX5E_SQ_TLS_ROOM \
- (MLX5_SEND_WQE_MAX_WQEBBS + \
- MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS)
-#endif
-
#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
+enum mlx5e_icosq_wqe_type {
+ MLX5E_ICOSQ_WQE_NOP,
+ MLX5E_ICOSQ_WQE_UMR_RX,
+};
+
static inline bool
mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
{
return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
}
-static inline void *
-mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, size_t size, u16 *pi)
+static inline void *mlx5e_fetch_wqe(struct mlx5_wq_cyc *wq, u16 pi, size_t wqe_size)
{
- struct mlx5_wq_cyc *wq = &sq->wq;
void *wqe;
- *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
- memset(wqe, 0, size);
+ wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memset(wqe, 0, wqe_size);
return wqe;
}
+#define MLX5E_TX_FETCH_WQE(sq, pi) \
+ ((struct mlx5e_tx_wqe *)mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_tx_wqe)))
+
static inline struct mlx5e_tx_wqe *
mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
{
@@ -81,6 +67,84 @@ mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
return wqe;
}
+struct mlx5e_tx_wqe_info {
+ struct sk_buff *skb;
+ u32 num_bytes;
+ u8 num_wqebbs;
+ u8 num_dma;
+#ifdef CONFIG_MLX5_EN_TLS
+ struct page *resync_dump_frag_page;
+#endif
+};
+
+static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi, contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs < size)) {
+ struct mlx5e_tx_wqe_info *wi, *edge_wi;
+
+ wi = &sq->db.wqe_info[pi];
+ edge_wi = wi + contig_wqebbs;
+
+ /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+ for (; wi < edge_wi; wi++) {
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .num_wqebbs = 1,
+ };
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+ sq->stats->nop += contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
+
+ return pi;
+}
+
+struct mlx5e_icosq_wqe_info {
+ u8 wqe_type;
+ u8 num_wqebbs;
+
+ /* Auxiliary data for different wqe types. */
+ union {
+ struct {
+ struct mlx5e_rq *rq;
+ } umr;
+ };
+};
+
+static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi, contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs < size)) {
+ struct mlx5e_icosq_wqe_info *wi, *edge_wi;
+
+ wi = &sq->db.wqe_info[pi];
+ edge_wi = wi + contig_wqebbs;
+
+ /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+ for (; wi < edge_wi; wi++) {
+ *wi = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_NOP,
+ .num_wqebbs = 1,
+ };
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
+
+ return pi;
+}
+
static inline void
mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
u16 pi, u16 nnops)
@@ -102,7 +166,7 @@ static inline void
mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
struct mlx5_wqe_ctrl_seg *ctrl)
{
- ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ ctrl->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
/* ensure wqe is visible to device before updating doorbell record */
dma_wmb();
@@ -189,6 +253,22 @@ static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
}
}
+static inline void mlx5e_dump_error_cqe(struct mlx5e_cq *cq, u32 sqn,
+ struct mlx5_err_cqe *err_cqe)
+{
+ struct mlx5_cqwq *wq = &cq->wq;
+ u32 ci;
+
+ ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1);
+
+ netdev_err(cq->channel->netdev,
+ "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
+ cq->mcq.cqn, ci, sqn,
+ get_cqe_opcode((struct mlx5_cqe64 *)err_cqe),
+ err_cqe->syndrome, err_cqe->vendor_err_synd);
+ mlx5_dump_err_cqe(cq->mdev, err_cqe);
+}
+
/* SW parser related functions */
struct mlx5e_swp_spec {
@@ -232,4 +312,25 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
}
}
+static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size)
+{
+ BUILD_BUG_ON(PAGE_SIZE / MLX5_SEND_WQE_BB < MLX5_SEND_WQE_MAX_WQEBBS);
+
+ /* A WQE must not cross the page boundary, hence two conditions:
+ * 1. Its size must not exceed the page size.
+ * 2. If the WQE size is X, and the space remaining in a page is less
+ * than X, this space needs to be padded with NOPs. So, one WQE of
+ * size X may require up to X-1 WQEBBs of padding, which makes the
+ * stop room of X-1 + X.
+ * WQE size is also limited by the hardware limit.
+ */
+
+ if (__builtin_constant_p(wqe_size))
+ BUILD_BUG_ON(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
+ else
+ WARN_ON_ONCE(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
+
+ return wqe_size * 2 - 1;
+}
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index f049e0ac308a..c9d308e91965 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -31,7 +31,7 @@
*/
#include <linux/bpf_trace.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include "en/xdp.h"
#include "en/params.h"
@@ -64,14 +64,14 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
struct xdp_frame *xdpf;
dma_addr_t dma_addr;
- xdpf = convert_to_xdp_frame(xdp);
+ xdpf = xdp_convert_buff_to_frame(xdp);
if (unlikely(!xdpf))
return false;
xdptxd.data = xdpf->data;
xdptxd.len = xdpf->len;
- if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) {
+ if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
/* The xdp_buff was in the UMEM and was copied into a newly
* allocated page. The UMEM page was returned via the ZCA, and
* this new page has to be mapped at this point and has to be
@@ -97,10 +97,10 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
xdpi.frame.xdpf = xdpf;
xdpi.frame.dma_addr = dma_addr;
} else {
- /* Driver assumes that convert_to_xdp_frame returns an xdp_frame
- * that points to the same memory region as the original
- * xdp_buff. It allows to map the memory only once and to use
- * the DMA_BIDIRECTIONAL mode.
+ /* Driver assumes that xdp_convert_buff_to_frame returns
+ * an xdp_frame that points to the same memory region as
+ * the original xdp_buff. It allows to map the memory only
+ * once and to use the DMA_BIDIRECTIONAL mode.
*/
xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
@@ -119,49 +119,33 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
/* returns true if packet was consumed by xdp */
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- void *va, u16 *rx_headroom, u32 *len, bool xsk)
+ u32 *len, struct xdp_buff *xdp)
{
struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
- struct xdp_umem *umem = rq->umem;
- struct xdp_buff xdp;
u32 act;
int err;
if (!prog)
return false;
- xdp.data = va + *rx_headroom;
- xdp_set_data_meta_invalid(&xdp);
- xdp.data_end = xdp.data + *len;
- xdp.data_hard_start = va;
- if (xsk)
- xdp.handle = di->xsk.handle;
- xdp.rxq = &rq->xdp_rxq;
-
- act = bpf_prog_run_xdp(prog, &xdp);
- if (xsk) {
- u64 off = xdp.data - xdp.data_hard_start;
-
- xdp.handle = xsk_umem_adjust_offset(umem, xdp.handle, off);
- }
+ act = bpf_prog_run_xdp(prog, xdp);
switch (act) {
case XDP_PASS:
- *rx_headroom = xdp.data - xdp.data_hard_start;
- *len = xdp.data_end - xdp.data;
+ *len = xdp->data_end - xdp->data;
return false;
case XDP_TX:
- if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp)))
+ if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, xdp)))
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
return true;
case XDP_REDIRECT:
/* When XDP enabled then page-refcnt==1 here */
- err = xdp_do_redirect(rq->netdev, &xdp, prog);
+ err = xdp_do_redirect(rq->netdev, xdp, prog);
if (unlikely(err))
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
- if (!xsk)
+ if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL)
mlx5e_page_dma_unmap(rq, di);
rq->stats->xdp_redirect++;
return true;
@@ -178,20 +162,43 @@ xdp_abort:
}
}
-static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size)
{
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
- struct mlx5e_xdpsq_stats *stats = sq->stats;
struct mlx5_wq_cyc *wq = &sq->wq;
u16 pi, contig_wqebbs;
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs < size)) {
+ struct mlx5e_xdp_wqe_info *wi, *edge_wi;
+
+ wi = &sq->db.wqe_info[pi];
+ edge_wi = wi + contig_wqebbs;
+
+ /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+ for (; wi < edge_wi; wi++) {
+ *wi = (struct mlx5e_xdp_wqe_info) {
+ .num_wqebbs = 1,
+ .num_pkts = 0,
+ };
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+ sq->stats->nops += contig_wqebbs;
- if (unlikely(contig_wqebbs < MLX5_SEND_WQE_MAX_WQEBBS))
- mlx5e_fill_xdpsq_frag_edge(sq, wq, pi, contig_wqebbs);
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
+
+ return pi;
+}
- session->wqe = mlx5e_xdpsq_fetch_wqe(sq, &pi);
+static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_xdpsq_stats *stats = sq->stats;
+ u16 pi;
+
+ pi = mlx5e_xdpsq_get_next_pi(sq, MLX5_SEND_WQE_MAX_WQEBBS);
+ session->wqe = MLX5E_TX_FETCH_WQE(sq, pi);
prefetchw(session->wqe->data);
session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
@@ -233,8 +240,10 @@ enum {
static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
{
if (unlikely(!sq->mpwqe.wqe)) {
+ const u16 stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+
if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
- MLX5E_XDPSQ_STOP_ROOM))) {
+ stop_room))) {
/* SQ is full, ring doorbell */
mlx5e_xmit_xdp_doorbell(sq);
sq->stats->full++;
@@ -408,22 +417,15 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
i = 0;
do {
- u16 wqe_counter;
+ struct mlx5e_xdp_wqe_info *wi;
+ u16 wqe_counter, ci;
bool last_wqe;
mlx5_cqwq_pop(&cq->wq);
wqe_counter = be16_to_cpu(cqe->wqe_counter);
- if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ))
- netdev_WARN_ONCE(sq->channel->netdev,
- "Bad OP in XDPSQ CQE: 0x%x\n",
- get_cqe_opcode(cqe));
-
do {
- struct mlx5e_xdp_wqe_info *wi;
- u16 ci;
-
last_wqe = (sqcc == wqe_counter);
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci];
@@ -432,6 +434,15 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true);
} while (!last_wqe);
+
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
+ netdev_WARN_ONCE(sq->channel->netdev,
+ "Bad OP in XDPSQ CQE: 0x%x\n",
+ get_cqe_opcode(cqe));
+ mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
+ (struct mlx5_err_cqe *)cqe);
+ mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
+ }
} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
if (xsk_frames)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index d7587f40ecae..ca48c293151b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -40,8 +40,6 @@
(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
-#define MLX5E_XDPSQ_STOP_ROOM (MLX5E_SQ_STOP_ROOM)
-
#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg))
#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \
DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS)
@@ -63,7 +61,7 @@
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- void *va, u16 *rx_headroom, u32 *len, bool xsk);
+ u32 *len, struct xdp_buff *xdp);
void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
@@ -137,22 +135,10 @@ mlx5e_xdp_no_room_for_inline_pkt(struct mlx5e_xdp_mpwqe *session)
session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > MLX5E_XDP_MPW_MAX_NUM_DS;
}
-static inline void
-mlx5e_fill_xdpsq_frag_edge(struct mlx5e_xdpsq *sq, struct mlx5_wq_cyc *wq,
- u16 pi, u16 nnops)
-{
- struct mlx5e_xdp_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-
- edge_wi = wi + nnops;
- /* fill sq frag edge with nops to avoid wqe wrapping two pages */
- for (; wi < edge_wi; wi++) {
- wi->num_wqebbs = 1;
- wi->num_pkts = 0;
- mlx5e_post_nop(wq, sq->sqn, &sq->pc);
- }
-
- sq->stats->nops += nnops;
-}
+struct mlx5e_xdp_wqe_info {
+ u8 num_wqebbs;
+ u8 num_pkts;
+};
static inline void
mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
@@ -186,19 +172,6 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
session->ds_count++;
}
-static inline struct mlx5e_tx_wqe *
-mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq, u16 *pi)
-{
- struct mlx5_wq_cyc *wq = &sq->wq;
- struct mlx5e_tx_wqe *wqe;
-
- *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
- memset(wqe, 0, sizeof(*wqe));
-
- return wqe;
-}
-
static inline void
mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
struct mlx5e_xdp_info *xi)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
index 62fc8a128a8d..a33a1f762c70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -3,71 +3,10 @@
#include "rx.h"
#include "en/xdp.h"
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
/* RX data path */
-bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count)
-{
- /* Check in advance that we have enough frames, instead of allocating
- * one-by-one, failing and moving frames to the Reuse Ring.
- */
- return xsk_umem_has_addrs_rq(rq->umem, count);
-}
-
-int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
-{
- struct xdp_umem *umem = rq->umem;
- u64 handle;
-
- if (!xsk_umem_peek_addr_rq(umem, &handle))
- return -ENOMEM;
-
- dma_info->xsk.handle = xsk_umem_adjust_offset(umem, handle,
- rq->buff.umem_headroom);
- dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle);
-
- /* No need to add headroom to the DMA address. In striding RQ case, we
- * just provide pages for UMR, and headroom is counted at the setup
- * stage when creating a WQE. In non-striding RQ case, headroom is
- * accounted in mlx5e_alloc_rx_wqe.
- */
- dma_info->addr = xdp_umem_get_dma(umem, handle);
-
- xsk_umem_release_addr_rq(umem);
-
- dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
-
- return 0;
-}
-
-static inline void mlx5e_xsk_recycle_frame(struct mlx5e_rq *rq, u64 handle)
-{
- xsk_umem_fq_reuse(rq->umem, handle & rq->umem->chunk_mask);
-}
-
-/* XSKRQ uses pages from UMEM, they must not be released. They are returned to
- * the userspace if possible, and if not, this function is called to reuse them
- * in the driver.
- */
-void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
-{
- mlx5e_xsk_recycle_frame(rq, dma_info->xsk.handle);
-}
-
-/* Return a frame back to the hardware to fill in again. It is used by XDP when
- * the XDP program returns XDP_TX or XDP_REDIRECT not to an XSKMAP.
- */
-void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
-{
- struct mlx5e_rq *rq = container_of(zca, struct mlx5e_rq, zca);
-
- mlx5e_xsk_recycle_frame(rq, handle);
-}
-
static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data,
u32 cqe_bcnt)
{
@@ -90,11 +29,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
u32 head_offset,
u32 page_idx)
{
- struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
- u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
+ struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk;
u32 cqe_bcnt32 = cqe_bcnt;
- void *va, *data;
- u32 frag_size;
bool consumed;
/* Check packet size. Note LRO doesn't use linear SKB */
@@ -103,22 +39,20 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
return NULL;
}
- /* head_offset is not used in this function, because di->xsk.data and
- * di->addr point directly to the necessary place. Furthermore, in the
- * current implementation, UMR pages are mapped to XSK frames, so
+ /* head_offset is not used in this function, because xdp->data and the
+ * DMA address point directly to the necessary place. Furthermore, in
+ * the current implementation, UMR pages are mapped to XSK frames, so
* head_offset should always be 0.
*/
WARN_ON_ONCE(head_offset);
- va = di->xsk.data;
- data = va + rx_headroom;
- frag_size = rq->buff.headroom + cqe_bcnt32;
-
- dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
- prefetch(data);
+ xdp->data_end = xdp->data + cqe_bcnt32;
+ xdp_set_data_meta_invalid(xdp);
+ xsk_buff_dma_sync_for_cpu(xdp);
+ prefetch(xdp->data);
rcu_read_lock();
- consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, true);
+ consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp);
rcu_read_unlock();
/* Possible flows:
@@ -145,7 +79,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
/* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
* frame. On SKB allocation failure, NULL is returned.
*/
- return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt32);
+ return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt32);
}
struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
@@ -153,25 +87,20 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
struct mlx5e_wqe_frag_info *wi,
u32 cqe_bcnt)
{
- struct mlx5e_dma_info *di = wi->di;
- u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
- void *va, *data;
+ struct xdp_buff *xdp = wi->di->xsk;
bool consumed;
- u32 frag_size;
- /* wi->offset is not used in this function, because di->xsk.data and
- * di->addr point directly to the necessary place. Furthermore, in the
- * current implementation, one page = one packet = one frame, so
+ /* wi->offset is not used in this function, because xdp->data and the
+ * DMA address point directly to the necessary place. Furthermore, the
+ * XSK allocator allocates frames per packet, instead of pages, so
* wi->offset should always be 0.
*/
WARN_ON_ONCE(wi->offset);
- va = di->xsk.data;
- data = va + rx_headroom;
- frag_size = rq->buff.headroom + cqe_bcnt;
-
- dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
- prefetch(data);
+ xdp->data_end = xdp->data + cqe_bcnt;
+ xdp_set_data_meta_invalid(xdp);
+ xsk_buff_dma_sync_for_cpu(xdp);
+ prefetch(xdp->data);
if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
@@ -179,7 +108,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
}
rcu_read_lock();
- consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, true);
+ consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp);
rcu_read_unlock();
if (likely(consumed))
@@ -189,5 +118,5 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
* will be handled by mlx5e_put_rx_frag.
* On SKB allocation failure, NULL is returned.
*/
- return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt);
+ return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
index cab0e93497ae..d147b2f13b54 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -5,16 +5,10 @@
#define __MLX5_EN_XSK_RX_H__
#include "en.h"
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
/* RX data path */
-bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count);
-int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info);
-void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info);
-void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
struct mlx5e_mpw_info *wi,
u16 cqe_bcnt,
@@ -25,6 +19,23 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
struct mlx5e_wqe_frag_info *wi,
u32 cqe_bcnt);
+static inline int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ dma_info->xsk = xsk_buff_alloc(rq->umem);
+ if (!dma_info->xsk)
+ return -ENOMEM;
+
+ /* Store the DMA address without headroom. In striding RQ case, we just
+ * provide pages for UMR, and headroom is counted at the setup stage
+ * when creating a WQE. In non-striding RQ case, headroom is accounted
+ * in mlx5e_alloc_rx_wqe.
+ */
+ dma_info->addr = xsk_buff_xdp_get_frame_dma(dma_info->xsk);
+
+ return 0;
+}
+
static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err)
{
if (!xsk_umem_uses_need_wakeup(rq->umem))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index 3bcdb5b2fc20..83dce9cdb8c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -5,7 +5,7 @@
#include "umem.h"
#include "en/xdp.h"
#include "en/params.h"
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
{
@@ -92,12 +92,11 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
break;
}
- xdptxd.dma_addr = xdp_umem_get_dma(umem, desc.addr);
- xdptxd.data = xdp_umem_get_data(umem, desc.addr);
+ xdptxd.dma_addr = xsk_buff_raw_get_dma(umem, desc.addr);
+ xdptxd.data = xsk_buff_raw_get_data(umem, desc.addr);
xdptxd.len = desc.len;
- dma_sync_single_for_device(sq->pdev, xdptxd.dma_addr,
- xdptxd.len, DMA_BIDIRECTIONAL);
+ xsk_buff_raw_dma_sync_for_device(umem, xdptxd.dma_addr, xdptxd.len);
if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, check_result))) {
if (sq->mpwqe.wqe)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
index 79b487d89757..39fa0a705856 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -5,7 +5,7 @@
#define __MLX5_EN_XSK_TX_H__
#include "en.h"
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
/* TX data path */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
index 4baaa5788320..7b17fcd0a56d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2019 Mellanox Technologies. */
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include "umem.h"
#include "setup.h"
#include "en/params.h"
@@ -10,40 +10,14 @@ static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv,
struct xdp_umem *umem)
{
struct device *dev = priv->mdev->device;
- u32 i;
- for (i = 0; i < umem->npgs; i++) {
- dma_addr_t dma = dma_map_page(dev, umem->pgs[i], 0, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
-
- if (unlikely(dma_mapping_error(dev, dma)))
- goto err_unmap;
- umem->pages[i].dma = dma;
- }
-
- return 0;
-
-err_unmap:
- while (i--) {
- dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
- umem->pages[i].dma = 0;
- }
-
- return -ENOMEM;
+ return xsk_buff_dma_map(umem, dev, 0);
}
static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv,
struct xdp_umem *umem)
{
- struct device *dev = priv->mdev->device;
- u32 i;
-
- for (i = 0; i < umem->npgs; i++) {
- dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
- umem->pages[i].dma = 0;
- }
+ return xsk_buff_dma_unmap(umem, 0);
}
static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk)
@@ -90,13 +64,14 @@ static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix)
static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem)
{
- return umem->headroom <= 0xffff && umem->chunk_size_nohr <= 0xffff;
+ return xsk_umem_get_headroom(umem) <= 0xffff &&
+ xsk_umem_get_chunk_size(umem) <= 0xffff;
}
void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk)
{
- xsk->headroom = umem->headroom;
- xsk->chunk_size = umem->chunk_size_nohr + umem->headroom;
+ xsk->headroom = xsk_umem_get_headroom(umem);
+ xsk->chunk_size = xsk_umem_get_chunk_size(umem);
}
static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
@@ -241,18 +216,6 @@ int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid)
mlx5e_xsk_disable_umem(priv, ix);
}
-int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries)
-{
- struct xdp_umem_fq_reuse *reuseq;
-
- reuseq = xsk_reuseq_prepare(nentries);
- if (unlikely(!reuseq))
- return -ENOMEM;
- xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
-
- return 0;
-}
-
u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk)
{
u16 res = xsk->refcnt ? params->num_channels : 0;