aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c54
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/debugfs.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c162
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h110
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c169
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c634
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c222
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h55
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c242
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c112
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c489
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c775
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c1013
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c1260
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c70
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h28
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c283
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/events.c325
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c83
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c285
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c255
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h98
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c368
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c86
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/srq.c716
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/transobj.c109
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.h9
61 files changed, 5203 insertions, 3578 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index d324a3884462..9de9abacf7f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -12,17 +12,17 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
# mlx5 core basic
#
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
- health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
+ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
- fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \
- diag/fs_tracepoint.o diag/fw_tracer.o
+ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
+ lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o
#
# Netdev basic
#
mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
- en_selftest.o en/port.o
+ en_selftest.o en/port.o en/monitor_stats.o
#
# Netdev extra
@@ -30,7 +30,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o
#
# Core extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index a5a0823e5ada..d3125cdf69db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -40,9 +40,11 @@
#include <linux/random.h>
#include <linux/io-mapping.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
#include <linux/debugfs.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
enum {
CMD_IF_REV = 5,
@@ -313,6 +315,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_FPGA_DESTROY_QP:
case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
case MLX5_CMD_OP_DEALLOC_MEMIC:
+ case MLX5_CMD_OP_PAGE_FAULT_RESUME:
return MLX5_CMD_STAT_OK;
case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -326,7 +329,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_CREATE_MKEY:
case MLX5_CMD_OP_QUERY_MKEY:
case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
- case MLX5_CMD_OP_PAGE_FAULT_RESUME:
case MLX5_CMD_OP_CREATE_EQ:
case MLX5_CMD_OP_QUERY_EQ:
case MLX5_CMD_OP_GEN_EQE:
@@ -371,6 +373,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
case MLX5_CMD_OP_ALLOC_Q_COUNTER:
case MLX5_CMD_OP_QUERY_Q_COUNTER:
+ case MLX5_CMD_OP_SET_MONITOR_COUNTER:
+ case MLX5_CMD_OP_ARM_MONITOR_COUNTER:
case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
case MLX5_CMD_OP_QUERY_RATE_LIMIT:
case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
@@ -520,6 +524,8 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
+ MLX5_COMMAND_STR_CASE(SET_MONITOR_COUNTER);
+ MLX5_COMMAND_STR_CASE(ARM_MONITOR_COUNTER);
MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
@@ -805,6 +811,8 @@ static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
return MLX5_GET(mbox_in, in->first.data, opcode);
}
+static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
+
static void cb_timeout_handler(struct work_struct *work)
{
struct delayed_work *dwork = container_of(work, struct delayed_work,
@@ -1412,14 +1420,32 @@ static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
up(&cmd->sem);
}
+static int cmd_comp_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_core_dev *dev;
+ struct mlx5_cmd *cmd;
+ struct mlx5_eqe *eqe;
+
+ cmd = mlx5_nb_cof(nb, struct mlx5_cmd, nb);
+ dev = container_of(cmd, struct mlx5_core_dev, cmd);
+ eqe = data;
+
+ mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
+
+ return NOTIFY_OK;
+}
void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
{
+ MLX5_NB_INIT(&dev->cmd.nb, cmd_comp_notifier, CMD);
+ mlx5_eq_notifier_register(dev, &dev->cmd.nb);
mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
}
void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
{
mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
+ mlx5_eq_notifier_unregister(dev, &dev->cmd.nb);
}
static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
@@ -1435,7 +1461,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
}
}
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
+static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
{
struct mlx5_cmd *cmd = &dev->cmd;
struct mlx5_cmd_work_ent *ent;
@@ -1533,7 +1559,29 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
}
}
}
-EXPORT_SYMBOL(mlx5_cmd_comp_handler);
+
+void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
+{
+ unsigned long flags;
+ u64 vector;
+
+ /* wait for pending handlers to complete */
+ mlx5_eq_synchronize_cmd_irq(dev);
+ spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
+ vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
+ if (!vector)
+ goto no_trig;
+
+ vector |= MLX5_TRIGGERED_CMD_COMP;
+ spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+
+ mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
+ mlx5_cmd_comp_handler(dev, vector, true);
+ return;
+
+no_trig:
+ spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+}
static int status_to_err(u8 status)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 4b85abb5c9f7..713a17ee3751 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -38,6 +38,7 @@
#include <rdma/ib_verbs.h>
#include <linux/mlx5/cq.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
#define TASKLET_MAX_TIME 2
#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
@@ -92,10 +93,10 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
u32 out[MLX5_ST_SZ_DW(create_cq_out)];
u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
- struct mlx5_eq *eq;
+ struct mlx5_eq_comp *eq;
int err;
- eq = mlx5_eqn2eq(dev, eqn);
+ eq = mlx5_eqn2comp_eq(dev, eqn);
if (IS_ERR(eq))
return PTR_ERR(eq);
@@ -119,12 +120,12 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
INIT_LIST_HEAD(&cq->tasklet_ctx.list);
/* Add to comp EQ CQ tree to recv comp events */
- err = mlx5_eq_add_cq(eq, cq);
+ err = mlx5_eq_add_cq(&eq->core, cq);
if (err)
goto err_cmd;
/* Add to async EQ CQ tree to recv async events */
- err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq);
+ err = mlx5_eq_add_cq(mlx5_get_async_eq(dev), cq);
if (err)
goto err_cq_add;
@@ -139,7 +140,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
return 0;
err_cq_add:
- mlx5_eq_del_cq(eq, cq);
+ mlx5_eq_del_cq(&eq->core, cq);
err_cmd:
memset(din, 0, sizeof(din));
memset(dout, 0, sizeof(dout));
@@ -157,11 +158,11 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
int err;
- err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq);
+ err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
if (err)
return err;
- err = mlx5_eq_del_cq(cq->eq, cq);
+ err = mlx5_eq_del_cq(&cq->eq->core, cq);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 90fabd612b6c..a11e22d0b0cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -36,6 +36,7 @@
#include <linux/mlx5/cq.h>
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
enum {
QP_PID,
@@ -349,6 +350,16 @@ out:
return param;
}
+static int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {};
+
+ MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
+ MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
int index)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 37ba7c78859d..ebc046fa97d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -45,75 +45,11 @@ struct mlx5_device_context {
unsigned long state;
};
-struct mlx5_delayed_event {
- struct list_head list;
- struct mlx5_core_dev *dev;
- enum mlx5_dev_event event;
- unsigned long param;
-};
-
enum {
MLX5_INTERFACE_ADDED,
MLX5_INTERFACE_ATTACHED,
};
-static void add_delayed_event(struct mlx5_priv *priv,
- struct mlx5_core_dev *dev,
- enum mlx5_dev_event event,
- unsigned long param)
-{
- struct mlx5_delayed_event *delayed_event;
-
- delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC);
- if (!delayed_event) {
- mlx5_core_err(dev, "event %d is missed\n", event);
- return;
- }
-
- mlx5_core_dbg(dev, "Accumulating event %d\n", event);
- delayed_event->dev = dev;
- delayed_event->event = event;
- delayed_event->param = param;
- list_add_tail(&delayed_event->list, &priv->waiting_events_list);
-}
-
-static void delayed_event_release(struct mlx5_device_context *dev_ctx,
- struct mlx5_priv *priv)
-{
- struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
- struct mlx5_delayed_event *de;
- struct mlx5_delayed_event *n;
- struct list_head temp;
-
- INIT_LIST_HEAD(&temp);
-
- spin_lock_irq(&priv->ctx_lock);
-
- priv->is_accum_events = false;
- list_splice_init(&priv->waiting_events_list, &temp);
- if (!dev_ctx->context)
- goto out;
- list_for_each_entry_safe(de, n, &temp, list)
- dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
-
-out:
- spin_unlock_irq(&priv->ctx_lock);
-
- list_for_each_entry_safe(de, n, &temp, list) {
- list_del(&de->list);
- kfree(de);
- }
-}
-
-/* accumulating events that can come after mlx5_ib calls to
- * ib_register_device, till adding that interface to the events list.
- */
-static void delayed_event_start(struct mlx5_priv *priv)
-{
- spin_lock_irq(&priv->ctx_lock);
- priv->is_accum_events = true;
- spin_unlock_irq(&priv->ctx_lock);
-}
void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
{
@@ -129,8 +65,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
dev_ctx->intf = intf;
- delayed_event_start(priv);
-
dev_ctx->context = intf->add(dev);
if (dev_ctx->context) {
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
@@ -139,22 +73,9 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
spin_lock_irq(&priv->ctx_lock);
list_add_tail(&dev_ctx->list, &priv->ctx_list);
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (dev_ctx->intf->pfault) {
- if (priv->pfault) {
- mlx5_core_err(dev, "multiple page fault handlers not supported");
- } else {
- priv->pfault_ctx = dev_ctx->context;
- priv->pfault = dev_ctx->intf->pfault;
- }
- }
-#endif
spin_unlock_irq(&priv->ctx_lock);
}
- delayed_event_release(dev_ctx, priv);
-
if (!dev_ctx->context)
kfree(dev_ctx);
}
@@ -179,15 +100,6 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
if (!dev_ctx)
return;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- spin_lock_irq(&priv->ctx_lock);
- if (priv->pfault == dev_ctx->intf->pfault)
- priv->pfault = NULL;
- spin_unlock_irq(&priv->ctx_lock);
-
- synchronize_srcu(&priv->pfault_srcu);
-#endif
-
spin_lock_irq(&priv->ctx_lock);
list_del(&dev_ctx->list);
spin_unlock_irq(&priv->ctx_lock);
@@ -207,26 +119,20 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv
if (!dev_ctx)
return;
- delayed_event_start(priv);
if (intf->attach) {
if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
- goto out;
+ return;
if (intf->attach(dev, dev_ctx->context))
- goto out;
-
+ return;
set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
} else {
if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
- goto out;
+ return;
dev_ctx->context = intf->add(dev);
if (!dev_ctx->context)
- goto out;
-
+ return;
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
}
-
-out:
- delayed_event_release(dev_ctx, priv);
}
void mlx5_attach_device(struct mlx5_core_dev *dev)
@@ -350,28 +256,6 @@ void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
mutex_unlock(&mlx5_intf_mutex);
}
-void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
-{
- struct mlx5_priv *priv = &mdev->priv;
- struct mlx5_device_context *dev_ctx;
- unsigned long flags;
- void *result = NULL;
-
- spin_lock_irqsave(&priv->ctx_lock, flags);
-
- list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
- if ((dev_ctx->intf->protocol == protocol) &&
- dev_ctx->intf->get_dev) {
- result = dev_ctx->intf->get_dev(dev_ctx->context);
- break;
- }
-
- spin_unlock_irqrestore(&priv->ctx_lock, flags);
-
- return result;
-}
-EXPORT_SYMBOL(mlx5_get_protocol_dev);
-
/* Must be called with intf_mutex held */
void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
{
@@ -422,44 +306,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
return res;
}
-void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
- unsigned long param)
-{
- struct mlx5_priv *priv = &dev->priv;
- struct mlx5_device_context *dev_ctx;
- unsigned long flags;
-
- spin_lock_irqsave(&priv->ctx_lock, flags);
-
- if (priv->is_accum_events)
- add_delayed_event(priv, dev, event, param);
-
- /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is
- * still in priv->ctx_list. In this case, only notify the dev_ctx if its
- * ADDED or ATTACHED bit are set.
- */
- list_for_each_entry(dev_ctx, &priv->ctx_list, list)
- if (dev_ctx->intf->event &&
- (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) ||
- test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)))
- dev_ctx->intf->event(dev, dev_ctx->context, event, param);
-
- spin_unlock_irqrestore(&priv->ctx_lock, flags);
-}
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-void mlx5_core_page_fault(struct mlx5_core_dev *dev,
- struct mlx5_pagefault *pfault)
-{
- struct mlx5_priv *priv = &dev->priv;
- int srcu_idx;
-
- srcu_idx = srcu_read_lock(&priv->pfault_srcu);
- if (priv->pfault)
- priv->pfault(dev, priv->pfault_ctx, pfault);
- srcu_read_unlock(&priv->pfault_srcu, srcu_idx);
-}
-#endif
void mlx5_dev_list_lock(void)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index 0f11fff32a9b..424457ff9759 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -161,10 +161,10 @@ static void print_misc_parameters_hdrs(struct trace_seq *p,
PRINT_MASKED_VAL(name, p, format); \
}
DECLARE_MASK_VAL(u64, gre_key) = {
- .m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 |
- MLX5_GET(fte_match_set_misc, mask, gre_key_l),
- .v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 |
- MLX5_GET(fte_match_set_misc, value, gre_key_l)};
+ .m = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi) << 8 |
+ MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo),
+ .v = MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.hi) << 8 |
+ MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.lo)};
PRINT_MASKED_VAL(gre_key, p, "%llu");
PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index d4ec93bde4de..6999f4486e9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -30,6 +30,7 @@
* SOFTWARE.
*/
#define CREATE_TRACE_POINTS
+#include "lib/eq.h"
#include "fw_tracer.h"
#include "fw_tracer_tracepoint.h"
@@ -846,9 +847,9 @@ free_tracer:
return ERR_PTR(err);
}
-/* Create HW resources + start tracer
- * must be called before Async EQ is created
- */
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data);
+
+/* Create HW resources + start tracer */
int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
{
struct mlx5_core_dev *dev;
@@ -874,6 +875,9 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
goto err_dealloc_pd;
}
+ MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER);
+ mlx5_eq_notifier_register(dev, &tracer->nb);
+
mlx5_fw_tracer_start(tracer);
return 0;
@@ -883,9 +887,7 @@ err_dealloc_pd:
return err;
}
-/* Stop tracer + Cleanup HW resources
- * must be called after Async EQ is destroyed
- */
+/* Stop tracer + Cleanup HW resources */
void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
{
if (IS_ERR_OR_NULL(tracer))
@@ -893,7 +895,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n",
tracer->owner);
-
+ mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb);
cancel_work_sync(&tracer->ownership_change_work);
cancel_work_sync(&tracer->handle_traces_work);
@@ -922,12 +924,11 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
kfree(tracer);
}
-void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data)
{
- struct mlx5_fw_tracer *tracer = dev->tracer;
-
- if (!tracer)
- return;
+ struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb);
+ struct mlx5_core_dev *dev = tracer->dev;
+ struct mlx5_eqe *eqe = data;
switch (eqe->sub_type) {
case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
@@ -942,6 +943,8 @@ void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
eqe->sub_type);
}
+
+ return NOTIFY_OK;
}
EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index 0347f2dd5cee..a8b8747f2b61 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -55,6 +55,7 @@
struct mlx5_fw_tracer {
struct mlx5_core_dev *dev;
+ struct mlx5_nb nb;
bool owner;
u8 trc_ver;
struct workqueue_struct *work_queue;
@@ -170,6 +171,5 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
-void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 118324802926..8fa8fdd30b85 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -49,6 +49,7 @@
#include <net/switchdev.h>
#include <net/xdp.h>
#include <linux/net_dim.h>
+#include <linux/bits.h>
#include "wq.h"
#include "mlx5_core.h"
#include "en_stats.h"
@@ -147,9 +148,6 @@ struct page_pool;
MLX5_UMR_MTT_ALIGNMENT))
#define MLX5E_UMR_WQEBBS \
(DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB))
-#define MLX5E_ICOSQ_MAX_WQEBBS MLX5E_UMR_WQEBBS
-
-#define MLX5E_NUM_MAIN_GROUPS 9
#define MLX5E_MSG_LEVEL NETIF_MSG_LINK
@@ -178,8 +176,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
{
return is_kdump_kernel() ?
MLX5E_MIN_NUM_CHANNELS :
- min_t(int, mdev->priv.eq_table.num_comp_vectors,
- MLX5E_MAX_NUM_CHANNELS);
+ min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS);
}
/* Use this function to get max num channels after netdev was created */
@@ -214,22 +211,24 @@ struct mlx5e_umr_wqe {
extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
enum mlx5e_priv_flag {
- MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0),
- MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1),
- MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2),
- MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3),
- MLX5E_PFLAG_RX_NO_CSUM_COMPLETE = (1 << 4),
+ MLX5E_PFLAG_RX_CQE_BASED_MODER,
+ MLX5E_PFLAG_TX_CQE_BASED_MODER,
+ MLX5E_PFLAG_RX_CQE_COMPRESS,
+ MLX5E_PFLAG_RX_STRIDING_RQ,
+ MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
+ MLX5E_PFLAG_XDP_TX_MPWQE,
+ MLX5E_NUM_PFLAGS, /* Keep last */
};
#define MLX5E_SET_PFLAG(params, pflag, enable) \
do { \
if (enable) \
- (params)->pflags |= (pflag); \
+ (params)->pflags |= BIT(pflag); \
else \
- (params)->pflags &= ~(pflag); \
+ (params)->pflags &= ~(BIT(pflag)); \
} while (0)
-#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (pflag)))
+#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag))))
#ifdef CONFIG_MLX5_CORE_EN_DCB
#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
@@ -247,9 +246,6 @@ struct mlx5e_params {
bool lro_en;
u32 lro_wqe_sz;
u8 tx_min_inline_mode;
- u8 rss_hfunc;
- u8 toeplitz_hash_key[40];
- u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
bool vlan_strip_disable;
bool scatter_fcs_en;
bool rx_dim_enabled;
@@ -349,7 +345,6 @@ enum {
MLX5E_SQ_STATE_IPSEC,
MLX5E_SQ_STATE_AM,
MLX5E_SQ_STATE_TLS,
- MLX5E_SQ_STATE_REDIRECT,
};
struct mlx5e_sq_wqe_info {
@@ -410,24 +405,51 @@ struct mlx5e_xdp_info {
struct mlx5e_dma_info di;
};
+struct mlx5e_xdp_info_fifo {
+ struct mlx5e_xdp_info *xi;
+ u32 *cc;
+ u32 *pc;
+ u32 mask;
+};
+
+struct mlx5e_xdp_wqe_info {
+ u8 num_wqebbs;
+ u8 num_ds;
+};
+
+struct mlx5e_xdp_mpwqe {
+ /* Current MPWQE session */
+ struct mlx5e_tx_wqe *wqe;
+ u8 ds_count;
+ u8 max_ds_count;
+};
+
+struct mlx5e_xdpsq;
+typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq*,
+ struct mlx5e_xdp_info*);
struct mlx5e_xdpsq {
/* data path */
/* dirtied @completion */
+ u32 xdpi_fifo_cc;
u16 cc;
bool redirect_flush;
/* dirtied @xmit */
- u16 pc ____cacheline_aligned_in_smp;
- bool doorbell;
+ u32 xdpi_fifo_pc ____cacheline_aligned_in_smp;
+ u16 pc;
+ struct mlx5_wqe_ctrl_seg *doorbell_cseg;
+ struct mlx5e_xdp_mpwqe mpwqe;
struct mlx5e_cq cq;
/* read only */
struct mlx5_wq_cyc wq;
struct mlx5e_xdpsq_stats *stats;
+ mlx5e_fp_xmit_xdp_frame xmit_xdp_frame;
struct {
- struct mlx5e_xdp_info *xdpi;
+ struct mlx5e_xdp_wqe_info *wqe_info;
+ struct mlx5e_xdp_info_fifo xdpi_fifo;
} db;
void __iomem *uar_map;
u32 sqn;
@@ -633,7 +655,6 @@ struct mlx5e_channel_stats {
} ____cacheline_aligned_in_smp;
enum {
- MLX5E_STATE_ASYNC_EVENTS_ENABLED,
MLX5E_STATE_OPENED,
MLX5E_STATE_DESTROYING,
};
@@ -654,6 +675,13 @@ enum {
MLX5E_NIC_PRIO
};
+struct mlx5e_rss_params {
+ u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
+ u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
+ u8 toeplitz_hash_key[40];
+ u8 hfunc;
+};
+
struct mlx5e_priv {
/* priv data path fields - start */
struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
@@ -674,6 +702,7 @@ struct mlx5e_priv {
struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS];
+ struct mlx5e_rss_params rss_params;
u32 tx_rates[MLX5E_MAX_NUM_SQS];
struct mlx5e_flow_steering fs;
@@ -683,6 +712,8 @@ struct mlx5e_priv {
struct work_struct set_rx_mode_work;
struct work_struct tx_timeout_work;
struct work_struct update_stats_work;
+ struct work_struct monitor_counters_work;
+ struct mlx5_nb monitor_counters_nb;
struct mlx5_core_dev *mdev;
struct net_device *netdev;
@@ -692,6 +723,8 @@ struct mlx5e_priv {
struct hwtstamp_config tstamp;
u16 q_counter;
u16 drop_rq_q_counter;
+ struct notifier_block events_nb;
+
#ifdef CONFIG_MLX5_CORE_EN_DCB
struct mlx5e_dcbx dcbx;
#endif
@@ -769,6 +802,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
void mlx5e_update_stats(struct mlx5e_priv *priv);
+void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
void mlx5e_init_l2_addr(struct mlx5e_priv *priv);
int mlx5e_self_test_num(struct mlx5e_priv *priv);
@@ -799,9 +833,11 @@ struct mlx5e_redirect_rqt_param {
int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
struct mlx5e_redirect_rqt_param rrp);
-void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
- enum mlx5e_traffic_types tt,
+void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
+ const struct mlx5e_tirc_config *ttconfig,
void *tirc, bool inner);
+void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen);
+struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt);
int mlx5e_open_locked(struct net_device *netdev);
int mlx5e_close_locked(struct net_device *netdev);
@@ -931,14 +967,16 @@ int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
int mlx5e_create_tises(struct mlx5e_priv *priv);
-void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
+void mlx5e_update_carrier(struct mlx5e_priv *priv);
int mlx5e_close(struct net_device *netdev);
int mlx5e_open(struct net_device *netdev);
+void mlx5e_update_ndo_stats(struct mlx5e_priv *priv);
void mlx5e_queue_update_stats(struct mlx5e_priv *priv);
int mlx5e_bits_invert(unsigned long a, int size);
typedef int (*change_hw_mtu_cb)(struct mlx5e_priv *priv);
+int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv);
int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
change_hw_mtu_cb set_mtu_cb);
@@ -962,12 +1000,20 @@ int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
struct ethtool_coalesce *coal);
int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
struct ethtool_coalesce *coal);
+int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
+ struct ethtool_link_ksettings *link_ksettings);
+int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
+ const struct ethtool_link_ksettings *link_ksettings);
u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv);
u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv);
int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
struct ethtool_ts_info *info);
int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
struct ethtool_flash *flash);
+void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
+ struct ethtool_pauseparam *pauseparam);
+int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
+ struct ethtool_pauseparam *pauseparam);
/* mlx5e generic netdev management API */
int mlx5e_netdev_init(struct net_device *netdev,
@@ -983,12 +1029,26 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv);
void mlx5e_detach_netdev(struct mlx5e_priv *priv);
void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_rss_params *rss_params,
struct mlx5e_params *params,
u16 max_channels, u16 mtu);
void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
-void mlx5e_build_rss_params(struct mlx5e_params *params);
+void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
+ u16 num_channels);
u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
void mlx5e_rx_dim_work(struct work_struct *work);
void mlx5e_tx_dim_work(struct work_struct *work);
+
+void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti);
+void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti);
+netdev_features_t mlx5e_features_check(struct sk_buff *skb,
+ struct net_device *netdev,
+ netdev_features_t features);
+#ifdef CONFIG_MLX5_ESWITCH
+int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac);
+int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate);
+int mlx5e_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi);
+int mlx5e_get_vf_stats(struct net_device *dev, int vf, struct ifla_vf_stats *vf_stats);
+#endif
#endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 1431232c9a09..be5961ff24cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -73,6 +73,22 @@ enum mlx5e_traffic_types {
MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY,
};
+struct mlx5e_tirc_config {
+ u8 l3_prot_type;
+ u8 l4_prot_type;
+ u32 rx_hash_fields;
+};
+
+#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP)
+#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_L4_SPORT |\
+ MLX5_HASH_FIELD_SEL_L4_DPORT)
+#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_IPSEC_SPI)
+
enum mlx5e_tunnel_types {
MLX5E_TT_IPV4_GRE,
MLX5E_TT_IPV6_GRE,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
new file mode 100644
index 000000000000..2ce420851e77
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include "en.h"
+#include "monitor_stats.h"
+#include "lib/eq.h"
+
+/* Driver will set the following watch counters list:
+ * Ppcnt.802_3:
+ * a_in_range_length_errors Type: 0x0, Counter: 0x0, group_id = N/A
+ * a_out_of_range_length_field Type: 0x0, Counter: 0x1, group_id = N/A
+ * a_frame_too_long_errors Type: 0x0, Counter: 0x2, group_id = N/A
+ * a_frame_check_sequence_errors Type: 0x0, Counter: 0x3, group_id = N/A
+ * a_alignment_errors Type: 0x0, Counter: 0x4, group_id = N/A
+ * if_out_discards Type: 0x0, Counter: 0x5, group_id = N/A
+ * Q_Counters:
+ * Q[index].rx_out_of_buffer Type: 0x1, Counter: 0x4, group_id = counter_ix
+ */
+
+#define NUM_REQ_PPCNT_COUNTER_S1 MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1
+#define NUM_REQ_Q_COUNTERS_S1 MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1
+
+int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!MLX5_CAP_GEN(mdev, max_num_of_monitor_counters))
+ return false;
+ if (MLX5_CAP_PCAM_REG(mdev, ppcnt) &&
+ MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters) <
+ NUM_REQ_PPCNT_COUNTER_S1)
+ return false;
+ if (MLX5_CAP_GEN(mdev, num_q_monitor_counters) <
+ NUM_REQ_Q_COUNTERS_S1)
+ return false;
+ return true;
+}
+
+void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv)
+{
+ u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(arm_monitor_counter_out)] = {};
+
+ MLX5_SET(arm_monitor_counter_in, in, opcode,
+ MLX5_CMD_OP_ARM_MONITOR_COUNTER);
+ mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static void mlx5e_monitor_counters_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ monitor_counters_work);
+
+ mutex_lock(&priv->state_lock);
+ mlx5e_update_ndo_stats(priv);
+ mutex_unlock(&priv->state_lock);
+ mlx5e_monitor_counter_arm(priv);
+}
+
+static int mlx5e_monitor_event_handler(struct notifier_block *nb,
+ unsigned long event, void *eqe)
+{
+ struct mlx5e_priv *priv = mlx5_nb_cof(nb, struct mlx5e_priv,
+ monitor_counters_nb);
+ queue_work(priv->wq, &priv->monitor_counters_work);
+ return NOTIFY_OK;
+}
+
+void mlx5e_monitor_counter_start(struct mlx5e_priv *priv)
+{
+ MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler,
+ MONITOR_COUNTER);
+ mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb);
+}
+
+static void mlx5e_monitor_counter_stop(struct mlx5e_priv *priv)
+{
+ mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb);
+ cancel_work_sync(&priv->monitor_counters_work);
+}
+
+static int fill_monitor_counter_ppcnt_set1(int cnt, u32 *in)
+{
+ enum mlx5_monitor_counter_ppcnt ppcnt_cnt;
+
+ for (ppcnt_cnt = 0;
+ ppcnt_cnt < NUM_REQ_PPCNT_COUNTER_S1;
+ ppcnt_cnt++, cnt++) {
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].type,
+ MLX5_QUERY_MONITOR_CNT_TYPE_PPCNT);
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].counter,
+ ppcnt_cnt);
+ }
+ return ppcnt_cnt;
+}
+
+static int fill_monitor_counter_q_counter_set1(int cnt, int q_counter, u32 *in)
+{
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].type,
+ MLX5_QUERY_MONITOR_CNT_TYPE_Q_COUNTER);
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].counter,
+ MLX5_QUERY_MONITOR_Q_COUNTER_RX_OUT_OF_BUFFER);
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].counter_group_id,
+ q_counter);
+ return 1;
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int max_num_of_counters = MLX5_CAP_GEN(mdev, max_num_of_monitor_counters);
+ int num_q_counters = MLX5_CAP_GEN(mdev, num_q_monitor_counters);
+ int num_ppcnt_counters = !MLX5_CAP_PCAM_REG(mdev, ppcnt) ? 0 :
+ MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters);
+ u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {};
+ int q_counter = priv->q_counter;
+ int cnt = 0;
+
+ if (num_ppcnt_counters >= NUM_REQ_PPCNT_COUNTER_S1 &&
+ max_num_of_counters >= (NUM_REQ_PPCNT_COUNTER_S1 + cnt))
+ cnt += fill_monitor_counter_ppcnt_set1(cnt, in);
+
+ if (num_q_counters >= NUM_REQ_Q_COUNTERS_S1 &&
+ max_num_of_counters >= (NUM_REQ_Q_COUNTERS_S1 + cnt) &&
+ q_counter)
+ cnt += fill_monitor_counter_q_counter_set1(cnt, q_counter, in);
+
+ MLX5_SET(set_monitor_counter_in, in, num_of_counters, cnt);
+ MLX5_SET(set_monitor_counter_in, in, opcode,
+ MLX5_CMD_OP_SET_MONITOR_COUNTER);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+void mlx5e_monitor_counter_init(struct mlx5e_priv *priv)
+{
+ INIT_WORK(&priv->monitor_counters_work, mlx5e_monitor_counters_work);
+ mlx5e_monitor_counter_start(priv);
+ mlx5e_set_monitor_counter(priv);
+ mlx5e_monitor_counter_arm(priv);
+ queue_work(priv->wq, &priv->update_stats_work);
+}
+
+static void mlx5e_monitor_counter_disable(struct mlx5e_priv *priv)
+{
+ u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {};
+
+ MLX5_SET(set_monitor_counter_in, in, num_of_counters, 0);
+ MLX5_SET(set_monitor_counter_in, in, opcode,
+ MLX5_CMD_OP_SET_MONITOR_COUNTER);
+
+ mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv)
+{
+ mlx5e_monitor_counter_disable(priv);
+ mlx5e_monitor_counter_stop(priv);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h
new file mode 100644
index 000000000000..e1ac4b3d22fb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5_MONITOR_H__
+#define __MLX5_MONITOR_H__
+
+int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_init(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv);
+
+#endif /* __MLX5_MONITOR_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
new file mode 100644
index 000000000000..046948ead152
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -0,0 +1,634 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/vxlan.h>
+#include <net/gre.h>
+#include "lib/vxlan.h"
+#include "en/tc_tun.h"
+
+static int get_route_and_out_devs(struct mlx5e_priv *priv,
+ struct net_device *dev,
+ struct net_device **route_dev,
+ struct net_device **out_dev)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct net_device *uplink_dev, *uplink_upper;
+ bool dst_is_lag_dev;
+
+ uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ uplink_upper = netdev_master_upper_dev_get(uplink_dev);
+ dst_is_lag_dev = (uplink_upper &&
+ netif_is_lag_master(uplink_upper) &&
+ dev == uplink_upper &&
+ mlx5_lag_is_sriov(priv->mdev));
+
+ /* if the egress device isn't on the same HW e-switch or
+ * it's a LAG device, use the uplink
+ */
+ if (!switchdev_port_same_parent_id(priv->netdev, dev) ||
+ dst_is_lag_dev) {
+ *route_dev = uplink_dev;
+ *out_dev = *route_dev;
+ } else {
+ *route_dev = dev;
+ if (is_vlan_dev(*route_dev))
+ *out_dev = uplink_dev;
+ else if (mlx5e_eswitch_rep(dev))
+ *out_dev = *route_dev;
+ else
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct net_device **out_dev,
+ struct net_device **route_dev,
+ struct flowi4 *fl4,
+ struct neighbour **out_n,
+ u8 *out_ttl)
+{
+ struct rtable *rt;
+ struct neighbour *n = NULL;
+
+#if IS_ENABLED(CONFIG_INET)
+ int ret;
+
+ rt = ip_route_output_key(dev_net(mirred_dev), fl4);
+ ret = PTR_ERR_OR_ZERO(rt);
+ if (ret)
+ return ret;
+#else
+ return -EOPNOTSUPP;
+#endif
+
+ ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev);
+ if (ret < 0)
+ return ret;
+
+ if (!(*out_ttl))
+ *out_ttl = ip4_dst_hoplimit(&rt->dst);
+ n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
+ ip_rt_put(rt);
+ if (!n)
+ return -ENOMEM;
+
+ *out_n = n;
+ return 0;
+}
+
+static const char *mlx5e_netdev_kind(struct net_device *dev)
+{
+ if (dev->rtnl_link_ops)
+ return dev->rtnl_link_ops->kind;
+ else
+ return "";
+}
+
+static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct net_device **out_dev,
+ struct net_device **route_dev,
+ struct flowi6 *fl6,
+ struct neighbour **out_n,
+ u8 *out_ttl)
+{
+ struct neighbour *n = NULL;
+ struct dst_entry *dst;
+
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ int ret;
+
+ ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
+ fl6);
+ if (ret < 0)
+ return ret;
+
+ if (!(*out_ttl))
+ *out_ttl = ip6_dst_hoplimit(dst);
+
+ ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev);
+ if (ret < 0)
+ return ret;
+#else
+ return -EOPNOTSUPP;
+#endif
+
+ n = dst_neigh_lookup(dst, &fl6->daddr);
+ dst_release(dst);
+ if (!n)
+ return -ENOMEM;
+
+ *out_n = n;
+ return 0;
+}
+
+static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key)
+{
+ __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct vxlanhdr *vxh = (struct vxlanhdr *)
+ ((char *)udp + sizeof(struct udphdr));
+
+ udp->dest = tun_key->tp_dst;
+ vxh->vx_flags = VXLAN_HF_VNI;
+ vxh->vx_vni = vxlan_vni_field(tun_id);
+
+ return 0;
+}
+
+static int mlx5e_gen_gre_header(char buf[], struct ip_tunnel_key *tun_key)
+{
+ __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+ int hdr_len;
+ struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf);
+
+ /* the HW does not calculate GRE csum or sequences */
+ if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ))
+ return -EOPNOTSUPP;
+
+ greh->protocol = htons(ETH_P_TEB);
+
+ /* GRE key */
+ hdr_len = gre_calc_hlen(tun_key->tun_flags);
+ greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags);
+ if (tun_key->tun_flags & TUNNEL_KEY) {
+ __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
+
+ *ptr = tun_id;
+ }
+
+ return 0;
+}
+
+static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ int err = 0;
+ struct ip_tunnel_key *key = &e->tun_info.key;
+
+ if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
+ *ip_proto = IPPROTO_UDP;
+ err = mlx5e_gen_vxlan_header(buf, key);
+ } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
+ *ip_proto = IPPROTO_GRE;
+ err = mlx5e_gen_gre_header(buf, key);
+ } else {
+ pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n"
+ , e->tunnel_type);
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev,
+ struct mlx5e_encap_entry *e,
+ u16 proto)
+{
+ struct ethhdr *eth = (struct ethhdr *)buf;
+ char *ip;
+
+ ether_addr_copy(eth->h_dest, e->h_dest);
+ ether_addr_copy(eth->h_source, dev->dev_addr);
+ if (is_vlan_dev(dev)) {
+ struct vlan_hdr *vlan = (struct vlan_hdr *)
+ ((char *)eth + ETH_HLEN);
+ ip = (char *)vlan + VLAN_HLEN;
+ eth->h_proto = vlan_dev_vlan_proto(dev);
+ vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev));
+ vlan->h_vlan_encapsulated_proto = htons(proto);
+ } else {
+ eth->h_proto = htons(proto);
+ ip = (char *)eth + ETH_HLEN;
+ }
+
+ return ip;
+}
+
+int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ struct ip_tunnel_key *tun_key = &e->tun_info.key;
+ struct net_device *out_dev, *route_dev;
+ struct neighbour *n = NULL;
+ struct flowi4 fl4 = {};
+ int ipv4_encap_size;
+ char *encap_header;
+ u8 nud_state, ttl;
+ struct iphdr *ip;
+ int err;
+
+ /* add the IP fields */
+ fl4.flowi4_tos = tun_key->tos;
+ fl4.daddr = tun_key->u.ipv4.dst;
+ fl4.saddr = tun_key->u.ipv4.src;
+ ttl = tun_key->ttl;
+
+ err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &route_dev,
+ &fl4, &n, &ttl);
+ if (err)
+ return err;
+
+ ipv4_encap_size =
+ (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct iphdr) +
+ e->tunnel_hlen;
+
+ if (max_encap_size < ipv4_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv4_encap_size, max_encap_size);
+ return -EOPNOTSUPP;
+ }
+
+ encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
+ if (!encap_header)
+ return -ENOMEM;
+
+ /* used by mlx5e_detach_encap to lookup a neigh hash table
+ * entry in the neigh hash table when a user deletes a rule
+ */
+ e->m_neigh.dev = n->dev;
+ e->m_neigh.family = n->ops->family;
+ memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+ e->out_dev = out_dev;
+
+ /* It's important to add the neigh to the hash table before checking
+ * the neigh validity state. So if we'll get a notification, in case the
+ * neigh changes it's validity state, we would find the relevant neigh
+ * in the hash.
+ */
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+ if (err)
+ goto free_encap;
+
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ ether_addr_copy(e->h_dest, n->ha);
+ read_unlock_bh(&n->lock);
+
+ /* add ethernet header */
+ ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
+ ETH_P_IP);
+
+ /* add ip header */
+ ip->tos = tun_key->tos;
+ ip->version = 0x4;
+ ip->ihl = 0x5;
+ ip->ttl = ttl;
+ ip->daddr = fl4.daddr;
+ ip->saddr = fl4.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
+ &ip->protocol, e);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->encap_size = ipv4_encap_size;
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
+ err = -EAGAIN;
+ goto out;
+ }
+
+ err = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
+ ipv4_encap_size, encap_header,
+ MLX5_FLOW_NAMESPACE_FDB,
+ &e->encap_id);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
+ neigh_release(n);
+ return err;
+
+destroy_neigh_entry:
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+free_encap:
+ kfree(encap_header);
+out:
+ if (n)
+ neigh_release(n);
+ return err;
+}
+
+int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ struct ip_tunnel_key *tun_key = &e->tun_info.key;
+ struct net_device *out_dev, *route_dev;
+ struct neighbour *n = NULL;
+ struct flowi6 fl6 = {};
+ struct ipv6hdr *ip6h;
+ int ipv6_encap_size;
+ char *encap_header;
+ u8 nud_state, ttl;
+ int err;
+
+ ttl = tun_key->ttl;
+
+ fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
+ fl6.daddr = tun_key->u.ipv6.dst;
+ fl6.saddr = tun_key->u.ipv6.src;
+
+ err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &route_dev,
+ &fl6, &n, &ttl);
+ if (err)
+ return err;
+
+ ipv6_encap_size =
+ (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct ipv6hdr) +
+ e->tunnel_hlen;
+
+ if (max_encap_size < ipv6_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv6_encap_size, max_encap_size);
+ return -EOPNOTSUPP;
+ }
+
+ encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
+ if (!encap_header)
+ return -ENOMEM;
+
+ /* used by mlx5e_detach_encap to lookup a neigh hash table
+ * entry in the neigh hash table when a user deletes a rule
+ */
+ e->m_neigh.dev = n->dev;
+ e->m_neigh.family = n->ops->family;
+ memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+ e->out_dev = out_dev;
+
+ /* It's importent to add the neigh to the hash table before checking
+ * the neigh validity state. So if we'll get a notification, in case the
+ * neigh changes it's validity state, we would find the relevant neigh
+ * in the hash.
+ */
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+ if (err)
+ goto free_encap;
+
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ ether_addr_copy(e->h_dest, n->ha);
+ read_unlock_bh(&n->lock);
+
+ /* add ethernet header */
+ ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
+ ETH_P_IPV6);
+
+ /* add ip header */
+ ip6_flow_hdr(ip6h, tun_key->tos, 0);
+ /* the HW fills up ipv6 payload len */
+ ip6h->hop_limit = ttl;
+ ip6h->daddr = fl6.daddr;
+ ip6h->saddr = fl6.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
+ &ip6h->nexthdr, e);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->encap_size = ipv6_encap_size;
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
+ err = -EAGAIN;
+ goto out;
+ }
+
+ err = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
+ ipv6_encap_size, encap_header,
+ MLX5_FLOW_NAMESPACE_FDB,
+ &e->encap_id);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
+ neigh_release(n);
+ return err;
+
+destroy_neigh_entry:
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+free_encap:
+ kfree(encap_header);
+out:
+ if (n)
+ neigh_release(n);
+ return err;
+}
+
+int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev)
+{
+ if (netif_is_vxlan(tunnel_dev))
+ return MLX5E_TC_TUNNEL_TYPE_VXLAN;
+ else if (netif_is_gretap(tunnel_dev) ||
+ netif_is_ip6gretap(tunnel_dev))
+ return MLX5E_TC_TUNNEL_TYPE_GRETAP;
+ else
+ return MLX5E_TC_TUNNEL_TYPE_UNKNOWN;
+}
+
+bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
+ struct net_device *netdev)
+{
+ int tunnel_type = mlx5e_tc_tun_get_type(netdev);
+
+ if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN &&
+ MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
+ return true;
+ else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP &&
+ MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap))
+ return true;
+ else
+ return false;
+}
+
+int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ e->tunnel_type = mlx5e_tc_tun_get_type(tunnel_dev);
+
+ if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
+ int dst_port = be16_to_cpu(e->tun_info.key.tp_dst);
+
+ if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "vxlan udp dport was not registered with the HW");
+ netdev_warn(priv->netdev,
+ "%d isn't an offloaded vxlan udp dport\n",
+ dst_port);
+ return -EOPNOTSUPP;
+ }
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
+ e->tunnel_hlen = VXLAN_HLEN;
+ } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE;
+ e->tunnel_hlen = gre_calc_hlen(e->tun_info.key.tun_flags);
+ } else {
+ e->reformat_type = -1;
+ e->tunnel_hlen = -1;
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_dissector_key_ports *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS,
+ f->key);
+ struct flow_dissector_key_ports *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS,
+ f->mask);
+ void *misc_c = MLX5_ADDR_OF(fte_match_param,
+ spec->match_criteria,
+ misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param,
+ spec->match_value,
+ misc_parameters);
+
+ /* Full udp dst port must be given */
+ if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS) ||
+ memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "VXLAN decap filter must include enc_dst_port condition");
+ netdev_warn(priv->netdev,
+ "VXLAN decap filter must include enc_dst_port condition\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* udp dst port must be knonwn as a VXLAN port */
+ if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matched UDP port is not registered as a VXLAN port");
+ netdev_warn(priv->netdev,
+ "UDP port %d is not registered as a VXLAN port\n",
+ be16_to_cpu(key->dst));
+ return -EOPNOTSUPP;
+ }
+
+ /* dst UDP port is valid here */
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, ntohs(mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(key->dst));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(key->src));
+
+ /* match on VNI */
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_dissector_key_keyid *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ f->key);
+ struct flow_dissector_key_keyid *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ f->mask);
+ MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
+ be32_to_cpu(mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
+ be32_to_cpu(key->keyid));
+ }
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f,
+ void *outer_headers_c,
+ void *outer_headers_v)
+{
+ void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) {
+ NL_SET_ERR_MSG_MOD(f->common.extack,
+ "GRE HW offloading is not supported");
+ netdev_warn(priv->netdev, "GRE HW offloading is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ ip_protocol, IPPROTO_GRE);
+
+ /* gre protocol*/
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol);
+ MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB);
+
+ /* gre key */
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_dissector_key_keyid *mask = NULL;
+ struct flow_dissector_key_keyid *key = NULL;
+
+ mask = skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ f->mask);
+ MLX5_SET(fte_match_set_misc, misc_c,
+ gre_key.key, be32_to_cpu(mask->keyid));
+
+ key = skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ f->key);
+ MLX5_SET(fte_match_set_misc, misc_v,
+ gre_key.key, be32_to_cpu(key->keyid));
+ }
+
+ return 0;
+}
+
+int mlx5e_tc_tun_parse(struct net_device *filter_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ int tunnel_type;
+ int err = 0;
+
+ tunnel_type = mlx5e_tc_tun_get_type(filter_dev);
+ if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
+ err = mlx5e_tc_tun_parse_vxlan(priv, spec, f,
+ headers_c, headers_v);
+ } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
+ err = mlx5e_tc_tun_parse_gretap(priv, spec, f,
+ headers_c, headers_v);
+ } else {
+ netdev_warn(priv->netdev,
+ "decapsulation offload is not supported for %s net device (%d)\n",
+ mlx5e_netdev_kind(filter_dev), tunnel_type);
+ return -EOPNOTSUPP;
+ }
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
new file mode 100644
index 000000000000..706ce7bf15e7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_TUNNEL_H__
+#define __MLX5_EN_TC_TUNNEL_H__
+
+#include <linux/netdevice.h>
+#include <linux/mlx5/fs.h>
+#include <net/pkt_cls.h>
+#include <linux/netlink.h>
+#include "en.h"
+#include "en_rep.h"
+
+enum {
+ MLX5E_TC_TUNNEL_TYPE_UNKNOWN,
+ MLX5E_TC_TUNNEL_TYPE_VXLAN,
+ MLX5E_TC_TUNNEL_TYPE_GRETAP
+};
+
+int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack);
+
+int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
+
+int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
+
+int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev);
+bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
+ struct net_device *netdev);
+
+int mlx5e_tc_tun_parse(struct net_device *filter_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f,
+ void *headers_c,
+ void *headers_v);
+
+#endif //__MLX5_EN_TC_TUNNEL_H__
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index ad6d471d00dd..3740177eed09 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -47,7 +47,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
xdpi.xdpf->len, PCI_DMA_TODEVICE);
xdpi.di = *di;
- return mlx5e_xmit_xdp_frame(sq, &xdpi);
+ return sq->xmit_xdp_frame(sq, &xdpi);
}
/* returns true if packet was consumed by xdp */
@@ -102,7 +102,98 @@ xdp_abort:
}
}
-bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
+static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u8 wqebbs;
+ u16 pi;
+
+ mlx5e_xdpsq_fetch_wqe(sq, &session->wqe);
+
+ prefetchw(session->wqe->data);
+ session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
+ * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
+ * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
+ * full-session WQE be cache-aligned.
+ */
+#if L1_CACHE_BYTES < 128
+#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
+#else
+#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
+#endif
+
+ wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi),
+ MLX5E_XDP_MPW_MAX_WQEBBS);
+
+ session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs;
+}
+
+static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
+ u16 ds_count = session->ds_count;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
+
+ cseg->opmod_idx_opcode =
+ cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
+
+ wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
+ wi->num_ds = ds_count - MLX5E_XDP_TX_EMPTY_DS_COUNT;
+
+ sq->pc += wi->num_wqebbs;
+
+ sq->doorbell_cseg = cseg;
+
+ session->wqe = NULL; /* Close session */
+}
+
+static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xdp_info *xdpi)
+{
+ struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_xdpsq_stats *stats = sq->stats;
+
+ dma_addr_t dma_addr = xdpi->dma_addr;
+ struct xdp_frame *xdpf = xdpi->xdpf;
+ unsigned int dma_len = xdpf->len;
+
+ if (unlikely(sq->hw_mtu < dma_len)) {
+ stats->err++;
+ return false;
+ }
+
+ if (unlikely(!session->wqe)) {
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
+ MLX5_SEND_WQE_MAX_WQEBBS))) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ stats->full++;
+ return false;
+ }
+
+ mlx5e_xdp_mpwqe_session_start(sq);
+ }
+
+ mlx5e_xdp_mpwqe_add_dseg(sq, dma_addr, dma_len);
+
+ if (unlikely(session->ds_count == session->max_ds_count))
+ mlx5e_xdp_mpwqe_complete(sq);
+
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
+ stats->xmit++;
+ return true;
+}
+
+static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
{
struct mlx5_wq_cyc *wq = &sq->wq;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
@@ -126,11 +217,8 @@ bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
}
if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) {
- if (sq->doorbell) {
- /* SQ is full, ring doorbell */
- mlx5e_xmit_xdp_doorbell(sq);
- sq->doorbell = false;
- }
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
stats->full++;
return false;
}
@@ -152,23 +240,20 @@ bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
- /* move page to reference to sq responsibility,
- * and mark so it's not put back in page-cache.
- */
- sq->db.xdpi[pi] = *xdpi;
sq->pc++;
- sq->doorbell = true;
+ sq->doorbell_cseg = cseg;
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
stats->xmit++;
return true;
}
-bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
{
+ struct mlx5e_xdp_info_fifo *xdpi_fifo;
struct mlx5e_xdpsq *sq;
struct mlx5_cqe64 *cqe;
- struct mlx5e_rq *rq;
bool is_redirect;
u16 sqcc;
int i;
@@ -182,8 +267,8 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
if (!cqe)
return false;
- is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
- rq = container_of(sq, struct mlx5e_rq, xdpsq);
+ is_redirect = !rq;
+ xdpi_fifo = &sq->db.xdpi_fifo;
/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
* otherwise a cq overrun may occur
@@ -199,20 +284,33 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
wqe_counter = be16_to_cpu(cqe->wqe_counter);
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ))
+ netdev_WARN_ONCE(sq->channel->netdev,
+ "Bad OP in XDPSQ CQE: 0x%x\n",
+ get_cqe_opcode(cqe));
+
do {
- u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
- struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci];
+ struct mlx5e_xdp_wqe_info *wi;
+ u16 ci, j;
last_wqe = (sqcc == wqe_counter);
- sqcc++;
-
- if (is_redirect) {
- xdp_return_frame(xdpi->xdpf);
- dma_unmap_single(sq->pdev, xdpi->dma_addr,
- xdpi->xdpf->len, DMA_TO_DEVICE);
- } else {
- /* Recycle RX page */
- mlx5e_page_release(rq, &xdpi->di, true);
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+ wi = &sq->db.wqe_info[ci];
+
+ sqcc += wi->num_wqebbs;
+
+ for (j = 0; j < wi->num_ds; j++) {
+ struct mlx5e_xdp_info xdpi =
+ mlx5e_xdpi_fifo_pop(xdpi_fifo);
+
+ if (is_redirect) {
+ xdp_return_frame(xdpi.xdpf);
+ dma_unmap_single(sq->pdev, xdpi.dma_addr,
+ xdpi.xdpf->len, DMA_TO_DEVICE);
+ } else {
+ /* Recycle RX page */
+ mlx5e_page_release(rq, &xdpi.di, true);
+ }
}
} while (!last_wqe);
} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
@@ -228,27 +326,32 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
return (i == MLX5E_TX_CQ_POLL_BUDGET);
}
-void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
{
- struct mlx5e_rq *rq;
- bool is_redirect;
-
- is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
- rq = is_redirect ? NULL : container_of(sq, struct mlx5e_rq, xdpsq);
+ struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
+ bool is_redirect = !rq;
while (sq->cc != sq->pc) {
- u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
- struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci];
-
- sq->cc++;
-
- if (is_redirect) {
- xdp_return_frame(xdpi->xdpf);
- dma_unmap_single(sq->pdev, xdpi->dma_addr,
- xdpi->xdpf->len, DMA_TO_DEVICE);
- } else {
- /* Recycle RX page */
- mlx5e_page_release(rq, &xdpi->di, false);
+ struct mlx5e_xdp_wqe_info *wi;
+ u16 ci, i;
+
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
+ wi = &sq->db.wqe_info[ci];
+
+ sq->cc += wi->num_wqebbs;
+
+ for (i = 0; i < wi->num_ds; i++) {
+ struct mlx5e_xdp_info xdpi =
+ mlx5e_xdpi_fifo_pop(xdpi_fifo);
+
+ if (is_redirect) {
+ xdp_return_frame(xdpi.xdpf);
+ dma_unmap_single(sq->pdev, xdpi.dma_addr,
+ xdpi.xdpf->len, DMA_TO_DEVICE);
+ } else {
+ /* Recycle RX page */
+ mlx5e_page_release(rq, &xdpi.di, false);
+ }
}
}
}
@@ -292,7 +395,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
xdpi.xdpf = xdpf;
- if (unlikely(!mlx5e_xmit_xdp_frame(sq, &xdpi))) {
+ if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) {
dma_unmap_single(sq->pdev, xdpi.dma_addr,
xdpf->len, DMA_TO_DEVICE);
xdp_return_frame_rx_napi(xdpf);
@@ -300,8 +403,33 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
}
}
- if (flags & XDP_XMIT_FLUSH)
+ if (flags & XDP_XMIT_FLUSH) {
+ if (sq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xmit_xdp_doorbell(sq);
+ }
return n - drops;
}
+
+void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
+{
+ struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
+
+ if (xdpsq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(xdpsq);
+
+ mlx5e_xmit_xdp_doorbell(xdpsq);
+
+ if (xdpsq->redirect_flush) {
+ xdp_do_flush_map();
+ xdpsq->redirect_flush = false;
+ }
+}
+
+void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
+{
+ sq->xmit_xdp_frame = is_mpw ?
+ mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 6dfab045925f..3a67cb3cd179 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -37,27 +37,62 @@
#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \
MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM)))
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
-#define MLX5E_XDP_TX_DS_COUNT \
- ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
+#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
+ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
+#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
void *va, u16 *rx_headroom, u32 *len);
-bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
-void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
-
-bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi);
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq);
+void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw);
+void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
{
+ if (sq->doorbell_cseg) {
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg);
+ sq->doorbell_cseg = NULL;
+ }
+}
+
+static inline void
+mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, dma_addr_t dma_addr, u16 dma_len)
+{
+ struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wqe_data_seg *dseg =
+ (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count++;
+
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->byte_count = cpu_to_be32(dma_len);
+ dseg->lkey = sq->mkey_be;
+}
+
+static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq,
+ struct mlx5e_tx_wqe **wqe)
+{
struct mlx5_wq_cyc *wq = &sq->wq;
- struct mlx5e_tx_wqe *wqe;
- u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc - 1); /* last pi */
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+ *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memset(*wqe, 0, sizeof(**wqe));
+}
- wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+static inline void
+mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
+ struct mlx5e_xdp_info *xi)
+{
+ u32 i = (*fifo->pc)++ & fifo->mask;
- mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &wqe->ctrl);
+ fifo->xi[i] = *xi;
+}
+
+static inline struct mlx5e_xdp_info
+mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo)
+{
+ return fifo->xi[(*fifo->cc)++ & fifo->mask];
}
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 128a82b1dbfc..53608afd39b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -254,11 +254,13 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
struct mlx5e_ipsec_metadata *mdata;
struct mlx5e_ipsec_sa_entry *sa_entry;
struct xfrm_state *x;
+ struct sec_path *sp;
if (!xo)
return skb;
- if (unlikely(skb->sp->len != 1)) {
+ sp = skb_sec_path(skb);
+ if (unlikely(sp->len != 1)) {
atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle);
goto drop;
}
@@ -305,10 +307,11 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
struct mlx5e_priv *priv = netdev_priv(netdev);
struct xfrm_offload *xo;
struct xfrm_state *xs;
+ struct sec_path *sp;
u32 sa_handle;
- skb->sp = secpath_dup(skb->sp);
- if (unlikely(!skb->sp)) {
+ sp = secpath_set(skb);
+ if (unlikely(!sp)) {
atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc);
return NULL;
}
@@ -320,8 +323,9 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
return NULL;
}
- skb->sp->xvec[skb->sp->len++] = xs;
- skb->sp->olen++;
+ sp = skb_sec_path(skb);
+ sp->xvec[sp->len++] = xs;
+ sp->olen++;
xo = xfrm_offload(skb);
xo->flags = CRYPTO_DONE;
@@ -372,10 +376,11 @@ struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
netdev_features_t features)
{
+ struct sec_path *sp = skb_sec_path(skb);
struct xfrm_state *x;
- if (skb->sp && skb->sp->len) {
- x = skb->sp->xvec[0];
+ if (sp && sp->len) {
+ x = sp->xvec[0];
if (x && x->xso.offload_handle)
return true;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 25c1c4f96841..c9df08133718 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -135,14 +135,15 @@ void mlx5e_build_ptys2ethtool_map(void)
ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT);
}
-static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
- "rx_cqe_moder",
- "tx_cqe_moder",
- "rx_cqe_compress",
- "rx_striding_rq",
- "rx_no_csum_complete",
+typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable);
+
+struct pflag_desc {
+ char name[ETH_GSTRING_LEN];
+ mlx5e_pflag_handler handler;
};
+static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS];
+
int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
{
int i, num_stats = 0;
@@ -153,7 +154,7 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
num_stats += mlx5e_stats_grps[i].get_num_stats(priv);
return num_stats;
case ETH_SS_PRIV_FLAGS:
- return ARRAY_SIZE(mlx5e_priv_flags);
+ return MLX5E_NUM_PFLAGS;
case ETH_SS_TEST:
return mlx5e_self_test_num(priv);
/* fallthrough */
@@ -183,8 +184,9 @@ void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data)
switch (stringset) {
case ETH_SS_PRIV_FLAGS:
- for (i = 0; i < ARRAY_SIZE(mlx5e_priv_flags); i++)
- strcpy(data + i * ETH_GSTRING_LEN, mlx5e_priv_flags[i]);
+ for (i = 0; i < MLX5E_NUM_PFLAGS; i++)
+ strcpy(data + i * ETH_GSTRING_LEN,
+ mlx5e_priv_flags[i].name);
break;
case ETH_SS_TEST:
@@ -353,7 +355,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
new_channels.params = priv->channels.params;
new_channels.params.num_channels = count;
if (!netif_is_rxfh_configured(priv->netdev))
- mlx5e_build_default_indir_rqt(new_channels.params.indirection_rqt,
+ mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
MLX5E_INDIR_RQT_SIZE, count);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
@@ -785,10 +787,9 @@ static void get_lp_advertising(u32 eth_proto_lp,
ptys2ethtool_adver_link(lp_advertising, eth_proto_lp);
}
-static int mlx5e_get_link_ksettings(struct net_device *netdev,
- struct ethtool_link_ksettings *link_ksettings)
+int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
+ struct ethtool_link_ksettings *link_ksettings)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
u32 rx_pause = 0;
@@ -804,7 +805,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
if (err) {
- netdev_err(netdev, "%s: query port ptys failed: %d\n",
+ netdev_err(priv->netdev, "%s: query port ptys failed: %d\n",
__func__, err);
goto err_query_regs;
}
@@ -824,7 +825,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
get_supported(eth_proto_cap, link_ksettings);
get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings);
- get_speed_duplex(netdev, eth_proto_oper, link_ksettings);
+ get_speed_duplex(priv->netdev, eth_proto_oper, link_ksettings);
eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
@@ -844,7 +845,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
Autoneg);
if (get_fec_supported_advertised(mdev, link_ksettings))
- netdev_dbg(netdev, "%s: FEC caps query failed: %d\n",
+ netdev_dbg(priv->netdev, "%s: FEC caps query failed: %d\n",
__func__, err);
if (!an_disable_admin)
@@ -855,6 +856,14 @@ err_query_regs:
return err;
}
+static int mlx5e_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings);
+}
+
static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes)
{
u32 i, ptys_modes = 0;
@@ -869,10 +878,9 @@ static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes)
return ptys_modes;
}
-static int mlx5e_set_link_ksettings(struct net_device *netdev,
- const struct ethtool_link_ksettings *link_ksettings)
+int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
+ const struct ethtool_link_ksettings *link_ksettings)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
u32 eth_proto_cap, eth_proto_admin;
bool an_changes = false;
@@ -892,14 +900,14 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev,
err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
if (err) {
- netdev_err(netdev, "%s: query port eth proto cap failed: %d\n",
+ netdev_err(priv->netdev, "%s: query port eth proto cap failed: %d\n",
__func__, err);
goto out;
}
link_modes = link_modes & eth_proto_cap;
if (!link_modes) {
- netdev_err(netdev, "%s: Not supported link mode(s) requested",
+ netdev_err(priv->netdev, "%s: Not supported link mode(s) requested",
__func__);
err = -EINVAL;
goto out;
@@ -907,7 +915,7 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev,
err = mlx5_query_port_proto_admin(mdev, &eth_proto_admin, MLX5_PTYS_EN);
if (err) {
- netdev_err(netdev, "%s: query port eth proto admin failed: %d\n",
+ netdev_err(priv->netdev, "%s: query port eth proto admin failed: %d\n",
__func__, err);
goto out;
}
@@ -929,9 +937,17 @@ out:
return err;
}
+static int mlx5e_set_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings);
+}
+
u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv)
{
- return sizeof(priv->channels.params.toeplitz_hash_key);
+ return sizeof(priv->rss_params.toeplitz_hash_key);
}
static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev)
@@ -957,50 +973,27 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
u8 *hfunc)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rss_params *rss = &priv->rss_params;
if (indir)
- memcpy(indir, priv->channels.params.indirection_rqt,
- sizeof(priv->channels.params.indirection_rqt));
+ memcpy(indir, rss->indirection_rqt,
+ sizeof(rss->indirection_rqt));
if (key)
- memcpy(key, priv->channels.params.toeplitz_hash_key,
- sizeof(priv->channels.params.toeplitz_hash_key));
+ memcpy(key, rss->toeplitz_hash_key,
+ sizeof(rss->toeplitz_hash_key));
if (hfunc)
- *hfunc = priv->channels.params.rss_hfunc;
+ *hfunc = rss->hfunc;
return 0;
}
-static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
-{
- void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
- struct mlx5_core_dev *mdev = priv->mdev;
- int ctxlen = MLX5_ST_SZ_BYTES(tirc);
- int tt;
-
- MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- memset(tirc, 0, ctxlen);
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
- mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen);
- }
-
- if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
- return;
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- memset(tirc, 0, ctxlen);
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
- mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen);
- }
-}
-
static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
const u8 *key, const u8 hfunc)
{
struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_rss_params *rss = &priv->rss_params;
int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
bool hash_changed = false;
void *in;
@@ -1016,15 +1009,14 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
mutex_lock(&priv->state_lock);
- if (hfunc != ETH_RSS_HASH_NO_CHANGE &&
- hfunc != priv->channels.params.rss_hfunc) {
- priv->channels.params.rss_hfunc = hfunc;
+ if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hfunc) {
+ rss->hfunc = hfunc;
hash_changed = true;
}
if (indir) {
- memcpy(priv->channels.params.indirection_rqt, indir,
- sizeof(priv->channels.params.indirection_rqt));
+ memcpy(rss->indirection_rqt, indir,
+ sizeof(rss->indirection_rqt));
if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
u32 rqtn = priv->indir_rqt.rqtn;
@@ -1032,7 +1024,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
.is_rss = true,
{
.rss = {
- .hfunc = priv->channels.params.rss_hfunc,
+ .hfunc = rss->hfunc,
.channels = &priv->channels,
},
},
@@ -1043,10 +1035,9 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
}
if (key) {
- memcpy(priv->channels.params.toeplitz_hash_key, key,
- sizeof(priv->channels.params.toeplitz_hash_key));
- hash_changed = hash_changed ||
- priv->channels.params.rss_hfunc == ETH_RSS_HASH_TOP;
+ memcpy(rss->toeplitz_hash_key, key,
+ sizeof(rss->toeplitz_hash_key));
+ hash_changed = hash_changed || rss->hfunc == ETH_RSS_HASH_TOP;
}
if (hash_changed)
@@ -1150,25 +1141,31 @@ static int mlx5e_set_tunable(struct net_device *dev,
return err;
}
-static void mlx5e_get_pauseparam(struct net_device *netdev,
- struct ethtool_pauseparam *pauseparam)
+void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
+ struct ethtool_pauseparam *pauseparam)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
int err;
err = mlx5_query_port_pause(mdev, &pauseparam->rx_pause,
&pauseparam->tx_pause);
if (err) {
- netdev_err(netdev, "%s: mlx5_query_port_pause failed:0x%x\n",
+ netdev_err(priv->netdev, "%s: mlx5_query_port_pause failed:0x%x\n",
__func__, err);
}
}
-static int mlx5e_set_pauseparam(struct net_device *netdev,
- struct ethtool_pauseparam *pauseparam)
+static void mlx5e_get_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_ethtool_get_pauseparam(priv, pauseparam);
+}
+
+int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
+ struct ethtool_pauseparam *pauseparam)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
int err;
@@ -1179,22 +1176,25 @@ static int mlx5e_set_pauseparam(struct net_device *netdev,
pauseparam->rx_pause ? 1 : 0,
pauseparam->tx_pause ? 1 : 0);
if (err) {
- netdev_err(netdev, "%s: mlx5_set_port_pause failed:0x%x\n",
+ netdev_err(priv->netdev, "%s: mlx5_set_port_pause failed:0x%x\n",
__func__, err);
}
return err;
}
+static int mlx5e_set_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_pauseparam(priv, pauseparam);
+}
+
int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
struct ethtool_ts_info *info)
{
struct mlx5_core_dev *mdev = priv->mdev;
- int ret;
-
- ret = ethtool_op_get_ts_info(priv->netdev, info);
- if (ret)
- return ret;
info->phc_index = mlx5_clock_get_ptp_index(mdev);
@@ -1202,9 +1202,9 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
info->phc_index == -1)
return 0;
- info->so_timestamping |= SOF_TIMESTAMPING_TX_HARDWARE |
- SOF_TIMESTAMPING_RX_HARDWARE |
- SOF_TIMESTAMPING_RAW_HARDWARE;
+ info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
info->tx_types = BIT(HWTSTAMP_TX_OFF) |
BIT(HWTSTAMP_TX_ON);
@@ -1510,8 +1510,6 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev,
return 0;
}
-typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable);
-
static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
bool is_rx_cq)
{
@@ -1674,23 +1672,58 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
return 0;
}
+static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_channels new_channels = {};
+ int err;
+
+ if (enable && !MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe))
+ return -EOPNOTSUPP;
+
+ new_channels.params = priv->channels.params;
+
+ MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_XDP_TX_MPWQE, enable);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ return 0;
+ }
+
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ return err;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+ return 0;
+}
+
+static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = {
+ { "rx_cqe_moder", set_pflag_rx_cqe_based_moder },
+ { "tx_cqe_moder", set_pflag_tx_cqe_based_moder },
+ { "rx_cqe_compress", set_pflag_rx_cqe_compress },
+ { "rx_striding_rq", set_pflag_rx_striding_rq },
+ { "rx_no_csum_complete", set_pflag_rx_no_csum_complete },
+ { "xdp_tx_mpwqe", set_pflag_xdp_tx_mpwqe },
+};
+
static int mlx5e_handle_pflag(struct net_device *netdev,
u32 wanted_flags,
- enum mlx5e_priv_flag flag,
- mlx5e_pflag_handler pflag_handler)
+ enum mlx5e_priv_flag flag)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- bool enable = !!(wanted_flags & flag);
+ bool enable = !!(wanted_flags & BIT(flag));
u32 changes = wanted_flags ^ priv->channels.params.pflags;
int err;
- if (!(changes & flag))
+ if (!(changes & BIT(flag)))
return 0;
- err = pflag_handler(netdev, enable);
+ err = mlx5e_priv_flags[flag].handler(netdev, enable);
if (err) {
- netdev_err(netdev, "%s private flag 0x%x failed err %d\n",
- enable ? "Enable" : "Disable", flag, err);
+ netdev_err(netdev, "%s private flag '%s' failed err %d\n",
+ enable ? "Enable" : "Disable", mlx5e_priv_flags[flag].name, err);
return err;
}
@@ -1701,38 +1734,17 @@ static int mlx5e_handle_pflag(struct net_device *netdev,
static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ enum mlx5e_priv_flag pflag;
int err;
mutex_lock(&priv->state_lock);
- err = mlx5e_handle_pflag(netdev, pflags,
- MLX5E_PFLAG_RX_CQE_BASED_MODER,
- set_pflag_rx_cqe_based_moder);
- if (err)
- goto out;
- err = mlx5e_handle_pflag(netdev, pflags,
- MLX5E_PFLAG_TX_CQE_BASED_MODER,
- set_pflag_tx_cqe_based_moder);
- if (err)
- goto out;
-
- err = mlx5e_handle_pflag(netdev, pflags,
- MLX5E_PFLAG_RX_CQE_COMPRESS,
- set_pflag_rx_cqe_compress);
- if (err)
- goto out;
-
- err = mlx5e_handle_pflag(netdev, pflags,
- MLX5E_PFLAG_RX_STRIDING_RQ,
- set_pflag_rx_striding_rq);
- if (err)
- goto out;
-
- err = mlx5e_handle_pflag(netdev, pflags,
- MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
- set_pflag_rx_no_csum_complete);
+ for (pflag = 0; pflag < MLX5E_NUM_PFLAGS; pflag++) {
+ err = mlx5e_handle_pflag(netdev, pflags, pflag);
+ if (err)
+ break;
+ }
-out:
mutex_unlock(&priv->state_lock);
/* Need to fix some features.. */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index c18dcebe1462..4421c10f58ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -771,6 +771,112 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv)
INIT_LIST_HEAD(&priv->fs.ethtool.rules);
}
+static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type)
+{
+ switch (flow_type) {
+ case TCP_V4_FLOW:
+ return MLX5E_TT_IPV4_TCP;
+ case TCP_V6_FLOW:
+ return MLX5E_TT_IPV6_TCP;
+ case UDP_V4_FLOW:
+ return MLX5E_TT_IPV4_UDP;
+ case UDP_V6_FLOW:
+ return MLX5E_TT_IPV6_UDP;
+ case AH_V4_FLOW:
+ return MLX5E_TT_IPV4_IPSEC_AH;
+ case AH_V6_FLOW:
+ return MLX5E_TT_IPV6_IPSEC_AH;
+ case ESP_V4_FLOW:
+ return MLX5E_TT_IPV4_IPSEC_ESP;
+ case ESP_V6_FLOW:
+ return MLX5E_TT_IPV6_IPSEC_ESP;
+ case IPV4_FLOW:
+ return MLX5E_TT_IPV4;
+ case IPV6_FLOW:
+ return MLX5E_TT_IPV6;
+ default:
+ return MLX5E_NUM_INDIR_TIRS;
+ }
+}
+
+static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *nfc)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+ enum mlx5e_traffic_types tt;
+ u8 rx_hash_field = 0;
+ void *in;
+
+ tt = flow_type_to_traffic_type(nfc->flow_type);
+ if (tt == MLX5E_NUM_INDIR_TIRS)
+ return -EINVAL;
+
+ /* RSS does not support anything other than hashing to queues
+ * on src IP, dest IP, TCP/UDP src port and TCP/UDP dest
+ * port.
+ */
+ if (nfc->flow_type != TCP_V4_FLOW &&
+ nfc->flow_type != TCP_V6_FLOW &&
+ nfc->flow_type != UDP_V4_FLOW &&
+ nfc->flow_type != UDP_V6_FLOW)
+ return -EOPNOTSUPP;
+
+ if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3))
+ return -EOPNOTSUPP;
+
+ if (nfc->data & RXH_IP_SRC)
+ rx_hash_field |= MLX5_HASH_FIELD_SEL_SRC_IP;
+ if (nfc->data & RXH_IP_DST)
+ rx_hash_field |= MLX5_HASH_FIELD_SEL_DST_IP;
+ if (nfc->data & RXH_L4_B_0_1)
+ rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_SPORT;
+ if (nfc->data & RXH_L4_B_2_3)
+ rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mutex_lock(&priv->state_lock);
+
+ if (rx_hash_field == priv->rss_params.rx_hash_fields[tt])
+ goto out;
+
+ priv->rss_params.rx_hash_fields[tt] = rx_hash_field;
+ mlx5e_modify_tirs_hash(priv, in, inlen);
+
+out:
+ mutex_unlock(&priv->state_lock);
+ kvfree(in);
+ return 0;
+}
+
+static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *nfc)
+{
+ enum mlx5e_traffic_types tt;
+ u32 hash_field = 0;
+
+ tt = flow_type_to_traffic_type(nfc->flow_type);
+ if (tt == MLX5E_NUM_INDIR_TIRS)
+ return -EINVAL;
+
+ hash_field = priv->rss_params.rx_hash_fields[tt];
+ nfc->data = 0;
+
+ if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP)
+ nfc->data |= RXH_IP_SRC;
+ if (hash_field & MLX5_HASH_FIELD_SEL_DST_IP)
+ nfc->data |= RXH_IP_DST;
+ if (hash_field & MLX5_HASH_FIELD_SEL_L4_SPORT)
+ nfc->data |= RXH_L4_B_0_1;
+ if (hash_field & MLX5_HASH_FIELD_SEL_L4_DPORT)
+ nfc->data |= RXH_L4_B_2_3;
+
+ return 0;
+}
+
int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
{
int err = 0;
@@ -783,6 +889,9 @@ int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
case ETHTOOL_SRXCLSRLDEL:
err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location);
break;
+ case ETHTOOL_SRXFH:
+ err = mlx5e_set_rss_hash_opt(priv, cmd);
+ break;
default:
err = -EOPNOTSUPP;
break;
@@ -810,6 +919,9 @@ int mlx5e_get_rxnfc(struct net_device *dev,
case ETHTOOL_GRXCLSRLALL:
err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs);
break;
+ case ETHTOOL_GRXFH:
+ err = mlx5e_get_rss_hash_opt(priv, info);
+ break;
default:
err = -EOPNOTSUPP;
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 871313d6b34d..8cfd2ec7c0a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -49,6 +49,8 @@
#include "lib/clock.h"
#include "en/port.h"
#include "en/xdp.h"
+#include "lib/eq.h"
+#include "en/monitor_stats.h"
struct mlx5e_rq_param {
u32 rqc[MLX5_ST_SZ_DW(rqc)];
@@ -59,6 +61,7 @@ struct mlx5e_rq_param {
struct mlx5e_sq_param {
u32 sqc[MLX5_ST_SZ_DW(sqc)];
struct mlx5_wq_param wq;
+ bool is_mpw;
};
struct mlx5e_cq_param {
@@ -128,6 +131,8 @@ static bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
return !params->lro_en && frag_sz <= PAGE_SIZE;
}
+#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \
+ MLX5_MPWQE_LOG_STRIDE_SZ_BASE)
static bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
{
@@ -138,6 +143,9 @@ static bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
if (!mlx5e_rx_is_linear_skb(mdev, params))
return false;
+ if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
+ return false;
+
if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
return true;
@@ -223,7 +231,7 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
MLX5_WQ_TYPE_CYCLIC;
}
-static void mlx5e_update_carrier(struct mlx5e_priv *priv)
+void mlx5e_update_carrier(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
u8 port_state;
@@ -262,7 +270,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv)
mlx5e_stats_grps[i].update_stats(priv);
}
-static void mlx5e_update_ndo_stats(struct mlx5e_priv *priv)
+void mlx5e_update_ndo_stats(struct mlx5e_priv *priv)
{
int i;
@@ -293,33 +301,35 @@ void mlx5e_queue_update_stats(struct mlx5e_priv *priv)
queue_work(priv->wq, &priv->update_stats_work);
}
-static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
- enum mlx5_dev_event event, unsigned long param)
+static int async_event(struct notifier_block *nb, unsigned long event, void *data)
{
- struct mlx5e_priv *priv = vpriv;
+ struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
+ struct mlx5_eqe *eqe = data;
- if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state))
- return;
+ if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
+ return NOTIFY_DONE;
- switch (event) {
- case MLX5_DEV_EVENT_PORT_UP:
- case MLX5_DEV_EVENT_PORT_DOWN:
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
queue_work(priv->wq, &priv->update_carrier_work);
break;
default:
- break;
+ return NOTIFY_DONE;
}
+
+ return NOTIFY_OK;
}
static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
{
- set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
+ priv->events_nb.notifier_call = async_event;
+ mlx5_notifier_register(priv->mdev, &priv->events_nb);
}
static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
{
- clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
- synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC));
+ mlx5_notifier_unregister(priv->mdev, &priv->events_nb);
}
static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
@@ -983,18 +993,42 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq)
static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq)
{
- kvfree(sq->db.xdpi);
+ kvfree(sq->db.xdpi_fifo.xi);
+ kvfree(sq->db.wqe_info);
+}
+
+static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
+{
+ struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
+
+ xdpi_fifo->xi = kvzalloc_node(sizeof(*xdpi_fifo->xi) * dsegs_per_wq,
+ GFP_KERNEL, numa);
+ if (!xdpi_fifo->xi)
+ return -ENOMEM;
+
+ xdpi_fifo->pc = &sq->xdpi_fifo_pc;
+ xdpi_fifo->cc = &sq->xdpi_fifo_cc;
+ xdpi_fifo->mask = dsegs_per_wq - 1;
+
+ return 0;
}
static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
{
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ int err;
- sq->db.xdpi = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.xdpi)),
- GFP_KERNEL, numa);
- if (!sq->db.xdpi) {
- mlx5e_free_xdpsq_db(sq);
+ sq->db.wqe_info = kvzalloc_node(sizeof(*sq->db.wqe_info) * wq_sz,
+ GFP_KERNEL, numa);
+ if (!sq->db.wqe_info)
return -ENOMEM;
+
+ err = mlx5e_alloc_xdpsq_fifo(sq, numa);
+ if (err) {
+ mlx5e_free_xdpsq_db(sq);
+ return err;
}
return 0;
@@ -1396,6 +1430,7 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
struct mlx5_core_dev *mdev = c->mdev;
struct mlx5_rate_limit rl = {0};
+ cancel_work_sync(&sq->dim.work);
mlx5e_destroy_sq(mdev, sq->sqn);
if (sq->rate_limit) {
rl.rate = sq->rate_limit;
@@ -1552,11 +1587,8 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
struct mlx5e_xdpsq *sq,
bool is_redirect)
{
- unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
struct mlx5e_create_sq_param csp = {};
- unsigned int inline_hdr_sz = 0;
int err;
- int i;
err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect);
if (err)
@@ -1567,30 +1599,40 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
csp.cqn = sq->cq.mcq.cqn;
csp.wq_ctrl = &sq->wq_ctrl;
csp.min_inline_mode = sq->min_inline_mode;
- if (is_redirect)
- set_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
if (err)
goto err_free_xdpsq;
- if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
- inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
- ds_cnt++;
- }
+ mlx5e_set_xmit_fp(sq, param->is_mpw);
+
+ if (!param->is_mpw) {
+ unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
+ unsigned int inline_hdr_sz = 0;
+ int i;
+
+ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+ inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
+ ds_cnt++;
+ }
+
+ /* Pre initialize fixed WQE fields */
+ for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
+ struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[i];
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
+ struct mlx5_wqe_data_seg *dseg;
- /* Pre initialize fixed WQE fields */
- for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
- struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
- struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
- struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
- struct mlx5_wqe_data_seg *dseg;
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
- cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
- eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
+ dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
+ dseg->lkey = sq->mkey_be;
- dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
- dseg->lkey = sq->mkey_be;
+ wi->num_wqebbs = 1;
+ wi->num_ds = 1;
+ }
}
return 0;
@@ -1602,7 +1644,7 @@ err_free_xdpsq:
return err;
}
-static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
+static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
{
struct mlx5e_channel *c = sq->channel;
@@ -1610,7 +1652,7 @@ static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
napi_synchronize(&c->napi);
mlx5e_destroy_sq(c->mdev, sq->sqn);
- mlx5e_free_xdpsq_descs(sq);
+ mlx5e_free_xdpsq_descs(sq, rq);
mlx5e_free_xdpsq(sq);
}
@@ -1763,11 +1805,6 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
mlx5e_free_cq(cq);
}
-static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
-{
- return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
-}
-
static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam)
@@ -1918,9 +1955,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_channel_param *cparam,
struct mlx5e_channel **cp)
{
+ int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
struct net_dim_cq_moder icocq_moder = {0, 0};
struct net_device *netdev = priv->netdev;
- int cpu = mlx5e_get_cpu(priv, ix);
struct mlx5e_channel *c;
unsigned int irq;
int err;
@@ -2003,7 +2040,7 @@ err_close_rq:
err_close_xdp_sq:
if (c->xdp)
- mlx5e_close_xdpsq(&c->rq.xdpsq);
+ mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq);
err_close_sqs:
mlx5e_close_sqs(c);
@@ -2056,10 +2093,10 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
static void mlx5e_close_channel(struct mlx5e_channel *c)
{
- mlx5e_close_xdpsq(&c->xdpsq);
+ mlx5e_close_xdpsq(&c->xdpsq, NULL);
mlx5e_close_rq(&c->rq);
if (c->xdp)
- mlx5e_close_xdpsq(&c->rq.xdpsq);
+ mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq);
mlx5e_close_sqs(c);
mlx5e_close_icosq(&c->icosq);
napi_disable(&c->napi);
@@ -2226,6 +2263,8 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
void *cqc = param->cqc;
MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index);
+ if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128)
+ MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
}
static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
@@ -2302,6 +2341,7 @@ static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
mlx5e_build_sq_param_common(priv, param);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+ param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
}
static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
@@ -2504,7 +2544,7 @@ static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz,
if (rrp.rss.hfunc == ETH_RSS_HASH_XOR)
ix = mlx5e_bits_invert(i, ilog2(sz));
- ix = priv->channels.params.indirection_rqt[ix];
+ ix = priv->rss_params.indirection_rqt[ix];
rqn = rrp.rss.channels->c[ix]->rq.rqn;
} else {
rqn = rrp.rqn;
@@ -2587,7 +2627,7 @@ static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv,
{
.rss = {
.channels = chs,
- .hfunc = chs->params.rss_hfunc,
+ .hfunc = priv->rss_params.hfunc,
}
},
};
@@ -2607,6 +2647,54 @@ static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv)
mlx5e_redirect_rqts(priv, drop_rrp);
}
+static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = {
+ [MLX5E_TT_IPV4_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5E_TT_IPV6_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5E_TT_IPV4_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5E_TT_IPV6_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5E_TT_IPV4_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5E_TT_IPV6_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5E_TT_IPV4_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5E_TT_IPV6_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5E_TT_IPV4] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP,
+ },
+ [MLX5E_TT_IPV6] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP,
+ },
+};
+
+struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt)
+{
+ return tirc_default_config[tt];
+}
+
static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc)
{
if (!params->lro_en)
@@ -2622,116 +2710,68 @@ static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc)
MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout);
}
-void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
- enum mlx5e_traffic_types tt,
+void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
+ const struct mlx5e_tirc_config *ttconfig,
void *tirc, bool inner)
{
void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) :
MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
-#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
- MLX5_HASH_FIELD_SEL_DST_IP)
-
-#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\
- MLX5_HASH_FIELD_SEL_DST_IP |\
- MLX5_HASH_FIELD_SEL_L4_SPORT |\
- MLX5_HASH_FIELD_SEL_L4_DPORT)
-
-#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\
- MLX5_HASH_FIELD_SEL_DST_IP |\
- MLX5_HASH_FIELD_SEL_IPSEC_SPI)
-
- MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(params->rss_hfunc));
- if (params->rss_hfunc == ETH_RSS_HASH_TOP) {
+ MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(rss_params->hfunc));
+ if (rss_params->hfunc == ETH_RSS_HASH_TOP) {
void *rss_key = MLX5_ADDR_OF(tirc, tirc,
rx_hash_toeplitz_key);
size_t len = MLX5_FLD_SZ_BYTES(tirc,
rx_hash_toeplitz_key);
MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
- memcpy(rss_key, params->toeplitz_hash_key, len);
+ memcpy(rss_key, rss_params->toeplitz_hash_key, len);
}
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ ttconfig->l3_prot_type);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ ttconfig->l4_prot_type);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ ttconfig->rx_hash_fields);
+}
- switch (tt) {
- case MLX5E_TT_IPV4_TCP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV4);
- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
- MLX5_L4_PROT_TYPE_TCP);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_L4PORTS);
- break;
-
- case MLX5E_TT_IPV6_TCP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV6);
- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
- MLX5_L4_PROT_TYPE_TCP);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_L4PORTS);
- break;
-
- case MLX5E_TT_IPV4_UDP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV4);
- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
- MLX5_L4_PROT_TYPE_UDP);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_L4PORTS);
- break;
-
- case MLX5E_TT_IPV6_UDP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV6);
- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
- MLX5_L4_PROT_TYPE_UDP);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_L4PORTS);
- break;
-
- case MLX5E_TT_IPV4_IPSEC_AH:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV4);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_IPSEC_SPI);
- break;
+static void mlx5e_update_rx_hash_fields(struct mlx5e_tirc_config *ttconfig,
+ enum mlx5e_traffic_types tt,
+ u32 rx_hash_fields)
+{
+ *ttconfig = tirc_default_config[tt];
+ ttconfig->rx_hash_fields = rx_hash_fields;
+}
- case MLX5E_TT_IPV6_IPSEC_AH:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV6);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_IPSEC_SPI);
- break;
+void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
+{
+ void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
+ struct mlx5e_rss_params *rss = &priv->rss_params;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int ctxlen = MLX5_ST_SZ_BYTES(tirc);
+ struct mlx5e_tirc_config ttconfig;
+ int tt;
- case MLX5E_TT_IPV4_IPSEC_ESP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV4);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_IPSEC_SPI);
- break;
+ MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
- case MLX5E_TT_IPV6_IPSEC_ESP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV6);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_IPSEC_SPI);
- break;
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ memset(tirc, 0, ctxlen);
+ mlx5e_update_rx_hash_fields(&ttconfig, tt,
+ rss->rx_hash_fields[tt]);
+ mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, false);
+ mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen);
+ }
- case MLX5E_TT_IPV4:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV4);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP);
- break;
+ if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ return;
- case MLX5E_TT_IPV6:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV6);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP);
- break;
- default:
- WARN_ONCE(true, "%s: bad traffic type!\n", __func__);
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ memset(tirc, 0, ctxlen);
+ mlx5e_update_rx_hash_fields(&ttconfig, tt,
+ rss->rx_hash_fields[tt]);
+ mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, true);
+ mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in,
+ inlen);
}
}
@@ -2788,7 +2828,8 @@ static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv,
MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1);
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
+ mlx5e_build_indir_tir_ctx_hash(&priv->rss_params,
+ &tirc_default_config[tt], tirc, true);
}
static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
@@ -2819,7 +2860,7 @@ static void mlx5e_query_mtu(struct mlx5_core_dev *mdev,
*mtu = MLX5E_HW2SW_MTU(params, hw_mtu);
}
-static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
+int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
{
struct mlx5e_params *params = &priv->channels.params;
struct net_device *netdev = priv->netdev;
@@ -2899,7 +2940,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
mlx5e_activate_channels(&priv->channels);
netif_tx_start_all_queues(priv->netdev);
- if (MLX5_ESWITCH_MANAGER(priv->mdev))
+ if (mlx5e_is_vport_rep(priv))
mlx5e_add_sqs_fwd_rules(priv);
mlx5e_wait_channels_min_rx_wqes(&priv->channels);
@@ -2910,7 +2951,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
{
mlx5e_redirect_rqts_to_drop(priv);
- if (MLX5_ESWITCH_MANAGER(priv->mdev))
+ if (mlx5e_is_vport_rep(priv))
mlx5e_remove_sqs_fwd_rules(priv);
/* FIXME: This is a W/A only for tx timeout watch dog false alarm when
@@ -3162,7 +3203,7 @@ err_close_tises:
return err;
}
-void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
+static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
{
int tc;
@@ -3180,7 +3221,9 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv,
MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
+
+ mlx5e_build_indir_tir_ctx_hash(&priv->rss_params,
+ &tirc_default_config[tt], tirc, false);
}
static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc)
@@ -3385,11 +3428,14 @@ static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
{
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
- return mlx5e_configure_flower(priv, cls_flower, flags);
+ return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+ flags);
case TC_CLSFLOWER_DESTROY:
- return mlx5e_delete_flower(priv, cls_flower, flags);
+ return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+ flags);
case TC_CLSFLOWER_STATS:
- return mlx5e_stats_flower(priv, cls_flower, flags);
+ return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+ flags);
default:
return -EOPNOTSUPP;
}
@@ -3402,7 +3448,8 @@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
switch (type) {
case TC_SETUP_CLSFLOWER:
- return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
+ return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS |
+ MLX5E_TC_NIC_OFFLOAD);
default:
return -EOPNOTSUPP;
}
@@ -3445,7 +3492,7 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
}
}
-static void
+void
mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
struct mlx5e_priv *priv = netdev_priv(dev);
@@ -3453,8 +3500,10 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
struct mlx5e_vport_stats *vstats = &priv->stats.vport;
struct mlx5e_pport_stats *pstats = &priv->stats.pport;
- /* update HW stats in background for next time */
- mlx5e_queue_update_stats(priv);
+ if (!mlx5e_monitor_counter_supported(priv)) {
+ /* update HW stats in background for next time */
+ mlx5e_queue_update_stats(priv);
+ }
if (mlx5e_is_uplink_rep(priv)) {
stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
@@ -3587,7 +3636,7 @@ static int set_feature_tc_num_filters(struct net_device *netdev, bool enable)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- if (!enable && mlx5e_tc_num_filters(priv)) {
+ if (!enable && mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)) {
netdev_err(netdev,
"Active offloaded tc filters, can't turn hw_tc_offload off\n");
return -EINVAL;
@@ -3889,7 +3938,7 @@ static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
}
#ifdef CONFIG_MLX5_ESWITCH
-static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
@@ -3926,8 +3975,8 @@ static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting)
return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting);
}
-static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
- int max_tx_rate)
+int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
+ int max_tx_rate)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
@@ -3968,8 +4017,8 @@ static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
mlx5_ifla_link2vport(link_state));
}
-static int mlx5e_get_vf_config(struct net_device *dev,
- int vf, struct ifla_vf_info *ivi)
+int mlx5e_get_vf_config(struct net_device *dev,
+ int vf, struct ifla_vf_info *ivi)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
@@ -3982,8 +4031,8 @@ static int mlx5e_get_vf_config(struct net_device *dev,
return 0;
}
-static int mlx5e_get_vf_stats(struct net_device *dev,
- int vf, struct ifla_vf_stats *vf_stats)
+int mlx5e_get_vf_stats(struct net_device *dev,
+ int vf, struct ifla_vf_stats *vf_stats)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
@@ -4044,8 +4093,7 @@ static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add)
queue_work(priv->wq, &vxlan_work->work);
}
-static void mlx5e_add_vxlan_port(struct net_device *netdev,
- struct udp_tunnel_info *ti)
+void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4058,8 +4106,7 @@ static void mlx5e_add_vxlan_port(struct net_device *netdev,
mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1);
}
-static void mlx5e_del_vxlan_port(struct net_device *netdev,
- struct udp_tunnel_info *ti)
+void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4109,9 +4156,9 @@ out:
return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
}
-static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
- struct net_device *netdev,
- netdev_features_t features)
+netdev_features_t mlx5e_features_check(struct sk_buff *skb,
+ struct net_device *netdev,
+ netdev_features_t features)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4134,17 +4181,17 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
struct mlx5e_txqsq *sq)
{
- struct mlx5_eq *eq = sq->cq.mcq.eq;
+ struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
u32 eqe_count;
netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
- eq->eqn, eq->cons_index, eq->irqn);
+ eq->core.eqn, eq->core.cons_index, eq->core.irqn);
eqe_count = mlx5_eq_poll_irq_disabled(eq);
if (!eqe_count)
return false;
- netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn);
+ netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn);
sq->channel->stats->eq_rearm++;
return true;
}
@@ -4371,8 +4418,6 @@ const struct net_device_ops mlx5e_netdev_ops = {
.ndo_get_vf_config = mlx5e_get_vf_config,
.ndo_set_vf_link_state = mlx5e_set_vf_link_state,
.ndo_get_vf_stats = mlx5e_get_vf_stats,
- .ndo_has_offload_stats = mlx5e_has_offload_stats,
- .ndo_get_offload_stats = mlx5e_get_offload_stats,
#endif
};
@@ -4518,15 +4563,23 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
mlx5e_init_rq_type_params(mdev, params);
}
-void mlx5e_build_rss_params(struct mlx5e_params *params)
+void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
+ u16 num_channels)
{
- params->rss_hfunc = ETH_RSS_HASH_XOR;
- netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key));
- mlx5e_build_default_indir_rqt(params->indirection_rqt,
- MLX5E_INDIR_RQT_SIZE, params->num_channels);
+ enum mlx5e_traffic_types tt;
+
+ rss_params->hfunc = ETH_RSS_HASH_XOR;
+ netdev_rss_key_fill(rss_params->toeplitz_hash_key,
+ sizeof(rss_params->toeplitz_hash_key));
+ mlx5e_build_default_indir_rqt(rss_params->indirection_rqt,
+ MLX5E_INDIR_RQT_SIZE, num_channels);
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ rss_params->rx_hash_fields[tt] =
+ tirc_default_config[tt].rx_hash_fields;
}
void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_rss_params *rss_params,
struct mlx5e_params *params,
u16 max_channels, u16 mtu)
{
@@ -4542,6 +4595,10 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+ /* XDP SQ */
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE,
+ MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe));
+
/* set CQE compression */
params->rx_cqe_compress_def = false;
if (MLX5_CAP_GEN(mdev, cqe_compression) &&
@@ -4575,7 +4632,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev);
/* RSS */
- mlx5e_build_rss_params(params);
+ mlx5e_build_rss_params(rss_params, params->num_channels);
}
static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
@@ -4590,12 +4647,6 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
}
}
-#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
-static const struct switchdev_ops mlx5e_switchdev_ops = {
- .switchdev_port_attr_get = mlx5e_attr_get,
-};
-#endif
-
static void mlx5e_build_nic_netdev(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4705,12 +4756,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->priv_flags |= IFF_UNICAST_FLT;
mlx5e_set_netdev_dev_addr(netdev);
-
-#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
- if (MLX5_ESWITCH_MANAGER(mdev))
- netdev->switchdev_ops = &mlx5e_switchdev_ops;
-#endif
-
mlx5e_ipsec_build_netdev(priv);
mlx5e_tls_build_netdev(priv);
}
@@ -4748,14 +4793,16 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
void *ppriv)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rss_params *rss = &priv->rss_params;
int err;
err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv);
if (err)
return err;
- mlx5e_build_nic_params(mdev, &priv->channels.params,
- mlx5e_get_netdev_max_channels(netdev), netdev->mtu);
+ mlx5e_build_nic_params(mdev, rss, &priv->channels.params,
+ mlx5e_get_netdev_max_channels(netdev),
+ netdev->mtu);
mlx5e_timestamp_init(priv);
@@ -4885,9 +4932,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
mlx5_lag_add(mdev, netdev);
mlx5e_enable_async_events(priv);
-
- if (MLX5_ESWITCH_MANAGER(priv->mdev))
- mlx5e_register_vport_reps(priv);
+ if (mlx5e_monitor_counter_supported(priv))
+ mlx5e_monitor_counter_init(priv);
if (netdev->reg_state != NETREG_REGISTERED)
return;
@@ -4921,8 +4967,8 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
queue_work(priv->wq, &priv->set_rx_mode_work);
- if (MLX5_ESWITCH_MANAGER(priv->mdev))
- mlx5e_unregister_vport_reps(priv);
+ if (mlx5e_monitor_counter_supported(priv))
+ mlx5e_monitor_counter_cleanup(priv);
mlx5e_disable_async_events(priv);
mlx5_lag_remove(mdev);
@@ -4975,7 +5021,7 @@ int mlx5e_netdev_init(struct net_device *netdev,
netif_carrier_off(netdev);
#ifdef CONFIG_MLX5_EN_ARFS
- netdev->rx_cpu_rmap = mdev->rmap;
+ netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(mdev);
#endif
return 0;
@@ -5030,7 +5076,7 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
if (priv->channels.params.num_channels > max_nch) {
mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
priv->channels.params.num_channels = max_nch;
- mlx5e_build_default_indir_rqt(priv->channels.params.indirection_rqt,
+ mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
MLX5E_INDIR_RQT_SIZE, max_nch);
}
@@ -5119,7 +5165,6 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv)
static void *mlx5e_add(struct mlx5_core_dev *mdev)
{
struct net_device *netdev;
- void *rpriv = NULL;
void *priv;
int err;
int nch;
@@ -5129,20 +5174,18 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
return NULL;
#ifdef CONFIG_MLX5_ESWITCH
- if (MLX5_ESWITCH_MANAGER(mdev)) {
- rpriv = mlx5e_alloc_nic_rep_priv(mdev);
- if (!rpriv) {
- mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n");
- return NULL;
- }
+ if (MLX5_ESWITCH_MANAGER(mdev) &&
+ mlx5_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+ mlx5e_rep_register_vport_reps(mdev);
+ return mdev;
}
#endif
nch = mlx5e_get_max_num_channels(mdev);
- netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, rpriv);
+ netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, NULL);
if (!netdev) {
mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
- goto err_free_rpriv;
+ return NULL;
}
priv = netdev_priv(netdev);
@@ -5168,30 +5211,26 @@ err_detach:
mlx5e_detach(mdev, priv);
err_destroy_netdev:
mlx5e_destroy_netdev(priv);
-err_free_rpriv:
- kfree(rpriv);
return NULL;
}
static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
{
- struct mlx5e_priv *priv = vpriv;
- void *ppriv = priv->ppriv;
+ struct mlx5e_priv *priv;
+#ifdef CONFIG_MLX5_ESWITCH
+ if (MLX5_ESWITCH_MANAGER(mdev) && vpriv == mdev) {
+ mlx5e_rep_unregister_vport_reps(mdev);
+ return;
+ }
+#endif
+ priv = vpriv;
#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_delete_app(priv);
#endif
unregister_netdev(priv->netdev);
mlx5e_detach(mdev, vpriv);
mlx5e_destroy_netdev(priv);
- kfree(ppriv);
-}
-
-static void *mlx5e_get_netdev(void *vpriv)
-{
- struct mlx5e_priv *priv = vpriv;
-
- return priv->netdev;
}
static struct mlx5_interface mlx5e_interface = {
@@ -5199,9 +5238,7 @@ static struct mlx5_interface mlx5e_interface = {
.remove = mlx5e_remove,
.attach = mlx5e_attach,
.detach = mlx5e_detach,
- .event = mlx5e_async_event,
.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
- .get_dev = mlx5e_get_netdev,
};
void mlx5e_init(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index c3c657548824..96cc0c6a4014 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -42,13 +42,24 @@
#include "en.h"
#include "en_rep.h"
#include "en_tc.h"
+#include "en/tc_tun.h"
#include "fs_core.h"
-#define MLX5E_REP_PARAMS_LOG_SQ_SIZE \
- max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
+#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \
+ max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
+#define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1
static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
+struct mlx5e_rep_indr_block_priv {
+ struct net_device *netdev;
+ struct mlx5e_rep_priv *rpriv;
+
+ struct list_head list;
+};
+
+static void mlx5e_rep_indr_unregister_block(struct net_device *netdev);
+
static void mlx5e_rep_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
{
@@ -98,7 +109,7 @@ static void mlx5e_rep_get_strings(struct net_device *dev,
}
}
-static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
+static void mlx5e_vf_rep_update_hw_counters(struct mlx5e_priv *priv)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -121,6 +132,32 @@ static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
vport_stats->tx_bytes = vf_stats.rx_bytes;
}
+static void mlx5e_uplink_rep_update_hw_counters(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct rtnl_link_stats64 *vport_stats;
+
+ mlx5e_grp_802_3_update_stats(priv);
+
+ vport_stats = &priv->stats.vf_vport;
+
+ vport_stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
+ vport_stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok);
+ vport_stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
+ vport_stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
+}
+
+static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+
+ if (rep->vport == FDB_UPLINK_VPORT)
+ mlx5e_uplink_rep_update_hw_counters(priv);
+ else
+ mlx5e_vf_rep_update_hw_counters(priv);
+}
+
static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
{
struct mlx5e_sw_stats *s = &priv->stats.sw;
@@ -256,6 +293,22 @@ static int mlx5e_rep_set_channels(struct net_device *dev,
return 0;
}
+static int mlx5e_rep_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_coalesce(priv, coal);
+}
+
+static int mlx5e_rep_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_coalesce(priv, coal);
+}
+
static u32 mlx5e_rep_get_rxfh_key_size(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -270,7 +323,55 @@ static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev)
return mlx5e_ethtool_get_rxfh_indir_size(priv);
}
-static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
+static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_ethtool_get_pauseparam(priv, pauseparam);
+}
+
+static int mlx5e_uplink_rep_set_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_pauseparam(priv, pauseparam);
+}
+
+static int mlx5e_uplink_rep_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings);
+}
+
+static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings);
+}
+
+static const struct ethtool_ops mlx5e_vf_rep_ethtool_ops = {
+ .get_drvinfo = mlx5e_rep_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = mlx5e_rep_get_strings,
+ .get_sset_count = mlx5e_rep_get_sset_count,
+ .get_ethtool_stats = mlx5e_rep_get_ethtool_stats,
+ .get_ringparam = mlx5e_rep_get_ringparam,
+ .set_ringparam = mlx5e_rep_set_ringparam,
+ .get_channels = mlx5e_rep_get_channels,
+ .set_channels = mlx5e_rep_set_channels,
+ .get_coalesce = mlx5e_rep_get_coalesce,
+ .set_coalesce = mlx5e_rep_set_coalesce,
+ .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size,
+ .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
+};
+
+static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = {
.get_drvinfo = mlx5e_rep_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_strings = mlx5e_rep_get_strings,
@@ -280,24 +381,44 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
.set_ringparam = mlx5e_rep_set_ringparam,
.get_channels = mlx5e_rep_get_channels,
.set_channels = mlx5e_rep_set_channels,
+ .get_coalesce = mlx5e_rep_get_coalesce,
+ .set_coalesce = mlx5e_rep_set_coalesce,
+ .get_link_ksettings = mlx5e_uplink_rep_get_link_ksettings,
+ .set_link_ksettings = mlx5e_uplink_rep_set_link_ksettings,
.get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size,
.get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
+ .get_pauseparam = mlx5e_uplink_rep_get_pauseparam,
+ .set_pauseparam = mlx5e_uplink_rep_set_pauseparam,
};
-int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+static int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct net_device *uplink_upper = NULL;
+ struct mlx5e_priv *uplink_priv = NULL;
+ struct net_device *uplink_dev;
if (esw->mode == SRIOV_NONE)
return -EOPNOTSUPP;
+ uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ if (uplink_dev) {
+ uplink_upper = netdev_master_upper_dev_get(uplink_dev);
+ uplink_priv = netdev_priv(uplink_dev);
+ }
+
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
attr->u.ppid.id_len = ETH_ALEN;
- ether_addr_copy(attr->u.ppid.id, rep->hw_id);
+ if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) {
+ ether_addr_copy(attr->u.ppid.id, uplink_upper->dev_addr);
+ } else {
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+
+ ether_addr_copy(attr->u.ppid.id, rep->hw_id);
+ }
break;
default:
return -EOPNOTSUPP;
@@ -466,8 +587,8 @@ static void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
ASSERT_RTNL();
- if ((!neigh_connected && (e->flags & MLX5_ENCAP_ENTRY_VALID)) ||
- !ether_addr_equal(e->h_dest, ha))
+ if ((e->flags & MLX5_ENCAP_ENTRY_VALID) &&
+ (!neigh_connected || !ether_addr_equal(e->h_dest, ha)))
mlx5e_tc_encap_flows_del(priv, e);
if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
@@ -518,6 +639,184 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
neigh_release(n);
}
+static struct mlx5e_rep_indr_block_priv *
+mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev)
+{
+ struct mlx5e_rep_indr_block_priv *cb_priv;
+
+ /* All callback list access should be protected by RTNL. */
+ ASSERT_RTNL();
+
+ list_for_each_entry(cb_priv,
+ &rpriv->uplink_priv.tc_indr_block_priv_list,
+ list)
+ if (cb_priv->netdev == netdev)
+ return cb_priv;
+
+ return NULL;
+}
+
+static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_rep_indr_block_priv *cb_priv, *temp;
+ struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list;
+
+ list_for_each_entry_safe(cb_priv, temp, head, list) {
+ mlx5e_rep_indr_unregister_block(cb_priv->netdev);
+ kfree(cb_priv);
+ }
+}
+
+static int
+mlx5e_rep_indr_offload(struct net_device *netdev,
+ struct tc_cls_flower_offload *flower,
+ struct mlx5e_rep_indr_block_priv *indr_priv)
+{
+ struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
+ int flags = MLX5E_TC_EGRESS | MLX5E_TC_ESW_OFFLOAD;
+ int err = 0;
+
+ switch (flower->command) {
+ case TC_CLSFLOWER_REPLACE:
+ err = mlx5e_configure_flower(netdev, priv, flower, flags);
+ break;
+ case TC_CLSFLOWER_DESTROY:
+ err = mlx5e_delete_flower(netdev, priv, flower, flags);
+ break;
+ case TC_CLSFLOWER_STATS:
+ err = mlx5e_stats_flower(netdev, priv, flower, flags);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type,
+ void *type_data, void *indr_priv)
+{
+ struct mlx5e_rep_indr_block_priv *priv = indr_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_indr_offload(priv->netdev, type_data, priv);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+mlx5e_rep_indr_setup_tc_block(struct net_device *netdev,
+ struct mlx5e_rep_priv *rpriv,
+ struct tc_block_offload *f)
+{
+ struct mlx5e_rep_indr_block_priv *indr_priv;
+ int err = 0;
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
+ if (indr_priv)
+ return -EEXIST;
+
+ indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
+ if (!indr_priv)
+ return -ENOMEM;
+
+ indr_priv->netdev = netdev;
+ indr_priv->rpriv = rpriv;
+ list_add(&indr_priv->list,
+ &rpriv->uplink_priv.tc_indr_block_priv_list);
+
+ err = tcf_block_cb_register(f->block,
+ mlx5e_rep_indr_setup_block_cb,
+ netdev, indr_priv, f->extack);
+ if (err) {
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+ }
+
+ return err;
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block,
+ mlx5e_rep_indr_setup_block_cb,
+ netdev);
+ indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
+ if (indr_priv) {
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+ }
+
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static
+int mlx5e_rep_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv,
+ enum tc_setup_type type, void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return mlx5e_rep_indr_setup_tc_block(netdev, cb_priv,
+ type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev)
+{
+ int err;
+
+ err = __tc_indr_block_cb_register(netdev, rpriv,
+ mlx5e_rep_indr_setup_tc_cb,
+ netdev);
+ if (err) {
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n",
+ netdev_name(netdev), err);
+ }
+ return err;
+}
+
+static void mlx5e_rep_indr_unregister_block(struct net_device *netdev)
+{
+ __tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb,
+ netdev);
+}
+
+static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
+ uplink_priv.netdevice_nb);
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ if (!mlx5e_tc_tun_device_to_offload(priv, netdev))
+ return NOTIFY_OK;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ mlx5e_rep_indr_register_block(rpriv, netdev);
+ break;
+ case NETDEV_UNREGISTER:
+ mlx5e_rep_indr_unregister_block(netdev);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
static struct mlx5e_neigh_hash_entry *
mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
struct mlx5e_neigh *m_neigh);
@@ -779,7 +1078,7 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
mlx5e_rep_neigh_entry_destroy(priv, nhe);
}
-static int mlx5e_rep_open(struct net_device *dev)
+static int mlx5e_vf_rep_open(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -801,7 +1100,7 @@ unlock:
return err;
}
-static int mlx5e_rep_close(struct net_device *dev)
+static int mlx5e_vf_rep_close(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -838,24 +1137,14 @@ mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
{
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
- return mlx5e_configure_flower(priv, cls_flower, flags);
+ return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+ flags);
case TC_CLSFLOWER_DESTROY:
- return mlx5e_delete_flower(priv, cls_flower, flags);
+ return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+ flags);
case TC_CLSFLOWER_STATS:
- return mlx5e_stats_flower(priv, cls_flower, flags);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data,
- void *cb_priv)
-{
- struct mlx5e_priv *priv = cb_priv;
-
- switch (type) {
- case TC_SETUP_CLSFLOWER:
- return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS);
+ return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+ flags);
default:
return -EOPNOTSUPP;
}
@@ -868,7 +1157,8 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
switch (type) {
case TC_SETUP_CLSFLOWER:
- return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
+ return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS |
+ MLX5E_TC_ESW_OFFLOAD);
default:
return -EOPNOTSUPP;
}
@@ -907,43 +1197,23 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep;
if (!MLX5_ESWITCH_MANAGER(priv->mdev))
return false;
- rep = rpriv->rep;
- if (esw->mode == SRIOV_OFFLOADS &&
- rep && rep->vport == FDB_UPLINK_VPORT)
- return true;
-
- return false;
-}
-
-static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_eswitch_rep *rep;
-
- if (!MLX5_ESWITCH_MANAGER(priv->mdev))
+ if (!rpriv) /* non vport rep mlx5e instances don't use this field */
return false;
rep = rpriv->rep;
- if (rep && rep->vport != FDB_UPLINK_VPORT)
- return true;
-
- return false;
+ return (rep->vport == FDB_UPLINK_VPORT);
}
-bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
+static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id)
{
- struct mlx5e_priv *priv = netdev_priv(dev);
-
switch (attr_id) {
case IFLA_OFFLOAD_XSTATS_CPU_HIT:
- if (mlx5e_is_vf_vport_rep(priv) || mlx5e_is_uplink_rep(priv))
return true;
}
@@ -969,8 +1239,8 @@ mlx5e_get_sw_stats64(const struct net_device *dev,
return 0;
}
-int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
- void *sp)
+static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
+ void *sp)
{
switch (attr_id) {
case IFLA_OFFLOAD_XSTATS_CPU_HIT:
@@ -981,7 +1251,7 @@ int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
}
static void
-mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
struct mlx5e_priv *priv = netdev_priv(dev);
@@ -990,37 +1260,93 @@ mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
memcpy(stats, &priv->stats.vf_vport, sizeof(*stats));
}
+static int mlx5e_vf_rep_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ return mlx5e_change_mtu(netdev, new_mtu, NULL);
+}
+
+static int mlx5e_uplink_rep_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu);
+}
+
+static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr)
+{
+ struct sockaddr *saddr = addr;
+
+ if (!is_valid_ether_addr(saddr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ ether_addr_copy(netdev->dev_addr, saddr->sa_data);
+ return 0;
+}
+
static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
.switchdev_port_attr_get = mlx5e_attr_get,
};
-static int mlx5e_change_rep_mtu(struct net_device *netdev, int new_mtu)
-{
- return mlx5e_change_mtu(netdev, new_mtu, NULL);
-}
+static const struct net_device_ops mlx5e_netdev_ops_vf_rep = {
+ .ndo_open = mlx5e_vf_rep_open,
+ .ndo_stop = mlx5e_vf_rep_close,
+ .ndo_start_xmit = mlx5e_xmit,
+ .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name,
+ .ndo_setup_tc = mlx5e_rep_setup_tc,
+ .ndo_get_stats64 = mlx5e_vf_rep_get_stats,
+ .ndo_has_offload_stats = mlx5e_rep_has_offload_stats,
+ .ndo_get_offload_stats = mlx5e_rep_get_offload_stats,
+ .ndo_change_mtu = mlx5e_vf_rep_change_mtu,
+};
-static const struct net_device_ops mlx5e_netdev_ops_rep = {
- .ndo_open = mlx5e_rep_open,
- .ndo_stop = mlx5e_rep_close,
+static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = {
+ .ndo_open = mlx5e_open,
+ .ndo_stop = mlx5e_close,
.ndo_start_xmit = mlx5e_xmit,
+ .ndo_set_mac_address = mlx5e_uplink_rep_set_mac,
.ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name,
.ndo_setup_tc = mlx5e_rep_setup_tc,
- .ndo_get_stats64 = mlx5e_rep_get_stats,
- .ndo_has_offload_stats = mlx5e_has_offload_stats,
- .ndo_get_offload_stats = mlx5e_get_offload_stats,
- .ndo_change_mtu = mlx5e_change_rep_mtu,
+ .ndo_get_stats64 = mlx5e_get_stats,
+ .ndo_has_offload_stats = mlx5e_rep_has_offload_stats,
+ .ndo_get_offload_stats = mlx5e_rep_get_offload_stats,
+ .ndo_change_mtu = mlx5e_uplink_rep_change_mtu,
+ .ndo_udp_tunnel_add = mlx5e_add_vxlan_port,
+ .ndo_udp_tunnel_del = mlx5e_del_vxlan_port,
+ .ndo_features_check = mlx5e_features_check,
+ .ndo_set_vf_mac = mlx5e_set_vf_mac,
+ .ndo_set_vf_rate = mlx5e_set_vf_rate,
+ .ndo_get_vf_config = mlx5e_get_vf_config,
+ .ndo_get_vf_stats = mlx5e_get_vf_stats,
};
-static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params, u16 mtu)
+bool mlx5e_eswitch_rep(struct net_device *netdev)
+{
+ if (netdev->netdev_ops == &mlx5e_netdev_ops_vf_rep ||
+ netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep)
+ return true;
+
+ return false;
+}
+
+static void mlx5e_build_rep_params(struct net_device *netdev)
{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_params *params;
+
u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+ params = &priv->channels.params;
params->hard_mtu = MLX5E_ETH_HARD_MTU;
- params->sw_mtu = mtu;
- params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE;
+ params->sw_mtu = netdev->mtu;
+
+ /* SQ */
+ if (rep->vport == FDB_UPLINK_VPORT)
+ params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+ else
+ params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE;
/* RQ */
mlx5e_build_rq_params(mdev, params);
@@ -1034,24 +1360,38 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
/* RSS */
- mlx5e_build_rss_params(params);
+ mlx5e_build_rss_params(&priv->rss_params, params->num_channels);
}
static void mlx5e_build_rep_netdev(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5_core_dev *mdev = priv->mdev;
- u16 max_mtu;
- netdev->netdev_ops = &mlx5e_netdev_ops_rep;
+ if (rep->vport == FDB_UPLINK_VPORT) {
+ SET_NETDEV_DEV(netdev, &priv->mdev->pdev->dev);
+ netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep;
+ /* we want a persistent mac for the uplink rep */
+ mlx5_query_nic_vport_mac_address(mdev, 0, netdev->dev_addr);
+ netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (MLX5_CAP_GEN(mdev, qos))
+ netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
+#endif
+ } else {
+ netdev->netdev_ops = &mlx5e_netdev_ops_vf_rep;
+ eth_hw_addr_random(netdev);
+ netdev->ethtool_ops = &mlx5e_vf_rep_ethtool_ops;
+ }
netdev->watchdog_timeo = 15 * HZ;
- netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
netdev->switchdev_ops = &mlx5e_rep_switchdev_ops;
- netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
+ netdev->features |= NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
netdev->hw_features |= NETIF_F_HW_TC;
netdev->hw_features |= NETIF_F_SG;
@@ -1062,13 +1402,10 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
netdev->hw_features |= NETIF_F_TSO6;
netdev->hw_features |= NETIF_F_RXCSUM;
- netdev->features |= netdev->hw_features;
-
- eth_hw_addr_random(netdev);
+ if (rep->vport != FDB_UPLINK_VPORT)
+ netdev->features |= NETIF_F_VLAN_CHALLENGED;
- netdev->min_mtu = ETH_MIN_MTU;
- mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
- netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+ netdev->features |= netdev->hw_features;
}
static int mlx5e_init_rep(struct mlx5_core_dev *mdev,
@@ -1083,11 +1420,9 @@ static int mlx5e_init_rep(struct mlx5_core_dev *mdev,
if (err)
return err;
+ priv->channels.params.num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS;
- priv->channels.params.num_channels =
- mlx5e_get_netdev_max_channels(netdev);
-
- mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu);
+ mlx5e_build_rep_params(netdev);
mlx5e_build_rep_netdev(netdev);
mlx5e_timestamp_init(priv);
@@ -1210,94 +1545,173 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
{
- int err;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ int tc, err;
err = mlx5e_create_tises(priv);
if (err) {
mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
return err;
}
- return 0;
-}
-static const struct mlx5e_profile mlx5e_rep_profile = {
- .init = mlx5e_init_rep,
- .cleanup = mlx5e_cleanup_rep,
- .init_rx = mlx5e_init_rep_rx,
- .cleanup_rx = mlx5e_cleanup_rep_rx,
- .init_tx = mlx5e_init_rep_tx,
- .cleanup_tx = mlx5e_cleanup_nic_tx,
- .update_stats = mlx5e_rep_update_hw_counters,
- .update_carrier = NULL,
- .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
- .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
- .max_tc = 1,
-};
+ if (rpriv->rep->vport == FDB_UPLINK_VPORT) {
+ uplink_priv = &rpriv->uplink_priv;
-/* e-Switch vport representors */
+ /* init shared tc flow table */
+ err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
+ if (err)
+ goto destroy_tises;
+
+ /* init indirect block notifications */
+ INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
+ uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event;
+ err = register_netdevice_notifier(&uplink_priv->netdevice_nb);
+ if (err) {
+ mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n");
+ goto tc_esw_cleanup;
+ }
+ }
-static int
-mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+ return 0;
+
+tc_esw_cleanup:
+ mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht);
+destroy_tises:
+ for (tc = 0; tc < priv->profile->max_tc; tc++)
+ mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
+ return err;
+}
+
+static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
- struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ int tc;
- int err;
+ for (tc = 0; tc < priv->profile->max_tc; tc++)
+ mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
- if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- err = mlx5e_add_sqs_fwd_rules(priv);
- if (err)
- return err;
+ if (rpriv->rep->vport == FDB_UPLINK_VPORT) {
+ /* clean indirect TC block notifications */
+ unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb);
+ mlx5e_rep_indr_clean_block_privs(rpriv);
+
+ /* delete shared tc flow table */
+ mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
}
+}
- err = mlx5e_rep_neigh_init(rpriv);
- if (err)
- goto err_remove_sqs;
+static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 max_mtu;
- /* init shared tc flow table */
- err = mlx5e_tc_esw_init(&rpriv->tc_ht);
- if (err)
- goto err_neigh_cleanup;
+ netdev->min_mtu = ETH_MIN_MTU;
+ mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
+ netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+}
- return 0;
+static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
+ struct mlx5_eqe *eqe = data;
-err_neigh_cleanup:
- mlx5e_rep_neigh_cleanup(rpriv);
-err_remove_sqs:
- mlx5e_remove_sqs_fwd_rules(priv);
- return err;
+ if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
+ return NOTIFY_DONE;
+
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ queue_work(priv->wq, &priv->update_carrier_work);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
}
-static void
-mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
- struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 max_mtu;
- if (test_bit(MLX5E_STATE_OPENED, &priv->state))
- mlx5e_remove_sqs_fwd_rules(priv);
+ netdev->min_mtu = ETH_MIN_MTU;
+ mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1);
+ netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+ mlx5e_set_dev_port_mtu(priv);
+
+ mlx5_lag_add(mdev, netdev);
+ priv->events_nb.notifier_call = uplink_rep_async_event;
+ mlx5_notifier_register(mdev, &priv->events_nb);
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ mlx5e_dcbnl_initialize(priv);
+ mlx5e_dcbnl_init_app(priv);
+#endif
+}
- /* clean uplink offloaded TC rules, delete shared tc flow table */
- mlx5e_tc_esw_cleanup(&rpriv->tc_ht);
+static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
- mlx5e_rep_neigh_cleanup(rpriv);
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ mlx5e_dcbnl_delete_app(priv);
+#endif
+ mlx5_notifier_unregister(mdev, &priv->events_nb);
+ mlx5_lag_remove(mdev);
}
+static const struct mlx5e_profile mlx5e_vf_rep_profile = {
+ .init = mlx5e_init_rep,
+ .cleanup = mlx5e_cleanup_rep,
+ .init_rx = mlx5e_init_rep_rx,
+ .cleanup_rx = mlx5e_cleanup_rep_rx,
+ .init_tx = mlx5e_init_rep_tx,
+ .cleanup_tx = mlx5e_cleanup_rep_tx,
+ .enable = mlx5e_vf_rep_enable,
+ .update_stats = mlx5e_vf_rep_update_hw_counters,
+ .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
+ .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+ .max_tc = 1,
+};
+
+static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
+ .init = mlx5e_init_rep,
+ .cleanup = mlx5e_cleanup_rep,
+ .init_rx = mlx5e_init_rep_rx,
+ .cleanup_rx = mlx5e_cleanup_rep_rx,
+ .init_tx = mlx5e_init_rep_tx,
+ .cleanup_tx = mlx5e_cleanup_rep_tx,
+ .enable = mlx5e_uplink_rep_enable,
+ .disable = mlx5e_uplink_rep_disable,
+ .update_stats = mlx5e_uplink_rep_update_hw_counters,
+ .update_carrier = mlx5e_update_carrier,
+ .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
+ .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+ .max_tc = MLX5E_MAX_NUM_TC,
+};
+
+/* e-Switch vport representors */
static int
mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
- struct mlx5e_rep_priv *uplink_rpriv;
+ const struct mlx5e_profile *profile;
struct mlx5e_rep_priv *rpriv;
struct net_device *netdev;
- struct mlx5e_priv *upriv;
int nch, err;
rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
if (!rpriv)
return -ENOMEM;
+ /* rpriv->rep to be looked up when profile->init() is called */
+ rpriv->rep = rep;
+
nch = mlx5e_get_max_num_channels(dev);
- netdev = mlx5e_create_netdev(dev, &mlx5e_rep_profile, nch, rpriv);
+ profile = (rep->vport == FDB_UPLINK_VPORT) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile;
+ netdev = mlx5e_create_netdev(dev, profile, nch, rpriv);
if (!netdev) {
pr_warn("Failed to create representor netdev for vport %d\n",
rep->vport);
@@ -1306,15 +1720,20 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
}
rpriv->netdev = netdev;
- rpriv->rep = rep;
rep->rep_if[REP_ETH].priv = rpriv;
INIT_LIST_HEAD(&rpriv->vport_sqs_list);
+ if (rep->vport == FDB_UPLINK_VPORT) {
+ err = mlx5e_create_mdev_resources(dev);
+ if (err)
+ goto err_destroy_netdev;
+ }
+
err = mlx5e_attach_netdev(netdev_priv(netdev));
if (err) {
pr_warn("Failed to attach representor netdev for vport %d\n",
rep->vport);
- goto err_destroy_netdev;
+ goto err_destroy_mdev_resources;
}
err = mlx5e_rep_neigh_init(rpriv);
@@ -1324,32 +1743,25 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
goto err_detach_netdev;
}
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch, REP_ETH);
- upriv = netdev_priv(uplink_rpriv->netdev);
- err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb_egdev,
- upriv);
- if (err)
- goto err_neigh_cleanup;
-
err = register_netdev(netdev);
if (err) {
pr_warn("Failed to register representor netdev for vport %d\n",
rep->vport);
- goto err_egdev_cleanup;
+ goto err_neigh_cleanup;
}
return 0;
-err_egdev_cleanup:
- tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev,
- upriv);
-
err_neigh_cleanup:
mlx5e_rep_neigh_cleanup(rpriv);
err_detach_netdev:
mlx5e_detach_netdev(netdev_priv(netdev));
+err_destroy_mdev_resources:
+ if (rep->vport == FDB_UPLINK_VPORT)
+ mlx5e_destroy_mdev_resources(dev);
+
err_destroy_netdev:
mlx5e_destroy_netdev(netdev_priv(netdev));
kfree(rpriv);
@@ -1362,18 +1774,13 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_rep_priv *uplink_rpriv;
void *ppriv = priv->ppriv;
- struct mlx5e_priv *upriv;
unregister_netdev(netdev);
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch,
- REP_ETH);
- upriv = netdev_priv(uplink_rpriv->netdev);
- tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev,
- upriv);
mlx5e_rep_neigh_cleanup(rpriv);
mlx5e_detach_netdev(priv);
+ if (rep->vport == FDB_UPLINK_VPORT)
+ mlx5e_destroy_mdev_resources(priv->mdev);
mlx5e_destroy_netdev(priv);
kfree(ppriv); /* mlx5e_rep_priv */
}
@@ -1387,14 +1794,13 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
return rpriv->netdev;
}
-static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
+void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev)
{
- struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
int total_vfs = MLX5_TOTAL_VPORTS(mdev);
int vport;
- for (vport = 1; vport < total_vfs; vport++) {
+ for (vport = 0; vport < total_vfs; vport++) {
struct mlx5_eswitch_rep_if rep_if = {};
rep_if.load = mlx5e_vport_rep_load;
@@ -1404,55 +1810,12 @@ static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
}
}
-static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv)
+void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev)
{
- struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5_eswitch *esw = mdev->priv.eswitch;
int total_vfs = MLX5_TOTAL_VPORTS(mdev);
int vport;
- for (vport = 1; vport < total_vfs; vport++)
+ for (vport = total_vfs - 1; vport >= 0; vport--)
mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH);
}
-
-void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
-{
- struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
- struct mlx5_eswitch_rep_if rep_if;
- struct mlx5e_rep_priv *rpriv;
-
- rpriv = priv->ppriv;
- rpriv->netdev = priv->netdev;
-
- rep_if.load = mlx5e_nic_rep_load;
- rep_if.unload = mlx5e_nic_rep_unload;
- rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
- rep_if.priv = rpriv;
- INIT_LIST_HEAD(&rpriv->vport_sqs_list);
- mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
-
- mlx5e_rep_register_vf_vports(priv); /* VFs vports */
-}
-
-void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv)
-{
- struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
-
- mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */
- mlx5_eswitch_unregister_vport_rep(esw, 0, REP_ETH); /* UPLINK PF*/
-}
-
-void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev)
-{
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
- struct mlx5e_rep_priv *rpriv;
-
- rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
- if (!rpriv)
- return NULL;
-
- rpriv->rep = &esw->offloads.vport_reps[0];
- return rpriv;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 844d32d5c29f..edd722824697 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -53,13 +53,33 @@ struct mlx5e_neigh_update_table {
unsigned long min_interval; /* jiffies */
};
+struct mlx5_rep_uplink_priv {
+ /* Filters DB - instantiated by the uplink representor and shared by
+ * the uplink's VFs
+ */
+ struct rhashtable tc_ht;
+
+ /* indirect block callbacks are invoked on bind/unbind events
+ * on registered higher level devices (e.g. tunnel devices)
+ *
+ * tc_indr_block_cb_priv_list is used to lookup indirect callback
+ * private data
+ *
+ * netdevice_nb is the netdev events notifier - used to register
+ * tunnel devices for block events
+ *
+ */
+ struct list_head tc_indr_block_priv_list;
+ struct notifier_block netdevice_nb;
+};
+
struct mlx5e_rep_priv {
struct mlx5_eswitch_rep *rep;
struct mlx5e_neigh_update_table neigh_update;
struct net_device *netdev;
struct mlx5_flow_handle *vport_rx_rule;
struct list_head vport_sqs_list;
- struct rhashtable tc_ht; /* valid for uplink rep */
+ struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */
};
static inline
@@ -129,6 +149,8 @@ struct mlx5e_encap_entry {
struct net_device *out_dev;
int tunnel_type;
+ int tunnel_hlen;
+ int reformat_type;
u8 flags;
char *encap_header;
int encap_size;
@@ -140,16 +162,12 @@ struct mlx5e_rep_sq {
};
void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev);
-void mlx5e_register_vport_reps(struct mlx5e_priv *priv);
-void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv);
+void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev);
+void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev);
bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
-int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, void *sp);
-bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id);
-
-int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
@@ -158,12 +176,17 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e);
void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
+
+bool mlx5e_eswitch_rep(struct net_device *netdev);
+
#else /* CONFIG_MLX5_ESWITCH */
-static inline void mlx5e_register_vport_reps(struct mlx5e_priv *priv) {}
-static inline void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) {}
static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; }
static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {}
#endif
+static inline bool mlx5e_is_vport_rep(struct mlx5e_priv *priv)
+{
+ return (MLX5_ESWITCH_MANAGER(priv->mdev) && priv->ppriv);
+}
#endif /* __MLX5E_REP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 16985ca3248d..1d0bb5ff8c26 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -554,9 +554,9 @@ static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
mlx5_cqwq_pop(&cq->wq);
- if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) {
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
netdev_WARN_ONCE(cq->channel->netdev,
- "Bad OP in ICOSQ CQE: 0x%x\n", cqe->op_own);
+ "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe));
return;
}
@@ -724,9 +724,9 @@ static u32 mlx5e_get_fcs(const struct sk_buff *skb)
return __get_unaligned_cpu32(fcs_bytes);
}
-static u8 get_ip_proto(struct sk_buff *skb, __be16 proto)
+static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
{
- void *ip_p = skb->data + sizeof(struct ethhdr);
+ void *ip_p = skb->data + network_depth;
return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
((struct ipv6hdr *)ip_p)->nexthdr;
@@ -755,7 +755,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
goto csum_unnecessary;
if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) {
- if (unlikely(get_ip_proto(skb, proto) == IPPROTO_SCTP))
+ if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP))
goto csum_unnecessary;
skb->ip_summed = CHECKSUM_COMPLETE;
@@ -898,7 +898,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
prefetchw(va); /* xdp_frame data area */
prefetch(data);
- if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
return NULL;
}
@@ -930,7 +930,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
u16 byte_cnt = cqe_bcnt - headlen;
struct sk_buff *skb;
- if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
return NULL;
}
@@ -1154,7 +1154,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
wi->consumed_strides += cstrides;
- if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
goto mpwrq_cqe_out;
}
@@ -1190,7 +1190,6 @@ mpwrq_cqe_out:
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
{
struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
- struct mlx5e_xdpsq *xdpsq;
struct mlx5_cqe64 *cqe;
int work_done = 0;
@@ -1201,10 +1200,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
cqe = mlx5_cqwq_get_cqe(&cq->wq);
- if (!cqe)
+ if (!cqe) {
+ if (unlikely(work_done))
+ goto out;
return 0;
-
- xdpsq = &rq->xdpsq;
+ }
do {
if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
@@ -1219,15 +1219,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
rq->handle_rx_cqe(rq, cqe);
} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
- if (xdpsq->doorbell) {
- mlx5e_xmit_xdp_doorbell(xdpsq);
- xdpsq->doorbell = false;
- }
-
- if (xdpsq->redirect_flush) {
- xdp_do_flush_map();
- xdpsq->redirect_flush = false;
- }
+out:
+ if (rq->xdp_prog)
+ mlx5e_xdp_rx_poll_complete(rq);
mlx5_cqwq_update_db_record(&cq->wq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 3e99d0728b2f..d3fe48ff9da9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -30,6 +30,7 @@
* SOFTWARE.
*/
+#include "lib/mlx5.h"
#include "en.h"
#include "en_accel/ipsec.h"
#include "en_accel/tls.h"
@@ -74,7 +75,6 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_recover) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqes) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
- { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_udp_seg_rem) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) },
@@ -198,7 +198,6 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->tx_nop += sq_stats->nop;
s->tx_queue_stopped += sq_stats->stopped;
s->tx_queue_wake += sq_stats->wake;
- s->tx_udp_seg_rem += sq_stats->udp_seg_rem;
s->tx_queue_dropped += sq_stats->dropped;
s->tx_cqe_err += sq_stats->cqe_err;
s->tx_recover += sq_stats->recover;
@@ -482,7 +481,10 @@ static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data,
return idx;
}
-static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv)
+#define MLX5_BASIC_PPCNT_SUPPORTED(mdev) \
+ (MLX5_CAP_GEN(mdev, pcam_reg) ? MLX5_CAP_PCAM_REG(mdev, ppcnt) : 1)
+
+void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv)
{
struct mlx5e_pport_stats *pstats = &priv->stats.pport;
struct mlx5_core_dev *mdev = priv->mdev;
@@ -490,6 +492,9 @@ static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv)
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
void *out;
+ if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+ return;
+
MLX5_SET(ppcnt_reg, in, local_port, 1);
out = pstats->IEEE_802_3_counters;
MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
@@ -602,6 +607,9 @@ static void mlx5e_grp_2819_update_stats(struct mlx5e_priv *priv)
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
void *out;
+ if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+ return;
+
MLX5_SET(ppcnt_reg, in, local_port, 1);
out = pstats->RFC_2819_counters;
MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
@@ -936,7 +944,7 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
};
static const struct counter_desc pport_pfc_stall_stats_desc[] = {
- { "tx_pause_storm_warning_events ", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) },
+ { "tx_pause_storm_warning_events", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) },
{ "tx_pause_storm_error_events", PPORT_PER_PRIO_OFF(device_stall_critical_watermark_cnt) },
};
@@ -1077,6 +1085,9 @@ static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv)
int prio;
void *out;
+ if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+ return;
+
MLX5_SET(ppcnt_reg, in, local_port, 1);
MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
@@ -1088,13 +1099,13 @@ static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv)
}
static const struct counter_desc mlx5e_pme_status_desc[] = {
- { "module_unplug", 8 },
+ { "module_unplug", sizeof(u64) * MLX5_MODULE_STATUS_UNPLUGGED },
};
static const struct counter_desc mlx5e_pme_error_desc[] = {
- { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */
- { "module_high_temp", 48 }, /* high temperature */
- { "module_bad_shorted", 56 }, /* bad or shorted cable/module */
+ { "module_bus_stuck", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BUS_STUCK },
+ { "module_high_temp", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE },
+ { "module_bad_shorted", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BAD_CABLE },
};
#define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc)
@@ -1122,15 +1133,17 @@ static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data,
static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data,
int idx)
{
- struct mlx5_priv *mlx5_priv = &priv->mdev->priv;
+ struct mlx5_pme_stats pme_stats;
int i;
+ mlx5_get_pme_stats(priv->mdev, &pme_stats);
+
for (i = 0; i < NUM_PME_STATUS_STATS; i++)
- data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters,
+ data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters,
mlx5e_pme_status_desc, i);
for (i = 0; i < NUM_PME_ERR_STATS; i++)
- data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
+ data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters,
mlx5e_pme_error_desc, i);
return idx;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 3f8e870ef4c9..fe91ec06e3c7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -87,7 +87,6 @@ struct mlx5e_sw_stats {
u64 tx_recover;
u64 tx_cqes;
u64 tx_queue_wake;
- u64 tx_udp_seg_rem;
u64 tx_cqe_err;
u64 tx_xdp_xmit;
u64 tx_xdp_full;
@@ -221,7 +220,6 @@ struct mlx5e_sq_stats {
u64 csum_partial_inner;
u64 added_vlan_packets;
u64 nop;
- u64 udp_seg_rem;
#ifdef CONFIG_MLX5_EN_TLS
u64 tls_ooo;
u64 tls_resync_bytes;
@@ -280,5 +278,6 @@ extern const struct mlx5e_stats_grp mlx5e_stats_grps[];
extern const int mlx5e_num_stats_grps;
void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv);
+void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv);
#endif /* __MLX5_EN_STATS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index fca6f4132c91..cae6c6d48984 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -44,15 +44,15 @@
#include <net/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_pedit.h>
#include <net/tc_act/tc_csum.h>
-#include <net/vxlan.h>
#include <net/arp.h>
#include "en.h"
#include "en_rep.h"
#include "en_tc.h"
#include "eswitch.h"
-#include "lib/vxlan.h"
#include "fs_core.h"
#include "en/port.h"
+#include "en/tc_tun.h"
+#include "lib/devcom.h"
struct mlx5_nic_flow_attr {
u32 action;
@@ -69,25 +69,54 @@ struct mlx5_nic_flow_attr {
enum {
MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS,
MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS,
- MLX5E_TC_FLOW_ESWITCH = BIT(MLX5E_TC_FLOW_BASE),
- MLX5E_TC_FLOW_NIC = BIT(MLX5E_TC_FLOW_BASE + 1),
- MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2),
- MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3),
- MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
- MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 5),
+ MLX5E_TC_FLOW_ESWITCH = MLX5E_TC_ESW_OFFLOAD,
+ MLX5E_TC_FLOW_NIC = MLX5E_TC_NIC_OFFLOAD,
+ MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE),
+ MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 1),
+ MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 2),
+ MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 3),
+ MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 4),
};
#define MLX5E_TC_MAX_SPLITS 1
+/* Helper struct for accessing a struct containing list_head array.
+ * Containing struct
+ * |- Helper array
+ * [0] Helper item 0
+ * |- list_head item 0
+ * |- index (0)
+ * [1] Helper item 1
+ * |- list_head item 1
+ * |- index (1)
+ * To access the containing struct from one of the list_head items:
+ * 1. Get the helper item from the list_head item using
+ * helper item =
+ * container_of(list_head item, helper struct type, list_head field)
+ * 2. Get the contining struct from the helper item and its index in the array:
+ * containing struct =
+ * container_of(helper item, containing struct type, helper field[index])
+ */
+struct encap_flow_item {
+ struct list_head list;
+ int index;
+};
+
struct mlx5e_tc_flow {
struct rhash_head node;
struct mlx5e_priv *priv;
u64 cookie;
u16 flags;
struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
- struct list_head encap; /* flows sharing the same encap ID */
+ /* Flow can be associated with multiple encap IDs.
+ * The number of encaps is bounded by the number of supported
+ * destinations.
+ */
+ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_tc_flow *peer_flow;
struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
struct list_head hairpin; /* flows sharing the same hairpin */
+ struct list_head peer; /* flows with peer flow */
union {
struct mlx5_esw_flow_attr esw_attr[0];
struct mlx5_nic_flow_attr nic_attr[0];
@@ -95,11 +124,12 @@ struct mlx5e_tc_flow {
};
struct mlx5e_tc_flow_parse_attr {
- struct ip_tunnel_info tun_info;
+ struct ip_tunnel_info tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct net_device *filter_dev;
struct mlx5_flow_spec spec;
int num_mod_hdr_actions;
void *mod_hdr_actions;
- int mirred_ifindex;
+ int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
};
#define MLX5E_TC_TABLE_NUM_GROUPS 4
@@ -316,7 +346,7 @@ static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
for (i = 0; i < sz; i++) {
ix = i;
- if (priv->channels.params.rss_hfunc == ETH_RSS_HASH_XOR)
+ if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
ix = mlx5e_bits_invert(i, ilog2(sz));
ix = indirection_rqt[ix];
rqn = hp->pair->rqn[ix];
@@ -360,13 +390,15 @@ static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
void *tirc;
for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
+
memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
+ mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
err = mlx5_core_create_tir(hp->func_mdev, in,
MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]);
@@ -569,7 +601,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct netlink_ext_ack *extack)
{
- int peer_ifindex = parse_attr->mirred_ifindex;
+ int peer_ifindex = parse_attr->mirred_ifindex[0];
struct mlx5_hairpin_params params;
struct mlx5_core_dev *peer_mdev;
struct mlx5e_hairpin_entry *hpe;
@@ -802,7 +834,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
mlx5_del_flow_rules(flow->rule[0]);
mlx5_fc_destroy(priv->mdev, counter);
- if (!mlx5e_tc_num_filters(priv) && priv->fs.tc.t) {
+ if (!mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD) && priv->fs.tc.t) {
mlx5_destroy_flow_table(priv->fs.tc.t);
priv->fs.tc.t = NULL;
}
@@ -815,14 +847,15 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
}
static void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow);
+ struct mlx5e_tc_flow *flow, int out_index);
static int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct ip_tunnel_info *tun_info,
struct net_device *mirred_dev,
struct net_device **encap_dev,
struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack);
+ struct netlink_ext_ack *extack,
+ int out_index);
static struct mlx5_flow_handle *
mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
@@ -836,7 +869,7 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
if (IS_ERR(rule))
return rule;
- if (attr->mirror_count) {
+ if (attr->split_count) {
flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
if (IS_ERR(flow->rule[1])) {
mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
@@ -855,7 +888,7 @@ mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
{
flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
- if (attr->mirror_count)
+ if (attr->split_count)
mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
@@ -870,9 +903,9 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule;
memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
- slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
- slow_attr->mirror_count = 0,
- slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN,
+ slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ slow_attr->split_count = 0;
+ slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN;
rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
if (!IS_ERR(rule))
@@ -887,6 +920,9 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
struct mlx5_esw_flow_attr *slow_attr)
{
memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
+ slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ slow_attr->split_count = 0;
+ slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN;
mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
flow->flags &= ~MLX5E_TC_FLOW_SLOW;
}
@@ -906,12 +942,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_rep_priv *rpriv;
struct mlx5e_priv *out_priv;
int err = 0, encap_err = 0;
+ int out_index;
- /* if prios are not supported, keep the old behaviour of using same prio
- * for all offloaded rules.
- */
- if (!mlx5_eswitch_prios_supported(esw))
- attr->prio = 1;
+ if (!mlx5_eswitch_prios_supported(esw) && attr->prio != 1) {
+ NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW");
+ return -EOPNOTSUPP;
+ }
if (attr->chain > max_chain) {
NL_SET_ERR_MSG(extack, "Requested chain is out of supported range");
@@ -925,20 +961,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
goto err_max_prio_chain;
}
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+ int mirred_ifindex;
+
+ if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+
+ mirred_ifindex = attr->parse_attr->mirred_ifindex[out_index];
out_dev = __dev_get_by_index(dev_net(priv->netdev),
- attr->parse_attr->mirred_ifindex);
- encap_err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
- out_dev, &encap_dev, flow,
- extack);
- if (encap_err && encap_err != -EAGAIN) {
- err = encap_err;
+ mirred_ifindex);
+ err = mlx5e_attach_encap(priv,
+ &parse_attr->tun_info[out_index],
+ out_dev, &encap_dev, flow,
+ extack, out_index);
+ if (err && err != -EAGAIN)
goto err_attach_encap;
- }
+ if (err == -EAGAIN)
+ encap_err = err;
out_priv = netdev_priv(encap_dev);
rpriv = out_priv->ppriv;
- attr->out_rep[attr->out_count] = rpriv->rep;
- attr->out_mdev[attr->out_count++] = out_priv->mdev;
+ attr->dests[out_index].rep = rpriv->rep;
+ attr->dests[out_index].mdev = out_priv->mdev;
}
err = mlx5_eswitch_add_vlan_action(esw, attr);
@@ -953,7 +996,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- counter = mlx5_fc_create(esw->dev, true);
+ counter = mlx5_fc_create(attr->counter_dev, true);
if (IS_ERR(counter)) {
err = PTR_ERR(counter);
goto err_create_counter;
@@ -982,15 +1025,16 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
return 0;
err_add_rule:
- mlx5_fc_destroy(esw->dev, counter);
+ mlx5_fc_destroy(attr->counter_dev, counter);
err_create_counter:
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
err_mod_hdr:
mlx5_eswitch_del_vlan_action(esw, attr);
err_add_vlan:
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
- mlx5e_detach_encap(priv, flow);
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
+ if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
+ mlx5e_detach_encap(priv, flow, out_index);
err_attach_encap:
err_max_prio_chain:
return err;
@@ -1002,6 +1046,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct mlx5_esw_flow_attr slow_attr;
+ int out_index;
if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
if (flow->flags & MLX5E_TC_FLOW_SLOW)
@@ -1012,16 +1057,16 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5_eswitch_del_vlan_action(esw, attr);
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
- mlx5e_detach_encap(priv, flow);
- kvfree(attr->parse_attr);
- }
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
+ if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
+ mlx5e_detach_encap(priv, flow, out_index);
+ kvfree(attr->parse_attr);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
- mlx5_fc_destroy(esw->dev, attr->counter);
+ mlx5_fc_destroy(attr->counter_dev, attr->counter);
}
void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
@@ -1031,10 +1076,12 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
struct mlx5_esw_flow_attr slow_attr, *esw_attr;
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
+ struct encap_flow_item *efi;
struct mlx5e_tc_flow *flow;
int err;
- err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
+ err = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
e->encap_size, e->encap_header,
MLX5_FLOW_NAMESPACE_FDB,
&e->encap_id);
@@ -1046,11 +1093,31 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(priv);
- list_for_each_entry(flow, &e->flows, encap) {
+ list_for_each_entry(efi, &e->flows, list) {
+ bool all_flow_encaps_valid = true;
+ int i;
+
+ flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
esw_attr = flow->esw_attr;
- esw_attr->encap_id = e->encap_id;
spec = &esw_attr->parse_attr->spec;
+ esw_attr->dests[efi->index].encap_id = e->encap_id;
+ esw_attr->dests[efi->index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ /* Flow can be associated with multiple encap entries.
+ * Before offloading the flow verify that all of them have
+ * a valid neighbour.
+ */
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
+ all_flow_encaps_valid = false;
+ break;
+ }
+ }
+ /* Do not offload flows with unresolved neighbors */
+ if (!all_flow_encaps_valid)
+ continue;
/* update from slow path rule to encap rule */
rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
if (IS_ERR(rule)) {
@@ -1073,14 +1140,18 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
struct mlx5_esw_flow_attr slow_attr;
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
+ struct encap_flow_item *efi;
struct mlx5e_tc_flow *flow;
int err;
- list_for_each_entry(flow, &e->flows, encap) {
+ list_for_each_entry(efi, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
spec = &flow->esw_attr->parse_attr->spec;
/* update from encap rule to slow path rule */
rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr);
+ /* mark the flow's encap dest as non-valid */
+ flow->esw_attr->dests[efi->index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -1094,10 +1165,9 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
flow->rule[0] = rule;
}
- if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
- e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
- mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
- }
+ /* we know that the encap is valid */
+ e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
+ mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
}
static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
@@ -1129,9 +1199,12 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
return;
list_for_each_entry(e, &nhe->encap_list, encap_list) {
+ struct encap_flow_item *efi;
if (!(e->flags & MLX5_ENCAP_ENTRY_VALID))
continue;
- list_for_each_entry(flow, &e->flows, encap) {
+ list_for_each_entry(efi, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow,
+ encaps[efi->index]);
if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
counter = mlx5e_tc_get_counter(flow);
mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
@@ -1161,11 +1234,11 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
}
static void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow)
+ struct mlx5e_tc_flow *flow, int out_index)
{
- struct list_head *next = flow->encap.next;
+ struct list_head *next = flow->encaps[out_index].list.next;
- list_del(&flow->encap);
+ list_del(&flow->encaps[out_index].list);
if (list_empty(next)) {
struct mlx5e_encap_entry *e;
@@ -1181,49 +1254,55 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
}
}
-static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow)
+static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
{
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
- mlx5e_tc_del_fdb_flow(priv, flow);
- else
- mlx5e_tc_del_nic_flow(priv, flow);
+ struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
+
+ if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
+ !(flow->flags & MLX5E_TC_FLOW_DUP))
+ return;
+
+ mutex_lock(&esw->offloads.peer_mutex);
+ list_del(&flow->peer);
+ mutex_unlock(&esw->offloads.peer_mutex);
+
+ flow->flags &= ~MLX5E_TC_FLOW_DUP;
+
+ mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
+ kvfree(flow->peer_flow);
+ flow->peer_flow = NULL;
}
-static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f)
+static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
{
- void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- outer_headers);
- void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- outer_headers);
- void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
- void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
+ struct mlx5_core_dev *dev = flow->priv->mdev;
+ struct mlx5_devcom *devcom = dev->priv.devcom;
+ struct mlx5_eswitch *peer_esw;
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ return;
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
- struct flow_dissector_key_keyid *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_KEYID,
- f->key);
- struct flow_dissector_key_keyid *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_KEYID,
- f->mask);
- MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
- be32_to_cpu(mask->keyid));
- MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
- be32_to_cpu(key->keyid));
+ __mlx5e_tc_del_fdb_peer_flow(flow);
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+}
+
+static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
+ mlx5e_tc_del_fdb_peer_flow(flow);
+ mlx5e_tc_del_fdb_flow(priv, flow);
+ } else {
+ mlx5e_tc_del_nic_flow(priv, flow);
}
}
+
static int parse_tunnel_attr(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f)
+ struct tc_cls_flower_offload *f,
+ struct net_device *filter_dev)
{
struct netlink_ext_ack *extack = f->common.extack;
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
@@ -1235,48 +1314,14 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
skb_flow_dissector_target(f->dissector,
FLOW_DISSECTOR_KEY_ENC_CONTROL,
f->key);
+ int err = 0;
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
- struct flow_dissector_key_ports *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_PORTS,
- f->key);
- struct flow_dissector_key_ports *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_PORTS,
- f->mask);
-
- /* Full udp dst port must be given */
- if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
- goto vxlan_match_offload_err;
-
- if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst)) &&
- MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
- parse_vxlan_attr(spec, f);
- else {
- NL_SET_ERR_MSG_MOD(extack,
- "port isn't an offloaded vxlan udp dport");
- netdev_warn(priv->netdev,
- "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
- return -EOPNOTSUPP;
- }
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- udp_dport, ntohs(mask->dst));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- udp_dport, ntohs(key->dst));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- udp_sport, ntohs(mask->src));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- udp_sport, ntohs(key->src));
- } else { /* udp dst port must be given */
-vxlan_match_offload_err:
+ err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
+ headers_c, headers_v);
+ if (err) {
NL_SET_ERR_MSG_MOD(extack,
- "IP tunnel decap offload supported only for vxlan, must set UDP dport");
- netdev_warn(priv->netdev,
- "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
- return -EOPNOTSUPP;
+ "failed to parse tunnel attributes");
+ return err;
}
if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
@@ -1380,6 +1425,7 @@ vxlan_match_offload_err:
static int __parse_cls_flower(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct tc_cls_flower_offload *f,
+ struct net_device *filter_dev,
u8 *match_level)
{
struct netlink_ext_ack *extack = f->common.extack;
@@ -1431,7 +1477,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
switch (key->addr_type) {
case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
- if (parse_tunnel_attr(priv, spec, f))
+ if (parse_tunnel_attr(priv, spec, f, filter_dev))
return -EOPNOTSUPP;
break;
default:
@@ -1773,7 +1819,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
static int parse_cls_flower(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f)
+ struct tc_cls_flower_offload *f,
+ struct net_device *filter_dev)
{
struct netlink_ext_ack *extack = f->common.extack;
struct mlx5_core_dev *dev = priv->mdev;
@@ -1783,7 +1830,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
u8 match_level;
int err;
- err = __parse_cls_flower(priv, spec, f, &match_level);
+ err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level);
if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
rep = rpriv->rep;
@@ -2136,7 +2183,6 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
{
const struct tc_action *a;
bool modify_ip_header;
- LIST_HEAD(actions);
u8 htype, ip_proto;
void *headers_v;
u16 ethertype;
@@ -2225,7 +2271,6 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
{
struct mlx5_nic_flow_attr *attr = flow->nic_attr;
const struct tc_action *a;
- LIST_HEAD(actions);
u32 action = 0;
int err, i;
@@ -2268,7 +2313,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
same_hw_devs(priv, netdev_priv(peer_dev))) {
- parse_attr->mirred_ifindex = peer_dev->ifindex;
+ parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
flow->flags |= MLX5E_TC_FLOW_HAIRPIN;
action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
@@ -2317,45 +2362,6 @@ static inline int hash_encap_info(struct ip_tunnel_key *key)
return jhash(key, sizeof(*key), 0);
}
-static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
- struct net_device **out_dev,
- struct flowi4 *fl4,
- struct neighbour **out_n,
- u8 *out_ttl)
-{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5e_rep_priv *uplink_rpriv;
- struct rtable *rt;
- struct neighbour *n = NULL;
-
-#if IS_ENABLED(CONFIG_INET)
- int ret;
-
- rt = ip_route_output_key(dev_net(mirred_dev), fl4);
- ret = PTR_ERR_OR_ZERO(rt);
- if (ret)
- return ret;
-#else
- return -EOPNOTSUPP;
-#endif
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
- /* if the egress device isn't on the same HW e-switch, we use the uplink */
- if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
- *out_dev = uplink_rpriv->netdev;
- else
- *out_dev = rt->dst.dev;
-
- if (!(*out_ttl))
- *out_ttl = ip4_dst_hoplimit(&rt->dst);
- n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
- ip_rt_put(rt);
- if (!n)
- return -ENOMEM;
-
- *out_n = n;
- return 0;
-}
static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
struct net_device *peer_netdev)
@@ -2371,377 +2377,24 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
(peer_priv->mdev->priv.eswitch->mode == SRIOV_OFFLOADS));
}
-static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
- struct net_device **out_dev,
- struct flowi6 *fl6,
- struct neighbour **out_n,
- u8 *out_ttl)
-{
- struct neighbour *n = NULL;
- struct dst_entry *dst;
-
-#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
- struct mlx5e_rep_priv *uplink_rpriv;
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- int ret;
-
- ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
- fl6);
- if (ret < 0)
- return ret;
-
- if (!(*out_ttl))
- *out_ttl = ip6_dst_hoplimit(dst);
-
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
- /* if the egress device isn't on the same HW e-switch, we use the uplink */
- if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
- *out_dev = uplink_rpriv->netdev;
- else
- *out_dev = dst->dev;
-#else
- return -EOPNOTSUPP;
-#endif
-
- n = dst_neigh_lookup(dst, &fl6->daddr);
- dst_release(dst);
- if (!n)
- return -ENOMEM;
-
- *out_n = n;
- return 0;
-}
-
-static void gen_vxlan_header_ipv4(struct net_device *out_dev,
- char buf[], int encap_size,
- unsigned char h_dest[ETH_ALEN],
- u8 tos, u8 ttl,
- __be32 daddr,
- __be32 saddr,
- __be16 udp_dst_port,
- __be32 vx_vni)
-{
- struct ethhdr *eth = (struct ethhdr *)buf;
- struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
- struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
- struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
-
- memset(buf, 0, encap_size);
-
- ether_addr_copy(eth->h_dest, h_dest);
- ether_addr_copy(eth->h_source, out_dev->dev_addr);
- eth->h_proto = htons(ETH_P_IP);
-
- ip->daddr = daddr;
- ip->saddr = saddr;
-
- ip->tos = tos;
- ip->ttl = ttl;
- ip->protocol = IPPROTO_UDP;
- ip->version = 0x4;
- ip->ihl = 0x5;
-
- udp->dest = udp_dst_port;
- vxh->vx_flags = VXLAN_HF_VNI;
- vxh->vx_vni = vxlan_vni_field(vx_vni);
-}
-
-static void gen_vxlan_header_ipv6(struct net_device *out_dev,
- char buf[], int encap_size,
- unsigned char h_dest[ETH_ALEN],
- u8 tos, u8 ttl,
- struct in6_addr *daddr,
- struct in6_addr *saddr,
- __be16 udp_dst_port,
- __be32 vx_vni)
-{
- struct ethhdr *eth = (struct ethhdr *)buf;
- struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
- struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
- struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
-
- memset(buf, 0, encap_size);
-
- ether_addr_copy(eth->h_dest, h_dest);
- ether_addr_copy(eth->h_source, out_dev->dev_addr);
- eth->h_proto = htons(ETH_P_IPV6);
-
- ip6_flow_hdr(ip6h, tos, 0);
- /* the HW fills up ipv6 payload len */
- ip6h->nexthdr = IPPROTO_UDP;
- ip6h->hop_limit = ttl;
- ip6h->daddr = *daddr;
- ip6h->saddr = *saddr;
-
- udp->dest = udp_dst_port;
- vxh->vx_flags = VXLAN_HF_VNI;
- vxh->vx_vni = vxlan_vni_field(vx_vni);
-}
-
-static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
- struct mlx5e_encap_entry *e)
-{
- int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
- int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN;
- struct ip_tunnel_key *tun_key = &e->tun_info.key;
- struct net_device *out_dev;
- struct neighbour *n = NULL;
- struct flowi4 fl4 = {};
- u8 nud_state, tos, ttl;
- char *encap_header;
- int err;
-
- if (max_encap_size < ipv4_encap_size) {
- mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
- ipv4_encap_size, max_encap_size);
- return -EOPNOTSUPP;
- }
-
- encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
- if (!encap_header)
- return -ENOMEM;
-
- switch (e->tunnel_type) {
- case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
- fl4.flowi4_proto = IPPROTO_UDP;
- fl4.fl4_dport = tun_key->tp_dst;
- break;
- default:
- err = -EOPNOTSUPP;
- goto free_encap;
- }
-
- tos = tun_key->tos;
- ttl = tun_key->ttl;
-
- fl4.flowi4_tos = tun_key->tos;
- fl4.daddr = tun_key->u.ipv4.dst;
- fl4.saddr = tun_key->u.ipv4.src;
-
- err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev,
- &fl4, &n, &ttl);
- if (err)
- goto free_encap;
-
- /* used by mlx5e_detach_encap to lookup a neigh hash table
- * entry in the neigh hash table when a user deletes a rule
- */
- e->m_neigh.dev = n->dev;
- e->m_neigh.family = n->ops->family;
- memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
- e->out_dev = out_dev;
-
- /* It's importent to add the neigh to the hash table before checking
- * the neigh validity state. So if we'll get a notification, in case the
- * neigh changes it's validity state, we would find the relevant neigh
- * in the hash.
- */
- err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
- if (err)
- goto free_encap;
-
- read_lock_bh(&n->lock);
- nud_state = n->nud_state;
- ether_addr_copy(e->h_dest, n->ha);
- read_unlock_bh(&n->lock);
-
- switch (e->tunnel_type) {
- case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
- gen_vxlan_header_ipv4(out_dev, encap_header,
- ipv4_encap_size, e->h_dest, tos, ttl,
- fl4.daddr,
- fl4.saddr, tun_key->tp_dst,
- tunnel_id_to_key32(tun_key->tun_id));
- break;
- default:
- err = -EOPNOTSUPP;
- goto destroy_neigh_entry;
- }
- e->encap_size = ipv4_encap_size;
- e->encap_header = encap_header;
-
- if (!(nud_state & NUD_VALID)) {
- neigh_event_send(n, NULL);
- err = -EAGAIN;
- goto out;
- }
-
- err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
- ipv4_encap_size, encap_header,
- MLX5_FLOW_NAMESPACE_FDB,
- &e->encap_id);
- if (err)
- goto destroy_neigh_entry;
-
- e->flags |= MLX5_ENCAP_ENTRY_VALID;
- mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
- neigh_release(n);
- return err;
-
-destroy_neigh_entry:
- mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
-free_encap:
- kfree(encap_header);
-out:
- if (n)
- neigh_release(n);
- return err;
-}
-
-static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
- struct mlx5e_encap_entry *e)
-{
- int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
- int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN;
- struct ip_tunnel_key *tun_key = &e->tun_info.key;
- struct net_device *out_dev;
- struct neighbour *n = NULL;
- struct flowi6 fl6 = {};
- u8 nud_state, tos, ttl;
- char *encap_header;
- int err;
-
- if (max_encap_size < ipv6_encap_size) {
- mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
- ipv6_encap_size, max_encap_size);
- return -EOPNOTSUPP;
- }
-
- encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
- if (!encap_header)
- return -ENOMEM;
-
- switch (e->tunnel_type) {
- case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
- fl6.flowi6_proto = IPPROTO_UDP;
- fl6.fl6_dport = tun_key->tp_dst;
- break;
- default:
- err = -EOPNOTSUPP;
- goto free_encap;
- }
-
- tos = tun_key->tos;
- ttl = tun_key->ttl;
- fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
- fl6.daddr = tun_key->u.ipv6.dst;
- fl6.saddr = tun_key->u.ipv6.src;
-
- err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev,
- &fl6, &n, &ttl);
- if (err)
- goto free_encap;
-
- /* used by mlx5e_detach_encap to lookup a neigh hash table
- * entry in the neigh hash table when a user deletes a rule
- */
- e->m_neigh.dev = n->dev;
- e->m_neigh.family = n->ops->family;
- memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
- e->out_dev = out_dev;
-
- /* It's importent to add the neigh to the hash table before checking
- * the neigh validity state. So if we'll get a notification, in case the
- * neigh changes it's validity state, we would find the relevant neigh
- * in the hash.
- */
- err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
- if (err)
- goto free_encap;
-
- read_lock_bh(&n->lock);
- nud_state = n->nud_state;
- ether_addr_copy(e->h_dest, n->ha);
- read_unlock_bh(&n->lock);
-
- switch (e->tunnel_type) {
- case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
- gen_vxlan_header_ipv6(out_dev, encap_header,
- ipv6_encap_size, e->h_dest, tos, ttl,
- &fl6.daddr,
- &fl6.saddr, tun_key->tp_dst,
- tunnel_id_to_key32(tun_key->tun_id));
- break;
- default:
- err = -EOPNOTSUPP;
- goto destroy_neigh_entry;
- }
-
- e->encap_size = ipv6_encap_size;
- e->encap_header = encap_header;
-
- if (!(nud_state & NUD_VALID)) {
- neigh_event_send(n, NULL);
- err = -EAGAIN;
- goto out;
- }
-
- err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
- ipv6_encap_size, encap_header,
- MLX5_FLOW_NAMESPACE_FDB,
- &e->encap_id);
- if (err)
- goto destroy_neigh_entry;
-
- e->flags |= MLX5_ENCAP_ENTRY_VALID;
- mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
- neigh_release(n);
- return err;
-
-destroy_neigh_entry:
- mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
-free_encap:
- kfree(encap_header);
-out:
- if (n)
- neigh_release(n);
- return err;
-}
static int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct ip_tunnel_info *tun_info,
struct net_device *mirred_dev,
struct net_device **encap_dev,
struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ int out_index)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
unsigned short family = ip_tunnel_info_af(tun_info);
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct ip_tunnel_key *key = &tun_info->key;
struct mlx5e_encap_entry *e;
- int tunnel_type, err = 0;
uintptr_t hash_key;
bool found = false;
-
- /* udp dst port must be set */
- if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
- goto vxlan_encap_offload_err;
-
- /* setting udp src port isn't supported */
- if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
-vxlan_encap_offload_err:
- NL_SET_ERR_MSG_MOD(extack,
- "must set udp dst port and not set udp src port");
- netdev_warn(priv->netdev,
- "must set udp dst port and not set udp src port\n");
- return -EOPNOTSUPP;
- }
-
- if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) &&
- MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
- tunnel_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
- } else {
- NL_SET_ERR_MSG_MOD(extack,
- "port isn't an offloaded vxlan udp dport");
- netdev_warn(priv->netdev,
- "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
- return -EOPNOTSUPP;
- }
+ int err = 0;
hash_key = hash_encap_info(key);
@@ -2762,13 +2415,16 @@ vxlan_encap_offload_err:
return -ENOMEM;
e->tun_info = *tun_info;
- e->tunnel_type = tunnel_type;
+ err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
+ if (err)
+ goto out_err;
+
INIT_LIST_HEAD(&e->flows);
if (family == AF_INET)
- err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e);
+ err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
else if (family == AF_INET6)
- err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e);
+ err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
if (err && err != -EAGAIN)
goto out_err;
@@ -2776,12 +2432,15 @@ vxlan_encap_offload_err:
hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
attach_flow:
- list_add(&flow->encap, &e->flows);
+ list_add(&flow->encaps[out_index].list, &e->flows);
+ flow->encaps[out_index].index = out_index;
*encap_dev = e->out_dev;
- if (e->flags & MLX5_ENCAP_ENTRY_VALID)
- attr->encap_id = e->encap_id;
- else
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ attr->dests[out_index].encap_id = e->encap_id;
+ attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ } else {
err = -EAGAIN;
+ }
return err;
@@ -2850,7 +2509,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct ip_tunnel_info *info = NULL;
const struct tc_action *a;
- LIST_HEAD(actions);
bool encap = false;
u32 action = 0;
int err, i;
@@ -2875,7 +2533,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
return err;
action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- attr->mirror_count = attr->out_count;
+ attr->split_count = attr->out_count;
continue;
}
@@ -2893,6 +2551,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
struct net_device *out_dev;
out_dev = tcf_mirred_dev(a);
+ if (!out_dev) {
+ /* out_dev is NULL when filters with
+ * non-existing mirred device are replayed to
+ * the driver.
+ */
+ return -EINVAL;
+ }
if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
NL_SET_ERR_MSG_MOD(extack,
@@ -2902,23 +2567,47 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
return -EOPNOTSUPP;
}
+ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
if (switchdev_port_same_parent_id(priv->netdev,
out_dev) ||
is_merged_eswitch_dev(priv, out_dev)) {
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev);
+
+ if (uplink_upper &&
+ netif_is_lag_master(uplink_upper) &&
+ uplink_upper == out_dev)
+ out_dev = uplink_dev;
+
+ if (!mlx5e_eswitch_rep(out_dev))
+ return -EOPNOTSUPP;
+
out_priv = netdev_priv(out_dev);
rpriv = out_priv->ppriv;
- attr->out_rep[attr->out_count] = rpriv->rep;
- attr->out_mdev[attr->out_count++] = out_priv->mdev;
+ attr->dests[attr->out_count].rep = rpriv->rep;
+ attr->dests[attr->out_count].mdev = out_priv->mdev;
+ attr->out_count++;
} else if (encap) {
- parse_attr->mirred_ifindex = out_dev->ifindex;
- parse_attr->tun_info = *info;
+ parse_attr->mirred_ifindex[attr->out_count] =
+ out_dev->ifindex;
+ parse_attr->tun_info[attr->out_count] = *info;
+ encap = false;
attr->parse_attr = parse_attr;
- action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
- MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
- /* attr->out_rep is resolved when we handle encap */
+ attr->dests[attr->out_count].flags |=
+ MLX5_ESW_DEST_ENCAP;
+ attr->out_count++;
+ /* attr->dests[].rep is resolved when we
+ * handle encap
+ */
+ } else if (parse_attr->filter_dev != priv->netdev) {
+ /* All mlx5 devices are called to configure
+ * high level device filters. Therefore, the
+ * *attempt* to install a filter on invalid
+ * eswitch should not trigger an explicit error
+ */
+ return -EINVAL;
} else {
NL_SET_ERR_MSG_MOD(extack,
"devices are not on same switch HW, can't offload forwarding");
@@ -2935,7 +2624,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
encap = true;
else
return -EOPNOTSUPP;
- attr->mirror_count = attr->out_count;
continue;
}
@@ -2945,7 +2633,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
if (err)
return err;
- attr->mirror_count = attr->out_count;
+ attr->split_count = attr->out_count;
continue;
}
@@ -2966,8 +2654,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range");
return -EOPNOTSUPP;
}
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
attr->dest_chain = dest_chain;
continue;
@@ -2980,7 +2667,15 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
if (!actions_match_supported(priv, exts, parse_attr, flow, extack))
return -EOPNOTSUPP;
- if (attr->mirror_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
+ if (attr->dest_chain) {
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
+ return -EOPNOTSUPP;
+ }
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ }
+
+ if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
NL_SET_ERR_MSG_MOD(extack,
"current firmware doesn't support split rule for port mirroring");
netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
@@ -2999,6 +2694,11 @@ static void get_flags(int flags, u16 *flow_flags)
if (flags & MLX5E_TC_EGRESS)
__flow_flags |= MLX5E_TC_FLOW_EGRESS;
+ if (flags & MLX5E_TC_ESW_OFFLOAD)
+ __flow_flags |= MLX5E_TC_FLOW_ESWITCH;
+ if (flags & MLX5E_TC_NIC_OFFLOAD)
+ __flow_flags |= MLX5E_TC_FLOW_NIC;
+
*flow_flags = __flow_flags;
}
@@ -3009,18 +2709,32 @@ static const struct rhashtable_params tc_ht_params = {
.automatic_shrinking = true,
};
-static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
+static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *uplink_rpriv;
- if (MLX5_VPORT_MANAGER(priv->mdev) && esw->mode == SRIOV_OFFLOADS) {
+ if (flags & MLX5E_TC_ESW_OFFLOAD) {
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
- return &uplink_rpriv->tc_ht;
- } else
+ return &uplink_rpriv->uplink_priv.tc_ht;
+ } else /* NIC offload */
return &priv->fs.tc.ht;
}
+static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ bool is_rep_ingress = attr->in_rep->vport != FDB_UPLINK_VPORT &&
+ flow->flags & MLX5E_TC_FLOW_INGRESS;
+ bool act_is_encap = !!(attr->action &
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
+ bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS);
+
+ return esw_paired && mlx5_lag_is_sriov(attr->in_mdev) &&
+ (is_rep_ingress || act_is_encap);
+}
+
static int
mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
struct tc_cls_flower_offload *f, u16 flow_flags,
@@ -3042,10 +2756,6 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
flow->flags = flow_flags;
flow->priv = priv;
- err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
- if (err)
- goto err_free;
-
*__flow = flow;
*__parse_attr = parse_attr;
@@ -3058,12 +2768,16 @@ err_free:
}
static int
-mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *f,
- u16 flow_flags,
- struct mlx5e_tc_flow **__flow)
+__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f,
+ u16 flow_flags,
+ struct net_device *filter_dev,
+ struct mlx5_eswitch_rep *in_rep,
+ struct mlx5_core_dev *in_mdev,
+ struct mlx5e_tc_flow **__flow)
{
struct netlink_ext_ack *extack = f->common.extack;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_tc_flow *flow;
int attr_size, err;
@@ -3074,6 +2788,12 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
&parse_attr, &flow);
if (err)
goto out;
+ parse_attr->filter_dev = filter_dev;
+ flow->esw_attr->parse_attr = parse_attr;
+ err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
+ f, filter_dev);
+ if (err)
+ goto err_free;
flow->esw_attr->chain = f->common.chain_index;
flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16;
@@ -3081,14 +2801,19 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
if (err)
goto err_free;
+ flow->esw_attr->in_rep = in_rep;
+ flow->esw_attr->in_mdev = in_mdev;
+
+ if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
+ MLX5_COUNTER_SOURCE_ESWITCH)
+ flow->esw_attr->counter_dev = in_mdev;
+ else
+ flow->esw_attr->counter_dev = priv->mdev;
+
err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack);
if (err)
goto err_free;
- if (!(flow->esw_attr->action &
- MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT))
- kvfree(parse_attr);
-
*__flow = flow;
return 0;
@@ -3100,10 +2825,92 @@ out:
return err;
}
+static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5e_priv *priv = flow->priv, *peer_priv;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
+ struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_rep_priv *peer_urpriv;
+ struct mlx5e_tc_flow *peer_flow;
+ struct mlx5_core_dev *in_mdev;
+ int err = 0;
+
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ return -ENODEV;
+
+ peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
+ peer_priv = netdev_priv(peer_urpriv->netdev);
+
+ /* in_mdev is assigned of which the packet originated from.
+ * So packets redirected to uplink use the same mdev of the
+ * original flow and packets redirected from uplink use the
+ * peer mdev.
+ */
+ if (flow->esw_attr->in_rep->vport == FDB_UPLINK_VPORT)
+ in_mdev = peer_priv->mdev;
+ else
+ in_mdev = priv->mdev;
+
+ parse_attr = flow->esw_attr->parse_attr;
+ err = __mlx5e_add_fdb_flow(peer_priv, f, flow->flags,
+ parse_attr->filter_dev,
+ flow->esw_attr->in_rep, in_mdev, &peer_flow);
+ if (err)
+ goto out;
+
+ flow->peer_flow = peer_flow;
+ flow->flags |= MLX5E_TC_FLOW_DUP;
+ mutex_lock(&esw->offloads.peer_mutex);
+ list_add_tail(&flow->peer, &esw->offloads.peer_flows);
+ mutex_unlock(&esw->offloads.peer_mutex);
+
+out:
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ return err;
+}
+
+static int
+mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f,
+ u16 flow_flags,
+ struct net_device *filter_dev,
+ struct mlx5e_tc_flow **__flow)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *in_rep = rpriv->rep;
+ struct mlx5_core_dev *in_mdev = priv->mdev;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ err = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
+ in_mdev, &flow);
+ if (err)
+ goto out;
+
+ if (is_peer_flow_needed(flow)) {
+ err = mlx5e_tc_add_fdb_peer_flow(f, flow);
+ if (err) {
+ mlx5e_tc_del_fdb_flow(priv, flow);
+ goto out;
+ }
+ }
+
+ *__flow = flow;
+
+ return 0;
+
+out:
+ return err;
+}
+
static int
mlx5e_add_nic_flow(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f,
u16 flow_flags,
+ struct net_device *filter_dev,
struct mlx5e_tc_flow **__flow)
{
struct netlink_ext_ack *extack = f->common.extack;
@@ -3122,6 +2929,12 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
if (err)
goto out;
+ parse_attr->filter_dev = filter_dev;
+ err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
+ f, filter_dev);
+ if (err)
+ goto err_free;
+
err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, extack);
if (err)
goto err_free;
@@ -3147,6 +2960,7 @@ static int
mlx5e_tc_add_flow(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f,
int flags,
+ struct net_device *filter_dev,
struct mlx5e_tc_flow **flow)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -3159,18 +2973,20 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
if (esw && esw->mode == SRIOV_OFFLOADS)
- err = mlx5e_add_fdb_flow(priv, f, flow_flags, flow);
+ err = mlx5e_add_fdb_flow(priv, f, flow_flags,
+ filter_dev, flow);
else
- err = mlx5e_add_nic_flow(priv, f, flow_flags, flow);
+ err = mlx5e_add_nic_flow(priv, f, flow_flags,
+ filter_dev, flow);
return err;
}
-int mlx5e_configure_flower(struct mlx5e_priv *priv,
+int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags)
{
struct netlink_ext_ack *extack = f->common.extack;
- struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
struct mlx5e_tc_flow *flow;
int err = 0;
@@ -3184,7 +3000,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
goto out;
}
- err = mlx5e_tc_add_flow(priv, f, flags, &flow);
+ err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
if (err)
goto out;
@@ -3212,10 +3028,10 @@ static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
return false;
}
-int mlx5e_delete_flower(struct mlx5e_priv *priv,
+int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags)
{
- struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
struct mlx5e_tc_flow *flow;
flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
@@ -3231,10 +3047,12 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv,
return 0;
}
-int mlx5e_stats_flower(struct mlx5e_priv *priv,
+int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags)
{
- struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
+ struct mlx5_eswitch *peer_esw;
struct mlx5e_tc_flow *flow;
struct mlx5_fc *counter;
u64 bytes;
@@ -3254,6 +3072,27 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ goto out;
+
+ if ((flow->flags & MLX5E_TC_FLOW_DUP) &&
+ (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) {
+ u64 bytes2;
+ u64 packets2;
+ u64 lastuse2;
+
+ counter = mlx5e_tc_get_counter(flow->peer_flow);
+ mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
+
+ bytes += bytes2;
+ packets += packets2;
+ lastuse = max_t(u64, lastuse, lastuse2);
+ }
+
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
+out:
tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
return 0;
@@ -3342,7 +3181,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
if (tc->netdevice_nb.notifier_call)
unregister_netdevice_notifier(&tc->netdevice_nb);
- rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
+ rhashtable_destroy(&tc->ht);
if (!IS_ERR_OR_NULL(tc->t)) {
mlx5_destroy_flow_table(tc->t);
@@ -3360,9 +3199,17 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
}
-int mlx5e_tc_num_filters(struct mlx5e_priv *priv)
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags)
{
- struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
return atomic_read(&tc_ht->nelems);
}
+
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
+{
+ struct mlx5e_tc_flow *flow, *tmp;
+
+ list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
+ __mlx5e_tc_del_fdb_peer_flow(flow);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 49436bf3b80a..d2d87f978c06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -42,7 +42,9 @@
enum {
MLX5E_TC_INGRESS = BIT(0),
MLX5E_TC_EGRESS = BIT(1),
- MLX5E_TC_LAST_EXPORTED_BIT = 1,
+ MLX5E_TC_NIC_OFFLOAD = BIT(2),
+ MLX5E_TC_ESW_OFFLOAD = BIT(3),
+ MLX5E_TC_LAST_EXPORTED_BIT = 3,
};
int mlx5e_tc_nic_init(struct mlx5e_priv *priv);
@@ -51,12 +53,12 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
int mlx5e_tc_esw_init(struct rhashtable *tc_ht);
void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht);
-int mlx5e_configure_flower(struct mlx5e_priv *priv,
+int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags);
-int mlx5e_delete_flower(struct mlx5e_priv *priv,
+int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags);
-int mlx5e_stats_flower(struct mlx5e_priv *priv,
+int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags);
struct mlx5e_encap_entry;
@@ -68,12 +70,13 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
struct mlx5e_neigh_hash_entry;
void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
-int mlx5e_tc_num_filters(struct mlx5e_priv *priv);
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags);
+
#else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
-static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return 0; }
+static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags) { return 0; }
#endif
#endif /* __MLX5_EN_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 6dacaeba2fbf..598ad7e4d5c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -127,7 +127,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
else
#endif
if (skb_vlan_tag_present(skb))
- up = skb->vlan_tci >> VLAN_PRIO_SHIFT;
+ up = skb_vlan_tag_get_prio(skb);
/* channel_ix can be larger than num_channels since
* dev->num_real_tx_queues = num_channels * num_tc
@@ -459,9 +459,10 @@ static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq,
u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq);
netdev_err(sq->channel->netdev,
- "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
- sq->cq.mcq.cqn, ci, sq->sqn, err_cqe->syndrome,
- err_cqe->vendor_err_synd);
+ "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
+ sq->cq.mcq.cqn, ci, sq->sqn,
+ get_cqe_opcode((struct mlx5_cqe64 *)err_cqe),
+ err_cqe->syndrome, err_cqe->vendor_err_synd);
mlx5_dump_err_cqe(sq->cq.mdev, err_cqe);
}
@@ -507,7 +508,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
wqe_counter = be16_to_cpu(cqe->wqe_counter);
- if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) {
+ if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
&sq->state)) {
mlx5e_dump_error_cqe(sq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 85d517360157..b4af5e19f6ac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -76,6 +76,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
napi);
struct mlx5e_ch_stats *ch_stats = c->stats;
+ struct mlx5e_rq *rq = &c->rq;
bool busy = false;
int work_done = 0;
int i;
@@ -85,17 +86,17 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
for (i = 0; i < c->num_tc; i++)
busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
- busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
+ busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq, NULL);
if (c->xdp)
- busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq);
+ busy |= mlx5e_poll_xdpsq_cq(&rq->xdpsq.cq, rq);
if (likely(budget)) { /* budget=0 means: don't poll rx rings */
- work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget);
+ work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
busy |= work_done == budget;
}
- busy |= c->rq.post_wqes(&c->rq);
+ busy |= c->rq.post_wqes(rq);
if (busy) {
if (likely(mlx5e_channel_no_affinity_change(c)))
@@ -115,9 +116,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
mlx5e_cq_arm(&c->sq[i].cq);
}
- mlx5e_handle_rx_dim(&c->rq);
+ mlx5e_handle_rx_dim(rq);
- mlx5e_cq_arm(&c->rq.cq);
+ mlx5e_cq_arm(&rq->cq);
mlx5e_cq_arm(&c->icosq.cq);
mlx5e_cq_arm(&c->xdpsq.cq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index c1e1a16a9b07..ee04aab65a9f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -31,20 +31,22 @@
*/
#include <linux/interrupt.h>
+#include <linux/notifier.h>
#include <linux/module.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
#include <linux/mlx5/cmd.h>
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
#include "mlx5_core.h"
+#include "lib/eq.h"
#include "fpga/core.h"
#include "eswitch.h"
#include "lib/clock.h"
#include "diag/fw_tracer.h"
enum {
- MLX5_EQE_SIZE = sizeof(struct mlx5_eqe),
MLX5_EQE_OWNER_INIT_VAL = 0x1,
};
@@ -55,14 +57,32 @@ enum {
};
enum {
- MLX5_NUM_SPARE_EQE = 0x80,
- MLX5_NUM_ASYNC_EQE = 0x1000,
- MLX5_NUM_CMD_EQE = 32,
- MLX5_NUM_PF_DRAIN = 64,
+ MLX5_EQ_DOORBEL_OFFSET = 0x40,
};
-enum {
- MLX5_EQ_DOORBEL_OFFSET = 0x40,
+struct mlx5_irq_info {
+ cpumask_var_t mask;
+ char name[MLX5_MAX_IRQ_NAME];
+ void *context; /* dev_id provided to request_irq */
+};
+
+struct mlx5_eq_table {
+ struct list_head comp_eqs_list;
+ struct mlx5_eq pages_eq;
+ struct mlx5_eq cmd_eq;
+ struct mlx5_eq async_eq;
+
+ struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
+
+ /* Since CQ DB is stored in async_eq */
+ struct mlx5_nb cq_err_nb;
+
+ struct mutex lock; /* sync async eqs creations */
+ int num_comp_vectors;
+ struct mlx5_irq_info *irq_info;
+#ifdef CONFIG_RFS_ACCEL
+ struct cpu_rmap *rmap;
+#endif
};
#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
@@ -78,17 +98,6 @@ enum {
(1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \
(1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
-struct map_eq_in {
- u64 mask;
- u32 reserved;
- u32 unmap_eqn;
-};
-
-struct cre_des_eq {
- u8 reserved[15];
- u8 eqn;
-};
-
static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
{
u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
@@ -99,213 +108,56 @@ static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
-static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
-{
- return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
-}
-
-static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
-{
- struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
-
- return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
-}
-
-static const char *eqe_type_str(u8 type)
-{
- switch (type) {
- case MLX5_EVENT_TYPE_COMP:
- return "MLX5_EVENT_TYPE_COMP";
- case MLX5_EVENT_TYPE_PATH_MIG:
- return "MLX5_EVENT_TYPE_PATH_MIG";
- case MLX5_EVENT_TYPE_COMM_EST:
- return "MLX5_EVENT_TYPE_COMM_EST";
- case MLX5_EVENT_TYPE_SQ_DRAINED:
- return "MLX5_EVENT_TYPE_SQ_DRAINED";
- case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
- return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
- case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
- return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
- case MLX5_EVENT_TYPE_CQ_ERROR:
- return "MLX5_EVENT_TYPE_CQ_ERROR";
- case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
- return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
- case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
- return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
- case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
- return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
- case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
- return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
- case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
- return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
- case MLX5_EVENT_TYPE_INTERNAL_ERROR:
- return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
- case MLX5_EVENT_TYPE_PORT_CHANGE:
- return "MLX5_EVENT_TYPE_PORT_CHANGE";
- case MLX5_EVENT_TYPE_GPIO_EVENT:
- return "MLX5_EVENT_TYPE_GPIO_EVENT";
- case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
- return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
- case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
- return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
- case MLX5_EVENT_TYPE_REMOTE_CONFIG:
- return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
- case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
- return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
- case MLX5_EVENT_TYPE_STALL_EVENT:
- return "MLX5_EVENT_TYPE_STALL_EVENT";
- case MLX5_EVENT_TYPE_CMD:
- return "MLX5_EVENT_TYPE_CMD";
- case MLX5_EVENT_TYPE_PAGE_REQUEST:
- return "MLX5_EVENT_TYPE_PAGE_REQUEST";
- case MLX5_EVENT_TYPE_PAGE_FAULT:
- return "MLX5_EVENT_TYPE_PAGE_FAULT";
- case MLX5_EVENT_TYPE_PPS_EVENT:
- return "MLX5_EVENT_TYPE_PPS_EVENT";
- case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
- return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
- case MLX5_EVENT_TYPE_FPGA_ERROR:
- return "MLX5_EVENT_TYPE_FPGA_ERROR";
- case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
- return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
- case MLX5_EVENT_TYPE_GENERAL_EVENT:
- return "MLX5_EVENT_TYPE_GENERAL_EVENT";
- case MLX5_EVENT_TYPE_DEVICE_TRACER:
- return "MLX5_EVENT_TYPE_DEVICE_TRACER";
- default:
- return "Unrecognized event";
- }
-}
-
-static enum mlx5_dev_event port_subtype_event(u8 subtype)
-{
- switch (subtype) {
- case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
- return MLX5_DEV_EVENT_PORT_DOWN;
- case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
- return MLX5_DEV_EVENT_PORT_UP;
- case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
- return MLX5_DEV_EVENT_PORT_INITIALIZED;
- case MLX5_PORT_CHANGE_SUBTYPE_LID:
- return MLX5_DEV_EVENT_LID_CHANGE;
- case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
- return MLX5_DEV_EVENT_PKEY_CHANGE;
- case MLX5_PORT_CHANGE_SUBTYPE_GUID:
- return MLX5_DEV_EVENT_GUID_CHANGE;
- case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
- return MLX5_DEV_EVENT_CLIENT_REREG;
- }
- return -1;
-}
-
-static void eq_update_ci(struct mlx5_eq *eq, int arm)
+/* caller must eventually call mlx5_cq_put on the returned cq */
+static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
{
- __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
- u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
-
- __raw_writel((__force u32)cpu_to_be32(val), addr);
- /* We still want ordering, just not swabbing, so add a barrier */
- mb();
-}
+ struct mlx5_cq_table *table = &eq->cq_table;
+ struct mlx5_core_cq *cq = NULL;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-static void eqe_pf_action(struct work_struct *work)
-{
- struct mlx5_pagefault *pfault = container_of(work,
- struct mlx5_pagefault,
- work);
- struct mlx5_eq *eq = pfault->eq;
+ spin_lock(&table->lock);
+ cq = radix_tree_lookup(&table->tree, cqn);
+ if (likely(cq))
+ mlx5_cq_hold(cq);
+ spin_unlock(&table->lock);
- mlx5_core_page_fault(eq->dev, pfault);
- mempool_free(pfault, eq->pf_ctx.pool);
+ return cq;
}
-static void eq_pf_process(struct mlx5_eq *eq)
+static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
{
- struct mlx5_core_dev *dev = eq->dev;
- struct mlx5_eqe_page_fault *pf_eqe;
- struct mlx5_pagefault *pfault;
+ struct mlx5_eq_comp *eq_comp = eq_ptr;
+ struct mlx5_eq *eq = eq_ptr;
struct mlx5_eqe *eqe;
int set_ci = 0;
+ u32 cqn = -1;
while ((eqe = next_eqe_sw(eq))) {
- pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC);
- if (!pfault) {
- schedule_work(&eq->pf_ctx.work);
- break;
- }
-
+ struct mlx5_core_cq *cq;
+ /* Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
dma_rmb();
- pf_eqe = &eqe->data.page_fault;
- pfault->event_subtype = eqe->sub_type;
- pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
-
- mlx5_core_dbg(dev,
- "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
- eqe->sub_type, pfault->bytes_committed);
-
- switch (eqe->sub_type) {
- case MLX5_PFAULT_SUBTYPE_RDMA:
- /* RDMA based event */
- pfault->type =
- be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
- pfault->token =
- be32_to_cpu(pf_eqe->rdma.pftype_token) &
- MLX5_24BIT_MASK;
- pfault->rdma.r_key =
- be32_to_cpu(pf_eqe->rdma.r_key);
- pfault->rdma.packet_size =
- be16_to_cpu(pf_eqe->rdma.packet_length);
- pfault->rdma.rdma_op_len =
- be32_to_cpu(pf_eqe->rdma.rdma_op_len);
- pfault->rdma.rdma_va =
- be64_to_cpu(pf_eqe->rdma.rdma_va);
- mlx5_core_dbg(dev,
- "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
- pfault->type, pfault->token,
- pfault->rdma.r_key);
- mlx5_core_dbg(dev,
- "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
- pfault->rdma.rdma_op_len,
- pfault->rdma.rdma_va);
- break;
-
- case MLX5_PFAULT_SUBTYPE_WQE:
- /* WQE based event */
- pfault->type =
- (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
- pfault->token =
- be32_to_cpu(pf_eqe->wqe.token);
- pfault->wqe.wq_num =
- be32_to_cpu(pf_eqe->wqe.pftype_wq) &
- MLX5_24BIT_MASK;
- pfault->wqe.wqe_index =
- be16_to_cpu(pf_eqe->wqe.wqe_index);
- pfault->wqe.packet_size =
- be16_to_cpu(pf_eqe->wqe.packet_length);
- mlx5_core_dbg(dev,
- "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
- pfault->type, pfault->token,
- pfault->wqe.wq_num,
- pfault->wqe.wqe_index);
- break;
-
- default:
- mlx5_core_warn(dev,
- "Unsupported page fault event sub-type: 0x%02hhx\n",
- eqe->sub_type);
- /* Unsupported page faults should still be
- * resolved by the page fault handler
- */
+ /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */
+ cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
+
+ cq = mlx5_eq_cq_get(eq, cqn);
+ if (likely(cq)) {
+ ++cq->arm_sn;
+ cq->comp(cq);
+ mlx5_cq_put(cq);
+ } else {
+ mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
}
- pfault->eq = eq;
- INIT_WORK(&pfault->work, eqe_pf_action);
- queue_work(eq->pf_ctx.wq, &pfault->work);
-
++eq->cons_index;
++set_ci;
+ /* The HCA will think the queue has overflowed if we
+ * don't tell it we've been processing events. We
+ * create our EQs with MLX5_NUM_SPARE_EQE extra
+ * entries, so we must update our consumer index at
+ * least that often.
+ */
if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
eq_update_ci(eq, 0);
set_ci = 0;
@@ -313,165 +165,41 @@ static void eq_pf_process(struct mlx5_eq *eq)
}
eq_update_ci(eq, 1);
-}
-static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr)
-{
- struct mlx5_eq *eq = eq_ptr;
- unsigned long flags;
-
- if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) {
- eq_pf_process(eq);
- spin_unlock_irqrestore(&eq->pf_ctx.lock, flags);
- } else {
- schedule_work(&eq->pf_ctx.work);
- }
+ if (cqn != -1)
+ tasklet_schedule(&eq_comp->tasklet_ctx.task);
return IRQ_HANDLED;
}
-/* mempool_refill() was proposed but unfortunately wasn't accepted
- * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
- * Chip workaround.
+/* Some architectures don't latch interrupts when they are disabled, so using
+ * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
+ * avoid losing them. It is not recommended to use it, unless this is the last
+ * resort.
*/
-static void mempool_refill(mempool_t *pool)
-{
- while (pool->curr_nr < pool->min_nr)
- mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
-}
-
-static void eq_pf_action(struct work_struct *work)
-{
- struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work);
-
- mempool_refill(eq->pf_ctx.pool);
-
- spin_lock_irq(&eq->pf_ctx.lock);
- eq_pf_process(eq);
- spin_unlock_irq(&eq->pf_ctx.lock);
-}
-
-static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name)
-{
- spin_lock_init(&pf_ctx->lock);
- INIT_WORK(&pf_ctx->work, eq_pf_action);
-
- pf_ctx->wq = alloc_ordered_workqueue(name,
- WQ_MEM_RECLAIM);
- if (!pf_ctx->wq)
- return -ENOMEM;
-
- pf_ctx->pool = mempool_create_kmalloc_pool
- (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault));
- if (!pf_ctx->pool)
- goto err_wq;
-
- return 0;
-err_wq:
- destroy_workqueue(pf_ctx->wq);
- return -ENOMEM;
-}
-
-int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
- u32 wq_num, u8 type, int error)
-{
- u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0};
-
- MLX5_SET(page_fault_resume_in, in, opcode,
- MLX5_CMD_OP_PAGE_FAULT_RESUME);
- MLX5_SET(page_fault_resume_in, in, error, !!error);
- MLX5_SET(page_fault_resume_in, in, page_fault_type, type);
- MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
- MLX5_SET(page_fault_resume_in, in, token, token);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
-#endif
-
-static void general_event_handler(struct mlx5_core_dev *dev,
- struct mlx5_eqe *eqe)
-{
- switch (eqe->sub_type) {
- case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
- if (dev->event)
- dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0);
- break;
- default:
- mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n",
- eqe->sub_type);
- }
-}
-
-static void mlx5_temp_warning_event(struct mlx5_core_dev *dev,
- struct mlx5_eqe *eqe)
-{
- u64 value_lsb;
- u64 value_msb;
-
- value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
- value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
-
- mlx5_core_warn(dev,
- "High temperature on sensors with bit set %llx %llx",
- value_msb, value_lsb);
-}
-
-/* caller must eventually call mlx5_cq_put on the returned cq */
-static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
-{
- struct mlx5_cq_table *table = &eq->cq_table;
- struct mlx5_core_cq *cq = NULL;
-
- spin_lock(&table->lock);
- cq = radix_tree_lookup(&table->tree, cqn);
- if (likely(cq))
- mlx5_cq_hold(cq);
- spin_unlock(&table->lock);
-
- return cq;
-}
-
-static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
-{
- struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
-
- if (unlikely(!cq)) {
- mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
- return;
- }
-
- ++cq->arm_sn;
-
- cq->comp(cq);
-
- mlx5_cq_put(cq);
-}
-
-static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
{
- struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
-
- if (unlikely(!cq)) {
- mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
- return;
- }
+ u32 count_eqe;
- cq->event(cq, event_type);
+ disable_irq(eq->core.irqn);
+ count_eqe = eq->core.cons_index;
+ mlx5_eq_comp_int(eq->core.irqn, eq);
+ count_eqe = eq->core.cons_index - count_eqe;
+ enable_irq(eq->core.irqn);
- mlx5_cq_put(cq);
+ return count_eqe;
}
-static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
+static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
{
struct mlx5_eq *eq = eq_ptr;
- struct mlx5_core_dev *dev = eq->dev;
+ struct mlx5_eq_table *eqt;
+ struct mlx5_core_dev *dev;
struct mlx5_eqe *eqe;
int set_ci = 0;
- u32 cqn = -1;
- u32 rsn;
- u8 port;
+
+ dev = eq->dev;
+ eqt = dev->priv.eq_table;
while ((eqe = next_eqe_sw(eq))) {
/*
@@ -480,116 +208,12 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
*/
dma_rmb();
- mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n",
- eq->eqn, eqe_type_str(eqe->type));
- switch (eqe->type) {
- case MLX5_EVENT_TYPE_COMP:
- cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
- mlx5_eq_cq_completion(eq, cqn);
- break;
- case MLX5_EVENT_TYPE_DCT_DRAINED:
- rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
- rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
- mlx5_rsc_event(dev, rsn, eqe->type);
- break;
- case MLX5_EVENT_TYPE_PATH_MIG:
- case MLX5_EVENT_TYPE_COMM_EST:
- case MLX5_EVENT_TYPE_SQ_DRAINED:
- case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
- case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
- case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
- case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
- case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
- rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
- rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
- mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
- eqe_type_str(eqe->type), eqe->type, rsn);
- mlx5_rsc_event(dev, rsn, eqe->type);
- break;
-
- case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
- case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
- rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
- mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
- eqe_type_str(eqe->type), eqe->type, rsn);
- mlx5_srq_event(dev, rsn, eqe->type);
- break;
-
- case MLX5_EVENT_TYPE_CMD:
- mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
- break;
+ if (likely(eqe->type < MLX5_EVENT_TYPE_MAX))
+ atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe);
+ else
+ mlx5_core_warn_once(dev, "notifier_call_chain is not setup for eqe: %d\n", eqe->type);
- case MLX5_EVENT_TYPE_PORT_CHANGE:
- port = (eqe->data.port.port >> 4) & 0xf;
- switch (eqe->sub_type) {
- case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
- case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
- case MLX5_PORT_CHANGE_SUBTYPE_LID:
- case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
- case MLX5_PORT_CHANGE_SUBTYPE_GUID:
- case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
- case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
- if (dev->event)
- dev->event(dev, port_subtype_event(eqe->sub_type),
- (unsigned long)port);
- break;
- default:
- mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
- port, eqe->sub_type);
- }
- break;
- case MLX5_EVENT_TYPE_CQ_ERROR:
- cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
- mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
- cqn, eqe->data.cq_err.syndrome);
- mlx5_eq_cq_event(eq, cqn, eqe->type);
- break;
-
- case MLX5_EVENT_TYPE_PAGE_REQUEST:
- {
- u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
- s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
-
- mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
- func_id, npages);
- mlx5_core_req_pages_handler(dev, func_id, npages);
- }
- break;
-
- case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
- mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
- break;
-
- case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
- mlx5_port_module_event(dev, eqe);
- break;
-
- case MLX5_EVENT_TYPE_PPS_EVENT:
- mlx5_pps_event(dev, eqe);
- break;
-
- case MLX5_EVENT_TYPE_FPGA_ERROR:
- case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
- mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
- break;
-
- case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
- mlx5_temp_warning_event(dev, eqe);
- break;
-
- case MLX5_EVENT_TYPE_GENERAL_EVENT:
- general_event_handler(dev, eqe);
- break;
-
- case MLX5_EVENT_TYPE_DEVICE_TRACER:
- mlx5_fw_tracer_event(dev, eqe);
- break;
-
- default:
- mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
- eqe->type, eq->eqn);
- break;
- }
+ atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
++eq->cons_index;
++set_ci;
@@ -608,30 +232,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
eq_update_ci(eq, 1);
- if (cqn != -1)
- tasklet_schedule(&eq->tasklet_ctx.task);
-
return IRQ_HANDLED;
}
-/* Some architectures don't latch interrupts when they are disabled, so using
- * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
- * avoid losing them. It is not recommended to use it, unless this is the last
- * resort.
- */
-u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq)
-{
- u32 count_eqe;
-
- disable_irq(eq->irqn);
- count_eqe = eq->cons_index;
- mlx5_eq_int(eq->irqn, eq);
- count_eqe = eq->cons_index - count_eqe;
- enable_irq(eq->irqn);
-
- return count_eqe;
-}
-
static void init_eq_buf(struct mlx5_eq *eq)
{
struct mlx5_eqe *eqe;
@@ -643,39 +246,35 @@ static void init_eq_buf(struct mlx5_eq *eq)
}
}
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
- int nent, u64 mask, const char *name,
- enum mlx5_eq_type type)
+static int
+create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
+ struct mlx5_eq_param *param)
{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
struct mlx5_cq_table *cq_table = &eq->cq_table;
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
struct mlx5_priv *priv = &dev->priv;
- irq_handler_t handler;
+ u8 vecidx = param->index;
__be64 *pas;
void *eqc;
int inlen;
u32 *in;
int err;
+ if (eq_table->irq_info[vecidx].context)
+ return -EEXIST;
+
/* Init CQ table */
memset(cq_table, 0, sizeof(*cq_table));
spin_lock_init(&cq_table->lock);
INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
- eq->type = type;
- eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
+ eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE);
eq->cons_index = 0;
err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
if (err)
return err;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (type == MLX5_EQ_TYPE_PF)
- handler = mlx5_eq_pf_int;
- else
-#endif
- handler = mlx5_eq_int;
-
init_eq_buf(eq);
inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
@@ -691,7 +290,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
mlx5_fill_page_array(&eq->buf, pas);
MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
- MLX5_SET64(create_eq_in, in, event_bitmask, mask);
+ MLX5_SET64(create_eq_in, in, event_bitmask, param->mask);
eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
@@ -704,15 +303,17 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
if (err)
goto err_in;
- snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
+ snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
name, pci_name(dev->pdev));
+ eq_table->irq_info[vecidx].context = param->context;
+ eq->vecidx = vecidx;
eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
eq->irqn = pci_irq_vector(dev->pdev, vecidx);
eq->dev = dev;
eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
- err = request_irq(eq->irqn, handler, 0,
- priv->irq_info[vecidx].name, eq);
+ err = request_irq(eq->irqn, param->handler, 0,
+ eq_table->irq_info[vecidx].name, param->context);
if (err)
goto err_eq;
@@ -720,21 +321,6 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
if (err)
goto err_irq;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (type == MLX5_EQ_TYPE_PF) {
- err = init_pf_ctx(&eq->pf_ctx, name);
- if (err)
- goto err_irq;
- } else
-#endif
- {
- INIT_LIST_HEAD(&eq->tasklet_ctx.list);
- INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
- spin_lock_init(&eq->tasklet_ctx.lock);
- tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
- (unsigned long)&eq->tasklet_ctx);
- }
-
/* EQs are created in ARMED state
*/
eq_update_ci(eq, 1);
@@ -756,27 +342,25 @@ err_buf:
return err;
}
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+ struct mlx5_irq_info *irq_info;
int err;
+ irq_info = &eq_table->irq_info[eq->vecidx];
+
mlx5_debug_eq_remove(dev, eq);
- free_irq(eq->irqn, eq);
+
+ free_irq(eq->irqn, irq_info->context);
+ irq_info->context = NULL;
+
err = mlx5_cmd_destroy_eq(dev, eq->eqn);
if (err)
mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
eq->eqn);
synchronize_irq(eq->irqn);
- if (eq->type == MLX5_EQ_TYPE_COMP) {
- tasklet_disable(&eq->tasklet_ctx.task);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- } else if (eq->type == MLX5_EQ_TYPE_PF) {
- cancel_work_sync(&eq->pf_ctx.work);
- destroy_workqueue(eq->pf_ctx.wq);
- mempool_destroy(eq->pf_ctx.pool);
-#endif
- }
mlx5_buf_free(dev, &eq->buf);
return err;
@@ -816,28 +400,106 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
return 0;
}
-int mlx5_eq_init(struct mlx5_core_dev *dev)
+int mlx5_eq_table_init(struct mlx5_core_dev *dev)
{
- int err;
+ struct mlx5_eq_table *eq_table;
+ int i, err;
- spin_lock_init(&dev->priv.eq_table.lock);
+ eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL);
+ if (!eq_table)
+ return -ENOMEM;
+
+ dev->priv.eq_table = eq_table;
err = mlx5_eq_debugfs_init(dev);
+ if (err)
+ goto kvfree_eq_table;
+
+ mutex_init(&eq_table->lock);
+ for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
+ ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
+ return 0;
+
+kvfree_eq_table:
+ kvfree(eq_table);
+ dev->priv.eq_table = NULL;
return err;
}
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
{
mlx5_eq_debugfs_cleanup(dev);
+ kvfree(dev->priv.eq_table);
}
-int mlx5_start_eqs(struct mlx5_core_dev *dev)
+/* Async EQs */
+
+static int create_async_eq(struct mlx5_core_dev *dev, const char *name,
+ struct mlx5_eq *eq, struct mlx5_eq_param *param)
{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+ int err;
+
+ mutex_lock(&eq_table->lock);
+ if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) {
+ err = -ENOSPC;
+ goto unlock;
+ }
+
+ err = create_map_eq(dev, eq, name, param);
+unlock:
+ mutex_unlock(&eq_table->lock);
+ return err;
+}
+
+static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
int err;
+ mutex_lock(&eq_table->lock);
+ err = destroy_unmap_eq(dev, eq);
+ mutex_unlock(&eq_table->lock);
+ return err;
+}
+
+static int cq_err_event_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_eq_table *eqt;
+ struct mlx5_core_cq *cq;
+ struct mlx5_eqe *eqe;
+ struct mlx5_eq *eq;
+ u32 cqn;
+
+ /* type == MLX5_EVENT_TYPE_CQ_ERROR */
+
+ eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
+ eq = &eqt->async_eq;
+ eqe = data;
+
+ cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+ mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
+ cqn, eqe->data.cq_err.syndrome);
+
+ cq = mlx5_eq_cq_get(eq, cqn);
+ if (unlikely(!cq)) {
+ mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
+ return NOTIFY_OK;
+ }
+
+ cq->event(cq, type);
+
+ mlx5_cq_put(cq);
+
+ return NOTIFY_OK;
+}
+
+static u64 gather_async_events_mask(struct mlx5_core_dev *dev)
+{
+ u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+
if (MLX5_VPORT_MANAGER(dev))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
@@ -865,127 +527,521 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
- err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
- MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
- "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
+ if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER);
+
+ return async_event_mask;
+}
+
+static int create_async_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_param param = {};
+ int err;
+
+ MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
+ mlx5_eq_notifier_register(dev, &table->cq_err_nb);
+
+ param = (struct mlx5_eq_param) {
+ .index = MLX5_EQ_CMD_IDX,
+ .mask = 1ull << MLX5_EVENT_TYPE_CMD,
+ .nent = MLX5_NUM_CMD_EQE,
+ .context = &table->cmd_eq,
+ .handler = mlx5_eq_async_int,
+ };
+ err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, &param);
if (err) {
mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
- return err;
+ goto err0;
}
mlx5_cmd_use_events(dev);
- err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
- MLX5_NUM_ASYNC_EQE, async_event_mask,
- "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC);
+ param = (struct mlx5_eq_param) {
+ .index = MLX5_EQ_ASYNC_IDX,
+ .mask = gather_async_events_mask(dev),
+ .nent = MLX5_NUM_ASYNC_EQE,
+ .context = &table->async_eq,
+ .handler = mlx5_eq_async_int,
+ };
+ err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, &param);
if (err) {
mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
goto err1;
}
- err = mlx5_create_map_eq(dev, &table->pages_eq,
- MLX5_EQ_VEC_PAGES,
- /* TODO: sriov max_vf + */ 1,
- 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
- MLX5_EQ_TYPE_ASYNC);
+ param = (struct mlx5_eq_param) {
+ .index = MLX5_EQ_PAGEREQ_IDX,
+ .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST,
+ .nent = /* TODO: sriov max_vf + */ 1,
+ .context = &table->pages_eq,
+ .handler = mlx5_eq_async_int,
+ };
+ err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, &param);
if (err) {
mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
goto err2;
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (MLX5_CAP_GEN(dev, pg)) {
- err = mlx5_create_map_eq(dev, &table->pfault_eq,
- MLX5_EQ_VEC_PFAULT,
- MLX5_NUM_ASYNC_EQE,
- 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
- "mlx5_page_fault_eq",
- MLX5_EQ_TYPE_PF);
- if (err) {
- mlx5_core_warn(dev, "failed to create page fault EQ %d\n",
- err);
- goto err3;
- }
- }
-
- return err;
-err3:
- mlx5_destroy_unmap_eq(dev, &table->pages_eq);
-#else
return err;
-#endif
err2:
- mlx5_destroy_unmap_eq(dev, &table->async_eq);
+ destroy_async_eq(dev, &table->async_eq);
err1:
mlx5_cmd_use_polling(dev);
- mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+ destroy_async_eq(dev, &table->cmd_eq);
+err0:
+ mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
return err;
}
-void mlx5_stop_eqs(struct mlx5_core_dev *dev)
+static void destroy_async_eqs(struct mlx5_core_dev *dev)
{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
+ struct mlx5_eq_table *table = dev->priv.eq_table;
int err;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (MLX5_CAP_GEN(dev, pg)) {
- err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
- if (err)
- mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
- err);
- }
-#endif
-
- err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
+ err = destroy_async_eq(dev, &table->pages_eq);
if (err)
mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
err);
- err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
+ err = destroy_async_eq(dev, &table->async_eq);
if (err)
mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
err);
+
mlx5_cmd_use_polling(dev);
- err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+ err = destroy_async_eq(dev, &table->cmd_eq);
if (err)
mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
err);
+
+ mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
}
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
- u32 *out, int outlen)
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
{
- u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0};
+ return &dev->priv.eq_table->async_eq;
+}
- MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
- MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
+{
+ synchronize_irq(dev->priv.eq_table->async_eq.irqn);
+}
+
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
+{
+ synchronize_irq(dev->priv.eq_table->cmd_eq.irqn);
+}
+
+/* Generic EQ API for mlx5_core consumers
+ * Needed For RDMA ODP EQ for now
+ */
+struct mlx5_eq *
+mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
+ struct mlx5_eq_param *param)
+{
+ struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
+ int err;
+
+ if (!eq)
+ return ERR_PTR(-ENOMEM);
+
+ err = create_async_eq(dev, name, eq, param);
+ if (err) {
+ kvfree(eq);
+ eq = ERR_PTR(err);
+ }
+
+ return eq;
+}
+EXPORT_SYMBOL(mlx5_eq_create_generic);
+
+int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ int err;
+
+ if (IS_ERR(eq))
+ return -EINVAL;
+
+ err = destroy_async_eq(dev, eq);
+ if (err)
+ goto out;
+
+ kvfree(eq);
+out:
+ return err;
+}
+EXPORT_SYMBOL(mlx5_eq_destroy_generic);
+
+struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc)
+{
+ u32 ci = eq->cons_index + cc;
+ struct mlx5_eqe *eqe;
+
+ eqe = get_eqe(eq, ci & (eq->nent - 1));
+ eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe;
+ /* Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
+ if (eqe)
+ dma_rmb();
+
+ return eqe;
+}
+EXPORT_SYMBOL(mlx5_eq_get_eqe);
+
+void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
+{
+ __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+ u32 val;
+
+ eq->cons_index += cc;
+ val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+ __raw_writel((__force u32)cpu_to_be32(val), addr);
+ /* We still want ordering, just not swabbing, so add a barrier */
+ mb();
+}
+EXPORT_SYMBOL(mlx5_eq_update_ci);
+
+/* Completion EQs */
+
+static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ struct mlx5_priv *priv = &mdev->priv;
+ int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
+ int irq = pci_irq_vector(mdev->pdev, vecidx);
+ struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
+
+ if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) {
+ mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+ return -ENOMEM;
+ }
+
+ cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
+ irq_info->mask);
+
+ if (IS_ENABLED(CONFIG_SMP) &&
+ irq_set_affinity_hint(irq, irq_info->mask))
+ mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
+
+ return 0;
+}
+
+static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
+ struct mlx5_priv *priv = &mdev->priv;
+ int irq = pci_irq_vector(mdev->pdev, vecidx);
+ struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
+
+ irq_set_affinity_hint(irq, NULL);
+ free_cpumask_var(irq_info->mask);
+}
+
+static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) {
+ err = set_comp_irq_affinity_hint(mdev, i);
+ if (err)
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ for (i--; i >= 0; i--)
+ clear_comp_irq_affinity_hint(mdev, i);
+
+ return err;
+}
+
+static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int i;
+
+ for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++)
+ clear_comp_irq_affinity_hint(mdev, i);
+}
+
+static void destroy_comp_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq, *n;
+
+ clear_comp_irqs_affinity_hints(dev);
+
+#ifdef CONFIG_RFS_ACCEL
+ if (table->rmap) {
+ free_irq_cpu_rmap(table->rmap);
+ table->rmap = NULL;
+ }
+#endif
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ list_del(&eq->list);
+ if (destroy_unmap_eq(dev, &eq->core))
+ mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
+ eq->core.eqn);
+ tasklet_disable(&eq->tasklet_ctx.task);
+ kfree(eq);
+ }
+}
+
+static int create_comp_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ char name[MLX5_MAX_IRQ_NAME];
+ struct mlx5_eq_comp *eq;
+ int ncomp_vec;
+ int nent;
+ int err;
+ int i;
+
+ INIT_LIST_HEAD(&table->comp_eqs_list);
+ ncomp_vec = table->num_comp_vectors;
+ nent = MLX5_COMP_EQ_SIZE;
+#ifdef CONFIG_RFS_ACCEL
+ table->rmap = alloc_irq_cpu_rmap(ncomp_vec);
+ if (!table->rmap)
+ return -ENOMEM;
+#endif
+ for (i = 0; i < ncomp_vec; i++) {
+ int vecidx = i + MLX5_EQ_VEC_COMP_BASE;
+ struct mlx5_eq_param param = {};
+
+ eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+ if (!eq) {
+ err = -ENOMEM;
+ goto clean;
+ }
+
+ INIT_LIST_HEAD(&eq->tasklet_ctx.list);
+ INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
+ spin_lock_init(&eq->tasklet_ctx.lock);
+ tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
+ (unsigned long)&eq->tasklet_ctx);
+
+#ifdef CONFIG_RFS_ACCEL
+ irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx));
+#endif
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
+ param = (struct mlx5_eq_param) {
+ .index = vecidx,
+ .mask = 0,
+ .nent = nent,
+ .context = &eq->core,
+ .handler = mlx5_eq_comp_int
+ };
+ err = create_map_eq(dev, &eq->core, name, &param);
+ if (err) {
+ kfree(eq);
+ goto clean;
+ }
+ mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
+ /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
+ list_add_tail(&eq->list, &table->comp_eqs_list);
+ }
+
+ err = set_comp_irq_affinity_hints(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
+ goto clean;
+ }
+
+ return 0;
+
+clean:
+ destroy_comp_eqs(dev);
+ return err;
+}
+
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
+ unsigned int *irqn)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq, *n;
+ int err = -ENOENT;
+ int i = 0;
+
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ if (i++ == vector) {
+ *eqn = eq->core.eqn;
+ *irqn = eq->core.irqn;
+ err = 0;
+ break;
+ }
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_vector2eqn);
+
+unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
+{
+ return dev->priv.eq_table->num_comp_vectors;
+}
+EXPORT_SYMBOL(mlx5_comp_vectors_count);
+
+struct cpumask *
+mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
+{
+ /* TODO: consider irq_get_affinity_mask(irq) */
+ return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask;
+}
+EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
+
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+ return dev->priv.eq_table->rmap;
+#else
+ return NULL;
+#endif
+}
+
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq;
+
+ list_for_each_entry(eq, &table->comp_eqs_list, list) {
+ if (eq->core.eqn == eqn)
+ return eq;
+ }
+
+ return ERR_PTR(-ENOENT);
}
/* This function should only be called after mlx5_cmd_force_teardown_hca */
void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- struct mlx5_eq *eq;
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ int i, max_eqs;
+
+ clear_comp_irqs_affinity_hints(dev);
#ifdef CONFIG_RFS_ACCEL
- if (dev->rmap) {
- free_irq_cpu_rmap(dev->rmap);
- dev->rmap = NULL;
+ if (table->rmap) {
+ free_irq_cpu_rmap(table->rmap);
+ table->rmap = NULL;
}
#endif
- list_for_each_entry(eq, &table->comp_eqs_list, list)
- free_irq(eq->irqn, eq);
-
- free_irq(table->pages_eq.irqn, &table->pages_eq);
- free_irq(table->async_eq.irqn, &table->async_eq);
- free_irq(table->cmd_eq.irqn, &table->cmd_eq);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (MLX5_CAP_GEN(dev, pg))
- free_irq(table->pfault_eq.irqn, &table->pfault_eq);
-#endif
+
+ mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
+ max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE;
+ for (i = max_eqs - 1; i >= 0; i--) {
+ if (!table->irq_info[i].context)
+ continue;
+ free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context);
+ table->irq_info[i].context = NULL;
+ }
+ mutex_unlock(&table->lock);
+ pci_free_irq_vectors(dev->pdev);
+}
+
+static int alloc_irq_vectors(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_eq_table *table = priv->eq_table;
+ int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+ MLX5_CAP_GEN(dev, max_num_eqs) :
+ 1 << MLX5_CAP_GEN(dev, log_max_eq);
+ int nvec;
+ int err;
+
+ nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+ MLX5_EQ_VEC_COMP_BASE;
+ nvec = min_t(int, nvec, num_eqs);
+ if (nvec <= MLX5_EQ_VEC_COMP_BASE)
+ return -ENOMEM;
+
+ table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL);
+ if (!table->irq_info)
+ return -ENOMEM;
+
+ nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1,
+ nvec, PCI_IRQ_MSIX);
+ if (nvec < 0) {
+ err = nvec;
+ goto err_free_irq_info;
+ }
+
+ table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
+
+ return 0;
+
+err_free_irq_info:
+ kfree(table->irq_info);
+ return err;
+}
+
+static void free_irq_vectors(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+
pci_free_irq_vectors(dev->pdev);
+ kfree(priv->eq_table->irq_info);
+}
+
+int mlx5_eq_table_create(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ err = alloc_irq_vectors(dev);
+ if (err) {
+ mlx5_core_err(dev, "alloc irq vectors failed\n");
+ return err;
+ }
+
+ err = create_async_eqs(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to create async EQs\n");
+ goto err_async_eqs;
+ }
+
+ err = create_comp_eqs(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to create completion EQs\n");
+ goto err_comp_eqs;
+ }
+
+ return 0;
+err_comp_eqs:
+ destroy_async_eqs(dev);
+err_async_eqs:
+ free_irq_vectors(dev);
+ return err;
+}
+
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
+{
+ destroy_comp_eqs(dev);
+ destroy_async_eqs(dev);
+ free_irq_vectors(dev);
+}
+
+int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
+{
+ struct mlx5_eq_table *eqt = dev->priv.eq_table;
+
+ if (nb->event_type >= MLX5_EVENT_TYPE_MAX)
+ return -EINVAL;
+
+ return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb);
+}
+
+int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
+{
+ struct mlx5_eq_table *eqt = dev->priv.eq_table;
+
+ if (nb->event_type >= MLX5_EVENT_TYPE_MAX)
+ return -EINVAL;
+
+ return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index d004957328f9..a44ea7b85614 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -36,6 +36,7 @@
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
#include "eswitch.h"
#include "fs_core.h"
@@ -1567,7 +1568,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
/* Mark this vport as disabled to discard new events */
vport->enabled = false;
- synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC));
/* Wait for current already scheduled events to complete */
flush_workqueue(esw->work_queue);
/* Disable events from this vport */
@@ -1593,10 +1593,25 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
mutex_unlock(&esw->state_lock);
}
+static int eswitch_vport_event(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_eswitch *esw = mlx5_nb_cof(nb, struct mlx5_eswitch, nb);
+ struct mlx5_eqe *eqe = data;
+ struct mlx5_vport *vport;
+ u16 vport_num;
+
+ vport_num = be16_to_cpu(eqe->data.vport_change.vport_num);
+ vport = &esw->vports[vport_num];
+ if (vport->enabled)
+ queue_work(esw->work_queue, &vport->vport_change_handler);
+
+ return NOTIFY_OK;
+}
+
/* Public E-Switch API */
#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
-
int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
{
int err;
@@ -1615,13 +1630,16 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n");
esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
+
esw->mode = mode;
+ mlx5_lag_update(esw->dev);
+
if (mode == SRIOV_LEGACY) {
err = esw_create_legacy_fdb_table(esw);
} else {
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-
err = esw_offloads_init(esw, nvfs + 1);
}
@@ -1640,6 +1658,11 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
for (i = 0; i <= nvfs; i++)
esw_enable_vport(esw, i, enabled_events);
+ if (mode == SRIOV_LEGACY) {
+ MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
+ mlx5_eq_notifier_register(esw->dev, &esw->nb);
+ }
+
esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
esw->enabled_vports);
return 0;
@@ -1647,8 +1670,10 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
abort:
esw->mode = SRIOV_NONE;
- if (mode == SRIOV_OFFLOADS)
+ if (mode == SRIOV_OFFLOADS) {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
+ }
return err;
}
@@ -1669,6 +1694,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
mc_promisc = &esw->mc_promisc;
nvports = esw->enabled_vports;
+ if (esw->mode == SRIOV_LEGACY)
+ mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
+
for (i = 0; i < esw->total_vports; i++)
esw_disable_vport(esw, i);
@@ -1685,8 +1713,12 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
old_mode = esw->mode;
esw->mode = SRIOV_NONE;
- if (old_mode == SRIOV_OFFLOADS)
+ mlx5_lag_update(esw->dev);
+
+ if (old_mode == SRIOV_OFFLOADS) {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
+ }
}
int mlx5_eswitch_init(struct mlx5_core_dev *dev)
@@ -1777,23 +1809,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
kfree(esw);
}
-void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
-{
- struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change;
- u16 vport_num = be16_to_cpu(vc_eqe->vport_num);
- struct mlx5_vport *vport;
-
- if (!esw) {
- pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n",
- vport_num);
- return;
- }
-
- vport = &esw->vports[vport_num];
- if (vport->enabled)
- queue_work(esw->work_queue, &vport->vport_change_handler);
-}
-
/* Vport Administration */
#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
@@ -2219,3 +2234,14 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE;
}
EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
+
+bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
+{
+ if ((dev0->priv.eswitch->mode == SRIOV_NONE &&
+ dev1->priv.eswitch->mode == SRIOV_NONE) ||
+ (dev0->priv.eswitch->mode == SRIOV_OFFLOADS &&
+ dev1->priv.eswitch->mode == SRIOV_OFFLOADS))
+ return true;
+
+ return false;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index aaafc9f17115..9c89eea9b2c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -143,6 +143,8 @@ struct mlx5_eswitch_fdb {
struct offloads_fdb {
struct mlx5_flow_table *slow_fdb;
struct mlx5_flow_group *send_to_vport_grp;
+ struct mlx5_flow_group *peer_miss_grp;
+ struct mlx5_flow_handle **peer_miss_rules;
struct mlx5_flow_group *miss_grp;
struct mlx5_flow_handle *miss_rule_uni;
struct mlx5_flow_handle *miss_rule_multi;
@@ -165,6 +167,8 @@ struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads;
struct mlx5_flow_group *vport_rx_group;
struct mlx5_eswitch_rep *vport_reps;
+ struct list_head peer_flows;
+ struct mutex peer_mutex;
DECLARE_HASHTABLE(encap_tbl, 8);
DECLARE_HASHTABLE(mod_hdr_tbl, 8);
u8 inline_mode;
@@ -181,6 +185,7 @@ struct esw_mc_addr { /* SRIOV only */
struct mlx5_eswitch {
struct mlx5_core_dev *dev;
+ struct mlx5_nb nb;
struct mlx5_eswitch_fdb fdb_table;
struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
struct workqueue_struct *work_queue;
@@ -211,7 +216,6 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw);
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
-void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
@@ -281,13 +285,17 @@ enum mlx5_flow_match_level {
/* current maximum for flow based vport multicasting */
#define MLX5_MAX_FLOW_FWD_VPORTS 2
+enum {
+ MLX5_ESW_DEST_ENCAP = BIT(0),
+ MLX5_ESW_DEST_ENCAP_VALID = BIT(1),
+};
+
struct mlx5_esw_flow_attr {
struct mlx5_eswitch_rep *in_rep;
- struct mlx5_eswitch_rep *out_rep[MLX5_MAX_FLOW_FWD_VPORTS];
- struct mlx5_core_dev *out_mdev[MLX5_MAX_FLOW_FWD_VPORTS];
struct mlx5_core_dev *in_mdev;
+ struct mlx5_core_dev *counter_dev;
- int mirror_count;
+ int split_count;
int out_count;
int action;
@@ -296,7 +304,12 @@ struct mlx5_esw_flow_attr {
u8 vlan_prio[MLX5_FS_VLAN_DEPTH];
u8 total_vlan;
bool vlan_handled;
- u32 encap_id;
+ struct {
+ u32 flags;
+ struct mlx5_eswitch_rep *rep;
+ struct mlx5_core_dev *mdev;
+ u32 encap_id;
+ } dests[MLX5_MAX_FLOW_FWD_VPORTS];
u32 mod_hdr_id;
u8 match_level;
struct mlx5_fc *counter;
@@ -338,6 +351,9 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2);
}
+bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
+ struct mlx5_core_dev *dev1);
+
#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
#define esw_info(dev, format, ...) \
@@ -352,9 +368,9 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
-static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {}
static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
+static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
#define FDB_MAX_CHAIN 1
#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 9eac137790f5..53065b6ae593 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -39,6 +39,7 @@
#include "eswitch.h"
#include "en.h"
#include "fs_core.h"
+#include "lib/devcom.h"
enum {
FDB_FAST_PATH = 0,
@@ -81,7 +82,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
{
struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
- bool mirror = !!(attr->mirror_count);
+ bool split = !!(attr->split_count);
struct mlx5_flow_handle *rule;
struct mlx5_flow_table *fdb;
int j, i = 0;
@@ -120,13 +121,21 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
dest[i].ft = ft;
i++;
} else {
- for (j = attr->mirror_count; j < attr->out_count; j++) {
+ for (j = attr->split_count; j < attr->out_count; j++) {
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest[i].vport.num = attr->out_rep[j]->vport;
+ dest[i].vport.num = attr->dests[j].rep->vport;
dest[i].vport.vhca_id =
- MLX5_CAP_GEN(attr->out_mdev[j], vhca_id);
- dest[i].vport.vhca_id_valid =
- !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+ MLX5_CAP_GEN(attr->dests[j].mdev, vhca_id);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ dest[i].vport.flags |=
+ MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_act.reformat_id = attr->dests[j].encap_id;
+ dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+ dest[i].vport.reformat_id =
+ attr->dests[j].encap_id;
+ }
i++;
}
}
@@ -163,10 +172,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
flow_act.modify_id = attr->mod_hdr_id;
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
- flow_act.reformat_id = attr->encap_id;
-
- fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!mirror);
+ fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split);
if (IS_ERR(fdb)) {
rule = ERR_CAST(fdb);
goto err_esw_get;
@@ -181,7 +187,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
return rule;
err_add_rule:
- esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror);
+ esw_put_prio_table(esw, attr->chain, attr->prio, !!split);
err_esw_get:
if (attr->dest_chain)
esw_put_prio_table(esw, attr->dest_chain, 1, 0);
@@ -215,12 +221,17 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
}
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- for (i = 0; i < attr->mirror_count; i++) {
+ for (i = 0; i < attr->split_count; i++) {
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest[i].vport.num = attr->out_rep[i]->vport;
+ dest[i].vport.num = attr->dests[i].rep->vport;
dest[i].vport.vhca_id =
- MLX5_CAP_GEN(attr->out_mdev[i], vhca_id);
- dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+ MLX5_CAP_GEN(attr->dests[i].mdev, vhca_id);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) {
+ dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+ dest[i].vport.reformat_id = attr->dests[i].encap_id;
+ }
}
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest[i].ft = fwd_fdb,
@@ -268,7 +279,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
struct mlx5_esw_flow_attr *attr,
bool fwd_rule)
{
- bool mirror = (attr->mirror_count > 0);
+ bool split = (attr->split_count > 0);
mlx5_del_flow_rules(rule);
esw->offloads.num_flows--;
@@ -277,7 +288,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
esw_put_prio_table(esw, attr->chain, attr->prio, 1);
esw_put_prio_table(esw, attr->chain, attr->prio, 0);
} else {
- esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror);
+ esw_put_prio_table(esw, attr->chain, attr->prio, !!split);
if (attr->dest_chain)
esw_put_prio_table(esw, attr->dest_chain, 1, 0);
}
@@ -325,7 +336,7 @@ esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop)
struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL;
in_rep = attr->in_rep;
- out_rep = attr->out_rep[0];
+ out_rep = attr->dests[0].rep;
if (push)
vport = in_rep;
@@ -346,7 +357,7 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
goto out_notsupp;
in_rep = attr->in_rep;
- out_rep = attr->out_rep[0];
+ out_rep = attr->dests[0].rep;
if (push && in_rep->vport == FDB_UPLINK_VPORT)
goto out_notsupp;
@@ -398,7 +409,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
if (!push && !pop && fwd) {
/* tracks VF --> wire rules without vlan push action */
- if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT) {
+ if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT) {
vport->vlan_refcount++;
attr->vlan_handled = true;
}
@@ -458,7 +469,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
if (!push && !pop && fwd) {
/* tracks VF --> wire rules without vlan push action */
- if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT)
+ if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT)
vport->vlan_refcount--;
return 0;
@@ -531,6 +542,98 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
mlx5_del_flow_rules(rule);
}
+static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_destination *dest)
+{
+ void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(peer_dev, vhca_id));
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest->vport.num = 0;
+ dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
+ dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+}
+
+static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
+ struct mlx5_core_dev *peer_dev)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_handle **flows;
+ struct mlx5_flow_handle *flow;
+ struct mlx5_flow_spec *spec;
+ /* total vports is the same for both e-switches */
+ int nvports = esw->total_vports;
+ void *misc;
+ int err, i;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ peer_miss_rules_setup(peer_dev, spec, &dest);
+
+ flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL);
+ if (!flows) {
+ err = -ENOMEM;
+ goto alloc_flows_err;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ for (i = 1; i < nvports; i++) {
+ MLX5_SET(fte_match_set_misc, misc, source_port, i);
+ flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow)) {
+ err = PTR_ERR(flow);
+ esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
+ goto add_flow_err;
+ }
+ flows[i] = flow;
+ }
+
+ esw->fdb_table.offloads.peer_miss_rules = flows;
+
+ kvfree(spec);
+ return 0;
+
+add_flow_err:
+ for (i--; i > 0; i--)
+ mlx5_del_flow_rules(flows[i]);
+ kvfree(flows);
+alloc_flows_err:
+ kvfree(spec);
+ return err;
+}
+
+static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_handle **flows;
+ int i;
+
+ flows = esw->fdb_table.offloads.peer_miss_rules;
+
+ for (i = 1; i < esw->total_vports; i++)
+ mlx5_del_flow_rules(flows[i]);
+
+ kvfree(flows);
+}
+
static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
{
struct mlx5_flow_act flow_act = {0};
@@ -801,7 +904,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
esw->fdb_table.offloads.fdb_left[i] =
ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
- table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
+ table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2 +
+ esw->total_vports;
/* create the slow path fdb with encap set, so further table instances
* can be created at run time while VFs are probed if the FW allows that.
@@ -856,6 +960,34 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
}
esw->fdb_table.offloads.send_to_vport_grp = g;
+ /* create peer esw miss group */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_port);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ ix + esw->total_vports - 1);
+ ix += esw->total_vports;
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err);
+ goto peer_miss_err;
+ }
+ esw->fdb_table.offloads.peer_miss_grp = g;
+
/* create miss group */
memset(flow_group_in, 0, inlen);
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
@@ -888,6 +1020,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
miss_rule_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
miss_err:
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
+peer_miss_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
send_vport_err:
esw_destroy_offloads_fast_fdb_tables(esw);
@@ -907,6 +1041,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
@@ -1163,6 +1298,105 @@ err_reps:
return err;
}
+#define ESW_OFFLOADS_DEVCOM_PAIR (0)
+#define ESW_OFFLOADS_DEVCOM_UNPAIR (1)
+
+static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch *peer_esw)
+{
+ int err;
+
+ err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
+
+static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
+{
+ mlx5e_tc_clean_fdb_peer_flows(esw);
+ esw_del_fdb_peer_miss_rules(esw);
+}
+
+static int mlx5_esw_offloads_devcom_event(int event,
+ void *my_data,
+ void *event_data)
+{
+ struct mlx5_eswitch *esw = my_data;
+ struct mlx5_eswitch *peer_esw = event_data;
+ struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+ int err;
+
+ switch (event) {
+ case ESW_OFFLOADS_DEVCOM_PAIR:
+ err = mlx5_esw_offloads_pair(esw, peer_esw);
+ if (err)
+ goto err_out;
+
+ err = mlx5_esw_offloads_pair(peer_esw, esw);
+ if (err)
+ goto err_pair;
+
+ mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
+ break;
+
+ case ESW_OFFLOADS_DEVCOM_UNPAIR:
+ if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
+ break;
+
+ mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
+ mlx5_esw_offloads_unpair(peer_esw);
+ mlx5_esw_offloads_unpair(esw);
+ break;
+ }
+
+ return 0;
+
+err_pair:
+ mlx5_esw_offloads_unpair(esw);
+
+err_out:
+ mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d",
+ event, err);
+ return err;
+}
+
+static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
+{
+ struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+
+ INIT_LIST_HEAD(&esw->offloads.peer_flows);
+ mutex_init(&esw->offloads.peer_mutex);
+
+ if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ return;
+
+ mlx5_devcom_register_component(devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS,
+ mlx5_esw_offloads_devcom_event,
+ esw);
+
+ mlx5_devcom_send_event(devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS,
+ ESW_OFFLOADS_DEVCOM_PAIR, esw);
+}
+
+static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
+{
+ struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+
+ if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ return;
+
+ mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
+ ESW_OFFLOADS_DEVCOM_UNPAIR, esw);
+
+ mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+}
+
int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
{
int err;
@@ -1185,6 +1419,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
if (err)
goto err_reps;
+ esw_offloads_devcom_init(esw);
return 0;
err_reps:
@@ -1215,14 +1450,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
}
}
- /* enable back PF RoCE */
- mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-
return err;
}
void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports)
{
+ esw_offloads_devcom_cleanup(esw);
esw_offloads_unload_reps(esw, nvports);
esw_destroy_vport_rx_group(esw);
esw_destroy_offloads_table(esw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
new file mode 100644
index 000000000000..fbc42b7252a9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
+
+struct mlx5_event_nb {
+ struct mlx5_nb nb;
+ void *ctx;
+};
+
+/* General events handlers for the low level mlx5_core driver
+ *
+ * Other Major feature specific events such as
+ * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
+ * separate notifiers callbacks, specifically by those mlx5 components.
+ */
+static int any_notifier(struct notifier_block *, unsigned long, void *);
+static int temp_warn(struct notifier_block *, unsigned long, void *);
+static int port_module(struct notifier_block *, unsigned long, void *);
+
+/* handler which forwards the event to events->nh, driver notifiers */
+static int forward_event(struct notifier_block *, unsigned long, void *);
+
+static struct mlx5_nb events_nbs_ref[] = {
+ /* Events to be proccessed by mlx5_core */
+ {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
+ {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
+ {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
+
+ /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
+ /* QP/WQ resource events to forward */
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
+ /* SRQ events */
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
+};
+
+struct mlx5_events {
+ struct mlx5_core_dev *dev;
+ struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)];
+ /* driver notifier chain */
+ struct atomic_notifier_head nh;
+ /* port module events stats */
+ struct mlx5_pme_stats pme_stats;
+};
+
+static const char *eqe_type_str(u8 type)
+{
+ switch (type) {
+ case MLX5_EVENT_TYPE_COMP:
+ return "MLX5_EVENT_TYPE_COMP";
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ return "MLX5_EVENT_TYPE_PATH_MIG";
+ case MLX5_EVENT_TYPE_COMM_EST:
+ return "MLX5_EVENT_TYPE_COMM_EST";
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ return "MLX5_EVENT_TYPE_SQ_DRAINED";
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ return "MLX5_EVENT_TYPE_CQ_ERROR";
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
+ case MLX5_EVENT_TYPE_INTERNAL_ERROR:
+ return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ return "MLX5_EVENT_TYPE_PORT_CHANGE";
+ case MLX5_EVENT_TYPE_GPIO_EVENT:
+ return "MLX5_EVENT_TYPE_GPIO_EVENT";
+ case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+ return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
+ case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
+ return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
+ case MLX5_EVENT_TYPE_REMOTE_CONFIG:
+ return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
+ case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
+ return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
+ case MLX5_EVENT_TYPE_STALL_EVENT:
+ return "MLX5_EVENT_TYPE_STALL_EVENT";
+ case MLX5_EVENT_TYPE_CMD:
+ return "MLX5_EVENT_TYPE_CMD";
+ case MLX5_EVENT_TYPE_PAGE_REQUEST:
+ return "MLX5_EVENT_TYPE_PAGE_REQUEST";
+ case MLX5_EVENT_TYPE_PAGE_FAULT:
+ return "MLX5_EVENT_TYPE_PAGE_FAULT";
+ case MLX5_EVENT_TYPE_PPS_EVENT:
+ return "MLX5_EVENT_TYPE_PPS_EVENT";
+ case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
+ return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
+ case MLX5_EVENT_TYPE_FPGA_ERROR:
+ return "MLX5_EVENT_TYPE_FPGA_ERROR";
+ case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
+ return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
+ case MLX5_EVENT_TYPE_GENERAL_EVENT:
+ return "MLX5_EVENT_TYPE_GENERAL_EVENT";
+ case MLX5_EVENT_TYPE_MONITOR_COUNTER:
+ return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
+ case MLX5_EVENT_TYPE_DEVICE_TRACER:
+ return "MLX5_EVENT_TYPE_DEVICE_TRACER";
+ default:
+ return "Unrecognized event";
+ }
+}
+
+/* handles all FW events, type == eqe->type */
+static int any_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
+ eqe_type_str(eqe->type), eqe->sub_type);
+ return NOTIFY_OK;
+}
+
+/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
+static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+ u64 value_lsb;
+ u64 value_msb;
+
+ value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
+ value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
+
+ mlx5_core_warn(events->dev,
+ "High temperature on sensors with bit set %llx %llx",
+ value_msb, value_lsb);
+
+ return NOTIFY_OK;
+}
+
+/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
+static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
+{
+ switch (status) {
+ case MLX5_MODULE_STATUS_PLUGGED:
+ return "Cable plugged";
+ case MLX5_MODULE_STATUS_UNPLUGGED:
+ return "Cable unplugged";
+ case MLX5_MODULE_STATUS_ERROR:
+ return "Cable error";
+ case MLX5_MODULE_STATUS_DISABLED:
+ return "Cable disabled";
+ default:
+ return "Unknown status";
+ }
+}
+
+static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
+{
+ switch (error) {
+ case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
+ return "Power budget exceeded";
+ case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
+ return "Long Range for non MLNX cable";
+ case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
+ return "Bus stuck (I2C or data shorted)";
+ case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
+ return "No EEPROM/retry timeout";
+ case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
+ return "Enforce part number list";
+ case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
+ return "Unknown identifier";
+ case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
+ return "High Temperature";
+ case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
+ return "Bad or shorted cable/module";
+ case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
+ return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
+ default:
+ return "Unknown error";
+ }
+}
+
+/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
+static int port_module(struct notifier_block *nb, unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ enum port_module_event_status_type module_status;
+ enum port_module_event_error_type error_type;
+ struct mlx5_eqe_port_module *module_event_eqe;
+ const char *status_str, *error_str;
+ u8 module_num;
+
+ module_event_eqe = &eqe->data.port_module;
+ module_num = module_event_eqe->module;
+ module_status = module_event_eqe->module_status &
+ PORT_MODULE_EVENT_MODULE_STATUS_MASK;
+ error_type = module_event_eqe->error_type &
+ PORT_MODULE_EVENT_ERROR_TYPE_MASK;
+
+ if (module_status < MLX5_MODULE_STATUS_NUM)
+ events->pme_stats.status_counters[module_status]++;
+ status_str = mlx5_pme_status_to_string(module_status);
+
+ if (module_status == MLX5_MODULE_STATUS_ERROR) {
+ if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
+ events->pme_stats.error_counters[error_type]++;
+ error_str = mlx5_pme_error_to_string(error_type);
+ }
+
+ if (!printk_ratelimit())
+ return NOTIFY_OK;
+
+ if (module_status == MLX5_MODULE_STATUS_ERROR)
+ mlx5_core_err(events->dev,
+ "Port module event[error]: module %u, %s, %s\n",
+ module_num, status_str, error_str);
+ else
+ mlx5_core_info(events->dev,
+ "Port module event: module %u, %s\n",
+ module_num, status_str);
+
+ return NOTIFY_OK;
+}
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
+{
+ *stats = dev->priv.events->pme_stats;
+}
+
+/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
+static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
+ eqe_type_str(eqe->type), eqe->sub_type);
+ atomic_notifier_call_chain(&events->nh, event, data);
+ return NOTIFY_OK;
+}
+
+int mlx5_events_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
+
+ if (!events)
+ return -ENOMEM;
+
+ ATOMIC_INIT_NOTIFIER_HEAD(&events->nh);
+ events->dev = dev;
+ dev->priv.events = events;
+ return 0;
+}
+
+void mlx5_events_cleanup(struct mlx5_core_dev *dev)
+{
+ kvfree(dev->priv.events);
+}
+
+void mlx5_events_start(struct mlx5_core_dev *dev)
+{
+ struct mlx5_events *events = dev->priv.events;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
+ events->notifiers[i].nb = events_nbs_ref[i];
+ events->notifiers[i].ctx = events;
+ mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
+ }
+}
+
+void mlx5_events_stop(struct mlx5_core_dev *dev)
+{
+ struct mlx5_events *events = dev->priv.events;
+ int i;
+
+ for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
+ mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
+}
+
+int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ struct mlx5_events *events = dev->priv.events;
+
+ return atomic_notifier_chain_register(&events->nh, nb);
+}
+EXPORT_SYMBOL(mlx5_notifier_register);
+
+int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ struct mlx5_events *events = dev->priv.events;
+
+ return atomic_notifier_chain_unregister(&events->nh, nb);
+}
+EXPORT_SYMBOL(mlx5_notifier_unregister);
+
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
+{
+ return atomic_notifier_call_chain(&events->nh, event, data);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index 8ca1d1949d93..873541ef4c1b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -334,7 +334,7 @@ static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn,
{
u8 opcode, status = 0;
- opcode = cqe->op_own >> 4;
+ opcode = get_cqe_opcode(cqe);
switch (opcode) {
case MLX5_CQE_REQ_ERR:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index 436a8136f26f..27c5f6c7d36a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -36,6 +36,7 @@
#include "mlx5_core.h"
#include "lib/mlx5.h"
+#include "lib/eq.h"
#include "fpga/core.h"
#include "fpga/conn.h"
@@ -145,6 +146,22 @@ static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
return 0;
}
+static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
+
+static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
+{
+ struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
+
+ return mlx5_fpga_event(fdev, event, eqe);
+}
+
+static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
+{
+ struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
+
+ return mlx5_fpga_event(fdev, event, eqe);
+}
+
int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
{
struct mlx5_fpga_device *fdev = mdev->fpga;
@@ -185,6 +202,11 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
if (err)
goto out;
+ MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
+ MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
+ mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
+ mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
+
err = mlx5_fpga_conn_device_init(fdev);
if (err)
goto err_rsvd_gid;
@@ -201,6 +223,8 @@ err_conn_init:
mlx5_fpga_conn_device_cleanup(fdev);
err_rsvd_gid:
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
mlx5_core_unreserve_gids(mdev, max_num_qps);
out:
spin_lock_irqsave(&fdev->state_lock, flags);
@@ -256,6 +280,9 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
}
mlx5_fpga_conn_device_cleanup(fdev);
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
+
max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
mlx5_core_unreserve_gids(mdev, max_num_qps);
}
@@ -283,9 +310,10 @@ static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
return "Unknown";
}
-void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
+static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
+ unsigned long event, void *eqe)
{
- struct mlx5_fpga_device *fdev = mdev->fpga;
+ void *data = ((struct mlx5_eqe *)eqe)->data.raw;
const char *event_name;
bool teardown = false;
unsigned long flags;
@@ -303,9 +331,7 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn);
break;
default:
- mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n",
- event);
- return;
+ return NOTIFY_DONE;
}
spin_lock_irqsave(&fdev->state_lock, flags);
@@ -326,4 +352,6 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
*/
if (teardown)
mlx5_trigger_health_work(fdev->mdev);
+
+ return NOTIFY_OK;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index 3e2355c8df3f..7e2e871dbf83 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -35,11 +35,16 @@
#ifdef CONFIG_MLX5_FPGA
+#include <linux/mlx5/eq.h>
+
+#include "lib/eq.h"
#include "fpga/cmd.h"
/* Represents an Innova device */
struct mlx5_fpga_device {
struct mlx5_core_dev *mdev;
+ struct mlx5_nb fpga_err_nb;
+ struct mlx5_nb fpga_qp_err_nb;
spinlock_t state_lock; /* Protects state transitions */
enum mlx5_fpga_status state;
enum mlx5_fpga_image last_admin_image;
@@ -82,7 +87,6 @@ int mlx5_fpga_init(struct mlx5_core_dev *mdev);
void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev);
-void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data);
#else
@@ -104,11 +108,6 @@ static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
{
}
-static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event,
- void *data)
-{
-}
-
#endif
#endif /* __MLX5_FPGA_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 08a891f9aade..c44ccb67c4a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -308,22 +308,68 @@ static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
+static int mlx5_set_extended_dest(struct mlx5_core_dev *dev,
+ struct fs_fte *fte, bool *extended_dest)
+{
+ int fw_log_max_fdb_encap_uplink =
+ MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink);
+ int num_fwd_destinations = 0;
+ struct mlx5_flow_rule *dst;
+ int num_encap = 0;
+
+ *extended_dest = false;
+ if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+ return 0;
+
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+ if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
+ num_encap++;
+ num_fwd_destinations++;
+ }
+ if (num_fwd_destinations > 1 && num_encap > 0)
+ *extended_dest = true;
+
+ if (*extended_dest && !fw_log_max_fdb_encap_uplink) {
+ mlx5_core_warn(dev, "FW does not support extended destination");
+ return -EOPNOTSUPP;
+ }
+ if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) {
+ mlx5_core_warn(dev, "FW does not support more than %d encaps",
+ 1 << fw_log_max_fdb_encap_uplink);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
int opmod, int modify_mask,
struct mlx5_flow_table *ft,
unsigned group_id,
struct fs_fte *fte)
{
- unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) +
- fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct);
u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0};
+ bool extended_dest = false;
struct mlx5_flow_rule *dst;
void *in_flow_context, *vlan;
void *in_match_value;
+ unsigned int inlen;
+ int dst_cnt_size;
void *in_dests;
u32 *in;
int err;
+ if (mlx5_set_extended_dest(dev, fte, &extended_dest))
+ return -EOPNOTSUPP;
+
+ if (!extended_dest)
+ dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct);
+ else
+ dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format);
+
+ inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -343,9 +389,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(flow_context, in_flow_context, group_id, group_id);
MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
- MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
- MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
- fte->action.reformat_id);
+ MLX5_SET(flow_context, in_flow_context, extended_destination,
+ extended_dest);
+ if (extended_dest) {
+ u32 action;
+
+ action = fte->action.action &
+ ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ MLX5_SET(flow_context, in_flow_context, action, action);
+ } else {
+ MLX5_SET(flow_context, in_flow_context, action,
+ fte->action.action);
+ MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
+ fte->action.reformat_id);
+ }
MLX5_SET(flow_context, in_flow_context, modify_header_id,
fte->action.modify_id);
@@ -387,10 +444,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
id = dst->dest_attr.vport.num;
MLX5_SET(dest_format_struct, in_dests,
destination_eswitch_owner_vhca_id_valid,
- dst->dest_attr.vport.vhca_id_valid);
+ !!(dst->dest_attr.vport.flags &
+ MLX5_FLOW_DEST_VPORT_VHCA_ID));
MLX5_SET(dest_format_struct, in_dests,
destination_eswitch_owner_vhca_id,
dst->dest_attr.vport.vhca_id);
+ if (extended_dest) {
+ MLX5_SET(dest_format_struct, in_dests,
+ packet_reformat,
+ !!(dst->dest_attr.vport.flags &
+ MLX5_FLOW_DEST_VPORT_REFORMAT_ID));
+ MLX5_SET(extended_dest_format, in_dests,
+ packet_reformat_id,
+ dst->dest_attr.vport.reformat_id);
+ }
break;
default:
id = dst->dest_attr.tir_num;
@@ -399,7 +466,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(dest_format_struct, in_dests, destination_type,
type);
MLX5_SET(dest_format_struct, in_dests, destination_id, id);
- in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+ in_dests += dst_cnt_size;
list_size++;
}
@@ -420,7 +487,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
dst->dest_attr.counter_id);
- in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+ in_dests += dst_cnt_size;
list_size++;
}
if (list_size > max_list_size) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 9d73eb955f75..79f122b45def 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -452,7 +452,7 @@ static void del_sw_hw_rule(struct fs_node *node)
if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
--fte->dests_size) {
- modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST),
+ modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
update_fte = true;
}
out:
@@ -1373,7 +1373,10 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
{
if (d1->type == d2->type) {
if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
- d1->vport.num == d2->vport.num) ||
+ d1->vport.num == d2->vport.num &&
+ d1->vport.flags == d2->vport.flags &&
+ ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
+ (d1->vport.reformat_id == d2->vport.reformat_id) : true)) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
d1->ft == d2->ft) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index b51ad217da32..2dc86347af58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -145,29 +145,6 @@ struct mlx5_flow_table {
struct rhltable fgs_hash;
};
-struct mlx5_fc_cache {
- u64 packets;
- u64 bytes;
- u64 lastuse;
-};
-
-struct mlx5_fc {
- struct list_head list;
- struct llist_node addlist;
- struct llist_node dellist;
-
- /* last{packets,bytes} members are used when calculating the delta since
- * last reading
- */
- u64 lastpackets;
- u64 lastbytes;
-
- u32 id;
- bool aging;
-
- struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
-};
-
struct mlx5_ft_underlay_qp {
struct list_head list;
u32 qpn;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 32accd6b041b..c6c28f56aa29 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -41,6 +41,29 @@
/* Max number of counters to query in bulk read is 32K */
#define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
+struct mlx5_fc_cache {
+ u64 packets;
+ u64 bytes;
+ u64 lastuse;
+};
+
+struct mlx5_fc {
+ struct list_head list;
+ struct llist_node addlist;
+ struct llist_node dellist;
+
+ /* last{packets,bytes} members are used when calculating the delta since
+ * last reading
+ */
+ u64 lastpackets;
+ u64 lastbytes;
+
+ u32 id;
+ bool aging;
+
+ struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
+};
+
/* locking scheme:
*
* It is the responsibility of the user to prevent concurrent calls or bad
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 43118de8ee99..196c07383082 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -38,6 +38,8 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
enum {
MLX5_HEALTH_POLL_INTERVAL = 2 * HZ,
@@ -78,29 +80,6 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
&dev->iseg->cmdq_addr_l_sz);
}
-static void trigger_cmd_completions(struct mlx5_core_dev *dev)
-{
- unsigned long flags;
- u64 vector;
-
- /* wait for pending handlers to complete */
- synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD));
- spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
- vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
- if (!vector)
- goto no_trig;
-
- vector |= MLX5_TRIGGERED_CMD_COMP;
- spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
-
- mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
- mlx5_cmd_comp_handler(dev, vector, true);
- return;
-
-no_trig:
- spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
-}
-
static int in_fatal(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
@@ -124,10 +103,10 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
mlx5_core_err(dev, "start\n");
if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) {
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
- trigger_cmd_completions(dev);
+ mlx5_cmd_trigger_completions(dev);
}
- mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1);
+ mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
mlx5_core_err(dev, "end\n");
unlock:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 11dabd62e2c7..bfc0f6581729 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -87,7 +87,7 @@ int mlx5i_init(struct mlx5_core_dev *mdev,
mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
netdev->mtu = max_mtu;
- mlx5e_build_nic_params(mdev, &priv->channels.params,
+ mlx5e_build_nic_params(mdev, &priv->rss_params, &priv->channels.params,
mlx5e_get_netdev_max_channels(netdev),
netdev->mtu);
mlx5i_build_nic_params(mdev, &priv->channels.params);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 582b2f18010a..3a6baed722d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -34,11 +34,15 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
+#include "eswitch.h"
enum {
- MLX5_LAG_FLAG_BONDED = 1 << 0,
+ MLX5_LAG_FLAG_ROCE = 1 << 0,
+ MLX5_LAG_FLAG_SRIOV = 1 << 1,
};
+#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV)
+
struct lag_func {
struct mlx5_core_dev *dev;
struct net_device *netdev;
@@ -61,11 +65,6 @@ struct mlx5_lag {
struct lag_tracker tracker;
struct delayed_work bond_work;
struct notifier_block nb;
-
- /* Admin state. Allow lag only if allowed is true
- * even if network conditions for lag were met
- */
- bool allowed;
};
/* General purpose, use for short periods of time.
@@ -165,9 +164,19 @@ static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
return -1;
}
-static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev)
+static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
+{
+ return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
+}
+
+static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
+{
+ return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
+}
+
+static bool __mlx5_lag_is_active(struct mlx5_lag *ldev)
{
- return !!(ldev->flags & MLX5_LAG_FLAG_BONDED);
+ return !!(ldev->flags & MLX5_LAG_MODE_FLAGS);
}
static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
@@ -186,36 +195,131 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
*port2 = 1;
}
-static void mlx5_activate_lag(struct mlx5_lag *ldev,
- struct lag_tracker *tracker)
+static void mlx5_modify_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker)
{
struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ u8 v2p_port1, v2p_port2;
int err;
- ldev->flags |= MLX5_LAG_FLAG_BONDED;
+ mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
+ &v2p_port2);
+
+ if (v2p_port1 != ldev->v2p_map[0] ||
+ v2p_port2 != ldev->v2p_map[1]) {
+ ldev->v2p_map[0] = v2p_port1;
+ ldev->v2p_map[1] = v2p_port2;
+
+ mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
+ ldev->v2p_map[0], ldev->v2p_map[1]);
+
+ err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
+ if (err)
+ mlx5_core_err(dev0,
+ "Failed to modify LAG (%d)\n",
+ err);
+ }
+}
+
+static int mlx5_create_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ int err;
mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0],
&ldev->v2p_map[1]);
+ mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
+ ldev->v2p_map[0], ldev->v2p_map[1]);
+
err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]);
if (err)
mlx5_core_err(dev0,
"Failed to create LAG (%d)\n",
err);
+ return err;
+}
+
+static int mlx5_activate_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ u8 flags)
+{
+ bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
+ struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ int err;
+
+ err = mlx5_create_lag(ldev, tracker);
+ if (err) {
+ if (roce_lag) {
+ mlx5_core_err(dev0,
+ "Failed to activate RoCE LAG\n");
+ } else {
+ mlx5_core_err(dev0,
+ "Failed to activate VF LAG\n"
+ "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
+ }
+ return err;
+ }
+
+ ldev->flags |= flags;
+ return 0;
}
-static void mlx5_deactivate_lag(struct mlx5_lag *ldev)
+static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ bool roce_lag = __mlx5_lag_is_roce(ldev);
int err;
- ldev->flags &= ~MLX5_LAG_FLAG_BONDED;
+ ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
err = mlx5_cmd_destroy_lag(dev0);
- if (err)
- mlx5_core_err(dev0,
- "Failed to destroy LAG (%d)\n",
- err);
+ if (err) {
+ if (roce_lag) {
+ mlx5_core_err(dev0,
+ "Failed to deactivate RoCE LAG; driver restart required\n");
+ } else {
+ mlx5_core_err(dev0,
+ "Failed to deactivate VF LAG; driver restart required\n"
+ "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
+ }
+ }
+
+ return err;
+}
+
+static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
+{
+ if (!ldev->pf[0].dev || !ldev->pf[1].dev)
+ return false;
+
+#ifdef CONFIG_MLX5_ESWITCH
+ return mlx5_esw_lag_prereq(ldev->pf[0].dev, ldev->pf[1].dev);
+#else
+ return (!mlx5_sriov_is_enabled(ldev->pf[0].dev) &&
+ !mlx5_sriov_is_enabled(ldev->pf[1].dev));
+#endif
+}
+
+static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (ldev->pf[i].dev)
+ mlx5_add_dev_by_protocol(ldev->pf[i].dev,
+ MLX5_INTERFACE_PROTOCOL_IB);
+}
+
+static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (ldev->pf[i].dev)
+ mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
+ MLX5_INTERFACE_PROTOCOL_IB);
}
static void mlx5_do_bond(struct mlx5_lag *ldev)
@@ -223,9 +327,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
struct mlx5_core_dev *dev1 = ldev->pf[1].dev;
struct lag_tracker tracker;
- u8 v2p_port1, v2p_port2;
- int i, err;
- bool do_bond;
+ bool do_bond, roce_lag;
+ int err;
if (!dev0 || !dev1)
return;
@@ -234,42 +337,45 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
tracker = ldev->tracker;
mutex_unlock(&lag_mutex);
- do_bond = tracker.is_bonded && ldev->allowed;
+ do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
- if (do_bond && !mlx5_lag_is_bonded(ldev)) {
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
- MLX5_INTERFACE_PROTOCOL_IB);
+ if (do_bond && !__mlx5_lag_is_active(ldev)) {
+ roce_lag = !mlx5_sriov_is_enabled(dev0) &&
+ !mlx5_sriov_is_enabled(dev1);
- mlx5_activate_lag(ldev, &tracker);
+ if (roce_lag)
+ mlx5_lag_remove_ib_devices(ldev);
- mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_nic_vport_enable_roce(dev1);
- } else if (do_bond && mlx5_lag_is_bonded(ldev)) {
- mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1,
- &v2p_port2);
+ err = mlx5_activate_lag(ldev, &tracker,
+ roce_lag ? MLX5_LAG_FLAG_ROCE :
+ MLX5_LAG_FLAG_SRIOV);
+ if (err) {
+ if (roce_lag)
+ mlx5_lag_add_ib_devices(ldev);
- if ((v2p_port1 != ldev->v2p_map[0]) ||
- (v2p_port2 != ldev->v2p_map[1])) {
- ldev->v2p_map[0] = v2p_port1;
- ldev->v2p_map[1] = v2p_port2;
+ return;
+ }
- err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
- if (err)
- mlx5_core_err(dev0,
- "Failed to modify LAG (%d)\n",
- err);
+ if (roce_lag) {
+ mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_nic_vport_enable_roce(dev1);
+ }
+ } else if (do_bond && __mlx5_lag_is_active(ldev)) {
+ mlx5_modify_lag(ldev, &tracker);
+ } else if (!do_bond && __mlx5_lag_is_active(ldev)) {
+ roce_lag = __mlx5_lag_is_roce(ldev);
+
+ if (roce_lag) {
+ mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_nic_vport_disable_roce(dev1);
}
- } else if (!do_bond && mlx5_lag_is_bonded(ldev)) {
- mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_nic_vport_disable_roce(dev1);
- mlx5_deactivate_lag(ldev);
+ err = mlx5_deactivate_lag(ldev);
+ if (err)
+ return;
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- if (ldev->pf[i].dev)
- mlx5_add_dev_by_protocol(ldev->pf[i].dev,
- MLX5_INTERFACE_PROTOCOL_IB);
+ if (roce_lag)
+ mlx5_lag_add_ib_devices(ldev);
}
}
@@ -419,15 +525,6 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
}
-static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
-{
- if ((ldev->pf[0].dev && mlx5_sriov_is_enabled(ldev->pf[0].dev)) ||
- (ldev->pf[1].dev && mlx5_sriov_is_enabled(ldev->pf[1].dev)))
- return false;
- else
- return true;
-}
-
static struct mlx5_lag *mlx5_lag_dev_alloc(void)
{
struct mlx5_lag *ldev;
@@ -437,7 +534,6 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(void)
return NULL;
INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
- ldev->allowed = mlx5_lag_check_prereq(ldev);
return ldev;
}
@@ -462,7 +558,6 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
ldev->tracker.netdev_state[fn].link_up = 0;
ldev->tracker.netdev_state[fn].tx_enabled = 0;
- ldev->allowed = mlx5_lag_check_prereq(ldev);
dev->priv.lag = ldev;
mutex_unlock(&lag_mutex);
@@ -484,7 +579,6 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
dev->priv.lag = NULL;
- ldev->allowed = mlx5_lag_check_prereq(ldev);
mutex_unlock(&lag_mutex);
}
@@ -532,7 +626,7 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev)
if (!ldev)
return;
- if (mlx5_lag_is_bonded(ldev))
+ if (__mlx5_lag_is_active(ldev))
mlx5_deactivate_lag(ldev);
mlx5_lag_dev_remove_pf(ldev, dev);
@@ -549,56 +643,61 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev)
}
}
-bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
+bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
bool res;
mutex_lock(&lag_mutex);
ldev = mlx5_lag_dev_get(dev);
- res = ldev && mlx5_lag_is_bonded(ldev);
+ res = ldev && __mlx5_lag_is_roce(ldev);
mutex_unlock(&lag_mutex);
return res;
}
-EXPORT_SYMBOL(mlx5_lag_is_active);
+EXPORT_SYMBOL(mlx5_lag_is_roce);
-static int mlx5_lag_set_state(struct mlx5_core_dev *dev, bool allow)
+bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
- int ret = 0;
- bool lag_active;
-
- mlx5_dev_list_lock();
+ bool res;
+ mutex_lock(&lag_mutex);
ldev = mlx5_lag_dev_get(dev);
- if (!ldev) {
- ret = -ENODEV;
- goto unlock;
- }
- lag_active = mlx5_lag_is_bonded(ldev);
- if (!mlx5_lag_check_prereq(ldev) && allow) {
- ret = -EINVAL;
- goto unlock;
- }
- if (ldev->allowed == allow)
- goto unlock;
- ldev->allowed = allow;
- if ((lag_active && !allow) || allow)
- mlx5_do_bond(ldev);
-unlock:
- mlx5_dev_list_unlock();
- return ret;
+ res = ldev && __mlx5_lag_is_active(ldev);
+ mutex_unlock(&lag_mutex);
+
+ return res;
}
+EXPORT_SYMBOL(mlx5_lag_is_active);
-int mlx5_lag_forbid(struct mlx5_core_dev *dev)
+bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
{
- return mlx5_lag_set_state(dev, false);
+ struct mlx5_lag *ldev;
+ bool res;
+
+ mutex_lock(&lag_mutex);
+ ldev = mlx5_lag_dev_get(dev);
+ res = ldev && __mlx5_lag_is_sriov(ldev);
+ mutex_unlock(&lag_mutex);
+
+ return res;
}
+EXPORT_SYMBOL(mlx5_lag_is_sriov);
-int mlx5_lag_allow(struct mlx5_core_dev *dev)
+void mlx5_lag_update(struct mlx5_core_dev *dev)
{
- return mlx5_lag_set_state(dev, true);
+ struct mlx5_lag *ldev;
+
+ mlx5_dev_list_lock();
+ ldev = mlx5_lag_dev_get(dev);
+ if (!ldev)
+ goto unlock;
+
+ mlx5_do_bond(ldev);
+
+unlock:
+ mlx5_dev_list_unlock();
}
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
@@ -609,7 +708,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
mutex_lock(&lag_mutex);
ldev = mlx5_lag_dev_get(dev);
- if (!(ldev && mlx5_lag_is_bonded(ldev)))
+ if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock;
if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
@@ -638,7 +737,7 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
return true;
ldev = mlx5_lag_dev_get(dev);
- if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev)
+ if (!ldev || !__mlx5_lag_is_roce(ldev) || ldev->pf[0].dev == dev)
return true;
/* If bonded, we do not add an IB device for PF1. */
@@ -665,7 +764,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
mutex_lock(&lag_mutex);
ldev = mlx5_lag_dev_get(dev);
- if (ldev && mlx5_lag_is_bonded(ldev)) {
+ if (ldev && __mlx5_lag_is_roce(ldev)) {
num_ports = MLX5_MAX_PORTS;
mdev[0] = ldev->pf[0].dev;
mdev[1] = ldev->pf[1].dev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 0d90b1b4a3d3..ca0ee9916e9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -33,6 +33,7 @@
#include <linux/clocksource.h>
#include <linux/highmem.h>
#include <rdma/mlx5-abi.h>
+#include "lib/eq.h"
#include "en.h"
#include "clock.h"
@@ -71,7 +72,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc)
struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
clock);
- return mlx5_read_internal_timer(mdev) & cc->mask;
+ return mlx5_read_internal_timer(mdev, NULL) & cc->mask;
}
static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
@@ -155,15 +156,19 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
return 0;
}
-static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
{
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
ptp_info);
- u64 ns;
+ struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+ clock);
unsigned long flags;
+ u64 cycles, ns;
write_seqlock_irqsave(&clock->lock, flags);
- ns = timecounter_read(&clock->tc);
+ cycles = mlx5_read_internal_timer(mdev, sts);
+ ns = timecounter_cyc2time(&clock->tc, cycles);
write_sequnlock_irqrestore(&clock->lock, flags);
*ts = ns_to_timespec64(ns);
@@ -306,7 +311,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
ts.tv_sec = rq->perout.start.sec;
ts.tv_nsec = rq->perout.start.nsec;
ns = timespec64_to_ns(&ts);
- cycles_now = mlx5_read_internal_timer(mdev);
+ cycles_now = mlx5_read_internal_timer(mdev, NULL);
write_seqlock_irqsave(&clock->lock, flags);
nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
nsec_delta = ns - nsec_now;
@@ -383,7 +388,7 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = {
.pps = 0,
.adjfreq = mlx5_ptp_adjfreq,
.adjtime = mlx5_ptp_adjtime,
- .gettime64 = mlx5_ptp_gettime,
+ .gettimex64 = mlx5_ptp_gettimex,
.settime64 = mlx5_ptp_settime,
.enable = NULL,
.verify = NULL,
@@ -439,16 +444,17 @@ static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
}
-void mlx5_pps_event(struct mlx5_core_dev *mdev,
- struct mlx5_eqe *eqe)
+static int mlx5_pps_event(struct notifier_block *nb,
+ unsigned long type, void *data)
{
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb);
+ struct mlx5_core_dev *mdev = clock->mdev;
struct ptp_clock_event ptp_event;
- struct timespec64 ts;
- u64 nsec_now, nsec_delta;
u64 cycles_now, cycles_delta;
+ u64 nsec_now, nsec_delta, ns;
+ struct mlx5_eqe *eqe = data;
int pin = eqe->data.pps.pin;
- s64 ns;
+ struct timespec64 ts;
unsigned long flags;
switch (clock->ptp_info.pin_config[pin].func) {
@@ -463,11 +469,12 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
} else {
ptp_event.type = PTP_CLOCK_EXTTS;
}
+ /* TODOL clock->ptp can be NULL if ptp_clock_register failes */
ptp_clock_event(clock->ptp, &ptp_event);
break;
case PTP_PF_PEROUT:
- mlx5_ptp_gettime(&clock->ptp_info, &ts);
- cycles_now = mlx5_read_internal_timer(mdev);
+ mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL);
+ cycles_now = mlx5_read_internal_timer(mdev, NULL);
ts.tv_sec += 1;
ts.tv_nsec = 0;
ns = timespec64_to_ns(&ts);
@@ -481,8 +488,11 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
write_sequnlock_irqrestore(&clock->lock, flags);
break;
default:
- mlx5_core_err(mdev, " Unhandled event\n");
+ mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n",
+ clock->ptp_info.pin_config[pin].func);
}
+
+ return NOTIFY_OK;
}
void mlx5_init_clock(struct mlx5_core_dev *mdev)
@@ -511,14 +521,14 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
ktime_to_ns(ktime_get_real()));
/* Calculate period in seconds to call the overflow watchdog - to make
- * sure counter is checked at least once every wrap around.
+ * sure counter is checked at least twice every wrap around.
* The period is calculated as the minimum between max HW cycles count
* (The clock source mask) and max amount of cycles that can be
* multiplied by clock multiplier where the result doesn't exceed
* 64bits.
*/
overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult);
- overflow_cycles = min(overflow_cycles, clock->cycles.mask >> 1);
+ overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3));
ns = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles,
frac, &frac);
@@ -567,6 +577,9 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
PTR_ERR(clock->ptp));
clock->ptp = NULL;
}
+
+ MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT);
+ mlx5_eq_notifier_register(mdev, &clock->pps_nb);
}
void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
@@ -576,6 +589,7 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
return;
+ mlx5_eq_notifier_unregister(mdev, &clock->pps_nb);
if (clock->ptp) {
ptp_clock_unregister(clock->ptp);
clock->ptp = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
index 263cb6e2aeee..31600924bdc3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -36,7 +36,6 @@
#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
void mlx5_init_clock(struct mlx5_core_dev *mdev);
void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
-void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
{
@@ -60,8 +59,6 @@ static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
#else
static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {}
static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {}
-static inline void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) {}
-
static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
{
return -1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
new file mode 100644
index 000000000000..bced2efe9bef
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#include <linux/mlx5/vport.h>
+#include "lib/devcom.h"
+
+static LIST_HEAD(devcom_list);
+
+#define devcom_for_each_component(priv, comp, iter) \
+ for (iter = 0; \
+ comp = &(priv)->components[iter], iter < MLX5_DEVCOM_NUM_COMPONENTS; \
+ iter++)
+
+struct mlx5_devcom_component {
+ struct {
+ void *data;
+ } device[MLX5_MAX_PORTS];
+
+ mlx5_devcom_event_handler_t handler;
+ struct rw_semaphore sem;
+ bool paired;
+};
+
+struct mlx5_devcom_list {
+ struct list_head list;
+
+ struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS];
+ struct mlx5_core_dev *devs[MLX5_MAX_PORTS];
+};
+
+struct mlx5_devcom {
+ struct mlx5_devcom_list *priv;
+ int idx;
+};
+
+static struct mlx5_devcom_list *mlx5_devcom_list_alloc(void)
+{
+ struct mlx5_devcom_component *comp;
+ struct mlx5_devcom_list *priv;
+ int i;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return NULL;
+
+ devcom_for_each_component(priv, comp, i)
+ init_rwsem(&comp->sem);
+
+ return priv;
+}
+
+static struct mlx5_devcom *mlx5_devcom_alloc(struct mlx5_devcom_list *priv,
+ u8 idx)
+{
+ struct mlx5_devcom *devcom;
+
+ devcom = kzalloc(sizeof(*devcom), GFP_KERNEL);
+ if (!devcom)
+ return NULL;
+
+ devcom->priv = priv;
+ devcom->idx = idx;
+ return devcom;
+}
+
+/* Must be called with intf_mutex held */
+struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
+{
+ struct mlx5_devcom_list *priv = NULL, *iter;
+ struct mlx5_devcom *devcom = NULL;
+ bool new_priv = false;
+ u64 sguid0, sguid1;
+ int idx, i;
+
+ if (!mlx5_core_is_pf(dev))
+ return NULL;
+
+ sguid0 = mlx5_query_nic_system_image_guid(dev);
+ list_for_each_entry(iter, &devcom_list, list) {
+ struct mlx5_core_dev *tmp_dev = NULL;
+
+ idx = -1;
+ for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ if (iter->devs[i])
+ tmp_dev = iter->devs[i];
+ else
+ idx = i;
+ }
+
+ if (idx == -1)
+ continue;
+
+ sguid1 = mlx5_query_nic_system_image_guid(tmp_dev);
+ if (sguid0 != sguid1)
+ continue;
+
+ priv = iter;
+ break;
+ }
+
+ if (!priv) {
+ priv = mlx5_devcom_list_alloc();
+ if (!priv)
+ return ERR_PTR(-ENOMEM);
+
+ idx = 0;
+ new_priv = true;
+ }
+
+ priv->devs[idx] = dev;
+ devcom = mlx5_devcom_alloc(priv, idx);
+ if (!devcom) {
+ kfree(priv);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ if (new_priv)
+ list_add(&priv->list, &devcom_list);
+
+ return devcom;
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
+{
+ struct mlx5_devcom_list *priv;
+ int i;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return;
+
+ priv = devcom->priv;
+ priv->devs[devcom->idx] = NULL;
+
+ kfree(devcom);
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (priv->devs[i])
+ break;
+
+ if (i != MLX5_MAX_PORTS)
+ return;
+
+ list_del(&priv->list);
+ kfree(priv);
+}
+
+void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ mlx5_devcom_event_handler_t handler,
+ void *data)
+{
+ struct mlx5_devcom_component *comp;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return;
+
+ WARN_ON(!data);
+
+ comp = &devcom->priv->components[id];
+ down_write(&comp->sem);
+ comp->handler = handler;
+ comp->device[devcom->idx].data = data;
+ up_write(&comp->sem);
+}
+
+void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id)
+{
+ struct mlx5_devcom_component *comp;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return;
+
+ comp = &devcom->priv->components[id];
+ down_write(&comp->sem);
+ comp->device[devcom->idx].data = NULL;
+ up_write(&comp->sem);
+}
+
+int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ int event,
+ void *event_data)
+{
+ struct mlx5_devcom_component *comp;
+ int err = -ENODEV, i;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return err;
+
+ comp = &devcom->priv->components[id];
+ down_write(&comp->sem);
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (i != devcom->idx && comp->device[i].data) {
+ err = comp->handler(event, comp->device[i].data,
+ event_data);
+ break;
+ }
+
+ up_write(&comp->sem);
+ return err;
+}
+
+void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ bool paired)
+{
+ struct mlx5_devcom_component *comp;
+
+ comp = &devcom->priv->components[id];
+ WARN_ON(!rwsem_is_locked(&comp->sem));
+
+ comp->paired = paired;
+}
+
+bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id)
+{
+ if (IS_ERR_OR_NULL(devcom))
+ return false;
+
+ return devcom->priv->components[id].paired;
+}
+
+void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id)
+{
+ struct mlx5_devcom_component *comp;
+ int i;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return NULL;
+
+ comp = &devcom->priv->components[id];
+ down_read(&comp->sem);
+ if (!comp->paired) {
+ up_read(&comp->sem);
+ return NULL;
+ }
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (i != devcom->idx)
+ break;
+
+ return comp->device[i].data;
+}
+
+void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id)
+{
+ struct mlx5_devcom_component *comp = &devcom->priv->components[id];
+
+ up_read(&comp->sem);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
new file mode 100644
index 000000000000..939d5bf1581b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#ifndef __LIB_MLX5_DEVCOM_H__
+#define __LIB_MLX5_DEVCOM_H__
+
+#include <linux/mlx5/driver.h>
+
+enum mlx5_devcom_components {
+ MLX5_DEVCOM_ESW_OFFLOADS,
+
+ MLX5_DEVCOM_NUM_COMPONENTS,
+};
+
+typedef int (*mlx5_devcom_event_handler_t)(int event,
+ void *my_data,
+ void *event_data);
+
+struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev);
+void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom);
+
+void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ mlx5_devcom_event_handler_t handler,
+ void *data);
+void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+
+int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ int event,
+ void *event_data);
+
+void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ bool paired);
+bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+
+void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+
+#endif
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
new file mode 100644
index 000000000000..c0fb6d72b695
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#ifndef __LIB_MLX5_EQ_H__
+#define __LIB_MLX5_EQ_H__
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
+#include <linux/mlx5/cq.h>
+
+#define MLX5_MAX_IRQ_NAME (32)
+#define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe))
+
+struct mlx5_eq_tasklet {
+ struct list_head list;
+ struct list_head process_list;
+ struct tasklet_struct task;
+ spinlock_t lock; /* lock completion tasklet list */
+};
+
+struct mlx5_cq_table {
+ spinlock_t lock; /* protect radix tree */
+ struct radix_tree_root tree;
+};
+
+struct mlx5_eq {
+ struct mlx5_core_dev *dev;
+ struct mlx5_cq_table cq_table;
+ __be32 __iomem *doorbell;
+ u32 cons_index;
+ struct mlx5_frag_buf buf;
+ int size;
+ unsigned int vecidx;
+ unsigned int irqn;
+ u8 eqn;
+ int nent;
+ struct mlx5_rsc_debug *dbg;
+};
+
+struct mlx5_eq_comp {
+ struct mlx5_eq core; /* Must be first */
+ struct mlx5_eq_tasklet tasklet_ctx;
+ struct list_head list;
+};
+
+static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
+{
+ return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
+}
+
+static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
+{
+ struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
+
+ return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
+}
+
+static inline void eq_update_ci(struct mlx5_eq *eq, int arm)
+{
+ __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+ u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+ __raw_writel((__force u32)cpu_to_be32(val), addr);
+ /* We still want ordering, just not swabbing, so add a barrier */
+ mb();
+}
+
+int mlx5_eq_table_init(struct mlx5_core_dev *dev);
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_eq_table_create(struct mlx5_core_dev *dev);
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn);
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev);
+void mlx5_cq_tasklet_cb(unsigned long data);
+struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix);
+
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq);
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev);
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev);
+
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev);
+#endif
+
+int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
+int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index 7550b1cc8c6a..397a2847867a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -33,6 +33,8 @@
#ifndef __LIB_MLX5_H__
#define __LIB_MLX5_H__
+#include "mlx5_core.h"
+
void mlx5_init_reserved_gids(struct mlx5_core_dev *dev);
void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev);
int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
@@ -40,4 +42,38 @@ void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+/* TODO move to lib/events.h */
+
+#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF
+#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF
+
+enum port_module_event_status_type {
+ MLX5_MODULE_STATUS_PLUGGED = 0x1,
+ MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
+ MLX5_MODULE_STATUS_ERROR = 0x3,
+ MLX5_MODULE_STATUS_DISABLED = 0x4,
+ MLX5_MODULE_STATUS_NUM,
+};
+
+enum port_module_event_error_type {
+ MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0,
+ MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX = 0x1,
+ MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2,
+ MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3,
+ MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4,
+ MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER = 0x5,
+ MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6,
+ MLX5_MODULE_EVENT_ERROR_BAD_CABLE = 0x7,
+ MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc,
+ MLX5_MODULE_EVENT_ERROR_NUM,
+};
+
+struct mlx5_pme_stats {
+ u64 status_counters[MLX5_MODULE_STATUS_NUM];
+ u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
+};
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats);
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data);
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 28132c7dc05f..77896c11f6f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -43,7 +43,6 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
#include <linux/debugfs.h>
#include <linux/kmod.h>
#include <linux/mlx5/mlx5_ifc.h>
@@ -53,6 +52,7 @@
#endif
#include <net/devlink.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
#include "fs_core.h"
#include "lib/mpfs.h"
#include "eswitch.h"
@@ -63,6 +63,7 @@
#include "accel/tls.h"
#include "lib/clock.h"
#include "lib/vxlan.h"
+#include "lib/devcom.h"
#include "diag/fw_tracer.h"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
@@ -319,51 +320,6 @@ static void release_bar(struct pci_dev *pdev)
pci_release_regions(pdev);
}
-static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
-{
- struct mlx5_priv *priv = &dev->priv;
- struct mlx5_eq_table *table = &priv->eq_table;
- int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
- MLX5_CAP_GEN(dev, max_num_eqs) :
- 1 << MLX5_CAP_GEN(dev, log_max_eq);
- int nvec;
- int err;
-
- nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
- MLX5_EQ_VEC_COMP_BASE;
- nvec = min_t(int, nvec, num_eqs);
- if (nvec <= MLX5_EQ_VEC_COMP_BASE)
- return -ENOMEM;
-
- priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL);
- if (!priv->irq_info)
- return -ENOMEM;
-
- nvec = pci_alloc_irq_vectors(dev->pdev,
- MLX5_EQ_VEC_COMP_BASE + 1, nvec,
- PCI_IRQ_MSIX);
- if (nvec < 0) {
- err = nvec;
- goto err_free_irq_info;
- }
-
- table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
-
- return 0;
-
-err_free_irq_info:
- kfree(priv->irq_info);
- return err;
-}
-
-static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev)
-{
- struct mlx5_priv *priv = &dev->priv;
-
- pci_free_irq_vectors(dev->pdev);
- kfree(priv->irq_info);
-}
-
struct mlx5_reg_host_endianness {
u8 he;
u8 rsvd[15];
@@ -624,188 +580,24 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
-u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
+u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
+ struct ptp_system_timestamp *sts)
{
u32 timer_h, timer_h1, timer_l;
timer_h = ioread32be(&dev->iseg->internal_timer_h);
+ ptp_read_system_prets(sts);
timer_l = ioread32be(&dev->iseg->internal_timer_l);
+ ptp_read_system_postts(sts);
timer_h1 = ioread32be(&dev->iseg->internal_timer_h);
- if (timer_h != timer_h1) /* wrap around */
+ if (timer_h != timer_h1) {
+ /* wrap around */
+ ptp_read_system_prets(sts);
timer_l = ioread32be(&dev->iseg->internal_timer_l);
-
- return (u64)timer_l | (u64)timer_h1 << 32;
-}
-
-static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
- struct mlx5_priv *priv = &mdev->priv;
- int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
-
- if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
- mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
- return -ENOMEM;
+ ptp_read_system_postts(sts);
}
- cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
- priv->irq_info[i].mask);
-
- if (IS_ENABLED(CONFIG_SMP) &&
- irq_set_affinity_hint(irq, priv->irq_info[i].mask))
- mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
-
- return 0;
-}
-
-static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
- struct mlx5_priv *priv = &mdev->priv;
- int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
-
- irq_set_affinity_hint(irq, NULL);
- free_cpumask_var(priv->irq_info[i].mask);
-}
-
-static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
-{
- int err;
- int i;
-
- for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
- err = mlx5_irq_set_affinity_hint(mdev, i);
- if (err)
- goto err_out;
- }
-
- return 0;
-
-err_out:
- for (i--; i >= 0; i--)
- mlx5_irq_clear_affinity_hint(mdev, i);
-
- return err;
-}
-
-static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
-{
- int i;
-
- for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
- mlx5_irq_clear_affinity_hint(mdev, i);
-}
-
-int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
- unsigned int *irqn)
-{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- struct mlx5_eq *eq, *n;
- int err = -ENOENT;
-
- spin_lock(&table->lock);
- list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
- if (eq->index == vector) {
- *eqn = eq->eqn;
- *irqn = eq->irqn;
- err = 0;
- break;
- }
- }
- spin_unlock(&table->lock);
-
- return err;
-}
-EXPORT_SYMBOL(mlx5_vector2eqn);
-
-struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn)
-{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- struct mlx5_eq *eq;
-
- spin_lock(&table->lock);
- list_for_each_entry(eq, &table->comp_eqs_list, list)
- if (eq->eqn == eqn) {
- spin_unlock(&table->lock);
- return eq;
- }
-
- spin_unlock(&table->lock);
-
- return ERR_PTR(-ENOENT);
-}
-
-static void free_comp_eqs(struct mlx5_core_dev *dev)
-{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- struct mlx5_eq *eq, *n;
-
-#ifdef CONFIG_RFS_ACCEL
- if (dev->rmap) {
- free_irq_cpu_rmap(dev->rmap);
- dev->rmap = NULL;
- }
-#endif
- spin_lock(&table->lock);
- list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
- list_del(&eq->list);
- spin_unlock(&table->lock);
- if (mlx5_destroy_unmap_eq(dev, eq))
- mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
- eq->eqn);
- kfree(eq);
- spin_lock(&table->lock);
- }
- spin_unlock(&table->lock);
-}
-
-static int alloc_comp_eqs(struct mlx5_core_dev *dev)
-{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- char name[MLX5_MAX_IRQ_NAME];
- struct mlx5_eq *eq;
- int ncomp_vec;
- int nent;
- int err;
- int i;
-
- INIT_LIST_HEAD(&table->comp_eqs_list);
- ncomp_vec = table->num_comp_vectors;
- nent = MLX5_COMP_EQ_SIZE;
-#ifdef CONFIG_RFS_ACCEL
- dev->rmap = alloc_irq_cpu_rmap(ncomp_vec);
- if (!dev->rmap)
- return -ENOMEM;
-#endif
- for (i = 0; i < ncomp_vec; i++) {
- eq = kzalloc(sizeof(*eq), GFP_KERNEL);
- if (!eq) {
- err = -ENOMEM;
- goto clean;
- }
-
-#ifdef CONFIG_RFS_ACCEL
- irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev,
- MLX5_EQ_VEC_COMP_BASE + i));
-#endif
- snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
- err = mlx5_create_map_eq(dev, eq,
- i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
- name, MLX5_EQ_TYPE_COMP);
- if (err) {
- kfree(eq);
- goto clean;
- }
- mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
- eq->index = i;
- spin_lock(&table->lock);
- list_add_tail(&eq->list, &table->comp_eqs_list);
- spin_unlock(&table->lock);
- }
-
- return 0;
-
-clean:
- free_comp_eqs(dev);
- return err;
+ return (u64)timer_l | (u64)timer_h1 << 32;
}
static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
@@ -938,28 +730,37 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
struct pci_dev *pdev = dev->pdev;
int err;
+ priv->devcom = mlx5_devcom_register_device(dev);
+ if (IS_ERR(priv->devcom))
+ dev_err(&pdev->dev, "failed to register with devcom (0x%p)\n",
+ priv->devcom);
+
err = mlx5_query_board_id(dev);
if (err) {
dev_err(&pdev->dev, "query board id failed\n");
- goto out;
+ goto err_devcom;
}
- err = mlx5_eq_init(dev);
+ err = mlx5_eq_table_init(dev);
if (err) {
dev_err(&pdev->dev, "failed to initialize eq\n");
- goto out;
+ goto err_devcom;
+ }
+
+ err = mlx5_events_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "failed to initialize events\n");
+ goto err_eq_cleanup;
}
err = mlx5_cq_debugfs_init(dev);
if (err) {
dev_err(&pdev->dev, "failed to initialize cq debugfs\n");
- goto err_eq_cleanup;
+ goto err_events_cleanup;
}
mlx5_init_qp_table(dev);
- mlx5_init_srq_table(dev);
-
mlx5_init_mkey_table(dev);
mlx5_init_reserved_gids(dev);
@@ -1013,14 +814,15 @@ err_rl_cleanup:
err_tables_cleanup:
mlx5_vxlan_destroy(dev->vxlan);
mlx5_cleanup_mkey_table(dev);
- mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
mlx5_cq_debugfs_cleanup(dev);
-
+err_events_cleanup:
+ mlx5_events_cleanup(dev);
err_eq_cleanup:
- mlx5_eq_cleanup(dev);
+ mlx5_eq_table_cleanup(dev);
+err_devcom:
+ mlx5_devcom_unregister_device(dev->priv.devcom);
-out:
return err;
}
@@ -1036,10 +838,11 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_cleanup_clock(dev);
mlx5_cleanup_reserved_gids(dev);
mlx5_cleanup_mkey_table(dev);
- mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
mlx5_cq_debugfs_cleanup(dev);
- mlx5_eq_cleanup(dev);
+ mlx5_events_cleanup(dev);
+ mlx5_eq_table_cleanup(dev);
+ mlx5_devcom_unregister_device(dev->priv.devcom);
}
static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
@@ -1131,16 +934,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto reclaim_boot_pages;
}
- err = mlx5_pagealloc_start(dev);
- if (err) {
- dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n");
- goto reclaim_boot_pages;
- }
-
err = mlx5_cmd_init_hca(dev, sw_owner_id);
if (err) {
dev_err(&pdev->dev, "init hca failed\n");
- goto err_pagealloc_stop;
+ goto reclaim_boot_pages;
}
mlx5_set_driver_version(dev);
@@ -1161,23 +958,20 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
}
}
- err = mlx5_alloc_irq_vectors(dev);
- if (err) {
- dev_err(&pdev->dev, "alloc irq vectors failed\n");
- goto err_cleanup_once;
- }
-
dev->priv.uar = mlx5_get_uars_page(dev);
if (IS_ERR(dev->priv.uar)) {
dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
err = PTR_ERR(dev->priv.uar);
- goto err_disable_msix;
+ goto err_get_uars;
}
- err = mlx5_start_eqs(dev);
+ mlx5_events_start(dev);
+ mlx5_pagealloc_start(dev);
+
+ err = mlx5_eq_table_create(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
- goto err_put_uars;
+ dev_err(&pdev->dev, "Failed to create EQs\n");
+ goto err_eq_table;
}
err = mlx5_fw_tracer_init(dev->tracer);
@@ -1186,18 +980,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto err_fw_tracer;
}
- err = alloc_comp_eqs(dev);
- if (err) {
- dev_err(&pdev->dev, "Failed to alloc completion EQs\n");
- goto err_comp_eqs;
- }
-
- err = mlx5_irq_set_affinity_hints(dev);
- if (err) {
- dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
- goto err_affinity_hints;
- }
-
err = mlx5_fpga_device_start(dev);
if (err) {
dev_err(&pdev->dev, "fpga device start failed %d\n", err);
@@ -1266,24 +1048,17 @@ err_ipsec_start:
mlx5_fpga_device_stop(dev);
err_fpga_start:
- mlx5_irq_clear_affinity_hints(dev);
-
-err_affinity_hints:
- free_comp_eqs(dev);
-
-err_comp_eqs:
mlx5_fw_tracer_cleanup(dev->tracer);
err_fw_tracer:
- mlx5_stop_eqs(dev);
+ mlx5_eq_table_destroy(dev);
-err_put_uars:
+err_eq_table:
+ mlx5_pagealloc_stop(dev);
+ mlx5_events_stop(dev);
mlx5_put_uars_page(dev, priv->uar);
-err_disable_msix:
- mlx5_free_irq_vectors(dev);
-
-err_cleanup_once:
+err_get_uars:
if (boot)
mlx5_cleanup_once(dev);
@@ -1294,9 +1069,6 @@ err_stop_poll:
goto out_err;
}
-err_pagealloc_stop:
- mlx5_pagealloc_stop(dev);
-
reclaim_boot_pages:
mlx5_reclaim_startup_pages(dev);
@@ -1340,21 +1112,20 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
mlx5_accel_ipsec_cleanup(dev);
mlx5_accel_tls_cleanup(dev);
mlx5_fpga_device_stop(dev);
- mlx5_irq_clear_affinity_hints(dev);
- free_comp_eqs(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
- mlx5_stop_eqs(dev);
+ mlx5_eq_table_destroy(dev);
+ mlx5_pagealloc_stop(dev);
+ mlx5_events_stop(dev);
mlx5_put_uars_page(dev, priv->uar);
- mlx5_free_irq_vectors(dev);
if (cleanup)
mlx5_cleanup_once(dev);
mlx5_stop_health_poll(dev, cleanup);
+
err = mlx5_cmd_teardown_hca(dev);
if (err) {
dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
goto out;
}
- mlx5_pagealloc_stop(dev);
mlx5_reclaim_startup_pages(dev);
mlx5_core_disable_hca(dev, 0);
mlx5_cmd_cleanup(dev);
@@ -1364,12 +1135,6 @@ out:
return err;
}
-struct mlx5_core_event_handler {
- void (*event)(struct mlx5_core_dev *dev,
- enum mlx5_dev_event event,
- void *data);
-};
-
static const struct devlink_ops mlx5_devlink_ops = {
#ifdef CONFIG_MLX5_ESWITCH
.eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
@@ -1403,7 +1168,6 @@ static int init_one(struct pci_dev *pdev,
pci_set_drvdata(pdev, dev);
dev->pdev = pdev;
- dev->event = mlx5_core_event;
dev->profile = &profile[prof_sel];
INIT_LIST_HEAD(&priv->ctx_list);
@@ -1411,17 +1175,6 @@ static int init_one(struct pci_dev *pdev,
mutex_init(&dev->pci_status_mutex);
mutex_init(&dev->intf_state_mutex);
- INIT_LIST_HEAD(&priv->waiting_events_list);
- priv->is_accum_events = false;
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- err = init_srcu_struct(&priv->pfault_srcu);
- if (err) {
- dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n",
- err);
- goto clean_dev;
- }
-#endif
mutex_init(&priv->bfregs.reg_head.lock);
mutex_init(&priv->bfregs.wc_head.lock);
INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
@@ -1430,7 +1183,7 @@ static int init_one(struct pci_dev *pdev,
err = mlx5_pci_init(dev, priv);
if (err) {
dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
- goto clean_srcu;
+ goto clean_dev;
}
err = mlx5_health_init(dev);
@@ -1439,12 +1192,14 @@ static int init_one(struct pci_dev *pdev,
goto close_pci;
}
- mlx5_pagealloc_init(dev);
+ err = mlx5_pagealloc_init(dev);
+ if (err)
+ goto err_pagealloc_init;
err = mlx5_load_one(dev, priv, true);
if (err) {
dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
- goto clean_health;
+ goto err_load_one;
}
request_module_nowait(MLX5_IB_MOD);
@@ -1458,16 +1213,13 @@ static int init_one(struct pci_dev *pdev,
clean_load:
mlx5_unload_one(dev, priv, true);
-clean_health:
+err_load_one:
mlx5_pagealloc_cleanup(dev);
+err_pagealloc_init:
mlx5_health_cleanup(dev);
close_pci:
mlx5_pci_close(dev, priv);
-clean_srcu:
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- cleanup_srcu_struct(&priv->pfault_srcu);
clean_dev:
-#endif
devlink_free(devlink);
return err;
@@ -1491,9 +1243,6 @@ static void remove_one(struct pci_dev *pdev)
mlx5_pagealloc_cleanup(dev);
mlx5_health_cleanup(dev);
mlx5_pci_close(dev, priv);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- cleanup_srcu_struct(&priv->pfault_srcu);
-#endif
devlink_free(devlink);
}
@@ -1637,7 +1386,6 @@ succeed:
* kexec. There is no need to cleanup the mlx5_core software
* contexts.
*/
- mlx5_irq_clear_affinity_hints(dev);
mlx5_core_eq_free_irqs(dev);
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 0594d0961cb3..c68dcea5985b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -38,6 +38,7 @@
#include <linux/sched.h>
#include <linux/if_link.h>
#include <linux/firmware.h>
+#include <linux/ptp_clock_kernel.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/fs.h>
@@ -78,6 +79,11 @@ do { \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
+#define mlx5_core_warn_once(__dev, format, ...) \
+ dev_warn_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
#define mlx5_core_info(__dev, format, ...) \
dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__)
@@ -97,12 +103,6 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev);
-
-void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
- unsigned long param);
-void mlx5_core_page_fault(struct mlx5_core_dev *dev,
- struct mlx5_pagefault *pfault);
-void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
void mlx5_disable_device(struct mlx5_core_dev *dev);
void mlx5_recover_device(struct mlx5_core_dev *dev);
@@ -122,30 +122,10 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
u32 element_id);
int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
-u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
-
-int mlx5_eq_init(struct mlx5_core_dev *dev);
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
- int nent, u64 mask, const char *name,
- enum mlx5_eq_type type);
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
- u32 *out, int outlen);
-int mlx5_start_eqs(struct mlx5_core_dev *dev);
-void mlx5_stop_eqs(struct mlx5_core_dev *dev);
-/* This function should only be called after mlx5_cmd_force_teardown_hca */
-void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
-struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
-u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
-void mlx5_cq_tasklet_cb(unsigned long data);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
-int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
+ struct ptp_system_timestamp *sts);
+
+void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev);
int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
@@ -159,6 +139,11 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_remove(struct mlx5_core_dev *dev);
+int mlx5_events_init(struct mlx5_core_dev *dev);
+void mlx5_events_cleanup(struct mlx5_core_dev *dev);
+void mlx5_events_start(struct mlx5_core_dev *dev);
+void mlx5_events_stop(struct mlx5_core_dev *dev);
+
void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
void mlx5_attach_device(struct mlx5_core_dev *dev);
@@ -202,10 +187,8 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
MLX5_CAP_GEN(dev, lag_master);
}
-int mlx5_lag_allow(struct mlx5_core_dev *dev);
-int mlx5_lag_forbid(struct mlx5_core_dev *dev);
-
void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
+void mlx5_lag_update(struct mlx5_core_dev *dev);
enum {
MLX5_NIC_IFC_FULL = 0,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index e36d3e3675f9..a83b517b0714 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -37,6 +37,7 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
enum {
MLX5_PAGES_CANT_GIVE = 0,
@@ -433,15 +434,28 @@ static void pages_work_handler(struct work_struct *work)
kfree(req);
}
-void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
- s32 npages)
+static int req_pages_handler(struct notifier_block *nb,
+ unsigned long type, void *data)
{
struct mlx5_pages_req *req;
-
+ struct mlx5_core_dev *dev;
+ struct mlx5_priv *priv;
+ struct mlx5_eqe *eqe;
+ u16 func_id;
+ s32 npages;
+
+ priv = mlx5_nb_cof(nb, struct mlx5_priv, pg_nb);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+ eqe = data;
+
+ func_id = be16_to_cpu(eqe->data.req_pages.func_id);
+ npages = be32_to_cpu(eqe->data.req_pages.num_pages);
+ mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
+ func_id, npages);
req = kzalloc(sizeof(*req), GFP_ATOMIC);
if (!req) {
mlx5_core_warn(dev, "failed to allocate pages request\n");
- return;
+ return NOTIFY_DONE;
}
req->dev = dev;
@@ -449,6 +463,7 @@ void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
req->npages = npages;
INIT_WORK(&req->work, pages_work_handler);
queue_work(dev->priv.pg_wq, &req->work);
+ return NOTIFY_OK;
}
int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
@@ -524,29 +539,32 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
return 0;
}
-void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
+int mlx5_pagealloc_init(struct mlx5_core_dev *dev)
{
dev->priv.page_root = RB_ROOT;
INIT_LIST_HEAD(&dev->priv.free_list);
+ dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
+ if (!dev->priv.pg_wq)
+ return -ENOMEM;
+
+ return 0;
}
void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
{
- /* nothing */
+ destroy_workqueue(dev->priv.pg_wq);
}
-int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
+void mlx5_pagealloc_start(struct mlx5_core_dev *dev)
{
- dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
- if (!dev->priv.pg_wq)
- return -ENOMEM;
-
- return 0;
+ MLX5_NB_INIT(&dev->priv.pg_nb, req_pages_handler, PAGE_REQUEST);
+ mlx5_eq_notifier_register(dev, &dev->priv.pg_nb);
}
void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
{
- destroy_workqueue(dev->priv.pg_wq);
+ mlx5_eq_notifier_unregister(dev, &dev->priv.pg_nb);
+ flush_workqueue(dev->priv.pg_wq);
}
int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 31a9cbd85689..2b82f35f4c35 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -915,63 +915,6 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
*enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk));
}
-static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = {
- "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */
- "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */
- "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */
-};
-
-static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = {
- "Power budget exceeded",
- "Long Range for non MLNX cable",
- "Bus stuck(I2C or data shorted)",
- "No EEPROM/retry timeout",
- "Enforce part number list",
- "Unknown identifier",
- "High Temperature",
- "Bad or shorted cable/module",
- "Unknown status",
-};
-
-void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
-{
- enum port_module_event_status_type module_status;
- enum port_module_event_error_type error_type;
- struct mlx5_eqe_port_module *module_event_eqe;
- struct mlx5_priv *priv = &dev->priv;
- u8 module_num;
-
- module_event_eqe = &eqe->data.port_module;
- module_num = module_event_eqe->module;
- module_status = module_event_eqe->module_status &
- PORT_MODULE_EVENT_MODULE_STATUS_MASK;
- error_type = module_event_eqe->error_type &
- PORT_MODULE_EVENT_ERROR_TYPE_MASK;
-
- if (module_status < MLX5_MODULE_STATUS_ERROR) {
- priv->pme_stats.status_counters[module_status - 1]++;
- } else if (module_status == MLX5_MODULE_STATUS_ERROR) {
- if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN)
- /* Unknown error type */
- error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN;
- priv->pme_stats.error_counters[error_type]++;
- }
-
- if (!printk_ratelimit())
- return;
-
- if (module_status < MLX5_MODULE_STATUS_ERROR)
- mlx5_core_info(dev,
- "Port module event: module %u, %s\n",
- module_num, mlx5_pme_status[module_status - 1]);
-
- else if (module_status == MLX5_MODULE_STATUS_ERROR)
- mlx5_core_info(dev,
- "Port module event[error]: module %u, %s, %s\n",
- module_num, mlx5_pme_status[module_status - 1],
- mlx5_pme_error[error_type]);
-}
-
int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size)
{
u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 91b8139a388d..388f205a497f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -38,11 +38,11 @@
#include <linux/mlx5/transobj.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
-static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
- u32 rsn)
+static struct mlx5_core_rsc_common *
+mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
{
- struct mlx5_qp_table *table = &dev->priv.qp_table;
struct mlx5_core_rsc_common *common;
spin_lock(&table->lock);
@@ -53,11 +53,6 @@ static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
spin_unlock(&table->lock);
- if (!common) {
- mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n",
- rsn);
- return NULL;
- }
return common;
}
@@ -120,19 +115,57 @@ static bool is_event_type_allowed(int rsc_type, int event_type)
}
}
-void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
+static int rsc_event_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
{
- struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
+ struct mlx5_core_rsc_common *common;
+ struct mlx5_qp_table *table;
+ struct mlx5_core_dev *dev;
struct mlx5_core_dct *dct;
+ u8 event_type = (u8)type;
struct mlx5_core_qp *qp;
+ struct mlx5_priv *priv;
+ struct mlx5_eqe *eqe;
+ u32 rsn;
+
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ eqe = data;
+ rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
+ rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
+ break;
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ eqe = data;
+ rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ table = container_of(nb, struct mlx5_qp_table, nb);
+ priv = container_of(table, struct mlx5_priv, qp_table);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ mlx5_core_dbg(dev, "event (%d) arrived on resource 0x%x\n", eqe->type, rsn);
- if (!common)
- return;
+ common = mlx5_get_rsc(table, rsn);
+ if (!common) {
+ mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", rsn);
+ return NOTIFY_OK;
+ }
if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
event_type, rsn);
- return;
+ goto out;
}
switch (common->res) {
@@ -150,8 +183,10 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
default:
mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn);
}
-
+out:
mlx5_core_put_rsc(common);
+
+ return NOTIFY_OK;
}
static int create_resource_common(struct mlx5_core_dev *dev,
@@ -487,10 +522,16 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev)
spin_lock_init(&table->lock);
INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
mlx5_qp_debugfs_init(dev);
+
+ table->nb.notifier_call = rsc_event_notifier;
+ mlx5_notifier_register(dev, &table->nb);
}
void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev)
{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+ mlx5_notifier_unregister(dev, &table->nb);
mlx5_qp_debugfs_cleanup(dev);
}
@@ -670,3 +711,20 @@ int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
}
EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter);
+
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
+ int res_num,
+ enum mlx5_res_type res_type)
+{
+ u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN);
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+ return mlx5_get_rsc(table, rsn);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_res_hold);
+
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res)
+{
+ mlx5_core_put_rsc(res);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_res_put);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index a0674962f02c..6e178030d8fb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -216,20 +216,10 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (!mlx5_core_is_pf(dev))
return -EPERM;
- if (num_vfs) {
- int ret;
-
- ret = mlx5_lag_forbid(dev);
- if (ret && (ret != -ENODEV))
- return ret;
- }
-
- if (num_vfs) {
+ if (num_vfs)
err = mlx5_sriov_enable(pdev, num_vfs);
- } else {
+ else
mlx5_sriov_disable(pdev);
- mlx5_lag_allow(dev);
- }
return err ? err : num_vfs;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
deleted file mode 100644
index 6a6fc9be01e6..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+++ /dev/null
@@ -1,716 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
-#include <linux/mlx5/srq.h>
-#include <rdma/ib_verbs.h>
-#include "mlx5_core.h"
-#include <linux/mlx5/transobj.h>
-
-void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
-{
- struct mlx5_srq_table *table = &dev->priv.srq_table;
- struct mlx5_core_srq *srq;
-
- spin_lock(&table->lock);
-
- srq = radix_tree_lookup(&table->tree, srqn);
- if (srq)
- atomic_inc(&srq->refcount);
-
- spin_unlock(&table->lock);
-
- if (!srq) {
- mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn);
- return;
- }
-
- srq->event(srq, event_type);
-
- if (atomic_dec_and_test(&srq->refcount))
- complete(&srq->free);
-}
-
-static int get_pas_size(struct mlx5_srq_attr *in)
-{
- u32 log_page_size = in->log_page_size + 12;
- u32 log_srq_size = in->log_size;
- u32 log_rq_stride = in->wqe_shift;
- u32 page_offset = in->page_offset;
- u32 po_quanta = 1 << (log_page_size - 6);
- u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride);
- u32 page_size = 1 << log_page_size;
- u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
- u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size);
-
- return rq_num_pas * sizeof(u64);
-}
-
-static void set_wq(void *wq, struct mlx5_srq_attr *in)
-{
- MLX5_SET(wq, wq, wq_signature, !!(in->flags
- & MLX5_SRQ_FLAG_WQ_SIG));
- MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size);
- MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4);
- MLX5_SET(wq, wq, log_wq_sz, in->log_size);
- MLX5_SET(wq, wq, page_offset, in->page_offset);
- MLX5_SET(wq, wq, lwm, in->lwm);
- MLX5_SET(wq, wq, pd, in->pd);
- MLX5_SET64(wq, wq, dbr_addr, in->db_record);
-}
-
-static void set_srqc(void *srqc, struct mlx5_srq_attr *in)
-{
- MLX5_SET(srqc, srqc, wq_signature, !!(in->flags
- & MLX5_SRQ_FLAG_WQ_SIG));
- MLX5_SET(srqc, srqc, log_page_size, in->log_page_size);
- MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift);
- MLX5_SET(srqc, srqc, log_srq_size, in->log_size);
- MLX5_SET(srqc, srqc, page_offset, in->page_offset);
- MLX5_SET(srqc, srqc, lwm, in->lwm);
- MLX5_SET(srqc, srqc, pd, in->pd);
- MLX5_SET64(srqc, srqc, dbr_addr, in->db_record);
- MLX5_SET(srqc, srqc, xrcd, in->xrcd);
- MLX5_SET(srqc, srqc, cqn, in->cqn);
-}
-
-static void get_wq(void *wq, struct mlx5_srq_attr *in)
-{
- if (MLX5_GET(wq, wq, wq_signature))
- in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
- in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz);
- in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4;
- in->log_size = MLX5_GET(wq, wq, log_wq_sz);
- in->page_offset = MLX5_GET(wq, wq, page_offset);
- in->lwm = MLX5_GET(wq, wq, lwm);
- in->pd = MLX5_GET(wq, wq, pd);
- in->db_record = MLX5_GET64(wq, wq, dbr_addr);
-}
-
-static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
-{
- if (MLX5_GET(srqc, srqc, wq_signature))
- in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
- in->log_page_size = MLX5_GET(srqc, srqc, log_page_size);
- in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride);
- in->log_size = MLX5_GET(srqc, srqc, log_srq_size);
- in->page_offset = MLX5_GET(srqc, srqc, page_offset);
- in->lwm = MLX5_GET(srqc, srqc, lwm);
- in->pd = MLX5_GET(srqc, srqc, pd);
- in->db_record = MLX5_GET64(srqc, srqc, dbr_addr);
-}
-
-struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
-{
- struct mlx5_srq_table *table = &dev->priv.srq_table;
- struct mlx5_core_srq *srq;
-
- spin_lock(&table->lock);
-
- srq = radix_tree_lookup(&table->tree, srqn);
- if (srq)
- atomic_inc(&srq->refcount);
-
- spin_unlock(&table->lock);
-
- return srq;
-}
-EXPORT_SYMBOL(mlx5_core_get_srq);
-
-static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
- void *create_in;
- void *srqc;
- void *pas;
- int pas_size;
- int inlen;
- int err;
-
- pas_size = get_pas_size(in);
- inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
- create_in = kvzalloc(inlen, GFP_KERNEL);
- if (!create_in)
- return -ENOMEM;
-
- MLX5_SET(create_srq_in, create_in, uid, in->uid);
- srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
- pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
-
- set_srqc(srqc, in);
- memcpy(pas, in->pas, pas_size);
-
- MLX5_SET(create_srq_in, create_in, opcode,
- MLX5_CMD_OP_CREATE_SRQ);
-
- err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
- sizeof(create_out));
- kvfree(create_in);
- if (!err) {
- srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
- srq->uid = in->uid;
- }
-
- return err;
-}
-
-static int destroy_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq)
-{
- u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0};
- u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0};
-
- MLX5_SET(destroy_srq_in, srq_in, opcode,
- MLX5_CMD_OP_DESTROY_SRQ);
- MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
- MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
- srq_out, sizeof(srq_out));
-}
-
-static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- u16 lwm, int is_srq)
-{
- u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
- u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
-
- MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ);
- MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
- MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn);
- MLX5_SET(arm_rq_in, srq_in, lwm, lwm);
- MLX5_SET(arm_rq_in, srq_in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
- srq_out, sizeof(srq_out));
-}
-
-static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out)
-{
- u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0};
- u32 *srq_out;
- void *srqc;
- int err;
-
- srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL);
- if (!srq_out)
- return -ENOMEM;
-
- MLX5_SET(query_srq_in, srq_in, opcode,
- MLX5_CMD_OP_QUERY_SRQ);
- MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
- err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
- srq_out, MLX5_ST_SZ_BYTES(query_srq_out));
- if (err)
- goto out;
-
- srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry);
- get_srqc(srqc, out);
- if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD)
- out->flags |= MLX5_SRQ_FLAG_ERR;
-out:
- kvfree(srq_out);
- return err;
-}
-
-static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)];
- void *create_in;
- void *xrc_srqc;
- void *pas;
- int pas_size;
- int inlen;
- int err;
-
- pas_size = get_pas_size(in);
- inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
- create_in = kvzalloc(inlen, GFP_KERNEL);
- if (!create_in)
- return -ENOMEM;
-
- MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid);
- xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in,
- xrc_srq_context_entry);
- pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
-
- set_srqc(xrc_srqc, in);
- MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index);
- memcpy(pas, in->pas, pas_size);
- MLX5_SET(create_xrc_srq_in, create_in, opcode,
- MLX5_CMD_OP_CREATE_XRC_SRQ);
-
- memset(create_out, 0, sizeof(create_out));
- err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
- sizeof(create_out));
- if (err)
- goto out;
-
- srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn);
- srq->uid = in->uid;
-out:
- kvfree(create_in);
- return err;
-}
-
-static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq)
-{
- u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0};
- u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
-
- MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode,
- MLX5_CMD_OP_DESTROY_XRC_SRQ);
- MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
- MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
- xrcsrq_out, sizeof(xrcsrq_out));
-}
-
-static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq, u16 lwm)
-{
- u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
- u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
-
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm);
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
- xrcsrq_out, sizeof(xrcsrq_out));
-}
-
-static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out)
-{
- u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)];
- u32 *xrcsrq_out;
- void *xrc_srqc;
- int err;
-
- xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL);
- if (!xrcsrq_out)
- return -ENOMEM;
- memset(xrcsrq_in, 0, sizeof(xrcsrq_in));
-
- MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode,
- MLX5_CMD_OP_QUERY_XRC_SRQ);
- MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
-
- err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out,
- MLX5_ST_SZ_BYTES(query_xrc_srq_out));
- if (err)
- goto out;
-
- xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out,
- xrc_srq_context_entry);
- get_srqc(xrc_srqc, out);
- if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD)
- out->flags |= MLX5_SRQ_FLAG_ERR;
-
-out:
- kvfree(xrcsrq_out);
- return err;
-}
-
-static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- void *create_in;
- void *rmpc;
- void *wq;
- int pas_size;
- int inlen;
- int err;
-
- pas_size = get_pas_size(in);
- inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
- create_in = kvzalloc(inlen, GFP_KERNEL);
- if (!create_in)
- return -ENOMEM;
-
- rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
- wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
-
- MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
- MLX5_SET(create_rmp_in, create_in, uid, in->uid);
- set_wq(wq, in);
- memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
-
- err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
- if (!err)
- srq->uid = in->uid;
-
- kvfree(create_in);
- return err;
-}
-
-static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {};
- u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {};
-
- MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
- MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn);
- MLX5_SET(destroy_rmp_in, in, uid, srq->uid);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int arm_rmp_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
- u16 lwm)
-{
- void *in;
- void *rmpc;
- void *wq;
- void *bitmask;
- int err;
-
- in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx);
- bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask);
- wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
-
- MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY);
- MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn);
- MLX5_SET(modify_rmp_in, in, uid, srq->uid);
- MLX5_SET(wq, wq, lwm, lwm);
- MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
- MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
-
- err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
-
- kvfree(in);
- return err;
-}
-
-static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out)
-{
- u32 *rmp_out;
- void *rmpc;
- int err;
-
- rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL);
- if (!rmp_out)
- return -ENOMEM;
-
- err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out);
- if (err)
- goto out;
-
- rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context);
- get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out);
- if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY)
- out->flags |= MLX5_SRQ_FLAG_ERR;
-
-out:
- kvfree(rmp_out);
- return err;
-}
-
-static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0};
- void *create_in;
- void *xrqc;
- void *wq;
- int pas_size;
- int inlen;
- int err;
-
- pas_size = get_pas_size(in);
- inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size;
- create_in = kvzalloc(inlen, GFP_KERNEL);
- if (!create_in)
- return -ENOMEM;
-
- xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context);
- wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
-
- set_wq(wq, in);
- memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size);
-
- if (in->type == IB_SRQT_TM) {
- MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING);
- if (in->flags & MLX5_SRQ_FLAG_RNDV)
- MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV);
- MLX5_SET(xrqc, xrqc,
- tag_matching_topology_context.log_matching_list_sz,
- in->tm_log_list_size);
- }
- MLX5_SET(xrqc, xrqc, user_index, in->user_index);
- MLX5_SET(xrqc, xrqc, cqn, in->cqn);
- MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
- MLX5_SET(create_xrq_in, create_in, uid, in->uid);
- err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
- sizeof(create_out));
- kvfree(create_in);
- if (!err) {
- srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn);
- srq->uid = in->uid;
- }
-
- return err;
-}
-
-static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0};
-
- MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ);
- MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn);
- MLX5_SET(destroy_xrq_in, in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int arm_xrq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
- u16 lwm)
-{
- u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
-
- MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ);
- MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ);
- MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
- MLX5_SET(arm_rq_in, in, lwm, lwm);
- MLX5_SET(arm_rq_in, in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out)
-{
- u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0};
- u32 *xrq_out;
- int outlen = MLX5_ST_SZ_BYTES(query_xrq_out);
- void *xrqc;
- int err;
-
- xrq_out = kvzalloc(outlen, GFP_KERNEL);
- if (!xrq_out)
- return -ENOMEM;
-
- MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ);
- MLX5_SET(query_xrq_in, in, xrqn, srq->srqn);
-
- err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen);
- if (err)
- goto out;
-
- xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context);
- get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out);
- if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD)
- out->flags |= MLX5_SRQ_FLAG_ERR;
- out->tm_next_tag =
- MLX5_GET(xrqc, xrqc,
- tag_matching_topology_context.append_next_index);
- out->tm_hw_phase_cnt =
- MLX5_GET(xrqc, xrqc,
- tag_matching_topology_context.hw_phase_cnt);
- out->tm_sw_phase_cnt =
- MLX5_GET(xrqc, xrqc,
- tag_matching_topology_context.sw_phase_cnt);
-
-out:
- kvfree(xrq_out);
- return err;
-}
-
-static int create_srq_split(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- if (!dev->issi)
- return create_srq_cmd(dev, srq, in);
- switch (srq->common.res) {
- case MLX5_RES_XSRQ:
- return create_xrc_srq_cmd(dev, srq, in);
- case MLX5_RES_XRQ:
- return create_xrq_cmd(dev, srq, in);
- default:
- return create_rmp_cmd(dev, srq, in);
- }
-}
-
-static int destroy_srq_split(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq)
-{
- if (!dev->issi)
- return destroy_srq_cmd(dev, srq);
- switch (srq->common.res) {
- case MLX5_RES_XSRQ:
- return destroy_xrc_srq_cmd(dev, srq);
- case MLX5_RES_XRQ:
- return destroy_xrq_cmd(dev, srq);
- default:
- return destroy_rmp_cmd(dev, srq);
- }
-}
-
-int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- int err;
- struct mlx5_srq_table *table = &dev->priv.srq_table;
-
- switch (in->type) {
- case IB_SRQT_XRC:
- srq->common.res = MLX5_RES_XSRQ;
- break;
- case IB_SRQT_TM:
- srq->common.res = MLX5_RES_XRQ;
- break;
- default:
- srq->common.res = MLX5_RES_SRQ;
- }
-
- err = create_srq_split(dev, srq, in);
- if (err)
- return err;
-
- atomic_set(&srq->refcount, 1);
- init_completion(&srq->free);
-
- spin_lock_irq(&table->lock);
- err = radix_tree_insert(&table->tree, srq->srqn, srq);
- spin_unlock_irq(&table->lock);
- if (err) {
- mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn);
- goto err_destroy_srq_split;
- }
-
- return 0;
-
-err_destroy_srq_split:
- destroy_srq_split(dev, srq);
-
- return err;
-}
-EXPORT_SYMBOL(mlx5_core_create_srq);
-
-int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
-{
- struct mlx5_srq_table *table = &dev->priv.srq_table;
- struct mlx5_core_srq *tmp;
- int err;
-
- spin_lock_irq(&table->lock);
- tmp = radix_tree_delete(&table->tree, srq->srqn);
- spin_unlock_irq(&table->lock);
- if (!tmp) {
- mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn);
- return -EINVAL;
- }
- if (tmp != srq) {
- mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn);
- return -EINVAL;
- }
-
- err = destroy_srq_split(dev, srq);
- if (err)
- return err;
-
- if (atomic_dec_and_test(&srq->refcount))
- complete(&srq->free);
- wait_for_completion(&srq->free);
-
- return 0;
-}
-EXPORT_SYMBOL(mlx5_core_destroy_srq);
-
-int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out)
-{
- if (!dev->issi)
- return query_srq_cmd(dev, srq, out);
- switch (srq->common.res) {
- case MLX5_RES_XSRQ:
- return query_xrc_srq_cmd(dev, srq, out);
- case MLX5_RES_XRQ:
- return query_xrq_cmd(dev, srq, out);
- default:
- return query_rmp_cmd(dev, srq, out);
- }
-}
-EXPORT_SYMBOL(mlx5_core_query_srq);
-
-int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- u16 lwm, int is_srq)
-{
- if (!dev->issi)
- return arm_srq_cmd(dev, srq, lwm, is_srq);
- switch (srq->common.res) {
- case MLX5_RES_XSRQ:
- return arm_xrc_srq_cmd(dev, srq, lwm);
- case MLX5_RES_XRQ:
- return arm_xrq_cmd(dev, srq, lwm);
- default:
- return arm_rmp_cmd(dev, srq, lwm);
- }
-}
-EXPORT_SYMBOL(mlx5_core_arm_srq);
-
-void mlx5_init_srq_table(struct mlx5_core_dev *dev)
-{
- struct mlx5_srq_table *table = &dev->priv.srq_table;
-
- memset(table, 0, sizeof(*table));
- spin_lock_init(&table->lock);
- INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
-}
-
-void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev)
-{
- /* nothing */
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index a1ee9a8a769e..c4d4b76096dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -258,115 +258,6 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
}
EXPORT_SYMBOL(mlx5_core_destroy_tis);
-int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *rmpn)
-{
- u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0};
- int err;
-
- MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP);
- err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
- if (!err)
- *rmpn = MLX5_GET(create_rmp_out, out, rmpn);
-
- return err;
-}
-
-int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen)
-{
- u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0};
-
- MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
- return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
-}
-
-int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0};
-
- MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
- MLX5_SET(destroy_rmp_in, in, rmpn, rmpn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out,
- sizeof(out));
-}
-
-int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out)
-{
- u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0};
- int outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
-
- MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP);
- MLX5_SET(query_rmp_in, in, rmpn, rmpn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
-}
-
-int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm)
-{
- void *in;
- void *rmpc;
- void *wq;
- void *bitmask;
- int err;
-
- in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx);
- bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask);
- wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
-
- MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY);
- MLX5_SET(modify_rmp_in, in, rmpn, rmpn);
- MLX5_SET(wq, wq, lwm, lwm);
- MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
- MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
-
- err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
-
- kvfree(in);
-
- return err;
-}
-
-int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *xsrqn)
-{
- u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0};
- int err;
-
- MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ);
- err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
- if (!err)
- *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn);
-
- return err;
-}
-
-int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
-
- MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ);
- MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm)
-{
- u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
-
- MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
- MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn);
- MLX5_SET(arm_xrc_srq_in, in, lwm, lwm);
- MLX5_SET(arm_xrc_srq_in, in, op_mod,
- MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *rqtn)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index cfbea66b4879..9b150ce9d315 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1204,9 +1204,19 @@ EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport);
u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
{
- if (!mdev->sys_image_guid)
- mlx5_query_nic_vport_system_image_guid(mdev, &mdev->sys_image_guid);
+ int port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ u64 tmp = 0;
- return mdev->sys_image_guid;
+ if (mdev->sys_image_guid)
+ return mdev->sys_image_guid;
+
+ if (port_type_cap == MLX5_CAP_PORT_TYPE_ETH)
+ mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
+ else
+ mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
+
+ mdev->sys_image_guid = tmp;
+
+ return tmp;
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 2dcbf1ebfd6a..953cc8efba69 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -155,7 +155,8 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *cqc, struct mlx5_cqwq *wq,
struct mlx5_wq_ctrl *wq_ctrl)
{
- u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) + 6;
+ /* CQE_STRIDE_128 and CQE_STRIDE_128_PAD both mean 128B stride */
+ u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) == CQE_STRIDE_64 ? 6 : 7;
u8 log_wq_sz = MLX5_GET(cqc, cqc, log_cq_size);
int err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index b1293d153a58..ea934a48c90a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -177,9 +177,14 @@ static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq)
return mlx5_cqwq_ctr2ix(wq, wq->cc);
}
-static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
+static inline struct mlx5_cqe64 *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
{
- return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+ struct mlx5_cqe64 *cqe = mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+
+ /* For 128B CQEs the data is in the last 64B */
+ cqe += wq->fbc.log_stride == 7;
+
+ return cqe;
}
static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr)