aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/alloc.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c139
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/debugfs.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c162
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ecpf.c112
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ecpf.h33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h164
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c169
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.c139
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c314
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c648
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c248
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_common.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c568
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c112
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c845
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c893
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c292
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c1931
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c34
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c1272
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c560
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h103
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c782
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/events.c328
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c40
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c83
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c172
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c335
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.h65
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c315
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c255
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h98
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c205
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mad.c75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c508
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h52
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c98
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c176
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c163
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/srq.c716
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/transobj.c109
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/uar.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c34
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.h9
83 files changed, 9059 insertions, 5184 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 37a551436e4a..6debffb8336b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -4,7 +4,6 @@
config MLX5_CORE
tristate "Mellanox 5th generation network adapters (ConnectX series) core driver"
- depends on MAY_USE_DEVLINK
depends on PCI
imply PTP_1588_CLOCK
imply VXLAN
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index d324a3884462..1a16f6d73cbc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -12,17 +12,17 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
# mlx5 core basic
#
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
- health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
- mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
- fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \
- diag/fs_tracepoint.o diag/fw_tracer.o
+ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
+ transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
+ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
+ lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o
#
# Netdev basic
#
mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
- en_selftest.o en/port.o
+ en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o
#
# Netdev extra
@@ -30,12 +30,12 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o
#
# Core extra
#
-mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o ecpf.o
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 456f30007ad6..9008e17126db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -63,8 +63,8 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
mutex_lock(&priv->alloc_mutex);
original_node = dev_to_node(&dev->pdev->dev);
set_dev_node(&dev->pdev->dev, node);
- cpu_handle = dma_zalloc_coherent(&dev->pdev->dev, size,
- dma_handle, GFP_KERNEL);
+ cpu_handle = dma_alloc_coherent(&dev->pdev->dev, size, dma_handle,
+ GFP_KERNEL);
set_dev_node(&dev->pdev->dev, original_node);
mutex_unlock(&priv->alloc_mutex);
return cpu_handle;
@@ -186,10 +186,7 @@ static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
if (!pgdir)
return NULL;
- pgdir->bitmap = kcalloc(BITS_TO_LONGS(db_per_page),
- sizeof(unsigned long),
- GFP_KERNEL);
-
+ pgdir->bitmap = bitmap_zalloc(db_per_page, GFP_KERNEL);
if (!pgdir->bitmap) {
kfree(pgdir);
return NULL;
@@ -200,7 +197,7 @@ static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
pgdir->db_page = mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE,
&pgdir->db_dma, node);
if (!pgdir->db_page) {
- kfree(pgdir->bitmap);
+ bitmap_free(pgdir->bitmap);
kfree(pgdir);
return NULL;
}
@@ -280,7 +277,7 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
db->u.pgdir->db_page, db->u.pgdir->db_dma);
list_del(&db->u.pgdir->list);
- kfree(db->u.pgdir->bitmap);
+ bitmap_free(db->u.pgdir->bitmap);
kfree(db->u.pgdir);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index a5a0823e5ada..be48c6440251 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -40,9 +40,11 @@
#include <linux/random.h>
#include <linux/io-mapping.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
#include <linux/debugfs.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
enum {
CMD_IF_REV = 5,
@@ -313,6 +315,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_FPGA_DESTROY_QP:
case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
case MLX5_CMD_OP_DEALLOC_MEMIC:
+ case MLX5_CMD_OP_PAGE_FAULT_RESUME:
+ case MLX5_CMD_OP_QUERY_HOST_PARAMS:
return MLX5_CMD_STAT_OK;
case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -326,7 +330,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_CREATE_MKEY:
case MLX5_CMD_OP_QUERY_MKEY:
case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
- case MLX5_CMD_OP_PAGE_FAULT_RESUME:
case MLX5_CMD_OP_CREATE_EQ:
case MLX5_CMD_OP_QUERY_EQ:
case MLX5_CMD_OP_GEN_EQE:
@@ -371,6 +374,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
case MLX5_CMD_OP_ALLOC_Q_COUNTER:
case MLX5_CMD_OP_QUERY_Q_COUNTER:
+ case MLX5_CMD_OP_SET_MONITOR_COUNTER:
+ case MLX5_CMD_OP_ARM_MONITOR_COUNTER:
case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
case MLX5_CMD_OP_QUERY_RATE_LIMIT:
case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
@@ -520,6 +525,8 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
+ MLX5_COMMAND_STR_CASE(SET_MONITOR_COUNTER);
+ MLX5_COMMAND_STR_CASE(ARM_MONITOR_COUNTER);
MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
@@ -621,6 +628,7 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
MLX5_COMMAND_STR_CASE(ALLOC_MEMIC);
MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC);
+ MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS);
default: return "unknown command opcode";
}
}
@@ -805,6 +813,8 @@ static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
return MLX5_GET(mbox_in, in->first.data, opcode);
}
+static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
+
static void cb_timeout_handler(struct work_struct *work)
{
struct delayed_work *dwork = container_of(work, struct delayed_work,
@@ -1412,14 +1422,32 @@ static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
up(&cmd->sem);
}
+static int cmd_comp_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_core_dev *dev;
+ struct mlx5_cmd *cmd;
+ struct mlx5_eqe *eqe;
+
+ cmd = mlx5_nb_cof(nb, struct mlx5_cmd, nb);
+ dev = container_of(cmd, struct mlx5_core_dev, cmd);
+ eqe = data;
+
+ mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
+
+ return NOTIFY_OK;
+}
void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
{
+ MLX5_NB_INIT(&dev->cmd.nb, cmd_comp_notifier, CMD);
+ mlx5_eq_notifier_register(dev, &dev->cmd.nb);
mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
}
void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
{
mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
+ mlx5_eq_notifier_unregister(dev, &dev->cmd.nb);
}
static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
@@ -1435,7 +1463,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
}
}
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
+static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
{
struct mlx5_cmd *cmd = &dev->cmd;
struct mlx5_cmd_work_ent *ent;
@@ -1533,7 +1561,47 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
}
}
}
-EXPORT_SYMBOL(mlx5_cmd_comp_handler);
+
+void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
+{
+ unsigned long flags;
+ u64 vector;
+
+ /* wait for pending handlers to complete */
+ mlx5_eq_synchronize_cmd_irq(dev);
+ spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
+ vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
+ if (!vector)
+ goto no_trig;
+
+ vector |= MLX5_TRIGGERED_CMD_COMP;
+ spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+
+ mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
+ mlx5_cmd_comp_handler(dev, vector, true);
+ return;
+
+no_trig:
+ spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+}
+
+void mlx5_cmd_flush(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ int i;
+
+ for (i = 0; i < cmd->max_reg_cmds; i++)
+ while (down_trylock(&cmd->sem))
+ mlx5_cmd_trigger_completions(dev);
+
+ while (down_trylock(&cmd->pages_sem))
+ mlx5_cmd_trigger_completions(dev);
+
+ /* Unlock cmdif */
+ up(&cmd->pages_sem);
+ for (i = 0; i < cmd->max_reg_cmds; i++)
+ up(&cmd->sem);
+}
static int status_to_err(u8 status)
{
@@ -1663,12 +1731,57 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
}
EXPORT_SYMBOL(mlx5_cmd_exec);
-int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
- void *out, int out_size, mlx5_cmd_cbk_t callback,
- void *context)
+void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev,
+ struct mlx5_async_ctx *ctx)
+{
+ ctx->dev = dev;
+ /* Starts at 1 to avoid doing wake_up if we are not cleaning up */
+ atomic_set(&ctx->num_inflight, 1);
+ init_waitqueue_head(&ctx->wait);
+}
+EXPORT_SYMBOL(mlx5_cmd_init_async_ctx);
+
+/**
+ * mlx5_cmd_cleanup_async_ctx - Clean up an async_ctx
+ * @ctx: The ctx to clean
+ *
+ * Upon return all callbacks given to mlx5_cmd_exec_cb() have been called. The
+ * caller must ensure that mlx5_cmd_exec_cb() is not called during or after
+ * the call mlx5_cleanup_async_ctx().
+ */
+void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx)
+{
+ atomic_dec(&ctx->num_inflight);
+ wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0);
+}
+EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx);
+
+static void mlx5_cmd_exec_cb_handler(int status, void *_work)
{
- return cmd_exec(dev, in, in_size, out, out_size, callback, context,
- false);
+ struct mlx5_async_work *work = _work;
+ struct mlx5_async_ctx *ctx = work->ctx;
+
+ work->user_callback(status, work);
+ if (atomic_dec_and_test(&ctx->num_inflight))
+ wake_up(&ctx->wait);
+}
+
+int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
+ void *out, int out_size, mlx5_async_cbk_t callback,
+ struct mlx5_async_work *work)
+{
+ int ret;
+
+ work->ctx = ctx;
+ work->user_callback = callback;
+ if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight)))
+ return -EIO;
+ ret = cmd_exec(ctx->dev, in, in_size, out, out_size,
+ mlx5_cmd_exec_cb_handler, work, false);
+ if (ret && atomic_dec_and_test(&ctx->num_inflight))
+ wake_up(&ctx->wait);
+
+ return ret;
}
EXPORT_SYMBOL(mlx5_cmd_exec_cb);
@@ -1741,8 +1854,8 @@ static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
{
struct device *ddev = &dev->pdev->dev;
- cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE,
- &cmd->alloc_dma, GFP_KERNEL);
+ cmd->cmd_alloc_buf = dma_alloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE,
+ &cmd->alloc_dma, GFP_KERNEL);
if (!cmd->cmd_alloc_buf)
return -ENOMEM;
@@ -1756,9 +1869,9 @@ static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
dma_free_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf,
cmd->alloc_dma);
- cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev,
- 2 * MLX5_ADAPTER_PAGE_SIZE - 1,
- &cmd->alloc_dma, GFP_KERNEL);
+ cmd->cmd_alloc_buf = dma_alloc_coherent(ddev,
+ 2 * MLX5_ADAPTER_PAGE_SIZE - 1,
+ &cmd->alloc_dma, GFP_KERNEL);
if (!cmd->cmd_alloc_buf)
return -ENOMEM;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 4b85abb5c9f7..713a17ee3751 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -38,6 +38,7 @@
#include <rdma/ib_verbs.h>
#include <linux/mlx5/cq.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
#define TASKLET_MAX_TIME 2
#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
@@ -92,10 +93,10 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
u32 out[MLX5_ST_SZ_DW(create_cq_out)];
u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
- struct mlx5_eq *eq;
+ struct mlx5_eq_comp *eq;
int err;
- eq = mlx5_eqn2eq(dev, eqn);
+ eq = mlx5_eqn2comp_eq(dev, eqn);
if (IS_ERR(eq))
return PTR_ERR(eq);
@@ -119,12 +120,12 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
INIT_LIST_HEAD(&cq->tasklet_ctx.list);
/* Add to comp EQ CQ tree to recv comp events */
- err = mlx5_eq_add_cq(eq, cq);
+ err = mlx5_eq_add_cq(&eq->core, cq);
if (err)
goto err_cmd;
/* Add to async EQ CQ tree to recv async events */
- err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq);
+ err = mlx5_eq_add_cq(mlx5_get_async_eq(dev), cq);
if (err)
goto err_cq_add;
@@ -139,7 +140,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
return 0;
err_cq_add:
- mlx5_eq_del_cq(eq, cq);
+ mlx5_eq_del_cq(&eq->core, cq);
err_cmd:
memset(din, 0, sizeof(din));
memset(dout, 0, sizeof(dout));
@@ -157,11 +158,11 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
int err;
- err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq);
+ err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
if (err)
return err;
- err = mlx5_eq_del_cq(cq->eq, cq);
+ err = mlx5_eq_del_cq(&cq->eq->core, cq);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 90fabd612b6c..a11e22d0b0cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -36,6 +36,7 @@
#include <linux/mlx5/cq.h>
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
enum {
QP_PID,
@@ -349,6 +350,16 @@ out:
return param;
}
+static int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {};
+
+ MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
+ MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
int index)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 37ba7c78859d..ebc046fa97d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -45,75 +45,11 @@ struct mlx5_device_context {
unsigned long state;
};
-struct mlx5_delayed_event {
- struct list_head list;
- struct mlx5_core_dev *dev;
- enum mlx5_dev_event event;
- unsigned long param;
-};
-
enum {
MLX5_INTERFACE_ADDED,
MLX5_INTERFACE_ATTACHED,
};
-static void add_delayed_event(struct mlx5_priv *priv,
- struct mlx5_core_dev *dev,
- enum mlx5_dev_event event,
- unsigned long param)
-{
- struct mlx5_delayed_event *delayed_event;
-
- delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC);
- if (!delayed_event) {
- mlx5_core_err(dev, "event %d is missed\n", event);
- return;
- }
-
- mlx5_core_dbg(dev, "Accumulating event %d\n", event);
- delayed_event->dev = dev;
- delayed_event->event = event;
- delayed_event->param = param;
- list_add_tail(&delayed_event->list, &priv->waiting_events_list);
-}
-
-static void delayed_event_release(struct mlx5_device_context *dev_ctx,
- struct mlx5_priv *priv)
-{
- struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
- struct mlx5_delayed_event *de;
- struct mlx5_delayed_event *n;
- struct list_head temp;
-
- INIT_LIST_HEAD(&temp);
-
- spin_lock_irq(&priv->ctx_lock);
-
- priv->is_accum_events = false;
- list_splice_init(&priv->waiting_events_list, &temp);
- if (!dev_ctx->context)
- goto out;
- list_for_each_entry_safe(de, n, &temp, list)
- dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
-
-out:
- spin_unlock_irq(&priv->ctx_lock);
-
- list_for_each_entry_safe(de, n, &temp, list) {
- list_del(&de->list);
- kfree(de);
- }
-}
-
-/* accumulating events that can come after mlx5_ib calls to
- * ib_register_device, till adding that interface to the events list.
- */
-static void delayed_event_start(struct mlx5_priv *priv)
-{
- spin_lock_irq(&priv->ctx_lock);
- priv->is_accum_events = true;
- spin_unlock_irq(&priv->ctx_lock);
-}
void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
{
@@ -129,8 +65,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
dev_ctx->intf = intf;
- delayed_event_start(priv);
-
dev_ctx->context = intf->add(dev);
if (dev_ctx->context) {
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
@@ -139,22 +73,9 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
spin_lock_irq(&priv->ctx_lock);
list_add_tail(&dev_ctx->list, &priv->ctx_list);
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (dev_ctx->intf->pfault) {
- if (priv->pfault) {
- mlx5_core_err(dev, "multiple page fault handlers not supported");
- } else {
- priv->pfault_ctx = dev_ctx->context;
- priv->pfault = dev_ctx->intf->pfault;
- }
- }
-#endif
spin_unlock_irq(&priv->ctx_lock);
}
- delayed_event_release(dev_ctx, priv);
-
if (!dev_ctx->context)
kfree(dev_ctx);
}
@@ -179,15 +100,6 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
if (!dev_ctx)
return;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- spin_lock_irq(&priv->ctx_lock);
- if (priv->pfault == dev_ctx->intf->pfault)
- priv->pfault = NULL;
- spin_unlock_irq(&priv->ctx_lock);
-
- synchronize_srcu(&priv->pfault_srcu);
-#endif
-
spin_lock_irq(&priv->ctx_lock);
list_del(&dev_ctx->list);
spin_unlock_irq(&priv->ctx_lock);
@@ -207,26 +119,20 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv
if (!dev_ctx)
return;
- delayed_event_start(priv);
if (intf->attach) {
if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
- goto out;
+ return;
if (intf->attach(dev, dev_ctx->context))
- goto out;
-
+ return;
set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
} else {
if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
- goto out;
+ return;
dev_ctx->context = intf->add(dev);
if (!dev_ctx->context)
- goto out;
-
+ return;
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
}
-
-out:
- delayed_event_release(dev_ctx, priv);
}
void mlx5_attach_device(struct mlx5_core_dev *dev)
@@ -350,28 +256,6 @@ void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
mutex_unlock(&mlx5_intf_mutex);
}
-void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
-{
- struct mlx5_priv *priv = &mdev->priv;
- struct mlx5_device_context *dev_ctx;
- unsigned long flags;
- void *result = NULL;
-
- spin_lock_irqsave(&priv->ctx_lock, flags);
-
- list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
- if ((dev_ctx->intf->protocol == protocol) &&
- dev_ctx->intf->get_dev) {
- result = dev_ctx->intf->get_dev(dev_ctx->context);
- break;
- }
-
- spin_unlock_irqrestore(&priv->ctx_lock, flags);
-
- return result;
-}
-EXPORT_SYMBOL(mlx5_get_protocol_dev);
-
/* Must be called with intf_mutex held */
void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
{
@@ -422,44 +306,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
return res;
}
-void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
- unsigned long param)
-{
- struct mlx5_priv *priv = &dev->priv;
- struct mlx5_device_context *dev_ctx;
- unsigned long flags;
-
- spin_lock_irqsave(&priv->ctx_lock, flags);
-
- if (priv->is_accum_events)
- add_delayed_event(priv, dev, event, param);
-
- /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is
- * still in priv->ctx_list. In this case, only notify the dev_ctx if its
- * ADDED or ATTACHED bit are set.
- */
- list_for_each_entry(dev_ctx, &priv->ctx_list, list)
- if (dev_ctx->intf->event &&
- (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) ||
- test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)))
- dev_ctx->intf->event(dev, dev_ctx->context, event, param);
-
- spin_unlock_irqrestore(&priv->ctx_lock, flags);
-}
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-void mlx5_core_page_fault(struct mlx5_core_dev *dev,
- struct mlx5_pagefault *pfault)
-{
- struct mlx5_priv *priv = &dev->priv;
- int srcu_idx;
-
- srcu_idx = srcu_read_lock(&priv->pfault_srcu);
- if (priv->pfault)
- priv->pfault(dev, priv->pfault_ctx, pfault);
- srcu_read_unlock(&priv->pfault_srcu, srcu_idx);
-}
-#endif
void mlx5_dev_list_lock(void)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index 0f11fff32a9b..8ecac81a385d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -161,10 +161,10 @@ static void print_misc_parameters_hdrs(struct trace_seq *p,
PRINT_MASKED_VAL(name, p, format); \
}
DECLARE_MASK_VAL(u64, gre_key) = {
- .m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 |
- MLX5_GET(fte_match_set_misc, mask, gre_key_l),
- .v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 |
- MLX5_GET(fte_match_set_misc, value, gre_key_l)};
+ .m = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi) << 8 |
+ MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo),
+ .v = MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.hi) << 8 |
+ MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.lo)};
PRINT_MASKED_VAL(gre_key, p, "%llu");
PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u");
@@ -258,6 +258,8 @@ const char *parse_fs_dst(struct trace_seq *p,
return ret;
}
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_ft);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_ft);
EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_fg);
EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fg);
EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_set_fte);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index d027ce00c8ce..a4cf123e3f17 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -61,6 +61,41 @@ const char *parse_fs_dst(struct trace_seq *p,
const struct mlx5_flow_destination *dst,
u32 counter_id);
+TRACE_EVENT(mlx5_fs_add_ft,
+ TP_PROTO(const struct mlx5_flow_table *ft),
+ TP_ARGS(ft),
+ TP_STRUCT__entry(
+ __field(const struct mlx5_flow_table *, ft)
+ __field(u32, id)
+ __field(u32, level)
+ __field(u32, type)
+ ),
+ TP_fast_assign(
+ __entry->ft = ft;
+ __entry->id = ft->id;
+ __entry->level = ft->level;
+ __entry->type = ft->type;
+ ),
+ TP_printk("ft=%p id=%u level=%u type=%u \n",
+ __entry->ft, __entry->id, __entry->level, __entry->type)
+ );
+
+TRACE_EVENT(mlx5_fs_del_ft,
+ TP_PROTO(const struct mlx5_flow_table *ft),
+ TP_ARGS(ft),
+ TP_STRUCT__entry(
+ __field(const struct mlx5_flow_table *, ft)
+ __field(u32, id)
+ ),
+ TP_fast_assign(
+ __entry->ft = ft;
+ __entry->id = ft->id;
+
+ ),
+ TP_printk("ft=%p id=%u\n",
+ __entry->ft, __entry->id)
+ );
+
TRACE_EVENT(mlx5_fs_add_fg,
TP_PROTO(const struct mlx5_flow_group *fg),
TP_ARGS(fg),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index d4ec93bde4de..6999f4486e9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -30,6 +30,7 @@
* SOFTWARE.
*/
#define CREATE_TRACE_POINTS
+#include "lib/eq.h"
#include "fw_tracer.h"
#include "fw_tracer_tracepoint.h"
@@ -846,9 +847,9 @@ free_tracer:
return ERR_PTR(err);
}
-/* Create HW resources + start tracer
- * must be called before Async EQ is created
- */
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data);
+
+/* Create HW resources + start tracer */
int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
{
struct mlx5_core_dev *dev;
@@ -874,6 +875,9 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
goto err_dealloc_pd;
}
+ MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER);
+ mlx5_eq_notifier_register(dev, &tracer->nb);
+
mlx5_fw_tracer_start(tracer);
return 0;
@@ -883,9 +887,7 @@ err_dealloc_pd:
return err;
}
-/* Stop tracer + Cleanup HW resources
- * must be called after Async EQ is destroyed
- */
+/* Stop tracer + Cleanup HW resources */
void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
{
if (IS_ERR_OR_NULL(tracer))
@@ -893,7 +895,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n",
tracer->owner);
-
+ mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb);
cancel_work_sync(&tracer->ownership_change_work);
cancel_work_sync(&tracer->handle_traces_work);
@@ -922,12 +924,11 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
kfree(tracer);
}
-void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data)
{
- struct mlx5_fw_tracer *tracer = dev->tracer;
-
- if (!tracer)
- return;
+ struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb);
+ struct mlx5_core_dev *dev = tracer->dev;
+ struct mlx5_eqe *eqe = data;
switch (eqe->sub_type) {
case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
@@ -942,6 +943,8 @@ void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
eqe->sub_type);
}
+
+ return NOTIFY_OK;
}
EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index 0347f2dd5cee..a8b8747f2b61 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -55,6 +55,7 @@
struct mlx5_fw_tracer {
struct mlx5_core_dev *dev;
+ struct mlx5_nb nb;
bool owner;
u8 trc_ver;
struct workqueue_struct *work_queue;
@@ -170,6 +171,5 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
-void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
new file mode 100644
index 000000000000..4746f2d28fb6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "ecpf.h"
+
+bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev)
+{
+ return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1;
+}
+
+static int mlx5_peer_pf_enable_hca(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {};
+
+ MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
+ MLX5_SET(enable_hca_in, in, function_id, 0);
+ MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0);
+ return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+}
+
+static int mlx5_peer_pf_disable_hca(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {};
+
+ MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
+ MLX5_SET(disable_hca_in, in, function_id, 0);
+ MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_peer_pf_init(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ err = mlx5_peer_pf_enable_hca(dev);
+ if (err)
+ mlx5_core_err(dev, "Failed to enable peer PF HCA err(%d)\n",
+ err);
+
+ return err;
+}
+
+static void mlx5_peer_pf_cleanup(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ err = mlx5_peer_pf_disable_hca(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to disable peer PF HCA err(%d)\n",
+ err);
+ return;
+ }
+
+ err = mlx5_wait_for_pages(dev, &dev->priv.peer_pf_pages);
+ if (err)
+ mlx5_core_warn(dev, "Timeout reclaiming peer PF pages err(%d)\n",
+ err);
+}
+
+int mlx5_ec_init(struct mlx5_core_dev *dev)
+{
+ int err = 0;
+
+ if (!mlx5_core_is_ecpf(dev))
+ return 0;
+
+ /* ECPF shall enable HCA for peer PF in the same way a PF
+ * does this for its VFs.
+ */
+ err = mlx5_peer_pf_init(dev);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_core_is_ecpf(dev))
+ return;
+
+ mlx5_peer_pf_cleanup(dev);
+}
+
+static int mlx5_query_host_params_context(struct mlx5_core_dev *dev,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {};
+
+ MLX5_SET(query_host_params_in, in, opcode,
+ MLX5_CMD_OP_QUERY_HOST_PARAMS);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf)
+{
+ u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {};
+ int err;
+
+ err = mlx5_query_host_params_context(dev, out, sizeof(out));
+ if (err)
+ return err;
+
+ *num_vf = MLX5_GET(query_host_params_out, out,
+ host_params_context.host_num_of_vfs);
+ mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
new file mode 100644
index 000000000000..346372df218f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_ECPF_H__
+#define __MLX5_ECPF_H__
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+enum {
+ MLX5_ECPU_BIT_NUM = 23,
+};
+
+bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev);
+int mlx5_ec_init(struct mlx5_core_dev *dev);
+void mlx5_ec_cleanup(struct mlx5_core_dev *dev);
+int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf);
+
+#else /* CONFIG_MLX5_ESWITCH */
+
+static inline bool
+mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; }
+static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; }
+static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {}
+static inline int
+mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf)
+{ return -EOPNOTSUPP; }
+
+#endif /* CONFIG_MLX5_ESWITCH */
+
+#endif /* __MLX5_ECPF_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 118324802926..d3eaf2ceaa39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -49,6 +49,7 @@
#include <net/switchdev.h>
#include <net/xdp.h>
#include <linux/net_dim.h>
+#include <linux/bits.h>
#include "wq.h"
#include "mlx5_core.h"
#include "en_stats.h"
@@ -75,15 +76,14 @@ struct page_pool;
#define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+#define MLX5E_RX_MAX_HEAD (256)
+
#define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \
(6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */
#define MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, req) \
max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
-#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6)
-#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8)
-#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \
- (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \
- MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev))
+#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \
+ MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD))
#define MLX5_MPWRQ_LOG_WQE_SZ 18
#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
@@ -118,8 +118,6 @@ struct page_pool;
#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2
-#define MLX5E_RX_MAX_HEAD (256)
-
#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024)
#define MLX5E_DEFAULT_LRO_TIMEOUT 32
#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4
@@ -147,9 +145,6 @@ struct page_pool;
MLX5_UMR_MTT_ALIGNMENT))
#define MLX5E_UMR_WQEBBS \
(DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB))
-#define MLX5E_ICOSQ_MAX_WQEBBS MLX5E_UMR_WQEBBS
-
-#define MLX5E_NUM_MAIN_GROUPS 9
#define MLX5E_MSG_LEVEL NETIF_MSG_LINK
@@ -178,8 +173,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
{
return is_kdump_kernel() ?
MLX5E_MIN_NUM_CHANNELS :
- min_t(int, mdev->priv.eq_table.num_comp_vectors,
- MLX5E_MAX_NUM_CHANNELS);
+ min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS);
}
/* Use this function to get max num channels after netdev was created */
@@ -214,22 +208,24 @@ struct mlx5e_umr_wqe {
extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
enum mlx5e_priv_flag {
- MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0),
- MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1),
- MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2),
- MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3),
- MLX5E_PFLAG_RX_NO_CSUM_COMPLETE = (1 << 4),
+ MLX5E_PFLAG_RX_CQE_BASED_MODER,
+ MLX5E_PFLAG_TX_CQE_BASED_MODER,
+ MLX5E_PFLAG_RX_CQE_COMPRESS,
+ MLX5E_PFLAG_RX_STRIDING_RQ,
+ MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
+ MLX5E_PFLAG_XDP_TX_MPWQE,
+ MLX5E_NUM_PFLAGS, /* Keep last */
};
#define MLX5E_SET_PFLAG(params, pflag, enable) \
do { \
if (enable) \
- (params)->pflags |= (pflag); \
+ (params)->pflags |= BIT(pflag); \
else \
- (params)->pflags &= ~(pflag); \
+ (params)->pflags &= ~(BIT(pflag)); \
} while (0)
-#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (pflag)))
+#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag))))
#ifdef CONFIG_MLX5_CORE_EN_DCB
#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
@@ -247,9 +243,6 @@ struct mlx5e_params {
bool lro_en;
u32 lro_wqe_sz;
u8 tx_min_inline_mode;
- u8 rss_hfunc;
- u8 toeplitz_hash_key[40];
- u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
bool vlan_strip_disable;
bool scatter_fcs_en;
bool rx_dim_enabled;
@@ -313,16 +306,18 @@ struct mlx5e_cq {
struct mlx5_core_cq mcq;
struct mlx5e_channel *channel;
+ /* control */
+ struct mlx5_core_dev *mdev;
+ struct mlx5_wq_ctrl wq_ctrl;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5e_cq_decomp {
/* cqe decompression */
struct mlx5_cqe64 title;
struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE];
u8 mini_arr_idx;
- u16 decmprs_left;
- u16 decmprs_wqe_counter;
-
- /* control */
- struct mlx5_core_dev *mdev;
- struct mlx5_wq_ctrl wq_ctrl;
+ u16 left;
+ u16 wqe_counter;
} ____cacheline_aligned_in_smp;
struct mlx5e_tx_wqe_info {
@@ -349,7 +344,6 @@ enum {
MLX5E_SQ_STATE_IPSEC,
MLX5E_SQ_STATE_AM,
MLX5E_SQ_STATE_TLS,
- MLX5E_SQ_STATE_REDIRECT,
};
struct mlx5e_sq_wqe_info {
@@ -393,10 +387,7 @@ struct mlx5e_txqsq {
struct mlx5e_channel *channel;
int txq_ix;
u32 rate_limit;
- struct mlx5e_txqsq_recover {
- struct work_struct recover_work;
- u64 last_recover;
- } recover;
+ struct work_struct recover_work;
} ____cacheline_aligned_in_smp;
struct mlx5e_dma_info {
@@ -410,24 +401,51 @@ struct mlx5e_xdp_info {
struct mlx5e_dma_info di;
};
+struct mlx5e_xdp_info_fifo {
+ struct mlx5e_xdp_info *xi;
+ u32 *cc;
+ u32 *pc;
+ u32 mask;
+};
+
+struct mlx5e_xdp_wqe_info {
+ u8 num_wqebbs;
+ u8 num_ds;
+};
+
+struct mlx5e_xdp_mpwqe {
+ /* Current MPWQE session */
+ struct mlx5e_tx_wqe *wqe;
+ u8 ds_count;
+ u8 max_ds_count;
+};
+
+struct mlx5e_xdpsq;
+typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq*,
+ struct mlx5e_xdp_info*);
struct mlx5e_xdpsq {
/* data path */
/* dirtied @completion */
+ u32 xdpi_fifo_cc;
u16 cc;
bool redirect_flush;
/* dirtied @xmit */
- u16 pc ____cacheline_aligned_in_smp;
- bool doorbell;
+ u32 xdpi_fifo_pc ____cacheline_aligned_in_smp;
+ u16 pc;
+ struct mlx5_wqe_ctrl_seg *doorbell_cseg;
+ struct mlx5e_xdp_mpwqe mpwqe;
struct mlx5e_cq cq;
/* read only */
struct mlx5_wq_cyc wq;
struct mlx5e_xdpsq_stats *stats;
+ mlx5e_fp_xmit_xdp_frame xmit_xdp_frame;
struct {
- struct mlx5e_xdp_info *xdpi;
+ struct mlx5e_xdp_wqe_info *wqe_info;
+ struct mlx5e_xdp_info_fifo xdpi_fifo;
} db;
void __iomem *uar_map;
u32 sqn;
@@ -559,6 +577,7 @@ struct mlx5e_rq {
struct net_device *netdev;
struct mlx5e_rq_stats *stats;
struct mlx5e_cq cq;
+ struct mlx5e_cq_decomp cqd;
struct mlx5e_page_cache page_cache;
struct hwtstamp_config *tstamp;
struct mlx5_clock *clock;
@@ -616,6 +635,7 @@ struct mlx5e_channel {
struct hwtstamp_config *tstamp;
int ix;
int cpu;
+ cpumask_var_t xps_cpumask;
};
struct mlx5e_channels {
@@ -633,9 +653,9 @@ struct mlx5e_channel_stats {
} ____cacheline_aligned_in_smp;
enum {
- MLX5E_STATE_ASYNC_EVENTS_ENABLED,
MLX5E_STATE_OPENED,
MLX5E_STATE_DESTROYING,
+ MLX5E_STATE_XDP_TX_ENABLED,
};
struct mlx5e_rqt {
@@ -654,6 +674,20 @@ enum {
MLX5E_NIC_PRIO
};
+struct mlx5e_rss_params {
+ u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
+ u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
+ u8 toeplitz_hash_key[40];
+ u8 hfunc;
+};
+
+struct mlx5e_modify_sq_param {
+ int curr_state;
+ int next_state;
+ int rl_update;
+ int rl_index;
+};
+
struct mlx5e_priv {
/* priv data path fields - start */
struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
@@ -674,6 +708,7 @@ struct mlx5e_priv {
struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS];
+ struct mlx5e_rss_params rss_params;
u32 tx_rates[MLX5E_MAX_NUM_SQS];
struct mlx5e_flow_steering fs;
@@ -683,6 +718,8 @@ struct mlx5e_priv {
struct work_struct set_rx_mode_work;
struct work_struct tx_timeout_work;
struct work_struct update_stats_work;
+ struct work_struct monitor_counters_work;
+ struct mlx5_nb monitor_counters_nb;
struct mlx5_core_dev *mdev;
struct net_device *netdev;
@@ -692,6 +729,8 @@ struct mlx5e_priv {
struct hwtstamp_config tstamp;
u16 q_counter;
u16 drop_rq_q_counter;
+ struct notifier_block events_nb;
+
#ifdef CONFIG_MLX5_CORE_EN_DCB
struct mlx5e_dcbx dcbx;
#endif
@@ -704,6 +743,7 @@ struct mlx5e_priv {
#ifdef CONFIG_MLX5_EN_TLS
struct mlx5e_tls *tls;
#endif
+ struct devlink_health_reporter *tx_reporter;
};
struct mlx5e_profile {
@@ -769,6 +809,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
void mlx5e_update_stats(struct mlx5e_priv *priv);
+void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
+void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s);
void mlx5e_init_l2_addr(struct mlx5e_priv *priv);
int mlx5e_self_test_num(struct mlx5e_priv *priv);
@@ -799,9 +841,11 @@ struct mlx5e_redirect_rqt_param {
int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
struct mlx5e_redirect_rqt_param rrp);
-void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
- enum mlx5e_traffic_types tt,
+void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
+ const struct mlx5e_tirc_config *ttconfig,
void *tirc, bool inner);
+void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen);
+struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt);
int mlx5e_open_locked(struct net_device *netdev);
int mlx5e_close_locked(struct net_device *netdev);
@@ -814,9 +858,10 @@ void mlx5e_close_channels(struct mlx5e_channels *chs);
* switching channels
*/
typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv);
-void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
- struct mlx5e_channels *new_chs,
- mlx5e_fp_hw_modify hw_modify);
+int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv);
+int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *new_chs,
+ mlx5e_fp_hw_modify hw_modify);
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
@@ -830,6 +875,11 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
+int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
+ struct mlx5e_modify_sq_param *p);
+void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq);
+void mlx5e_tx_disable_queue(struct netdev_queue *txq);
+
static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
{
return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) &&
@@ -931,14 +981,16 @@ int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
int mlx5e_create_tises(struct mlx5e_priv *priv);
-void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
+void mlx5e_update_carrier(struct mlx5e_priv *priv);
int mlx5e_close(struct net_device *netdev);
int mlx5e_open(struct net_device *netdev);
+void mlx5e_update_ndo_stats(struct mlx5e_priv *priv);
void mlx5e_queue_update_stats(struct mlx5e_priv *priv);
int mlx5e_bits_invert(unsigned long a, int size);
typedef int (*change_hw_mtu_cb)(struct mlx5e_priv *priv);
+int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv);
int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
change_hw_mtu_cb set_mtu_cb);
@@ -962,12 +1014,20 @@ int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
struct ethtool_coalesce *coal);
int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
struct ethtool_coalesce *coal);
+int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
+ struct ethtool_link_ksettings *link_ksettings);
+int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
+ const struct ethtool_link_ksettings *link_ksettings);
u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv);
u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv);
int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
struct ethtool_ts_info *info);
int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
struct ethtool_flash *flash);
+void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
+ struct ethtool_pauseparam *pauseparam);
+int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
+ struct ethtool_pauseparam *pauseparam);
/* mlx5e generic netdev management API */
int mlx5e_netdev_init(struct net_device *netdev,
@@ -983,12 +1043,26 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv);
void mlx5e_detach_netdev(struct mlx5e_priv *priv);
void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_rss_params *rss_params,
struct mlx5e_params *params,
u16 max_channels, u16 mtu);
void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
-void mlx5e_build_rss_params(struct mlx5e_params *params);
+void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
+ u16 num_channels);
u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
void mlx5e_rx_dim_work(struct work_struct *work);
void mlx5e_tx_dim_work(struct work_struct *work);
+
+void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti);
+void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti);
+netdev_features_t mlx5e_features_check(struct sk_buff *skb,
+ struct net_device *netdev,
+ netdev_features_t features);
+#ifdef CONFIG_MLX5_ESWITCH
+int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac);
+int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate);
+int mlx5e_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi);
+int mlx5e_get_vf_stats(struct net_device *dev, int vf, struct ifla_vf_stats *vf_stats);
+#endif
#endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 1431232c9a09..be5961ff24cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -73,6 +73,22 @@ enum mlx5e_traffic_types {
MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY,
};
+struct mlx5e_tirc_config {
+ u8 l3_prot_type;
+ u8 l4_prot_type;
+ u32 rx_hash_fields;
+};
+
+#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP)
+#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_L4_SPORT |\
+ MLX5_HASH_FIELD_SEL_L4_DPORT)
+#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_IPSEC_SPI)
+
enum mlx5e_tunnel_types {
MLX5E_TT_IPV4_GRE,
MLX5E_TT_IPV6_GRE,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
new file mode 100644
index 000000000000..7cd5b02e0f10
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include "en.h"
+#include "monitor_stats.h"
+#include "lib/eq.h"
+
+/* Driver will set the following watch counters list:
+ * Ppcnt.802_3:
+ * a_in_range_length_errors Type: 0x0, Counter: 0x0, group_id = N/A
+ * a_out_of_range_length_field Type: 0x0, Counter: 0x1, group_id = N/A
+ * a_frame_too_long_errors Type: 0x0, Counter: 0x2, group_id = N/A
+ * a_frame_check_sequence_errors Type: 0x0, Counter: 0x3, group_id = N/A
+ * a_alignment_errors Type: 0x0, Counter: 0x4, group_id = N/A
+ * if_out_discards Type: 0x0, Counter: 0x5, group_id = N/A
+ * Q_Counters:
+ * Q[index].rx_out_of_buffer Type: 0x1, Counter: 0x4, group_id = counter_ix
+ */
+
+#define NUM_REQ_PPCNT_COUNTER_S1 MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1
+#define NUM_REQ_Q_COUNTERS_S1 MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1
+
+int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!MLX5_CAP_GEN(mdev, max_num_of_monitor_counters))
+ return false;
+ if (MLX5_CAP_PCAM_REG(mdev, ppcnt) &&
+ MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters) <
+ NUM_REQ_PPCNT_COUNTER_S1)
+ return false;
+ if (MLX5_CAP_GEN(mdev, num_q_monitor_counters) <
+ NUM_REQ_Q_COUNTERS_S1)
+ return false;
+ return true;
+}
+
+void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv)
+{
+ u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(arm_monitor_counter_out)] = {};
+
+ MLX5_SET(arm_monitor_counter_in, in, opcode,
+ MLX5_CMD_OP_ARM_MONITOR_COUNTER);
+ mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static void mlx5e_monitor_counters_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ monitor_counters_work);
+
+ mutex_lock(&priv->state_lock);
+ mlx5e_update_ndo_stats(priv);
+ mutex_unlock(&priv->state_lock);
+ mlx5e_monitor_counter_arm(priv);
+}
+
+static int mlx5e_monitor_event_handler(struct notifier_block *nb,
+ unsigned long event, void *eqe)
+{
+ struct mlx5e_priv *priv = mlx5_nb_cof(nb, struct mlx5e_priv,
+ monitor_counters_nb);
+ queue_work(priv->wq, &priv->monitor_counters_work);
+ return NOTIFY_OK;
+}
+
+static void mlx5e_monitor_counter_start(struct mlx5e_priv *priv)
+{
+ MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler,
+ MONITOR_COUNTER);
+ mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb);
+}
+
+static void mlx5e_monitor_counter_stop(struct mlx5e_priv *priv)
+{
+ mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb);
+ cancel_work_sync(&priv->monitor_counters_work);
+}
+
+static int fill_monitor_counter_ppcnt_set1(int cnt, u32 *in)
+{
+ enum mlx5_monitor_counter_ppcnt ppcnt_cnt;
+
+ for (ppcnt_cnt = 0;
+ ppcnt_cnt < NUM_REQ_PPCNT_COUNTER_S1;
+ ppcnt_cnt++, cnt++) {
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].type,
+ MLX5_QUERY_MONITOR_CNT_TYPE_PPCNT);
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].counter,
+ ppcnt_cnt);
+ }
+ return ppcnt_cnt;
+}
+
+static int fill_monitor_counter_q_counter_set1(int cnt, int q_counter, u32 *in)
+{
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].type,
+ MLX5_QUERY_MONITOR_CNT_TYPE_Q_COUNTER);
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].counter,
+ MLX5_QUERY_MONITOR_Q_COUNTER_RX_OUT_OF_BUFFER);
+ MLX5_SET(set_monitor_counter_in, in,
+ monitor_counter[cnt].counter_group_id,
+ q_counter);
+ return 1;
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int max_num_of_counters = MLX5_CAP_GEN(mdev, max_num_of_monitor_counters);
+ int num_q_counters = MLX5_CAP_GEN(mdev, num_q_monitor_counters);
+ int num_ppcnt_counters = !MLX5_CAP_PCAM_REG(mdev, ppcnt) ? 0 :
+ MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters);
+ u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {};
+ int q_counter = priv->q_counter;
+ int cnt = 0;
+
+ if (num_ppcnt_counters >= NUM_REQ_PPCNT_COUNTER_S1 &&
+ max_num_of_counters >= (NUM_REQ_PPCNT_COUNTER_S1 + cnt))
+ cnt += fill_monitor_counter_ppcnt_set1(cnt, in);
+
+ if (num_q_counters >= NUM_REQ_Q_COUNTERS_S1 &&
+ max_num_of_counters >= (NUM_REQ_Q_COUNTERS_S1 + cnt) &&
+ q_counter)
+ cnt += fill_monitor_counter_q_counter_set1(cnt, q_counter, in);
+
+ MLX5_SET(set_monitor_counter_in, in, num_of_counters, cnt);
+ MLX5_SET(set_monitor_counter_in, in, opcode,
+ MLX5_CMD_OP_SET_MONITOR_COUNTER);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+void mlx5e_monitor_counter_init(struct mlx5e_priv *priv)
+{
+ INIT_WORK(&priv->monitor_counters_work, mlx5e_monitor_counters_work);
+ mlx5e_monitor_counter_start(priv);
+ mlx5e_set_monitor_counter(priv);
+ mlx5e_monitor_counter_arm(priv);
+ queue_work(priv->wq, &priv->update_stats_work);
+}
+
+static void mlx5e_monitor_counter_disable(struct mlx5e_priv *priv)
+{
+ u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {};
+
+ MLX5_SET(set_monitor_counter_in, in, num_of_counters, 0);
+ MLX5_SET(set_monitor_counter_in, in, opcode,
+ MLX5_CMD_OP_SET_MONITOR_COUNTER);
+
+ mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv)
+{
+ mlx5e_monitor_counter_disable(priv);
+ mlx5e_monitor_counter_stop(priv);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h
new file mode 100644
index 000000000000..e1ac4b3d22fb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5_MONITOR_H__
+#define __MLX5_MONITOR_H__
+
+int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_init(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv);
+void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv);
+
+#endif /* __MLX5_MONITOR_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 4a37713023be..d5e5afbdca6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -63,66 +63,165 @@ static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = {
[MLX5E_50GBASE_KR2] = 50000,
};
-u32 mlx5e_port_ptys2speed(u32 eth_proto_oper)
+static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = {
+ [MLX5E_SGMII_100M] = 100,
+ [MLX5E_1000BASE_X_SGMII] = 1000,
+ [MLX5E_5GBASE_R] = 5000,
+ [MLX5E_10GBASE_XFI_XAUI_1] = 10000,
+ [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000,
+ [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000,
+ [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000,
+ [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000,
+ [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000,
+ [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000,
+ [MLX5E_400GAUI_8] = 400000,
+};
+
+static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev,
+ const u32 **arr, u32 *size)
+{
+ bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+
+ *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) :
+ ARRAY_SIZE(mlx5e_link_speed);
+ *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed;
+}
+
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
+ struct mlx5e_port_eth_proto *eproto)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ int err;
+
+ if (!eproto)
+ return -EINVAL;
+
+ err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port);
+ if (err)
+ return err;
+
+ eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
+ eth_proto_capability);
+ eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin);
+ eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
+ return 0;
+}
+
+void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
+ u8 *an_disable_cap, u8 *an_disable_admin)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+
+ *an_status = 0;
+ *an_disable_cap = 0;
+ *an_disable_admin = 0;
+
+ if (mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, 1))
+ return;
+
+ *an_status = MLX5_GET(ptys_reg, out, an_status);
+ *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap);
+ *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
+}
+
+int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
+ u32 proto_admin, bool ext)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ u32 in[MLX5_ST_SZ_DW(ptys_reg)];
+ u8 an_disable_admin;
+ u8 an_disable_cap;
+ u8 an_status;
+
+ mlx5_port_query_eth_autoneg(dev, &an_status, &an_disable_cap,
+ &an_disable_admin);
+ if (!an_disable_cap && an_disable)
+ return -EPERM;
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(ptys_reg, in, local_port, 1);
+ MLX5_SET(ptys_reg, in, an_disable_admin, an_disable);
+ MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN);
+ if (ext)
+ MLX5_SET(ptys_reg, in, ext_eth_proto_admin, proto_admin);
+ else
+ MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PTYS, 0, 1);
+}
+
+u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper)
{
unsigned long temp = eth_proto_oper;
+ const u32 *table;
u32 speed = 0;
+ u32 max_size;
int i;
- i = find_first_bit(&temp, MLX5E_LINK_MODES_NUMBER);
- if (i < MLX5E_LINK_MODES_NUMBER)
- speed = mlx5e_link_speed[i];
-
+ mlx5e_port_get_speed_arr(mdev, &table, &max_size);
+ i = find_first_bit(&temp, max_size);
+ if (i < max_size)
+ speed = table[i];
return speed;
}
int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
{
- u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {};
- u32 eth_proto_oper;
+ struct mlx5e_port_eth_proto eproto;
+ bool ext;
int err;
- err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
+ ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+ err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
if (err)
- return err;
+ goto out;
- eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
- *speed = mlx5e_port_ptys2speed(eth_proto_oper);
+ *speed = mlx5e_port_ptys2speed(mdev, eproto.oper);
if (!(*speed))
err = -EINVAL;
+out:
return err;
}
int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
{
+ struct mlx5e_port_eth_proto eproto;
u32 max_speed = 0;
- u32 proto_cap;
+ const u32 *table;
+ u32 max_size;
+ bool ext;
int err;
int i;
- err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN);
+ ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+ err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
if (err)
return err;
- for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i)
- if (proto_cap & MLX5E_PROT_MASK(i))
- max_speed = max(max_speed, mlx5e_link_speed[i]);
+ mlx5e_port_get_speed_arr(mdev, &table, &max_size);
+ for (i = 0; i < max_size; ++i)
+ if (eproto.cap & MLX5E_PROT_MASK(i))
+ max_speed = max(max_speed, table[i]);
*speed = max_speed;
return 0;
}
-u32 mlx5e_port_speed2linkmodes(u32 speed)
+u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed)
{
u32 link_modes = 0;
+ const u32 *table;
+ u32 max_size;
int i;
- for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
- if (mlx5e_link_speed[i] == speed)
+ mlx5e_port_get_speed_arr(mdev, &table, &max_size);
+ for (i = 0; i < max_size; ++i) {
+ if (table[i] == speed)
link_modes |= MLX5E_PROT_MASK(i);
}
-
return link_modes;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
index cd2160b8c9bf..70f536ec51c4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -36,10 +36,22 @@
#include <linux/mlx5/driver.h>
#include "en.h"
-u32 mlx5e_port_ptys2speed(u32 eth_proto_oper);
+struct mlx5e_port_eth_proto {
+ u32 cap;
+ u32 admin;
+ u32 oper;
+};
+
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
+ struct mlx5e_port_eth_proto *eproto);
+void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
+ u8 *an_disable_cap, u8 *an_disable_admin);
+int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
+ u32 proto_admin, bool ext);
+u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper);
int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
-u32 mlx5e_port_speed2linkmodes(u32 speed);
+u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed);
int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out);
int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
index eac245a93f91..4ab0d030b544 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -122,7 +122,9 @@ out:
return err;
}
-/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) */
+/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B])
+ * minimum speed value is 40Gbps
+ */
static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
{
u32 speed;
@@ -130,10 +132,9 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
int err;
err = mlx5e_port_linkspeed(priv->mdev, &speed);
- if (err) {
- mlx5_core_warn(priv->mdev, "cannot get port speed\n");
- return 0;
- }
+ if (err)
+ speed = SPEED_40000;
+ speed = max_t(u32, speed, SPEED_40000);
xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100;
@@ -142,7 +143,7 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
}
static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
- u32 xoff, unsigned int mtu)
+ u32 xoff, unsigned int max_mtu)
{
int i;
@@ -154,11 +155,12 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
}
if (port_buffer->buffer[i].size <
- (xoff + mtu + (1 << MLX5E_BUFFER_CELL_SHIFT)))
+ (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT)))
return -ENOMEM;
port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff;
- port_buffer->buffer[i].xon = port_buffer->buffer[i].xoff - mtu;
+ port_buffer->buffer[i].xon =
+ port_buffer->buffer[i].xoff - max_mtu;
}
return 0;
@@ -166,7 +168,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
/**
* update_buffer_lossy()
- * mtu: device's MTU
+ * max_mtu: netdev's max_mtu
* pfc_en: <input> current pfc configuration
* buffer: <input> current prio to buffer mapping
* xoff: <input> xoff value
@@ -183,7 +185,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
* Return 0 if no error.
* Set change to true if buffer configuration is modified.
*/
-static int update_buffer_lossy(unsigned int mtu,
+static int update_buffer_lossy(unsigned int max_mtu,
u8 pfc_en, u8 *buffer, u32 xoff,
struct mlx5e_port_buffer *port_buffer,
bool *change)
@@ -220,7 +222,7 @@ static int update_buffer_lossy(unsigned int mtu,
}
if (changed) {
- err = update_xoff_threshold(port_buffer, xoff, mtu);
+ err = update_xoff_threshold(port_buffer, xoff, max_mtu);
if (err)
return err;
@@ -230,6 +232,7 @@ static int update_buffer_lossy(unsigned int mtu,
return 0;
}
+#define MINIMUM_MAX_MTU 9216
int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
u32 change, unsigned int mtu,
struct ieee_pfc *pfc,
@@ -241,12 +244,14 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
bool update_prio2buffer = false;
u8 buffer[MLX5E_MAX_PRIORITY];
bool update_buffer = false;
+ unsigned int max_mtu;
u32 total_used = 0;
u8 curr_pfc_en;
int err;
int i;
mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change);
+ max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU);
err = mlx5e_port_query_buffer(priv, &port_buffer);
if (err)
@@ -254,7 +259,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
if (change & MLX5E_PORT_BUFFER_CABLE_LEN) {
update_buffer = true;
- err = update_xoff_threshold(&port_buffer, xoff, mtu);
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
if (err)
return err;
}
@@ -264,7 +269,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
if (err)
return err;
- err = update_buffer_lossy(mtu, pfc->pfc_en, buffer, xoff,
+ err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff,
&port_buffer, &update_buffer);
if (err)
return err;
@@ -276,8 +281,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
if (err)
return err;
- err = update_buffer_lossy(mtu, curr_pfc_en, prio2buffer, xoff,
- &port_buffer, &update_buffer);
+ err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer,
+ xoff, &port_buffer, &update_buffer);
if (err)
return err;
}
@@ -301,7 +306,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
return -EINVAL;
update_buffer = true;
- err = update_xoff_threshold(&port_buffer, xoff, mtu);
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
if (err)
return err;
}
@@ -309,7 +314,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
/* Need to update buffer configuration if xoff value is changed */
if (!update_buffer && xoff != priv->dcbx.xoff) {
update_buffer = true;
- err = update_xoff_threshold(&port_buffer, xoff, mtu);
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
if (err)
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
new file mode 100644
index 000000000000..e78e92753d73
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5E_EN_REPORTER_H
+#define __MLX5E_EN_REPORTER_H
+
+#include <linux/mlx5/driver.h>
+#include "en.h"
+
+int mlx5e_tx_reporter_create(struct mlx5e_priv *priv);
+void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv);
+void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq);
+int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
new file mode 100644
index 000000000000..476dd97f7f2f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -0,0 +1,314 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <net/devlink.h>
+#include "reporter.h"
+#include "lib/eq.h"
+
+#define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256
+
+struct mlx5e_tx_err_ctx {
+ int (*recover)(struct mlx5e_txqsq *sq);
+ struct mlx5e_txqsq *sq;
+};
+
+static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
+{
+ unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
+
+ while (time_before(jiffies, exp_time)) {
+ if (sq->cc == sq->pc)
+ return 0;
+
+ msleep(20);
+ }
+
+ netdev_err(sq->channel->netdev,
+ "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
+ sq->sqn, sq->cc, sq->pc);
+
+ return -ETIMEDOUT;
+}
+
+static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
+{
+ WARN_ONCE(sq->cc != sq->pc,
+ "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
+ sq->sqn, sq->cc, sq->pc);
+ sq->cc = 0;
+ sq->dma_fifo_cc = 0;
+ sq->pc = 0;
+}
+
+static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
+{
+ struct mlx5_core_dev *mdev = sq->channel->mdev;
+ struct net_device *dev = sq->channel->netdev;
+ struct mlx5e_modify_sq_param msp = {0};
+ int err;
+
+ msp.curr_state = curr_state;
+ msp.next_state = MLX5_SQC_STATE_RST;
+
+ err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+ if (err) {
+ netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
+ return err;
+ }
+
+ memset(&msp, 0, sizeof(msp));
+ msp.curr_state = MLX5_SQC_STATE_RST;
+ msp.next_state = MLX5_SQC_STATE_RDY;
+
+ err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+ if (err) {
+ netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq)
+{
+ struct mlx5_core_dev *mdev = sq->channel->mdev;
+ struct net_device *dev = sq->channel->netdev;
+ u8 state;
+ int err;
+
+ if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
+ return 0;
+
+ err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
+ if (err) {
+ netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
+ sq->sqn, err);
+ return err;
+ }
+
+ if (state != MLX5_SQC_STATE_ERR) {
+ netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
+ return -EINVAL;
+ }
+
+ mlx5e_tx_disable_queue(sq->txq);
+
+ err = mlx5e_wait_for_sq_flush(sq);
+ if (err)
+ return err;
+
+ /* At this point, no new packets will arrive from the stack as TXQ is
+ * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
+ * pending WQEs. SQ can safely reset the SQ.
+ */
+
+ err = mlx5e_sq_to_ready(sq, state);
+ if (err)
+ return err;
+
+ mlx5e_reset_txqsq_cc_pc(sq);
+ sq->stats->recover++;
+ mlx5e_activate_txqsq(sq);
+
+ return 0;
+}
+
+static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter,
+ char *err_str,
+ struct mlx5e_tx_err_ctx *err_ctx)
+{
+ if (IS_ERR_OR_NULL(tx_reporter)) {
+ netdev_err(err_ctx->sq->channel->netdev, err_str);
+ return err_ctx->recover(err_ctx->sq);
+ }
+
+ return devlink_health_report(tx_reporter, err_str, err_ctx);
+}
+
+void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq)
+{
+ char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN];
+ struct mlx5e_tx_err_ctx err_ctx = {0};
+
+ err_ctx.sq = sq;
+ err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
+ sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn);
+
+ mlx5_tx_health_report(sq->channel->priv->tx_reporter, err_str,
+ &err_ctx);
+}
+
+static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq)
+{
+ struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
+ u32 eqe_count;
+ int ret;
+
+ netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
+ eq->core.eqn, eq->core.cons_index, eq->core.irqn);
+
+ eqe_count = mlx5_eq_poll_irq_disabled(eq);
+ ret = eqe_count ? false : true;
+ if (!eqe_count) {
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ return ret;
+ }
+
+ netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n",
+ eqe_count, eq->core.eqn);
+ sq->channel->stats->eq_rearm++;
+ return ret;
+}
+
+int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq)
+{
+ char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN];
+ struct mlx5e_tx_err_ctx err_ctx;
+
+ err_ctx.sq = sq;
+ err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
+ sprintf(err_str,
+ "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
+ sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
+ jiffies_to_usecs(jiffies - sq->txq->trans_start));
+
+ return mlx5_tx_health_report(sq->channel->priv->tx_reporter, err_str,
+ &err_ctx);
+}
+
+/* state lock cannot be grabbed within this function.
+ * It can cause a dead lock or a read-after-free.
+ */
+static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx)
+{
+ return err_ctx->recover(err_ctx->sq);
+}
+
+static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv)
+{
+ int err = 0;
+
+ rtnl_lock();
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto out;
+
+ err = mlx5e_safe_reopen_channels(priv);
+
+out:
+ mutex_unlock(&priv->state_lock);
+ rtnl_unlock();
+
+ return err;
+}
+
+static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
+ void *context)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_tx_err_ctx *err_ctx = context;
+
+ return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
+ mlx5e_tx_reporter_recover_all(priv);
+}
+
+static int
+mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
+ u32 sqn, u8 state, bool stopped)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ int i, err = 0;
+
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
+ if (err)
+ goto unlock;
+
+ for (i = 0; i < priv->channels.num * priv->channels.params.num_tc;
+ i++) {
+ struct mlx5e_txqsq *sq = priv->txq2sq[i];
+ u8 state;
+
+ err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
+ if (err)
+ break;
+
+ err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq->sqn,
+ state,
+ netif_xmit_stopped(sq->txq));
+ if (err)
+ break;
+ }
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+ if (err)
+ goto unlock;
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
+ .name = "tx",
+ .recover = mlx5e_tx_reporter_recover,
+ .diagnose = mlx5e_tx_reporter_diagnose,
+};
+
+#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
+
+int mlx5e_tx_reporter_create(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct devlink *devlink = priv_to_devlink(mdev);
+
+ priv->tx_reporter =
+ devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
+ MLX5_REPORTER_TX_GRACEFUL_PERIOD,
+ true, priv);
+ if (IS_ERR(priv->tx_reporter))
+ netdev_warn(priv->netdev,
+ "Failed to create tx reporter, err = %ld\n",
+ PTR_ERR(priv->tx_reporter));
+ return IS_ERR_OR_NULL(priv->tx_reporter);
+}
+
+void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv)
+{
+ if (IS_ERR_OR_NULL(priv->tx_reporter))
+ return;
+
+ devlink_health_reporter_destroy(priv->tx_reporter);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
new file mode 100644
index 000000000000..eec07b34b4ad
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -0,0 +1,648 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/vxlan.h>
+#include <net/gre.h>
+#include "lib/vxlan.h"
+#include "en/tc_tun.h"
+
+static int get_route_and_out_devs(struct mlx5e_priv *priv,
+ struct net_device *dev,
+ struct net_device **route_dev,
+ struct net_device **out_dev)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct net_device *uplink_dev, *uplink_upper;
+ bool dst_is_lag_dev;
+
+ uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ uplink_upper = netdev_master_upper_dev_get(uplink_dev);
+ dst_is_lag_dev = (uplink_upper &&
+ netif_is_lag_master(uplink_upper) &&
+ dev == uplink_upper &&
+ mlx5_lag_is_sriov(priv->mdev));
+
+ /* if the egress device isn't on the same HW e-switch or
+ * it's a LAG device, use the uplink
+ */
+ if (!netdev_port_same_parent_id(priv->netdev, dev) ||
+ dst_is_lag_dev) {
+ *route_dev = uplink_dev;
+ *out_dev = *route_dev;
+ } else {
+ *route_dev = dev;
+ if (is_vlan_dev(*route_dev))
+ *out_dev = uplink_dev;
+ else if (mlx5e_eswitch_rep(dev))
+ *out_dev = *route_dev;
+ else
+ return -EOPNOTSUPP;
+ }
+
+ if (!(mlx5e_eswitch_rep(*out_dev) &&
+ mlx5e_is_uplink_rep(netdev_priv(*out_dev))))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct net_device **out_dev,
+ struct net_device **route_dev,
+ struct flowi4 *fl4,
+ struct neighbour **out_n,
+ u8 *out_ttl)
+{
+ struct rtable *rt;
+ struct neighbour *n = NULL;
+
+#if IS_ENABLED(CONFIG_INET)
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct net_device *uplink_dev;
+ int ret;
+
+ if (mlx5_lag_is_multipath(mdev)) {
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+ uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ fl4->flowi4_oif = uplink_dev->ifindex;
+ }
+
+ rt = ip_route_output_key(dev_net(mirred_dev), fl4);
+ ret = PTR_ERR_OR_ZERO(rt);
+ if (ret)
+ return ret;
+
+ if (mlx5_lag_is_multipath(mdev) && !rt->rt_gateway)
+ return -ENETUNREACH;
+#else
+ return -EOPNOTSUPP;
+#endif
+
+ ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev);
+ if (ret < 0)
+ return ret;
+
+ if (!(*out_ttl))
+ *out_ttl = ip4_dst_hoplimit(&rt->dst);
+ n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
+ ip_rt_put(rt);
+ if (!n)
+ return -ENOMEM;
+
+ *out_n = n;
+ return 0;
+}
+
+static const char *mlx5e_netdev_kind(struct net_device *dev)
+{
+ if (dev->rtnl_link_ops)
+ return dev->rtnl_link_ops->kind;
+ else
+ return "";
+}
+
+static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct net_device **out_dev,
+ struct net_device **route_dev,
+ struct flowi6 *fl6,
+ struct neighbour **out_n,
+ u8 *out_ttl)
+{
+ struct neighbour *n = NULL;
+ struct dst_entry *dst;
+
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ int ret;
+
+ ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
+ fl6);
+ if (ret < 0)
+ return ret;
+
+ if (!(*out_ttl))
+ *out_ttl = ip6_dst_hoplimit(dst);
+
+ ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev);
+ if (ret < 0)
+ return ret;
+#else
+ return -EOPNOTSUPP;
+#endif
+
+ n = dst_neigh_lookup(dst, &fl6->daddr);
+ dst_release(dst);
+ if (!n)
+ return -ENOMEM;
+
+ *out_n = n;
+ return 0;
+}
+
+static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key)
+{
+ __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct vxlanhdr *vxh = (struct vxlanhdr *)
+ ((char *)udp + sizeof(struct udphdr));
+
+ udp->dest = tun_key->tp_dst;
+ vxh->vx_flags = VXLAN_HF_VNI;
+ vxh->vx_vni = vxlan_vni_field(tun_id);
+
+ return 0;
+}
+
+static int mlx5e_gen_gre_header(char buf[], struct ip_tunnel_key *tun_key)
+{
+ __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+ int hdr_len;
+ struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf);
+
+ /* the HW does not calculate GRE csum or sequences */
+ if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ))
+ return -EOPNOTSUPP;
+
+ greh->protocol = htons(ETH_P_TEB);
+
+ /* GRE key */
+ hdr_len = gre_calc_hlen(tun_key->tun_flags);
+ greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags);
+ if (tun_key->tun_flags & TUNNEL_KEY) {
+ __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
+
+ *ptr = tun_id;
+ }
+
+ return 0;
+}
+
+static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ int err = 0;
+ struct ip_tunnel_key *key = &e->tun_info.key;
+
+ if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
+ *ip_proto = IPPROTO_UDP;
+ err = mlx5e_gen_vxlan_header(buf, key);
+ } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
+ *ip_proto = IPPROTO_GRE;
+ err = mlx5e_gen_gre_header(buf, key);
+ } else {
+ pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n"
+ , e->tunnel_type);
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev,
+ struct mlx5e_encap_entry *e,
+ u16 proto)
+{
+ struct ethhdr *eth = (struct ethhdr *)buf;
+ char *ip;
+
+ ether_addr_copy(eth->h_dest, e->h_dest);
+ ether_addr_copy(eth->h_source, dev->dev_addr);
+ if (is_vlan_dev(dev)) {
+ struct vlan_hdr *vlan = (struct vlan_hdr *)
+ ((char *)eth + ETH_HLEN);
+ ip = (char *)vlan + VLAN_HLEN;
+ eth->h_proto = vlan_dev_vlan_proto(dev);
+ vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev));
+ vlan->h_vlan_encapsulated_proto = htons(proto);
+ } else {
+ eth->h_proto = htons(proto);
+ ip = (char *)eth + ETH_HLEN;
+ }
+
+ return ip;
+}
+
+int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ struct ip_tunnel_key *tun_key = &e->tun_info.key;
+ struct net_device *out_dev, *route_dev;
+ struct neighbour *n = NULL;
+ struct flowi4 fl4 = {};
+ int ipv4_encap_size;
+ char *encap_header;
+ u8 nud_state, ttl;
+ struct iphdr *ip;
+ int err;
+
+ /* add the IP fields */
+ fl4.flowi4_tos = tun_key->tos;
+ fl4.daddr = tun_key->u.ipv4.dst;
+ fl4.saddr = tun_key->u.ipv4.src;
+ ttl = tun_key->ttl;
+
+ err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &route_dev,
+ &fl4, &n, &ttl);
+ if (err)
+ return err;
+
+ ipv4_encap_size =
+ (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct iphdr) +
+ e->tunnel_hlen;
+
+ if (max_encap_size < ipv4_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv4_encap_size, max_encap_size);
+ return -EOPNOTSUPP;
+ }
+
+ encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
+ if (!encap_header)
+ return -ENOMEM;
+
+ /* used by mlx5e_detach_encap to lookup a neigh hash table
+ * entry in the neigh hash table when a user deletes a rule
+ */
+ e->m_neigh.dev = n->dev;
+ e->m_neigh.family = n->ops->family;
+ memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+ e->out_dev = out_dev;
+ e->route_dev = route_dev;
+
+ /* It's important to add the neigh to the hash table before checking
+ * the neigh validity state. So if we'll get a notification, in case the
+ * neigh changes it's validity state, we would find the relevant neigh
+ * in the hash.
+ */
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+ if (err)
+ goto free_encap;
+
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ ether_addr_copy(e->h_dest, n->ha);
+ read_unlock_bh(&n->lock);
+
+ /* add ethernet header */
+ ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
+ ETH_P_IP);
+
+ /* add ip header */
+ ip->tos = tun_key->tos;
+ ip->version = 0x4;
+ ip->ihl = 0x5;
+ ip->ttl = ttl;
+ ip->daddr = fl4.daddr;
+ ip->saddr = fl4.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
+ &ip->protocol, e);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->encap_size = ipv4_encap_size;
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
+ /* the encap entry will be made valid on neigh update event
+ * and not used before that.
+ */
+ goto out;
+ }
+
+ err = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
+ ipv4_encap_size, encap_header,
+ MLX5_FLOW_NAMESPACE_FDB,
+ &e->encap_id);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
+ neigh_release(n);
+ return err;
+
+destroy_neigh_entry:
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+free_encap:
+ kfree(encap_header);
+out:
+ if (n)
+ neigh_release(n);
+ return err;
+}
+
+int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ struct ip_tunnel_key *tun_key = &e->tun_info.key;
+ struct net_device *out_dev, *route_dev;
+ struct neighbour *n = NULL;
+ struct flowi6 fl6 = {};
+ struct ipv6hdr *ip6h;
+ int ipv6_encap_size;
+ char *encap_header;
+ u8 nud_state, ttl;
+ int err;
+
+ ttl = tun_key->ttl;
+
+ fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
+ fl6.daddr = tun_key->u.ipv6.dst;
+ fl6.saddr = tun_key->u.ipv6.src;
+
+ err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &route_dev,
+ &fl6, &n, &ttl);
+ if (err)
+ return err;
+
+ ipv6_encap_size =
+ (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
+ sizeof(struct ipv6hdr) +
+ e->tunnel_hlen;
+
+ if (max_encap_size < ipv6_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv6_encap_size, max_encap_size);
+ return -EOPNOTSUPP;
+ }
+
+ encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
+ if (!encap_header)
+ return -ENOMEM;
+
+ /* used by mlx5e_detach_encap to lookup a neigh hash table
+ * entry in the neigh hash table when a user deletes a rule
+ */
+ e->m_neigh.dev = n->dev;
+ e->m_neigh.family = n->ops->family;
+ memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+ e->out_dev = out_dev;
+ e->route_dev = route_dev;
+
+ /* It's importent to add the neigh to the hash table before checking
+ * the neigh validity state. So if we'll get a notification, in case the
+ * neigh changes it's validity state, we would find the relevant neigh
+ * in the hash.
+ */
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+ if (err)
+ goto free_encap;
+
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ ether_addr_copy(e->h_dest, n->ha);
+ read_unlock_bh(&n->lock);
+
+ /* add ethernet header */
+ ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, route_dev, e,
+ ETH_P_IPV6);
+
+ /* add ip header */
+ ip6_flow_hdr(ip6h, tun_key->tos, 0);
+ /* the HW fills up ipv6 payload len */
+ ip6h->hop_limit = ttl;
+ ip6h->daddr = fl6.daddr;
+ ip6h->saddr = fl6.saddr;
+
+ /* add tunneling protocol header */
+ err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
+ &ip6h->nexthdr, e);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->encap_size = ipv6_encap_size;
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
+ /* the encap entry will be made valid on neigh update event
+ * and not used before that.
+ */
+ goto out;
+ }
+
+ err = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
+ ipv6_encap_size, encap_header,
+ MLX5_FLOW_NAMESPACE_FDB,
+ &e->encap_id);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
+ neigh_release(n);
+ return err;
+
+destroy_neigh_entry:
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+free_encap:
+ kfree(encap_header);
+out:
+ if (n)
+ neigh_release(n);
+ return err;
+}
+
+int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev)
+{
+ if (netif_is_vxlan(tunnel_dev))
+ return MLX5E_TC_TUNNEL_TYPE_VXLAN;
+ else if (netif_is_gretap(tunnel_dev) ||
+ netif_is_ip6gretap(tunnel_dev))
+ return MLX5E_TC_TUNNEL_TYPE_GRETAP;
+ else
+ return MLX5E_TC_TUNNEL_TYPE_UNKNOWN;
+}
+
+bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
+ struct net_device *netdev)
+{
+ int tunnel_type = mlx5e_tc_tun_get_type(netdev);
+
+ if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN &&
+ MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
+ return true;
+ else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP &&
+ MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap))
+ return true;
+ else
+ return false;
+}
+
+int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ e->tunnel_type = mlx5e_tc_tun_get_type(tunnel_dev);
+
+ if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
+ int dst_port = be16_to_cpu(e->tun_info.key.tp_dst);
+
+ if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "vxlan udp dport was not registered with the HW");
+ netdev_warn(priv->netdev,
+ "%d isn't an offloaded vxlan udp dport\n",
+ dst_port);
+ return -EOPNOTSUPP;
+ }
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
+ e->tunnel_hlen = VXLAN_HLEN;
+ } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE;
+ e->tunnel_hlen = gre_calc_hlen(e->tun_info.key.tun_flags);
+ } else {
+ e->reformat_type = -1;
+ e->tunnel_hlen = -1;
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ void *misc_c = MLX5_ADDR_OF(fte_match_param,
+ spec->match_criteria,
+ misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param,
+ spec->match_value,
+ misc_parameters);
+ struct flow_match_ports enc_ports;
+
+ flow_rule_match_enc_ports(rule, &enc_ports);
+
+ /* Full udp dst port must be given */
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) ||
+ memchr_inv(&enc_ports.mask->dst, 0xff, sizeof(enc_ports.mask->dst))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "VXLAN decap filter must include enc_dst_port condition");
+ netdev_warn(priv->netdev,
+ "VXLAN decap filter must include enc_dst_port condition\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* udp dst port must be knonwn as a VXLAN port */
+ if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(enc_ports.key->dst))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matched UDP port is not registered as a VXLAN port");
+ netdev_warn(priv->netdev,
+ "UDP port %d is not registered as a VXLAN port\n",
+ be16_to_cpu(enc_ports.key->dst));
+ return -EOPNOTSUPP;
+ }
+
+ /* dst UDP port is valid here */
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
+ ntohs(enc_ports.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
+ ntohs(enc_ports.key->dst));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
+ ntohs(enc_ports.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
+ ntohs(enc_ports.key->src));
+
+ /* match on VNI */
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_match_enc_keyid enc_keyid;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+ MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
+ be32_to_cpu(enc_keyid.mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
+ be32_to_cpu(enc_keyid.key->keyid));
+ }
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f,
+ void *outer_headers_c,
+ void *outer_headers_v)
+{
+ void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+
+ if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) {
+ NL_SET_ERR_MSG_MOD(f->common.extack,
+ "GRE HW offloading is not supported");
+ netdev_warn(priv->netdev, "GRE HW offloading is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ ip_protocol, IPPROTO_GRE);
+
+ /* gre protocol*/
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol);
+ MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB);
+
+ /* gre key */
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_match_enc_keyid enc_keyid;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+ MLX5_SET(fte_match_set_misc, misc_c,
+ gre_key.key, be32_to_cpu(enc_keyid.mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v,
+ gre_key.key, be32_to_cpu(enc_keyid.key->keyid));
+ }
+
+ return 0;
+}
+
+int mlx5e_tc_tun_parse(struct net_device *filter_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f,
+ void *headers_c,
+ void *headers_v, u8 *match_level)
+{
+ int tunnel_type;
+ int err = 0;
+
+ tunnel_type = mlx5e_tc_tun_get_type(filter_dev);
+ if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
+ *match_level = MLX5_MATCH_L4;
+ err = mlx5e_tc_tun_parse_vxlan(priv, spec, f,
+ headers_c, headers_v);
+ } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
+ *match_level = MLX5_MATCH_L3;
+ err = mlx5e_tc_tun_parse_gretap(priv, spec, f,
+ headers_c, headers_v);
+ } else {
+ netdev_warn(priv->netdev,
+ "decapsulation offload is not supported for %s net device (%d)\n",
+ mlx5e_netdev_kind(filter_dev), tunnel_type);
+ return -EOPNOTSUPP;
+ }
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
new file mode 100644
index 000000000000..b63f15de899d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_TUNNEL_H__
+#define __MLX5_EN_TC_TUNNEL_H__
+
+#include <linux/netdevice.h>
+#include <linux/mlx5/fs.h>
+#include <net/pkt_cls.h>
+#include <linux/netlink.h>
+#include "en.h"
+#include "en_rep.h"
+
+enum {
+ MLX5E_TC_TUNNEL_TYPE_UNKNOWN,
+ MLX5E_TC_TUNNEL_TYPE_VXLAN,
+ MLX5E_TC_TUNNEL_TYPE_GRETAP
+};
+
+int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack);
+
+int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
+
+int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e);
+
+int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev);
+bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
+ struct net_device *netdev);
+
+int mlx5e_tc_tun_parse(struct net_device *filter_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f,
+ void *headers_c,
+ void *headers_v, u8 *match_level);
+
+#endif //__MLX5_EN_TC_TUNNEL_H__
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index ad6d471d00dd..cad34d6f5f45 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -33,6 +33,26 @@
#include <linux/bpf_trace.h>
#include "en/xdp.h"
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
+{
+ int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM;
+
+ /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
+ * The condition checked in mlx5e_rx_is_linear_skb is:
+ * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1)
+ * (Note that hw_mtu == sw_mtu + hard_mtu.)
+ * What is returned from this function is:
+ * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2)
+ * After assigning sw_mtu := max_mtu, the left side of (1) turns to
+ * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
+ * because both PAGE_SIZE and S are already aligned. Any number greater
+ * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
+ * so max_mtu is the maximum MTU allowed.
+ */
+
+ return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
+}
+
static inline bool
mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
struct xdp_buff *xdp)
@@ -47,7 +67,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
xdpi.xdpf->len, PCI_DMA_TODEVICE);
xdpi.di = *di;
- return mlx5e_xmit_xdp_frame(sq, &xdpi);
+ return sq->xmit_xdp_frame(sq, &xdpi);
}
/* returns true if packet was consumed by xdp */
@@ -102,7 +122,98 @@ xdp_abort:
}
}
-bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
+static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u8 wqebbs;
+ u16 pi;
+
+ mlx5e_xdpsq_fetch_wqe(sq, &session->wqe);
+
+ prefetchw(session->wqe->data);
+ session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
+ * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
+ * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
+ * full-session WQE be cache-aligned.
+ */
+#if L1_CACHE_BYTES < 128
+#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
+#else
+#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
+#endif
+
+ wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi),
+ MLX5E_XDP_MPW_MAX_WQEBBS);
+
+ session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs;
+}
+
+static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
+ u16 ds_count = session->ds_count;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
+
+ cseg->opmod_idx_opcode =
+ cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
+
+ wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
+ wi->num_ds = ds_count - MLX5E_XDP_TX_EMPTY_DS_COUNT;
+
+ sq->pc += wi->num_wqebbs;
+
+ sq->doorbell_cseg = cseg;
+
+ session->wqe = NULL; /* Close session */
+}
+
+static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xdp_info *xdpi)
+{
+ struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_xdpsq_stats *stats = sq->stats;
+
+ dma_addr_t dma_addr = xdpi->dma_addr;
+ struct xdp_frame *xdpf = xdpi->xdpf;
+ unsigned int dma_len = xdpf->len;
+
+ if (unlikely(sq->hw_mtu < dma_len)) {
+ stats->err++;
+ return false;
+ }
+
+ if (unlikely(!session->wqe)) {
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
+ MLX5_SEND_WQE_MAX_WQEBBS))) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ stats->full++;
+ return false;
+ }
+
+ mlx5e_xdp_mpwqe_session_start(sq);
+ }
+
+ mlx5e_xdp_mpwqe_add_dseg(sq, dma_addr, dma_len);
+
+ if (unlikely(session->ds_count == session->max_ds_count))
+ mlx5e_xdp_mpwqe_complete(sq);
+
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
+ stats->xmit++;
+ return true;
+}
+
+static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
{
struct mlx5_wq_cyc *wq = &sq->wq;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
@@ -126,11 +237,8 @@ bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
}
if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) {
- if (sq->doorbell) {
- /* SQ is full, ring doorbell */
- mlx5e_xmit_xdp_doorbell(sq);
- sq->doorbell = false;
- }
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
stats->full++;
return false;
}
@@ -152,23 +260,20 @@ bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
- /* move page to reference to sq responsibility,
- * and mark so it's not put back in page-cache.
- */
- sq->db.xdpi[pi] = *xdpi;
sq->pc++;
- sq->doorbell = true;
+ sq->doorbell_cseg = cseg;
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
stats->xmit++;
return true;
}
-bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
{
+ struct mlx5e_xdp_info_fifo *xdpi_fifo;
struct mlx5e_xdpsq *sq;
struct mlx5_cqe64 *cqe;
- struct mlx5e_rq *rq;
bool is_redirect;
u16 sqcc;
int i;
@@ -182,8 +287,8 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
if (!cqe)
return false;
- is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
- rq = container_of(sq, struct mlx5e_rq, xdpsq);
+ is_redirect = !rq;
+ xdpi_fifo = &sq->db.xdpi_fifo;
/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
* otherwise a cq overrun may occur
@@ -199,20 +304,33 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
wqe_counter = be16_to_cpu(cqe->wqe_counter);
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ))
+ netdev_WARN_ONCE(sq->channel->netdev,
+ "Bad OP in XDPSQ CQE: 0x%x\n",
+ get_cqe_opcode(cqe));
+
do {
- u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
- struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci];
+ struct mlx5e_xdp_wqe_info *wi;
+ u16 ci, j;
last_wqe = (sqcc == wqe_counter);
- sqcc++;
-
- if (is_redirect) {
- xdp_return_frame(xdpi->xdpf);
- dma_unmap_single(sq->pdev, xdpi->dma_addr,
- xdpi->xdpf->len, DMA_TO_DEVICE);
- } else {
- /* Recycle RX page */
- mlx5e_page_release(rq, &xdpi->di, true);
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+ wi = &sq->db.wqe_info[ci];
+
+ sqcc += wi->num_wqebbs;
+
+ for (j = 0; j < wi->num_ds; j++) {
+ struct mlx5e_xdp_info xdpi =
+ mlx5e_xdpi_fifo_pop(xdpi_fifo);
+
+ if (is_redirect) {
+ dma_unmap_single(sq->pdev, xdpi.dma_addr,
+ xdpi.xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame(xdpi.xdpf);
+ } else {
+ /* Recycle RX page */
+ mlx5e_page_release(rq, &xdpi.di, true);
+ }
}
} while (!last_wqe);
} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
@@ -228,27 +346,32 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
return (i == MLX5E_TX_CQ_POLL_BUDGET);
}
-void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
{
- struct mlx5e_rq *rq;
- bool is_redirect;
-
- is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
- rq = is_redirect ? NULL : container_of(sq, struct mlx5e_rq, xdpsq);
+ struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
+ bool is_redirect = !rq;
while (sq->cc != sq->pc) {
- u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
- struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci];
-
- sq->cc++;
-
- if (is_redirect) {
- xdp_return_frame(xdpi->xdpf);
- dma_unmap_single(sq->pdev, xdpi->dma_addr,
- xdpi->xdpf->len, DMA_TO_DEVICE);
- } else {
- /* Recycle RX page */
- mlx5e_page_release(rq, &xdpi->di, false);
+ struct mlx5e_xdp_wqe_info *wi;
+ u16 ci, i;
+
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
+ wi = &sq->db.wqe_info[ci];
+
+ sq->cc += wi->num_wqebbs;
+
+ for (i = 0; i < wi->num_ds; i++) {
+ struct mlx5e_xdp_info xdpi =
+ mlx5e_xdpi_fifo_pop(xdpi_fifo);
+
+ if (is_redirect) {
+ dma_unmap_single(sq->pdev, xdpi.dma_addr,
+ xdpi.xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame(xdpi.xdpf);
+ } else {
+ /* Recycle RX page */
+ mlx5e_page_release(rq, &xdpi.di, false);
+ }
}
}
}
@@ -262,7 +385,8 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
int sq_num;
int i;
- if (unlikely(!test_bit(MLX5E_STATE_OPENED, &priv->state)))
+ /* this flag is sufficient, no need to test internal sq state */
+ if (unlikely(!mlx5e_xdp_tx_is_enabled(priv)))
return -ENETDOWN;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
@@ -275,9 +399,6 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
sq = &priv->channels.c[sq_num]->xdpsq;
- if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
- return -ENETDOWN;
-
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
struct mlx5e_xdp_info xdpi;
@@ -292,7 +413,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
xdpi.xdpf = xdpf;
- if (unlikely(!mlx5e_xmit_xdp_frame(sq, &xdpi))) {
+ if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) {
dma_unmap_single(sq->pdev, xdpi.dma_addr,
xdpf->len, DMA_TO_DEVICE);
xdp_return_frame_rx_napi(xdpf);
@@ -300,8 +421,33 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
}
}
- if (flags & XDP_XMIT_FLUSH)
+ if (flags & XDP_XMIT_FLUSH) {
+ if (sq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xmit_xdp_doorbell(sq);
+ }
return n - drops;
}
+
+void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
+{
+ struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
+
+ if (xdpsq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(xdpsq);
+
+ mlx5e_xmit_xdp_doorbell(xdpsq);
+
+ if (xdpsq->redirect_flush) {
+ xdp_do_flush_map();
+ xdpsq->redirect_flush = false;
+ }
+}
+
+void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
+{
+ sq->xmit_xdp_frame = is_mpw ?
+ mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 6dfab045925f..553956cadc8a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -34,30 +34,81 @@
#include "en.h"
-#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \
- MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM)))
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
-#define MLX5E_XDP_TX_DS_COUNT \
- ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
+#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
+ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
+#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
void *va, u16 *rx_headroom, u32 *len);
-bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
-void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
-
-bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi);
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq);
+void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw);
+void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
+static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv)
+{
+ set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+}
+
+static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv)
+{
+ clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+ /* let other device's napi(s) see our new state */
+ synchronize_rcu();
+}
+
+static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
+{
+ return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+}
+
static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
{
+ if (sq->doorbell_cseg) {
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg);
+ sq->doorbell_cseg = NULL;
+ }
+}
+
+static inline void
+mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, dma_addr_t dma_addr, u16 dma_len)
+{
+ struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wqe_data_seg *dseg =
+ (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count++;
+
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->byte_count = cpu_to_be32(dma_len);
+ dseg->lkey = sq->mkey_be;
+}
+
+static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq,
+ struct mlx5e_tx_wqe **wqe)
+{
struct mlx5_wq_cyc *wq = &sq->wq;
- struct mlx5e_tx_wqe *wqe;
- u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc - 1); /* last pi */
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memset(*wqe, 0, sizeof(**wqe));
+}
- mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &wqe->ctrl);
+static inline void
+mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
+ struct mlx5e_xdp_info *xi)
+{
+ u32 i = (*fifo->pc)++ & fifo->mask;
+
+ fifo->xi[i] = *xi;
+}
+
+static inline struct mlx5e_xdp_info
+mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo)
+{
+ return fifo->xi[(*fifo->cc)++ & fifo->mask];
}
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 128a82b1dbfc..53608afd39b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -254,11 +254,13 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
struct mlx5e_ipsec_metadata *mdata;
struct mlx5e_ipsec_sa_entry *sa_entry;
struct xfrm_state *x;
+ struct sec_path *sp;
if (!xo)
return skb;
- if (unlikely(skb->sp->len != 1)) {
+ sp = skb_sec_path(skb);
+ if (unlikely(sp->len != 1)) {
atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle);
goto drop;
}
@@ -305,10 +307,11 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
struct mlx5e_priv *priv = netdev_priv(netdev);
struct xfrm_offload *xo;
struct xfrm_state *xs;
+ struct sec_path *sp;
u32 sa_handle;
- skb->sp = secpath_dup(skb->sp);
- if (unlikely(!skb->sp)) {
+ sp = secpath_set(skb);
+ if (unlikely(!sp)) {
atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc);
return NULL;
}
@@ -320,8 +323,9 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
return NULL;
}
- skb->sp->xvec[skb->sp->len++] = xs;
- skb->sp->olen++;
+ sp = skb_sec_path(skb);
+ sp->xvec[sp->len++] = xs;
+ sp->olen++;
xo = xfrm_offload(skb);
xo->flags = CRYPTO_DONE;
@@ -372,10 +376,11 @@ struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
netdev_features_t features)
{
+ struct sec_path *sp = skb_sec_path(skb);
struct xfrm_state *x;
- if (skb->sp && skb->sp->len) {
- x = skb->sp->xvec[0];
+ if (sp && sp->len) {
+ x = sp->xvec[0];
if (x && x->xso.offload_handle)
return true;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index 3078491cc0d0..1539cf3de5dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -45,7 +45,9 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev,
if (err)
return err;
+ mutex_lock(&mdev->mlx5e_res.td.list_lock);
list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list);
+ mutex_unlock(&mdev->mlx5e_res.td.list_lock);
return 0;
}
@@ -53,8 +55,10 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev,
void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
struct mlx5e_tir *tir)
{
+ mutex_lock(&mdev->mlx5e_res.td.list_lock);
mlx5_core_destroy_tir(mdev, tir->tirn);
list_del(&tir->list);
+ mutex_unlock(&mdev->mlx5e_res.td.list_lock);
}
static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
@@ -114,6 +118,7 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
}
INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list);
+ mutex_init(&mdev->mlx5e_res.td.list_lock);
return 0;
@@ -141,15 +146,17 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_tir *tir;
- int err = -ENOMEM;
+ int err = 0;
u32 tirn = 0;
int inlen;
void *in;
inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
+ if (!in) {
+ err = -ENOMEM;
goto out;
+ }
if (enable_uc_lb)
MLX5_SET(modify_tir_in, in, ctx.self_lb_block,
@@ -157,6 +164,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
+ mutex_lock(&mdev->mlx5e_res.td.list_lock);
list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
tirn = tir->tirn;
err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
@@ -168,6 +176,7 @@ out:
kvfree(in);
if (err)
netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err);
+ mutex_unlock(&mdev->mlx5e_res.td.list_lock);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 722998d68564..554672edf8c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -1126,9 +1126,7 @@ static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv)
priv->channels.params.tx_min_inline_mode)
goto out;
- if (mlx5e_open_channels(priv, &new_channels))
- goto out;
- mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+ mlx5e_safe_switch_channels(priv, &new_channels, NULL);
out:
mutex_unlock(&priv->state_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index f480763dcd0d..78dc8fe2a83c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -63,86 +63,158 @@ struct ptys2ethtool_config {
__ETHTOOL_DECLARE_LINK_MODE_MASK(advertised);
};
-static struct ptys2ethtool_config ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER];
+static
+struct ptys2ethtool_config ptys2legacy_ethtool_table[MLX5E_LINK_MODES_NUMBER];
+static
+struct ptys2ethtool_config ptys2ext_ethtool_table[MLX5E_EXT_LINK_MODES_NUMBER];
-#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, ...) \
+#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, table, ...) \
({ \
struct ptys2ethtool_config *cfg; \
const unsigned int modes[] = { __VA_ARGS__ }; \
- unsigned int i; \
- cfg = &ptys2ethtool_table[reg_]; \
+ unsigned int i, bit, idx; \
+ cfg = &ptys2##table##_ethtool_table[reg_]; \
bitmap_zero(cfg->supported, \
__ETHTOOL_LINK_MODE_MASK_NBITS); \
bitmap_zero(cfg->advertised, \
__ETHTOOL_LINK_MODE_MASK_NBITS); \
for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) { \
- __set_bit(modes[i], cfg->supported); \
- __set_bit(modes[i], cfg->advertised); \
+ bit = modes[i] % 64; \
+ idx = modes[i] / 64; \
+ __set_bit(bit, &cfg->supported[idx]); \
+ __set_bit(bit, &cfg->advertised[idx]); \
} \
})
void mlx5e_build_ptys2ethtool_map(void)
{
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII,
+ memset(ptys2legacy_ethtool_table, 0, sizeof(ptys2legacy_ethtool_table));
+ memset(ptys2ext_ethtool_table, 0, sizeof(ptys2ext_ethtool_table));
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, legacy,
ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, legacy,
ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, legacy,
ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, legacy,
ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, legacy,
ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, legacy,
ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, legacy,
ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, legacy,
ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, legacy,
ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, legacy,
ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, legacy,
ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, legacy,
ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, legacy,
ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, legacy,
ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, legacy,
ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, legacy,
ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, legacy,
ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, legacy,
ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, legacy,
ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, legacy,
ETHTOOL_LINK_MODE_10000baseT_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, legacy,
ETHTOOL_LINK_MODE_25000baseCR_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, legacy,
ETHTOOL_LINK_MODE_25000baseKR_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, legacy,
ETHTOOL_LINK_MODE_25000baseSR_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, legacy,
ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT);
- MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2,
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, legacy,
ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_SGMII_100M, ext,
+ ETHTOOL_LINK_MODE_100baseT_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_X_SGMII, ext,
+ ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+ ETHTOOL_LINK_MODE_1000baseX_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_5GBASE_R, ext,
+ ETHTOOL_LINK_MODE_5000baseT_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_XFI_XAUI_1, ext,
+ ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+ ETHTOOL_LINK_MODE_10000baseR_FEC_BIT,
+ ETHTOOL_LINK_MODE_10000baseCR_Full_BIT,
+ ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+ ETHTOOL_LINK_MODE_10000baseLR_Full_BIT,
+ ETHTOOL_LINK_MODE_10000baseER_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_XLAUI_4_XLPPI_4, ext,
+ ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
+ ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
+ ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT,
+ ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GAUI_1_25GBASE_CR_KR, ext,
+ ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
+ ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
+ ETHTOOL_LINK_MODE_25000baseSR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2,
+ ext,
+ ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT,
+ ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT,
+ ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR, ext,
+ ETHTOOL_LINK_MODE_50000baseKR_Full_BIT,
+ ETHTOOL_LINK_MODE_50000baseSR_Full_BIT,
+ ETHTOOL_LINK_MODE_50000baseCR_Full_BIT,
+ ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT,
+ ETHTOOL_LINK_MODE_50000baseDR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_CAUI_4_100GBASE_CR4_KR4, ext,
+ ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_2_100GBASE_CR2_KR2, ext,
+ ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_4_200GBASE_CR4_KR4, ext,
+ ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT,
+ ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT);
+}
+
+static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev,
+ struct ptys2ethtool_config **arr,
+ u32 *size)
+{
+ bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+
+ *arr = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table;
+ *size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) :
+ ARRAY_SIZE(ptys2legacy_ethtool_table);
}
-static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
- "rx_cqe_moder",
- "tx_cqe_moder",
- "rx_cqe_compress",
- "rx_striding_rq",
- "rx_no_csum_complete",
+typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable);
+
+struct pflag_desc {
+ char name[ETH_GSTRING_LEN];
+ mlx5e_pflag_handler handler;
};
+static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS];
+
int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
{
int i, num_stats = 0;
@@ -153,7 +225,7 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
num_stats += mlx5e_stats_grps[i].get_num_stats(priv);
return num_stats;
case ETH_SS_PRIV_FLAGS:
- return ARRAY_SIZE(mlx5e_priv_flags);
+ return MLX5E_NUM_PFLAGS;
case ETH_SS_TEST:
return mlx5e_self_test_num(priv);
/* fallthrough */
@@ -183,8 +255,9 @@ void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data)
switch (stringset) {
case ETH_SS_PRIV_FLAGS:
- for (i = 0; i < ARRAY_SIZE(mlx5e_priv_flags); i++)
- strcpy(data + i * ETH_GSTRING_LEN, mlx5e_priv_flags[i]);
+ for (i = 0; i < MLX5E_NUM_PFLAGS; i++)
+ strcpy(data + i * ETH_GSTRING_LEN,
+ mlx5e_priv_flags[i].name);
break;
case ETH_SS_TEST:
@@ -296,11 +369,7 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
goto unlock;
}
- err = mlx5e_open_channels(priv, &new_channels);
- if (err)
- goto unlock;
-
- mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+ err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
unlock:
mutex_unlock(&priv->state_lock);
@@ -352,32 +421,32 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
new_channels.params = priv->channels.params;
new_channels.params.num_channels = count;
- if (!netif_is_rxfh_configured(priv->netdev))
- mlx5e_build_default_indir_rqt(new_channels.params.indirection_rqt,
- MLX5E_INDIR_RQT_SIZE, count);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
priv->channels.params = new_channels.params;
+ if (!netif_is_rxfh_configured(priv->netdev))
+ mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
+ MLX5E_INDIR_RQT_SIZE, count);
goto out;
}
- /* Create fresh channels with new parameters */
- err = mlx5e_open_channels(priv, &new_channels);
- if (err)
- goto out;
-
arfs_enabled = priv->netdev->features & NETIF_F_NTUPLE;
if (arfs_enabled)
mlx5e_arfs_disable(priv);
+ if (!netif_is_rxfh_configured(priv->netdev))
+ mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
+ MLX5E_INDIR_RQT_SIZE, count);
+
/* Switch to new channels, set new parameters and close old ones */
- mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+ err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
if (arfs_enabled) {
- err = mlx5e_arfs_enable(priv);
- if (err)
+ int err2 = mlx5e_arfs_enable(priv);
+
+ if (err2)
netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n",
- __func__, err);
+ __func__, err2);
}
out:
@@ -503,12 +572,7 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
goto out;
}
- /* open fresh channels with new coal parameters */
- err = mlx5e_open_channels(priv, &new_channels);
- if (err)
- goto out;
-
- mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+ err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
out:
mutex_unlock(&priv->state_lock);
@@ -523,27 +587,37 @@ static int mlx5e_set_coalesce(struct net_device *netdev,
return mlx5e_ethtool_set_coalesce(priv, coal);
}
-static void ptys2ethtool_supported_link(unsigned long *supported_modes,
+static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev,
+ unsigned long *supported_modes,
u32 eth_proto_cap)
{
unsigned long proto_cap = eth_proto_cap;
+ struct ptys2ethtool_config *table;
+ u32 max_size;
int proto;
- for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER)
+ mlx5e_ethtool_get_speed_arr(mdev, &table, &max_size);
+ for_each_set_bit(proto, &proto_cap, max_size)
bitmap_or(supported_modes, supported_modes,
- ptys2ethtool_table[proto].supported,
+ table[proto].supported,
__ETHTOOL_LINK_MODE_MASK_NBITS);
}
static void ptys2ethtool_adver_link(unsigned long *advertising_modes,
- u32 eth_proto_cap)
+ u32 eth_proto_cap, bool ext)
{
unsigned long proto_cap = eth_proto_cap;
+ struct ptys2ethtool_config *table;
+ u32 max_size;
int proto;
- for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER)
+ table = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table;
+ max_size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) :
+ ARRAY_SIZE(ptys2legacy_ethtool_table);
+
+ for_each_set_bit(proto, &proto_cap, max_size)
bitmap_or(advertising_modes, advertising_modes,
- ptys2ethtool_table[proto].advertised,
+ table[proto].advertised,
__ETHTOOL_LINK_MODE_MASK_NBITS);
}
@@ -693,13 +767,14 @@ static void get_speed_duplex(struct net_device *netdev,
u32 eth_proto_oper,
struct ethtool_link_ksettings *link_ksettings)
{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
u32 speed = SPEED_UNKNOWN;
u8 duplex = DUPLEX_UNKNOWN;
if (!netif_carrier_ok(netdev))
goto out;
- speed = mlx5e_port_ptys2speed(eth_proto_oper);
+ speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper);
if (!speed) {
speed = SPEED_UNKNOWN;
goto out;
@@ -712,22 +787,22 @@ out:
link_ksettings->base.duplex = duplex;
}
-static void get_supported(u32 eth_proto_cap,
+static void get_supported(struct mlx5_core_dev *mdev, u32 eth_proto_cap,
struct ethtool_link_ksettings *link_ksettings)
{
unsigned long *supported = link_ksettings->link_modes.supported;
+ ptys2ethtool_supported_link(mdev, supported, eth_proto_cap);
- ptys2ethtool_supported_link(supported, eth_proto_cap);
ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause);
}
-static void get_advertising(u32 eth_proto_cap, u8 tx_pause,
- u8 rx_pause,
- struct ethtool_link_ksettings *link_ksettings)
+static void get_advertising(u32 eth_proto_cap, u8 tx_pause, u8 rx_pause,
+ struct ethtool_link_ksettings *link_ksettings,
+ bool ext)
{
unsigned long *advertising = link_ksettings->link_modes.advertising;
+ ptys2ethtool_adver_link(advertising, eth_proto_cap, ext);
- ptys2ethtool_adver_link(advertising, eth_proto_cap);
if (rx_pause)
ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause);
if (tx_pause ^ rx_pause)
@@ -777,18 +852,18 @@ static u8 get_connector_port(u32 eth_proto, u8 connector_type)
return PORT_OTHER;
}
-static void get_lp_advertising(u32 eth_proto_lp,
+static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp,
struct ethtool_link_ksettings *link_ksettings)
{
unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising;
+ bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
- ptys2ethtool_adver_link(lp_advertising, eth_proto_lp);
+ ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext);
}
-static int mlx5e_get_link_ksettings(struct net_device *netdev,
- struct ethtool_link_ksettings *link_ksettings)
+int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
+ struct ethtool_link_ksettings *link_ksettings)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
u32 rx_pause = 0;
@@ -800,31 +875,50 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
u8 an_disable_admin;
u8 an_status;
u8 connector_type;
+ bool admin_ext;
+ bool ext;
int err;
err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
if (err) {
- netdev_err(netdev, "%s: query port ptys failed: %d\n",
+ netdev_err(priv->netdev, "%s: query port ptys failed: %d\n",
__func__, err);
goto err_query_regs;
}
+ ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+ eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
+ eth_proto_capability);
+ eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
+ eth_proto_admin);
+ /* Fields: eth_proto_admin and ext_eth_proto_admin are
+ * mutually exclusive. Hence try reading legacy advertising
+ * when extended advertising is zero.
+ * admin_ext indicates how eth_proto_admin should be
+ * interpreted
+ */
+ admin_ext = ext;
+ if (ext && !eth_proto_admin) {
+ eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, false,
+ eth_proto_admin);
+ admin_ext = false;
+ }
- eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability);
- eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin);
- eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
- eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise);
- an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
- an_status = MLX5_GET(ptys_reg, out, an_status);
- connector_type = MLX5_GET(ptys_reg, out, connector_type);
+ eth_proto_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
+ eth_proto_oper);
+ eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise);
+ an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
+ an_status = MLX5_GET(ptys_reg, out, an_status);
+ connector_type = MLX5_GET(ptys_reg, out, connector_type);
mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
- get_supported(eth_proto_cap, link_ksettings);
- get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings);
- get_speed_duplex(netdev, eth_proto_oper, link_ksettings);
+ get_supported(mdev, eth_proto_cap, link_ksettings);
+ get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings,
+ admin_ext);
+ get_speed_duplex(priv->netdev, eth_proto_oper, link_ksettings);
eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
@@ -832,7 +926,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
connector_type);
ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin,
connector_type);
- get_lp_advertising(eth_proto_lp, link_ksettings);
+ get_lp_advertising(mdev, eth_proto_lp, link_ksettings);
if (an_status == MLX5_AN_COMPLETE)
ethtool_link_ksettings_add_link_mode(link_ksettings,
@@ -843,9 +937,12 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
Autoneg);
- if (get_fec_supported_advertised(mdev, link_ksettings))
- netdev_dbg(netdev, "%s: FEC caps query failed: %d\n",
+ err = get_fec_supported_advertised(mdev, link_ksettings);
+ if (err) {
+ netdev_dbg(priv->netdev, "%s: FEC caps query failed: %d\n",
__func__, err);
+ err = 0; /* don't fail caps query because of FEC error */
+ }
if (!an_disable_admin)
ethtool_link_ksettings_add_link_mode(link_ksettings,
@@ -855,12 +952,22 @@ err_query_regs:
return err;
}
+static int mlx5e_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings);
+}
+
static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes)
{
u32 i, ptys_modes = 0;
for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
- if (bitmap_intersects(ptys2ethtool_table[i].advertised,
+ if (*ptys2legacy_ethtool_table[i].advertised == 0)
+ continue;
+ if (bitmap_intersects(ptys2legacy_ethtool_table[i].advertised,
link_modes,
__ETHTOOL_LINK_MODE_MASK_NBITS))
ptys_modes |= MLX5E_PROT_MASK(i);
@@ -869,14 +976,34 @@ static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes)
return ptys_modes;
}
-static int mlx5e_set_link_ksettings(struct net_device *netdev,
- const struct ethtool_link_ksettings *link_ksettings)
+static u32 mlx5e_ethtool2ptys_ext_adver_link(const unsigned long *link_modes)
+{
+ u32 i, ptys_modes = 0;
+ unsigned long modes[2];
+
+ for (i = 0; i < MLX5E_EXT_LINK_MODES_NUMBER; ++i) {
+ if (*ptys2ext_ethtool_table[i].advertised == 0)
+ continue;
+ memset(modes, 0, sizeof(modes));
+ bitmap_and(modes, ptys2ext_ethtool_table[i].advertised,
+ link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+ if (modes[0] == ptys2ext_ethtool_table[i].advertised[0] &&
+ modes[1] == ptys2ext_ethtool_table[i].advertised[1])
+ ptys_modes |= MLX5E_PROT_MASK(i);
+ }
+ return ptys_modes;
+}
+
+int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
+ const struct ethtool_link_ksettings *link_ksettings)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
- u32 eth_proto_cap, eth_proto_admin;
+ struct mlx5e_port_eth_proto eproto;
bool an_changes = false;
u8 an_disable_admin;
+ bool ext_supported;
+ bool ext_requested;
u8 an_disable_cap;
bool an_disable;
u32 link_modes;
@@ -884,54 +1011,66 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev,
u32 speed;
int err;
- speed = link_ksettings->base.speed;
+ u32 (*ethtool2ptys_adver_func)(const unsigned long *adver);
- link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ?
- mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) :
- mlx5e_port_speed2linkmodes(speed);
+#define MLX5E_PTYS_EXT ((1ULL << ETHTOOL_LINK_MODE_50000baseKR_Full_BIT) - 1)
+
+ ext_requested = !!(link_ksettings->link_modes.advertising[0] >
+ MLX5E_PTYS_EXT ||
+ link_ksettings->link_modes.advertising[1]);
+ ext_supported = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+ ext_requested &= ext_supported;
- err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
+ speed = link_ksettings->base.speed;
+ ethtool2ptys_adver_func = ext_requested ?
+ mlx5e_ethtool2ptys_ext_adver_link :
+ mlx5e_ethtool2ptys_adver_link;
+ err = mlx5_port_query_eth_proto(mdev, 1, ext_requested, &eproto);
if (err) {
- netdev_err(netdev, "%s: query port eth proto cap failed: %d\n",
+ netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n",
__func__, err);
goto out;
}
+ link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ?
+ ethtool2ptys_adver_func(link_ksettings->link_modes.advertising) :
+ mlx5e_port_speed2linkmodes(mdev, speed);
- link_modes = link_modes & eth_proto_cap;
+ link_modes = link_modes & eproto.cap;
if (!link_modes) {
- netdev_err(netdev, "%s: Not supported link mode(s) requested",
+ netdev_err(priv->netdev, "%s: Not supported link mode(s) requested",
__func__);
err = -EINVAL;
goto out;
}
- err = mlx5_query_port_proto_admin(mdev, &eth_proto_admin, MLX5_PTYS_EN);
- if (err) {
- netdev_err(netdev, "%s: query port eth proto admin failed: %d\n",
- __func__, err);
- goto out;
- }
-
- mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status,
- &an_disable_cap, &an_disable_admin);
+ mlx5_port_query_eth_autoneg(mdev, &an_status, &an_disable_cap,
+ &an_disable_admin);
an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE;
an_changes = ((!an_disable && an_disable_admin) ||
(an_disable && !an_disable_admin));
- if (!an_changes && link_modes == eth_proto_admin)
+ if (!an_changes && link_modes == eproto.admin)
goto out;
- mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN);
+ mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext_requested);
mlx5_toggle_port_link(mdev);
out:
return err;
}
+static int mlx5e_set_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings);
+}
+
u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv)
{
- return sizeof(priv->channels.params.toeplitz_hash_key);
+ return sizeof(priv->rss_params.toeplitz_hash_key);
}
static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev)
@@ -957,50 +1096,27 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
u8 *hfunc)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rss_params *rss = &priv->rss_params;
if (indir)
- memcpy(indir, priv->channels.params.indirection_rqt,
- sizeof(priv->channels.params.indirection_rqt));
+ memcpy(indir, rss->indirection_rqt,
+ sizeof(rss->indirection_rqt));
if (key)
- memcpy(key, priv->channels.params.toeplitz_hash_key,
- sizeof(priv->channels.params.toeplitz_hash_key));
+ memcpy(key, rss->toeplitz_hash_key,
+ sizeof(rss->toeplitz_hash_key));
if (hfunc)
- *hfunc = priv->channels.params.rss_hfunc;
+ *hfunc = rss->hfunc;
return 0;
}
-static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
-{
- void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
- struct mlx5_core_dev *mdev = priv->mdev;
- int ctxlen = MLX5_ST_SZ_BYTES(tirc);
- int tt;
-
- MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- memset(tirc, 0, ctxlen);
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
- mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen);
- }
-
- if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
- return;
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- memset(tirc, 0, ctxlen);
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
- mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen);
- }
-}
-
static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
const u8 *key, const u8 hfunc)
{
struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_rss_params *rss = &priv->rss_params;
int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
bool hash_changed = false;
void *in;
@@ -1016,15 +1132,14 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
mutex_lock(&priv->state_lock);
- if (hfunc != ETH_RSS_HASH_NO_CHANGE &&
- hfunc != priv->channels.params.rss_hfunc) {
- priv->channels.params.rss_hfunc = hfunc;
+ if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hfunc) {
+ rss->hfunc = hfunc;
hash_changed = true;
}
if (indir) {
- memcpy(priv->channels.params.indirection_rqt, indir,
- sizeof(priv->channels.params.indirection_rqt));
+ memcpy(rss->indirection_rqt, indir,
+ sizeof(rss->indirection_rqt));
if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
u32 rqtn = priv->indir_rqt.rqtn;
@@ -1032,7 +1147,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
.is_rss = true,
{
.rss = {
- .hfunc = priv->channels.params.rss_hfunc,
+ .hfunc = rss->hfunc,
.channels = &priv->channels,
},
},
@@ -1043,10 +1158,9 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
}
if (key) {
- memcpy(priv->channels.params.toeplitz_hash_key, key,
- sizeof(priv->channels.params.toeplitz_hash_key));
- hash_changed = hash_changed ||
- priv->channels.params.rss_hfunc == ETH_RSS_HASH_TOP;
+ memcpy(rss->toeplitz_hash_key, key,
+ sizeof(rss->toeplitz_hash_key));
+ hash_changed = hash_changed || rss->hfunc == ETH_RSS_HASH_TOP;
}
if (hash_changed)
@@ -1150,25 +1264,31 @@ static int mlx5e_set_tunable(struct net_device *dev,
return err;
}
-static void mlx5e_get_pauseparam(struct net_device *netdev,
- struct ethtool_pauseparam *pauseparam)
+void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv,
+ struct ethtool_pauseparam *pauseparam)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
int err;
err = mlx5_query_port_pause(mdev, &pauseparam->rx_pause,
&pauseparam->tx_pause);
if (err) {
- netdev_err(netdev, "%s: mlx5_query_port_pause failed:0x%x\n",
+ netdev_err(priv->netdev, "%s: mlx5_query_port_pause failed:0x%x\n",
__func__, err);
}
}
-static int mlx5e_set_pauseparam(struct net_device *netdev,
- struct ethtool_pauseparam *pauseparam)
+static void mlx5e_get_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_ethtool_get_pauseparam(priv, pauseparam);
+}
+
+int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv,
+ struct ethtool_pauseparam *pauseparam)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
int err;
@@ -1179,13 +1299,21 @@ static int mlx5e_set_pauseparam(struct net_device *netdev,
pauseparam->rx_pause ? 1 : 0,
pauseparam->tx_pause ? 1 : 0);
if (err) {
- netdev_err(netdev, "%s: mlx5_set_port_pause failed:0x%x\n",
+ netdev_err(priv->netdev, "%s: mlx5_set_port_pause failed:0x%x\n",
__func__, err);
}
return err;
}
+static int mlx5e_set_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_pauseparam(priv, pauseparam);
+}
+
int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
struct ethtool_ts_info *info)
{
@@ -1458,7 +1586,7 @@ static int mlx5e_get_module_info(struct net_device *netdev,
break;
case MLX5_MODULE_ID_SFP:
modinfo->type = ETH_MODULE_SFF_8472;
- modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+ modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH;
break;
default:
netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n",
@@ -1505,8 +1633,6 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev,
return 0;
}
-typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable);
-
static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
bool is_rx_cq)
{
@@ -1515,7 +1641,6 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
struct mlx5e_channels new_channels = {};
bool mode_changed;
u8 cq_period_mode, current_cq_period_mode;
- int err = 0;
cq_period_mode = enable ?
MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
@@ -1543,12 +1668,7 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
return 0;
}
- err = mlx5e_open_channels(priv, &new_channels);
- if (err)
- return err;
-
- mlx5e_switch_priv_channels(priv, &new_channels, NULL);
- return 0;
+ return mlx5e_safe_switch_channels(priv, &new_channels, NULL);
}
static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable)
@@ -1581,11 +1701,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
return 0;
}
- err = mlx5e_open_channels(priv, &new_channels);
+ err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
if (err)
return err;
- mlx5e_switch_priv_channels(priv, &new_channels, NULL);
mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n",
MLX5E_GET_PFLAG(&priv->channels.params,
MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF");
@@ -1618,7 +1737,6 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_channels new_channels = {};
- int err;
if (enable) {
if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
@@ -1640,12 +1758,7 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
return 0;
}
- err = mlx5e_open_channels(priv, &new_channels);
- if (err)
- return err;
-
- mlx5e_switch_priv_channels(priv, &new_channels, NULL);
- return 0;
+ return mlx5e_safe_switch_channels(priv, &new_channels, NULL);
}
static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
@@ -1655,7 +1768,8 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
struct mlx5e_channel *c;
int i;
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state) ||
+ priv->channels.params.xdp_prog)
return 0;
for (i = 0; i < channels->num; i++) {
@@ -1669,23 +1783,54 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
return 0;
}
+static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_channels new_channels = {};
+ int err;
+
+ if (enable && !MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe))
+ return -EOPNOTSUPP;
+
+ new_channels.params = priv->channels.params;
+
+ MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_XDP_TX_MPWQE, enable);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ return 0;
+ }
+
+ err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+ return err;
+}
+
+static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = {
+ { "rx_cqe_moder", set_pflag_rx_cqe_based_moder },
+ { "tx_cqe_moder", set_pflag_tx_cqe_based_moder },
+ { "rx_cqe_compress", set_pflag_rx_cqe_compress },
+ { "rx_striding_rq", set_pflag_rx_striding_rq },
+ { "rx_no_csum_complete", set_pflag_rx_no_csum_complete },
+ { "xdp_tx_mpwqe", set_pflag_xdp_tx_mpwqe },
+};
+
static int mlx5e_handle_pflag(struct net_device *netdev,
u32 wanted_flags,
- enum mlx5e_priv_flag flag,
- mlx5e_pflag_handler pflag_handler)
+ enum mlx5e_priv_flag flag)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- bool enable = !!(wanted_flags & flag);
+ bool enable = !!(wanted_flags & BIT(flag));
u32 changes = wanted_flags ^ priv->channels.params.pflags;
int err;
- if (!(changes & flag))
+ if (!(changes & BIT(flag)))
return 0;
- err = pflag_handler(netdev, enable);
+ err = mlx5e_priv_flags[flag].handler(netdev, enable);
if (err) {
- netdev_err(netdev, "%s private flag 0x%x failed err %d\n",
- enable ? "Enable" : "Disable", flag, err);
+ netdev_err(netdev, "%s private flag '%s' failed err %d\n",
+ enable ? "Enable" : "Disable", mlx5e_priv_flags[flag].name, err);
return err;
}
@@ -1696,38 +1841,17 @@ static int mlx5e_handle_pflag(struct net_device *netdev,
static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ enum mlx5e_priv_flag pflag;
int err;
mutex_lock(&priv->state_lock);
- err = mlx5e_handle_pflag(netdev, pflags,
- MLX5E_PFLAG_RX_CQE_BASED_MODER,
- set_pflag_rx_cqe_based_moder);
- if (err)
- goto out;
-
- err = mlx5e_handle_pflag(netdev, pflags,
- MLX5E_PFLAG_TX_CQE_BASED_MODER,
- set_pflag_tx_cqe_based_moder);
- if (err)
- goto out;
- err = mlx5e_handle_pflag(netdev, pflags,
- MLX5E_PFLAG_RX_CQE_COMPRESS,
- set_pflag_rx_cqe_compress);
- if (err)
- goto out;
-
- err = mlx5e_handle_pflag(netdev, pflags,
- MLX5E_PFLAG_RX_STRIDING_RQ,
- set_pflag_rx_striding_rq);
- if (err)
- goto out;
-
- err = mlx5e_handle_pflag(netdev, pflags,
- MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
- set_pflag_rx_no_csum_complete);
+ for (pflag = 0; pflag < MLX5E_NUM_PFLAGS; pflag++) {
+ err = mlx5e_handle_pflag(netdev, pflags, pflag);
+ if (err)
+ break;
+ }
-out:
mutex_unlock(&priv->state_lock);
/* Need to fix some features.. */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index c18dcebe1462..4421c10f58ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -771,6 +771,112 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv)
INIT_LIST_HEAD(&priv->fs.ethtool.rules);
}
+static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type)
+{
+ switch (flow_type) {
+ case TCP_V4_FLOW:
+ return MLX5E_TT_IPV4_TCP;
+ case TCP_V6_FLOW:
+ return MLX5E_TT_IPV6_TCP;
+ case UDP_V4_FLOW:
+ return MLX5E_TT_IPV4_UDP;
+ case UDP_V6_FLOW:
+ return MLX5E_TT_IPV6_UDP;
+ case AH_V4_FLOW:
+ return MLX5E_TT_IPV4_IPSEC_AH;
+ case AH_V6_FLOW:
+ return MLX5E_TT_IPV6_IPSEC_AH;
+ case ESP_V4_FLOW:
+ return MLX5E_TT_IPV4_IPSEC_ESP;
+ case ESP_V6_FLOW:
+ return MLX5E_TT_IPV6_IPSEC_ESP;
+ case IPV4_FLOW:
+ return MLX5E_TT_IPV4;
+ case IPV6_FLOW:
+ return MLX5E_TT_IPV6;
+ default:
+ return MLX5E_NUM_INDIR_TIRS;
+ }
+}
+
+static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *nfc)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+ enum mlx5e_traffic_types tt;
+ u8 rx_hash_field = 0;
+ void *in;
+
+ tt = flow_type_to_traffic_type(nfc->flow_type);
+ if (tt == MLX5E_NUM_INDIR_TIRS)
+ return -EINVAL;
+
+ /* RSS does not support anything other than hashing to queues
+ * on src IP, dest IP, TCP/UDP src port and TCP/UDP dest
+ * port.
+ */
+ if (nfc->flow_type != TCP_V4_FLOW &&
+ nfc->flow_type != TCP_V6_FLOW &&
+ nfc->flow_type != UDP_V4_FLOW &&
+ nfc->flow_type != UDP_V6_FLOW)
+ return -EOPNOTSUPP;
+
+ if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3))
+ return -EOPNOTSUPP;
+
+ if (nfc->data & RXH_IP_SRC)
+ rx_hash_field |= MLX5_HASH_FIELD_SEL_SRC_IP;
+ if (nfc->data & RXH_IP_DST)
+ rx_hash_field |= MLX5_HASH_FIELD_SEL_DST_IP;
+ if (nfc->data & RXH_L4_B_0_1)
+ rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_SPORT;
+ if (nfc->data & RXH_L4_B_2_3)
+ rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mutex_lock(&priv->state_lock);
+
+ if (rx_hash_field == priv->rss_params.rx_hash_fields[tt])
+ goto out;
+
+ priv->rss_params.rx_hash_fields[tt] = rx_hash_field;
+ mlx5e_modify_tirs_hash(priv, in, inlen);
+
+out:
+ mutex_unlock(&priv->state_lock);
+ kvfree(in);
+ return 0;
+}
+
+static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *nfc)
+{
+ enum mlx5e_traffic_types tt;
+ u32 hash_field = 0;
+
+ tt = flow_type_to_traffic_type(nfc->flow_type);
+ if (tt == MLX5E_NUM_INDIR_TIRS)
+ return -EINVAL;
+
+ hash_field = priv->rss_params.rx_hash_fields[tt];
+ nfc->data = 0;
+
+ if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP)
+ nfc->data |= RXH_IP_SRC;
+ if (hash_field & MLX5_HASH_FIELD_SEL_DST_IP)
+ nfc->data |= RXH_IP_DST;
+ if (hash_field & MLX5_HASH_FIELD_SEL_L4_SPORT)
+ nfc->data |= RXH_L4_B_0_1;
+ if (hash_field & MLX5_HASH_FIELD_SEL_L4_DPORT)
+ nfc->data |= RXH_L4_B_2_3;
+
+ return 0;
+}
+
int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
{
int err = 0;
@@ -783,6 +889,9 @@ int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
case ETHTOOL_SRXCLSRLDEL:
err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location);
break;
+ case ETHTOOL_SRXFH:
+ err = mlx5e_set_rss_hash_opt(priv, cmd);
+ break;
default:
err = -EOPNOTSUPP;
break;
@@ -810,6 +919,9 @@ int mlx5e_get_rxnfc(struct net_device *dev,
case ETHTOOL_GRXCLSRLALL:
err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs);
break;
+ case ETHTOOL_GRXFH:
+ err = mlx5e_get_rss_hash_opt(priv, info);
+ break;
default:
err = -EOPNOTSUPP;
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b70cb6fd164c..46157e2a1e5a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -35,6 +35,7 @@
#include <linux/mlx5/fs.h>
#include <net/vxlan.h>
#include <linux/bpf.h>
+#include <linux/if_bridge.h>
#include <net/page_pool.h>
#include "eswitch.h"
#include "en.h"
@@ -49,6 +50,9 @@
#include "lib/clock.h"
#include "en/port.h"
#include "en/xdp.h"
+#include "lib/eq.h"
+#include "en/monitor_stats.h"
+#include "en/reporter.h"
struct mlx5e_rq_param {
u32 rqc[MLX5_ST_SZ_DW(rqc)];
@@ -59,6 +63,7 @@ struct mlx5e_rq_param {
struct mlx5e_sq_param {
u32 sqc[MLX5_ST_SZ_DW(sqc)];
struct mlx5_wq_param wq;
+ bool is_mpw;
};
struct mlx5e_cq_param {
@@ -168,8 +173,7 @@ static u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
return order_base_2(mlx5e_rx_get_linear_frag_sz(params));
- return MLX5E_MPWQE_STRIDE_SZ(mdev,
- MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
+ return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
}
static u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
@@ -228,7 +232,7 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
MLX5_WQ_TYPE_CYCLIC;
}
-static void mlx5e_update_carrier(struct mlx5e_priv *priv)
+void mlx5e_update_carrier(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
u8 port_state;
@@ -267,7 +271,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv)
mlx5e_stats_grps[i].update_stats(priv);
}
-static void mlx5e_update_ndo_stats(struct mlx5e_priv *priv)
+void mlx5e_update_ndo_stats(struct mlx5e_priv *priv)
{
int i;
@@ -298,33 +302,35 @@ void mlx5e_queue_update_stats(struct mlx5e_priv *priv)
queue_work(priv->wq, &priv->update_stats_work);
}
-static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
- enum mlx5_dev_event event, unsigned long param)
+static int async_event(struct notifier_block *nb, unsigned long event, void *data)
{
- struct mlx5e_priv *priv = vpriv;
+ struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
+ struct mlx5_eqe *eqe = data;
- if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state))
- return;
+ if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
+ return NOTIFY_DONE;
- switch (event) {
- case MLX5_DEV_EVENT_PORT_UP:
- case MLX5_DEV_EVENT_PORT_DOWN:
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
queue_work(priv->wq, &priv->update_carrier_work);
break;
default:
- break;
+ return NOTIFY_DONE;
}
+
+ return NOTIFY_OK;
}
static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
{
- set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
+ priv->events_nb.notifier_call = async_event;
+ mlx5_notifier_register(priv->mdev, &priv->events_nb);
}
static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
{
- clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
- synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC));
+ mlx5_notifier_unregister(priv->mdev, &priv->events_nb);
}
static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
@@ -945,7 +951,11 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
if (params->rx_dim_enabled)
__set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
- if (params->pflags & MLX5E_PFLAG_RX_NO_CSUM_COMPLETE)
+ /* We disable csum_complete when XDP is enabled since
+ * XDP programs might manipulate packets which will render
+ * skb->checksum incorrect.
+ */
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp)
__set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
return 0;
@@ -988,18 +998,42 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq)
static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq)
{
- kvfree(sq->db.xdpi);
+ kvfree(sq->db.xdpi_fifo.xi);
+ kvfree(sq->db.wqe_info);
+}
+
+static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
+{
+ struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
+
+ xdpi_fifo->xi = kvzalloc_node(sizeof(*xdpi_fifo->xi) * dsegs_per_wq,
+ GFP_KERNEL, numa);
+ if (!xdpi_fifo->xi)
+ return -ENOMEM;
+
+ xdpi_fifo->pc = &sq->xdpi_fifo_pc;
+ xdpi_fifo->cc = &sq->xdpi_fifo_cc;
+ xdpi_fifo->mask = dsegs_per_wq - 1;
+
+ return 0;
}
static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
{
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ int err;
- sq->db.xdpi = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.xdpi)),
- GFP_KERNEL, numa);
- if (!sq->db.xdpi) {
- mlx5e_free_xdpsq_db(sq);
+ sq->db.wqe_info = kvzalloc_node(sizeof(*sq->db.wqe_info) * wq_sz,
+ GFP_KERNEL, numa);
+ if (!sq->db.wqe_info)
return -ENOMEM;
+
+ err = mlx5e_alloc_xdpsq_fifo(sq, numa);
+ if (err) {
+ mlx5e_free_xdpsq_db(sq);
+ return err;
}
return 0;
@@ -1131,7 +1165,7 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
return 0;
}
-static void mlx5e_sq_recover(struct work_struct *work);
+static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
int txq_ix,
struct mlx5e_params *params,
@@ -1153,7 +1187,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
sq->uar_map = mdev->mlx5e_res.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
sq->stats = &c->priv->channel_stats[c->ix].sq[tc];
- INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover);
+ INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
if (MLX5_IPSEC_DEV(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
if (mlx5_accel_is_tls_device(c->priv->mdev))
@@ -1241,15 +1275,8 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
return err;
}
-struct mlx5e_modify_sq_param {
- int curr_state;
- int next_state;
- bool rl_update;
- int rl_index;
-};
-
-static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
- struct mlx5e_modify_sq_param *p)
+int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
+ struct mlx5e_modify_sq_param *p)
{
void *in;
void *sqc;
@@ -1347,17 +1374,7 @@ err_free_txqsq:
return err;
}
-static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
-{
- WARN_ONCE(sq->cc != sq->pc,
- "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
- sq->sqn, sq->cc, sq->pc);
- sq->cc = 0;
- sq->dma_fifo_cc = 0;
- sq->pc = 0;
-}
-
-static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
+void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
{
sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix);
clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
@@ -1366,7 +1383,7 @@ static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
netif_tx_start_queue(sq->txq);
}
-static inline void netif_tx_disable_queue(struct netdev_queue *txq)
+void mlx5e_tx_disable_queue(struct netdev_queue *txq)
{
__netif_tx_lock_bh(txq);
netif_tx_stop_queue(txq);
@@ -1382,7 +1399,7 @@ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
/* prevent netif_tx_wake_queue */
napi_synchronize(&c->napi);
- netif_tx_disable_queue(sq->txq);
+ mlx5e_tx_disable_queue(sq->txq);
/* last doorbell out, godspeed .. */
if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
@@ -1402,6 +1419,7 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
struct mlx5_rate_limit rl = {0};
cancel_work_sync(&sq->dim.work);
+ cancel_work_sync(&sq->recover_work);
mlx5e_destroy_sq(mdev, sq->sqn);
if (sq->rate_limit) {
rl.rate = sq->rate_limit;
@@ -1411,105 +1429,12 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
mlx5e_free_txqsq(sq);
}
-static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
+static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
{
- unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
+ struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq,
+ recover_work);
- while (time_before(jiffies, exp_time)) {
- if (sq->cc == sq->pc)
- return 0;
-
- msleep(20);
- }
-
- netdev_err(sq->channel->netdev,
- "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
- sq->sqn, sq->cc, sq->pc);
-
- return -ETIMEDOUT;
-}
-
-static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
-{
- struct mlx5_core_dev *mdev = sq->channel->mdev;
- struct net_device *dev = sq->channel->netdev;
- struct mlx5e_modify_sq_param msp = {0};
- int err;
-
- msp.curr_state = curr_state;
- msp.next_state = MLX5_SQC_STATE_RST;
-
- err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
- if (err) {
- netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
- return err;
- }
-
- memset(&msp, 0, sizeof(msp));
- msp.curr_state = MLX5_SQC_STATE_RST;
- msp.next_state = MLX5_SQC_STATE_RDY;
-
- err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
- if (err) {
- netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
- return err;
- }
-
- return 0;
-}
-
-static void mlx5e_sq_recover(struct work_struct *work)
-{
- struct mlx5e_txqsq_recover *recover =
- container_of(work, struct mlx5e_txqsq_recover,
- recover_work);
- struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq,
- recover);
- struct mlx5_core_dev *mdev = sq->channel->mdev;
- struct net_device *dev = sq->channel->netdev;
- u8 state;
- int err;
-
- err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
- if (err) {
- netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
- sq->sqn, err);
- return;
- }
-
- if (state != MLX5_RQC_STATE_ERR) {
- netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
- return;
- }
-
- netif_tx_disable_queue(sq->txq);
-
- if (mlx5e_wait_for_sq_flush(sq))
- return;
-
- /* If the interval between two consecutive recovers per SQ is too
- * short, don't recover to avoid infinite loop of ERR_CQE -> recover.
- * If we reached this state, there is probably a bug that needs to be
- * fixed. let's keep the queue close and let tx timeout cleanup.
- */
- if (jiffies_to_msecs(jiffies - recover->last_recover) <
- MLX5E_SQ_RECOVER_MIN_INTERVAL) {
- netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n",
- sq->sqn);
- return;
- }
-
- /* At this point, no new packets will arrive from the stack as TXQ is
- * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
- * pending WQEs. SQ can safely reset the SQ.
- */
- if (mlx5e_sq_to_ready(sq, state))
- return;
-
- mlx5e_reset_txqsq_cc_pc(sq);
- sq->stats->recover++;
- recover->last_recover = jiffies;
- mlx5e_activate_txqsq(sq);
+ mlx5e_tx_reporter_err_cqe(sq);
}
static int mlx5e_open_icosq(struct mlx5e_channel *c,
@@ -1558,11 +1483,8 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
struct mlx5e_xdpsq *sq,
bool is_redirect)
{
- unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
struct mlx5e_create_sq_param csp = {};
- unsigned int inline_hdr_sz = 0;
int err;
- int i;
err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect);
if (err)
@@ -1573,30 +1495,40 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
csp.cqn = sq->cq.mcq.cqn;
csp.wq_ctrl = &sq->wq_ctrl;
csp.min_inline_mode = sq->min_inline_mode;
- if (is_redirect)
- set_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
if (err)
goto err_free_xdpsq;
- if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
- inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
- ds_cnt++;
- }
+ mlx5e_set_xmit_fp(sq, param->is_mpw);
+
+ if (!param->is_mpw) {
+ unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
+ unsigned int inline_hdr_sz = 0;
+ int i;
+
+ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+ inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
+ ds_cnt++;
+ }
- /* Pre initialize fixed WQE fields */
- for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
- struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
- struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
- struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
- struct mlx5_wqe_data_seg *dseg;
+ /* Pre initialize fixed WQE fields */
+ for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
+ struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[i];
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
+ struct mlx5_wqe_data_seg *dseg;
- cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
- eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
- dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
- dseg->lkey = sq->mkey_be;
+ dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
+ dseg->lkey = sq->mkey_be;
+
+ wi->num_wqebbs = 1;
+ wi->num_ds = 1;
+ }
}
return 0;
@@ -1608,7 +1540,7 @@ err_free_xdpsq:
return err;
}
-static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
+static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
{
struct mlx5e_channel *c = sq->channel;
@@ -1616,7 +1548,7 @@ static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
napi_synchronize(&c->napi);
mlx5e_destroy_sq(c->mdev, sq->sqn);
- mlx5e_free_xdpsq_descs(sq);
+ mlx5e_free_xdpsq_descs(sq, rq);
mlx5e_free_xdpsq(sq);
}
@@ -1769,11 +1701,6 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
mlx5e_free_cq(cq);
}
-static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
-{
- return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
-}
-
static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam)
@@ -1919,14 +1846,37 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
return err;
}
+static int mlx5e_alloc_xps_cpumask(struct mlx5e_channel *c,
+ struct mlx5e_params *params)
+{
+ int num_comp_vectors = mlx5_comp_vectors_count(c->mdev);
+ int irq;
+
+ if (!zalloc_cpumask_var(&c->xps_cpumask, GFP_KERNEL))
+ return -ENOMEM;
+
+ for (irq = c->ix; irq < num_comp_vectors; irq += params->num_channels) {
+ int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(c->mdev, irq));
+
+ cpumask_set_cpu(cpu, c->xps_cpumask);
+ }
+
+ return 0;
+}
+
+static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c)
+{
+ free_cpumask_var(c->xps_cpumask);
+}
+
static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam,
struct mlx5e_channel **cp)
{
+ int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
struct net_dim_cq_moder icocq_moder = {0, 0};
struct net_device *netdev = priv->netdev;
- int cpu = mlx5e_get_cpu(priv, ix);
struct mlx5e_channel *c;
unsigned int irq;
int err;
@@ -1951,9 +1901,12 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
c->num_tc = params->num_tc;
c->xdp = !!params->xdp_prog;
c->stats = &priv->channel_stats[ix].ch;
-
c->irq_desc = irq_to_desc(irq);
+ err = mlx5e_alloc_xps_cpumask(c, params);
+ if (err)
+ goto err_free_channel;
+
netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq);
@@ -2009,7 +1962,7 @@ err_close_rq:
err_close_xdp_sq:
if (c->xdp)
- mlx5e_close_xdpsq(&c->rq.xdpsq);
+ mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq);
err_close_sqs:
mlx5e_close_sqs(c);
@@ -2036,6 +1989,9 @@ err_close_icosq_cq:
err_napi_del:
netif_napi_del(&c->napi);
+ mlx5e_free_xps_cpumask(c);
+
+err_free_channel:
kvfree(c);
return err;
@@ -2048,7 +2004,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
for (tc = 0; tc < c->num_tc; tc++)
mlx5e_activate_txqsq(&c->sq[tc]);
mlx5e_activate_rq(&c->rq);
- netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
+ netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix);
}
static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
@@ -2062,10 +2018,10 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
static void mlx5e_close_channel(struct mlx5e_channel *c)
{
- mlx5e_close_xdpsq(&c->xdpsq);
+ mlx5e_close_xdpsq(&c->xdpsq, NULL);
mlx5e_close_rq(&c->rq);
if (c->xdp)
- mlx5e_close_xdpsq(&c->rq.xdpsq);
+ mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq);
mlx5e_close_sqs(c);
mlx5e_close_icosq(&c->icosq);
napi_disable(&c->napi);
@@ -2076,6 +2032,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
mlx5e_close_tx_cqs(c);
mlx5e_close_cq(&c->icosq.cq);
netif_napi_del(&c->napi);
+ mlx5e_free_xps_cpumask(c);
kvfree(c);
}
@@ -2232,6 +2189,8 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
void *cqc = param->cqc;
MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index);
+ if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128)
+ MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
}
static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
@@ -2308,6 +2267,7 @@ static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
mlx5e_build_sq_param_common(priv, param);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+ param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
}
static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
@@ -2346,6 +2306,10 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
goto err_close_channels;
}
+ if (!IS_ERR_OR_NULL(priv->tx_reporter))
+ devlink_health_reporter_state_update(priv->tx_reporter,
+ DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+
kvfree(cparam);
return 0;
@@ -2510,7 +2474,7 @@ static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz,
if (rrp.rss.hfunc == ETH_RSS_HASH_XOR)
ix = mlx5e_bits_invert(i, ilog2(sz));
- ix = priv->channels.params.indirection_rqt[ix];
+ ix = priv->rss_params.indirection_rqt[ix];
rqn = rrp.rss.channels->c[ix]->rq.rqn;
} else {
rqn = rrp.rqn;
@@ -2593,7 +2557,7 @@ static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv,
{
.rss = {
.channels = chs,
- .hfunc = chs->params.rss_hfunc,
+ .hfunc = priv->rss_params.hfunc,
}
},
};
@@ -2613,6 +2577,54 @@ static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv)
mlx5e_redirect_rqts(priv, drop_rrp);
}
+static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = {
+ [MLX5E_TT_IPV4_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5E_TT_IPV6_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5E_TT_IPV4_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5E_TT_IPV6_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+ .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+ },
+ [MLX5E_TT_IPV4_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5E_TT_IPV6_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5E_TT_IPV4_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5E_TT_IPV6_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+ },
+ [MLX5E_TT_IPV4] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP,
+ },
+ [MLX5E_TT_IPV6] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+ .l4_prot_type = 0,
+ .rx_hash_fields = MLX5_HASH_IP,
+ },
+};
+
+struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt)
+{
+ return tirc_default_config[tt];
+}
+
static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc)
{
if (!params->lro_en)
@@ -2628,116 +2640,68 @@ static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc)
MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout);
}
-void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
- enum mlx5e_traffic_types tt,
+void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
+ const struct mlx5e_tirc_config *ttconfig,
void *tirc, bool inner)
{
void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) :
MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
-#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
- MLX5_HASH_FIELD_SEL_DST_IP)
-
-#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\
- MLX5_HASH_FIELD_SEL_DST_IP |\
- MLX5_HASH_FIELD_SEL_L4_SPORT |\
- MLX5_HASH_FIELD_SEL_L4_DPORT)
-
-#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\
- MLX5_HASH_FIELD_SEL_DST_IP |\
- MLX5_HASH_FIELD_SEL_IPSEC_SPI)
-
- MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(params->rss_hfunc));
- if (params->rss_hfunc == ETH_RSS_HASH_TOP) {
+ MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(rss_params->hfunc));
+ if (rss_params->hfunc == ETH_RSS_HASH_TOP) {
void *rss_key = MLX5_ADDR_OF(tirc, tirc,
rx_hash_toeplitz_key);
size_t len = MLX5_FLD_SZ_BYTES(tirc,
rx_hash_toeplitz_key);
MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
- memcpy(rss_key, params->toeplitz_hash_key, len);
+ memcpy(rss_key, rss_params->toeplitz_hash_key, len);
}
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ ttconfig->l3_prot_type);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ ttconfig->l4_prot_type);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ ttconfig->rx_hash_fields);
+}
- switch (tt) {
- case MLX5E_TT_IPV4_TCP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV4);
- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
- MLX5_L4_PROT_TYPE_TCP);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_L4PORTS);
- break;
-
- case MLX5E_TT_IPV6_TCP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV6);
- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
- MLX5_L4_PROT_TYPE_TCP);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_L4PORTS);
- break;
-
- case MLX5E_TT_IPV4_UDP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV4);
- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
- MLX5_L4_PROT_TYPE_UDP);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_L4PORTS);
- break;
-
- case MLX5E_TT_IPV6_UDP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV6);
- MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
- MLX5_L4_PROT_TYPE_UDP);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_L4PORTS);
- break;
-
- case MLX5E_TT_IPV4_IPSEC_AH:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV4);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_IPSEC_SPI);
- break;
+static void mlx5e_update_rx_hash_fields(struct mlx5e_tirc_config *ttconfig,
+ enum mlx5e_traffic_types tt,
+ u32 rx_hash_fields)
+{
+ *ttconfig = tirc_default_config[tt];
+ ttconfig->rx_hash_fields = rx_hash_fields;
+}
- case MLX5E_TT_IPV6_IPSEC_AH:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV6);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_IPSEC_SPI);
- break;
+void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
+{
+ void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
+ struct mlx5e_rss_params *rss = &priv->rss_params;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int ctxlen = MLX5_ST_SZ_BYTES(tirc);
+ struct mlx5e_tirc_config ttconfig;
+ int tt;
- case MLX5E_TT_IPV4_IPSEC_ESP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV4);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_IPSEC_SPI);
- break;
+ MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
- case MLX5E_TT_IPV6_IPSEC_ESP:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV6);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP_IPSEC_SPI);
- break;
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ memset(tirc, 0, ctxlen);
+ mlx5e_update_rx_hash_fields(&ttconfig, tt,
+ rss->rx_hash_fields[tt]);
+ mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, false);
+ mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen);
+ }
- case MLX5E_TT_IPV4:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV4);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP);
- break;
+ if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ return;
- case MLX5E_TT_IPV6:
- MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
- MLX5_L3_PROT_TYPE_IPV6);
- MLX5_SET(rx_hash_field_select, hfso, selected_fields,
- MLX5_HASH_IP);
- break;
- default:
- WARN_ONCE(true, "%s: bad traffic type!\n", __func__);
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ memset(tirc, 0, ctxlen);
+ mlx5e_update_rx_hash_fields(&ttconfig, tt,
+ rss->rx_hash_fields[tt]);
+ mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, true);
+ mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in,
+ inlen);
}
}
@@ -2794,7 +2758,8 @@ static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv,
MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1);
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
+ mlx5e_build_indir_tir_ctx_hash(&priv->rss_params,
+ &tirc_default_config[tt], tirc, true);
}
static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
@@ -2825,7 +2790,7 @@ static void mlx5e_query_mtu(struct mlx5_core_dev *mdev,
*mtu = MLX5E_HW2SW_MTU(params, hw_mtu);
}
-static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
+int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
{
struct mlx5e_params *params = &priv->channels.params;
struct net_device *netdev = priv->netdev;
@@ -2903,9 +2868,10 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
mlx5e_build_tx2sq_maps(priv);
mlx5e_activate_channels(&priv->channels);
+ mlx5e_xdp_tx_enable(priv);
netif_tx_start_all_queues(priv->netdev);
- if (MLX5_ESWITCH_MANAGER(priv->mdev))
+ if (mlx5e_is_vport_rep(priv))
mlx5e_add_sqs_fwd_rules(priv);
mlx5e_wait_channels_min_rx_wqes(&priv->channels);
@@ -2916,7 +2882,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
{
mlx5e_redirect_rqts_to_drop(priv);
- if (MLX5_ESWITCH_MANAGER(priv->mdev))
+ if (mlx5e_is_vport_rep(priv))
mlx5e_remove_sqs_fwd_rules(priv);
/* FIXME: This is a W/A only for tx timeout watch dog false alarm when
@@ -2924,16 +2890,18 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
*/
netif_tx_stop_all_queues(priv->netdev);
netif_tx_disable(priv->netdev);
+ mlx5e_xdp_tx_disable(priv);
mlx5e_deactivate_channels(&priv->channels);
}
-void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
- struct mlx5e_channels *new_chs,
- mlx5e_fp_hw_modify hw_modify)
+static void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *new_chs,
+ mlx5e_fp_hw_modify hw_modify)
{
struct net_device *netdev = priv->netdev;
int new_num_txqs;
int carrier_ok;
+
new_num_txqs = new_chs->num * new_chs->params.num_tc;
carrier_ok = netif_carrier_ok(netdev);
@@ -2959,6 +2927,28 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
netif_carrier_on(netdev);
}
+int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *new_chs,
+ mlx5e_fp_hw_modify hw_modify)
+{
+ int err;
+
+ err = mlx5e_open_channels(priv, new_chs);
+ if (err)
+ return err;
+
+ mlx5e_switch_priv_channels(priv, new_chs, hw_modify);
+ return 0;
+}
+
+int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv)
+{
+ struct mlx5e_channels new_channels = {};
+
+ new_channels.params = priv->channels.params;
+ return mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+}
+
void mlx5e_timestamp_init(struct mlx5e_priv *priv)
{
priv->tstamp.tx_type = HWTSTAMP_TX_OFF;
@@ -3168,10 +3158,11 @@ err_close_tises:
return err;
}
-void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
+static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
{
int tc;
+ mlx5e_tx_reporter_destroy(priv);
for (tc = 0; tc < priv->profile->max_tc; tc++)
mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
}
@@ -3186,7 +3177,9 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv,
MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
+
+ mlx5e_build_indir_tir_ctx_hash(&priv->rss_params,
+ &tirc_default_config[tt], tirc, false);
}
static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc)
@@ -3372,13 +3365,12 @@ static int mlx5e_setup_tc_mqprio(struct net_device *netdev,
goto out;
}
- err = mlx5e_open_channels(priv, &new_channels);
+ err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
if (err)
goto out;
priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
new_channels.params.num_tc);
- mlx5e_switch_priv_channels(priv, &new_channels, NULL);
out:
mutex_unlock(&priv->state_lock);
return err;
@@ -3391,11 +3383,14 @@ static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
{
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
- return mlx5e_configure_flower(priv, cls_flower, flags);
+ return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+ flags);
case TC_CLSFLOWER_DESTROY:
- return mlx5e_delete_flower(priv, cls_flower, flags);
+ return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+ flags);
case TC_CLSFLOWER_STATS:
- return mlx5e_stats_flower(priv, cls_flower, flags);
+ return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+ flags);
default:
return -EOPNOTSUPP;
}
@@ -3408,7 +3403,8 @@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
switch (type) {
case TC_SETUP_CLSFLOWER:
- return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
+ return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS |
+ MLX5E_TC_NIC_OFFLOAD);
default:
return -EOPNOTSUPP;
}
@@ -3451,16 +3447,39 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
}
}
-static void
+void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
+{
+ int i;
+
+ for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) {
+ struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i];
+ struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
+ int j;
+
+ s->rx_packets += rq_stats->packets;
+ s->rx_bytes += rq_stats->bytes;
+
+ for (j = 0; j < priv->max_opened_tc; j++) {
+ struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
+
+ s->tx_packets += sq_stats->packets;
+ s->tx_bytes += sq_stats->bytes;
+ s->tx_dropped += sq_stats->dropped;
+ }
+ }
+}
+
+void
mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_sw_stats *sstats = &priv->stats.sw;
struct mlx5e_vport_stats *vstats = &priv->stats.vport;
struct mlx5e_pport_stats *pstats = &priv->stats.pport;
- /* update HW stats in background for next time */
- mlx5e_queue_update_stats(priv);
+ if (!mlx5e_monitor_counter_supported(priv)) {
+ /* update HW stats in background for next time */
+ mlx5e_queue_update_stats(priv);
+ }
if (mlx5e_is_uplink_rep(priv)) {
stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
@@ -3468,12 +3487,7 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
} else {
- mlx5e_grp_sw_update_stats(priv);
- stats->rx_packets = sstats->rx_packets;
- stats->rx_bytes = sstats->rx_bytes;
- stats->tx_packets = sstats->tx_packets;
- stats->tx_bytes = sstats->tx_bytes;
- stats->tx_dropped = sstats->tx_queue_dropped;
+ mlx5e_fold_sw_stats64(priv, stats);
}
stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
@@ -3566,11 +3580,7 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
goto out;
}
- err = mlx5e_open_channels(priv, &new_channels);
- if (err)
- goto out;
-
- mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_modify_tirs_lro);
+ err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_modify_tirs_lro);
out:
mutex_unlock(&priv->state_lock);
return err;
@@ -3593,7 +3603,7 @@ static int set_feature_tc_num_filters(struct net_device *netdev, bool enable)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- if (!enable && mlx5e_tc_num_filters(priv)) {
+ if (!enable && mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)) {
netdev_err(netdev,
"Active offloaded tc filters, can't turn hw_tc_offload off\n");
return -EINVAL;
@@ -3767,7 +3777,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
if (params->xdp_prog &&
!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
- new_mtu, MLX5E_XDP_MAX_MTU);
+ new_mtu, mlx5e_xdp_max_mtu(params));
err = -EINVAL;
goto out;
}
@@ -3788,11 +3798,10 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
goto out;
}
- err = mlx5e_open_channels(priv, &new_channels);
+ err = mlx5e_safe_switch_channels(priv, &new_channels, set_mtu_cb);
if (err)
goto out;
- mlx5e_switch_priv_channels(priv, &new_channels, set_mtu_cb);
netdev->mtu = new_channels.params.sw_mtu;
out:
@@ -3895,7 +3904,7 @@ static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
}
#ifdef CONFIG_MLX5_ESWITCH
-static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
@@ -3932,8 +3941,8 @@ static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting)
return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting);
}
-static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
- int max_tx_rate)
+int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
+ int max_tx_rate)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
@@ -3974,8 +3983,8 @@ static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
mlx5_ifla_link2vport(link_state));
}
-static int mlx5e_get_vf_config(struct net_device *dev,
- int vf, struct ifla_vf_info *ivi)
+int mlx5e_get_vf_config(struct net_device *dev,
+ int vf, struct ifla_vf_info *ivi)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
@@ -3988,8 +3997,8 @@ static int mlx5e_get_vf_config(struct net_device *dev,
return 0;
}
-static int mlx5e_get_vf_stats(struct net_device *dev,
- int vf, struct ifla_vf_stats *vf_stats)
+int mlx5e_get_vf_stats(struct net_device *dev,
+ int vf, struct ifla_vf_stats *vf_stats)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
@@ -4050,8 +4059,7 @@ static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add)
queue_work(priv->wq, &vxlan_work->work);
}
-static void mlx5e_add_vxlan_port(struct net_device *netdev,
- struct udp_tunnel_info *ti)
+void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4064,8 +4072,7 @@ static void mlx5e_add_vxlan_port(struct net_device *netdev,
mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1);
}
-static void mlx5e_del_vxlan_port(struct net_device *netdev,
- struct udp_tunnel_info *ti)
+void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4115,9 +4122,9 @@ out:
return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
}
-static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
- struct net_device *netdev,
- netdev_features_t features)
+netdev_features_t mlx5e_features_check(struct sk_buff *skb,
+ struct net_device *netdev,
+ netdev_features_t features)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4137,31 +4144,13 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
return features;
}
-static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
- struct mlx5e_txqsq *sq)
-{
- struct mlx5_eq *eq = sq->cq.mcq.eq;
- u32 eqe_count;
-
- netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
- eq->eqn, eq->cons_index, eq->irqn);
-
- eqe_count = mlx5_eq_poll_irq_disabled(eq);
- if (!eqe_count)
- return false;
-
- netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn);
- sq->channel->stats->eq_rearm++;
- return true;
-}
-
static void mlx5e_tx_timeout_work(struct work_struct *work)
{
struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
tx_timeout_work);
- struct net_device *dev = priv->netdev;
- bool reopen_channels = false;
- int i, err;
+ bool report_failed = false;
+ int err;
+ int i;
rtnl_lock();
mutex_lock(&priv->state_lock);
@@ -4170,34 +4159,24 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
goto unlock;
for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) {
- struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i);
+ struct netdev_queue *dev_queue =
+ netdev_get_tx_queue(priv->netdev, i);
struct mlx5e_txqsq *sq = priv->txq2sq[i];
if (!netif_xmit_stopped(dev_queue))
continue;
- netdev_err(dev,
- "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
- i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
- jiffies_to_usecs(jiffies - dev_queue->trans_start));
-
- /* If we recover a lost interrupt, most likely TX timeout will
- * be resolved, skip reopening channels
- */
- if (!mlx5e_tx_timeout_eq_recover(dev, sq)) {
- clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
- reopen_channels = true;
- }
+ if (mlx5e_tx_reporter_timeout(sq))
+ report_failed = true;
}
- if (!reopen_channels)
+ if (!report_failed)
goto unlock;
- mlx5e_close_locked(dev);
- err = mlx5e_open_locked(dev);
+ err = mlx5e_safe_reopen_channels(priv);
if (err)
netdev_err(priv->netdev,
- "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
+ "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
err);
unlock:
@@ -4233,7 +4212,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
if (!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
- new_channels.params.sw_mtu, MLX5E_XDP_MAX_MTU);
+ new_channels.params.sw_mtu,
+ mlx5e_xdp_max_mtu(&new_channels.params));
return -EINVAL;
}
@@ -4342,6 +4322,61 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
}
+#ifdef CONFIG_MLX5_ESWITCH
+static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+ struct net_device *dev, u32 filter_mask,
+ int nlflags)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 mode, setting;
+ int err;
+
+ err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting);
+ if (err)
+ return err;
+ mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB;
+ return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
+ mode,
+ 0, 0, nlflags, filter_mask, NULL);
+}
+
+static int mlx5e_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
+ u16 flags, struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct nlattr *attr, *br_spec;
+ u16 mode = BRIDGE_MODE_UNDEF;
+ u8 setting;
+ int rem;
+
+ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+ if (!br_spec)
+ return -EINVAL;
+
+ nla_for_each_nested(attr, br_spec, rem) {
+ if (nla_type(attr) != IFLA_BRIDGE_MODE)
+ continue;
+
+ if (nla_len(attr) < sizeof(mode))
+ return -EINVAL;
+
+ mode = nla_get_u16(attr);
+ if (mode > BRIDGE_MODE_VEPA)
+ return -EINVAL;
+
+ break;
+ }
+
+ if (mode == BRIDGE_MODE_UNDEF)
+ return -EINVAL;
+
+ setting = (mode == BRIDGE_MODE_VEPA) ? 1 : 0;
+ return mlx5_eswitch_set_vepa(mdev->priv.eswitch, setting);
+}
+#endif
+
const struct net_device_ops mlx5e_netdev_ops = {
.ndo_open = mlx5e_open,
.ndo_stop = mlx5e_close,
@@ -4368,6 +4403,9 @@ const struct net_device_ops mlx5e_netdev_ops = {
.ndo_rx_flow_steer = mlx5e_rx_flow_steer,
#endif
#ifdef CONFIG_MLX5_ESWITCH
+ .ndo_bridge_setlink = mlx5e_bridge_setlink,
+ .ndo_bridge_getlink = mlx5e_bridge_getlink,
+
/* SRIOV E-Switch NDOs */
.ndo_set_vf_mac = mlx5e_set_vf_mac,
.ndo_set_vf_vlan = mlx5e_set_vf_vlan,
@@ -4377,8 +4415,6 @@ const struct net_device_ops mlx5e_netdev_ops = {
.ndo_get_vf_config = mlx5e_get_vf_config,
.ndo_set_vf_link_state = mlx5e_set_vf_link_state,
.ndo_get_vf_stats = mlx5e_get_vf_stats,
- .ndo_has_offload_stats = mlx5e_has_offload_stats,
- .ndo_get_offload_stats = mlx5e_get_offload_stats,
#endif
};
@@ -4524,15 +4560,23 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
mlx5e_init_rq_type_params(mdev, params);
}
-void mlx5e_build_rss_params(struct mlx5e_params *params)
+void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
+ u16 num_channels)
{
- params->rss_hfunc = ETH_RSS_HASH_XOR;
- netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key));
- mlx5e_build_default_indir_rqt(params->indirection_rqt,
- MLX5E_INDIR_RQT_SIZE, params->num_channels);
+ enum mlx5e_traffic_types tt;
+
+ rss_params->hfunc = ETH_RSS_HASH_TOP;
+ netdev_rss_key_fill(rss_params->toeplitz_hash_key,
+ sizeof(rss_params->toeplitz_hash_key));
+ mlx5e_build_default_indir_rqt(rss_params->indirection_rqt,
+ MLX5E_INDIR_RQT_SIZE, num_channels);
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ rss_params->rx_hash_fields[tt] =
+ tirc_default_config[tt].rx_hash_fields;
}
void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_rss_params *rss_params,
struct mlx5e_params *params,
u16 max_channels, u16 mtu)
{
@@ -4548,6 +4592,10 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+ /* XDP SQ */
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE,
+ MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe));
+
/* set CQE compression */
params->rx_cqe_compress_def = false;
if (MLX5_CAP_GEN(mdev, cqe_compression) &&
@@ -4581,7 +4629,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev);
/* RSS */
- mlx5e_build_rss_params(params);
+ mlx5e_build_rss_params(rss_params, params->num_channels);
}
static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
@@ -4596,12 +4644,6 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
}
}
-#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
-static const struct switchdev_ops mlx5e_switchdev_ops = {
- .switchdev_port_attr_get = mlx5e_attr_get,
-};
-#endif
-
static void mlx5e_build_nic_netdev(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4711,12 +4753,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->priv_flags |= IFF_UNICAST_FLT;
mlx5e_set_netdev_dev_addr(netdev);
-
-#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
- if (MLX5_ESWITCH_MANAGER(mdev))
- netdev->switchdev_ops = &mlx5e_switchdev_ops;
-#endif
-
mlx5e_ipsec_build_netdev(priv);
mlx5e_tls_build_netdev(priv);
}
@@ -4754,14 +4790,16 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
void *ppriv)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rss_params *rss = &priv->rss_params;
int err;
err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv);
if (err)
return err;
- mlx5e_build_nic_params(mdev, &priv->channels.params,
- mlx5e_get_netdev_max_channels(netdev), netdev->mtu);
+ mlx5e_build_nic_params(mdev, rss, &priv->channels.params,
+ mlx5e_get_netdev_max_channels(netdev),
+ netdev->mtu);
mlx5e_timestamp_init(priv);
@@ -4867,6 +4905,7 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_initialize(priv);
#endif
+ mlx5e_tx_reporter_create(priv);
return 0;
}
@@ -4891,9 +4930,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
mlx5_lag_add(mdev, netdev);
mlx5e_enable_async_events(priv);
-
- if (MLX5_ESWITCH_MANAGER(priv->mdev))
- mlx5e_register_vport_reps(priv);
+ if (mlx5e_monitor_counter_supported(priv))
+ mlx5e_monitor_counter_init(priv);
if (netdev->reg_state != NETREG_REGISTERED)
return;
@@ -4927,8 +4965,8 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
queue_work(priv->wq, &priv->set_rx_mode_work);
- if (MLX5_ESWITCH_MANAGER(priv->mdev))
- mlx5e_unregister_vport_reps(priv);
+ if (mlx5e_monitor_counter_supported(priv))
+ mlx5e_monitor_counter_cleanup(priv);
mlx5e_disable_async_events(priv);
mlx5_lag_remove(mdev);
@@ -4981,7 +5019,7 @@ int mlx5e_netdev_init(struct net_device *netdev,
netif_carrier_off(netdev);
#ifdef CONFIG_MLX5_EN_ARFS
- netdev->rx_cpu_rmap = mdev->rmap;
+ netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(mdev);
#endif
return 0;
@@ -5036,7 +5074,7 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
if (priv->channels.params.num_channels > max_nch) {
mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
priv->channels.params.num_channels = max_nch;
- mlx5e_build_default_indir_rqt(priv->channels.params.indirection_rqt,
+ mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
MLX5E_INDIR_RQT_SIZE, max_nch);
}
@@ -5125,7 +5163,6 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv)
static void *mlx5e_add(struct mlx5_core_dev *mdev)
{
struct net_device *netdev;
- void *rpriv = NULL;
void *priv;
int err;
int nch;
@@ -5135,20 +5172,18 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
return NULL;
#ifdef CONFIG_MLX5_ESWITCH
- if (MLX5_ESWITCH_MANAGER(mdev)) {
- rpriv = mlx5e_alloc_nic_rep_priv(mdev);
- if (!rpriv) {
- mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n");
- return NULL;
- }
+ if (MLX5_ESWITCH_MANAGER(mdev) &&
+ mlx5_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+ mlx5e_rep_register_vport_reps(mdev);
+ return mdev;
}
#endif
nch = mlx5e_get_max_num_channels(mdev);
- netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, rpriv);
+ netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, NULL);
if (!netdev) {
mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
- goto err_free_rpriv;
+ return NULL;
}
priv = netdev_priv(netdev);
@@ -5174,30 +5209,26 @@ err_detach:
mlx5e_detach(mdev, priv);
err_destroy_netdev:
mlx5e_destroy_netdev(priv);
-err_free_rpriv:
- kfree(rpriv);
return NULL;
}
static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
{
- struct mlx5e_priv *priv = vpriv;
- void *ppriv = priv->ppriv;
+ struct mlx5e_priv *priv;
+#ifdef CONFIG_MLX5_ESWITCH
+ if (MLX5_ESWITCH_MANAGER(mdev) && vpriv == mdev) {
+ mlx5e_rep_unregister_vport_reps(mdev);
+ return;
+ }
+#endif
+ priv = vpriv;
#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_delete_app(priv);
#endif
unregister_netdev(priv->netdev);
mlx5e_detach(mdev, vpriv);
mlx5e_destroy_netdev(priv);
- kfree(ppriv);
-}
-
-static void *mlx5e_get_netdev(void *vpriv)
-{
- struct mlx5e_priv *priv = vpriv;
-
- return priv->netdev;
}
static struct mlx5_interface mlx5e_interface = {
@@ -5205,9 +5236,7 @@ static struct mlx5_interface mlx5e_interface = {
.remove = mlx5e_remove,
.attach = mlx5e_attach,
.detach = mlx5e_detach,
- .event = mlx5e_async_event,
.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
- .get_dev = mlx5e_get_netdev,
};
void mlx5e_init(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 820fe85100b0..a66b6ed80b30 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -42,14 +42,26 @@
#include "en.h"
#include "en_rep.h"
#include "en_tc.h"
+#include "en/tc_tun.h"
#include "fs_core.h"
+#include "lib/port_tun.h"
-#define MLX5E_REP_PARAMS_LOG_SQ_SIZE \
- max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
+#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \
+ max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
#define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1
static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
+struct mlx5e_rep_indr_block_priv {
+ struct net_device *netdev;
+ struct mlx5e_rep_priv *rpriv;
+
+ struct list_head list;
+};
+
+static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev);
+
static void mlx5e_rep_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
{
@@ -99,7 +111,7 @@ static void mlx5e_rep_get_strings(struct net_device *dev,
}
}
-static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
+static void mlx5e_vf_rep_update_hw_counters(struct mlx5e_priv *priv)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -122,29 +134,45 @@ static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
vport_stats->tx_bytes = vf_stats.rx_bytes;
}
-static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
+static void mlx5e_uplink_rep_update_hw_counters(struct mlx5e_priv *priv)
{
- struct mlx5e_sw_stats *s = &priv->stats.sw;
- struct mlx5e_rq_stats *rq_stats;
- struct mlx5e_sq_stats *sq_stats;
- int i, j;
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct rtnl_link_stats64 *vport_stats;
- memset(s, 0, sizeof(*s));
- for (i = 0; i < priv->channels.num; i++) {
- struct mlx5e_channel *c = priv->channels.c[i];
+ mlx5e_grp_802_3_update_stats(priv);
- rq_stats = c->rq.stats;
+ vport_stats = &priv->stats.vf_vport;
- s->rx_packets += rq_stats->packets;
- s->rx_bytes += rq_stats->bytes;
+ vport_stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
+ vport_stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok);
+ vport_stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
+ vport_stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
+}
- for (j = 0; j < priv->channels.params.num_tc; j++) {
- sq_stats = c->sq[j].stats;
+static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
- s->tx_packets += sq_stats->packets;
- s->tx_bytes += sq_stats->bytes;
- }
- }
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ mlx5e_uplink_rep_update_hw_counters(priv);
+ else
+ mlx5e_vf_rep_update_hw_counters(priv);
+}
+
+static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
+{
+ struct mlx5e_sw_stats *s = &priv->stats.sw;
+ struct rtnl_link_stats64 stats64 = {};
+
+ memset(s, 0, sizeof(*s));
+ mlx5e_fold_sw_stats64(priv, &stats64);
+
+ s->rx_packets = stats64.rx_packets;
+ s->rx_bytes = stats64.rx_bytes;
+ s->tx_packets = stats64.tx_packets;
+ s->tx_bytes = stats64.tx_bytes;
+ s->tx_queue_dropped = stats64.tx_dropped;
}
static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
@@ -157,8 +185,7 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
return;
mutex_lock(&priv->state_lock);
- if (test_bit(MLX5E_STATE_OPENED, &priv->state))
- mlx5e_rep_update_sw_counters(priv);
+ mlx5e_rep_update_sw_counters(priv);
mlx5e_rep_update_hw_counters(priv);
mutex_unlock(&priv->state_lock);
@@ -257,6 +284,22 @@ static int mlx5e_rep_set_channels(struct net_device *dev,
return 0;
}
+static int mlx5e_rep_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_coalesce(priv, coal);
+}
+
+static int mlx5e_rep_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_coalesce(priv, coal);
+}
+
static u32 mlx5e_rep_get_rxfh_key_size(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -271,7 +314,39 @@ static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev)
return mlx5e_ethtool_get_rxfh_indir_size(priv);
}
-static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
+static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5e_ethtool_get_pauseparam(priv, pauseparam);
+}
+
+static int mlx5e_uplink_rep_set_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_pauseparam(priv, pauseparam);
+}
+
+static int mlx5e_uplink_rep_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings);
+}
+
+static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings);
+}
+
+static const struct ethtool_ops mlx5e_vf_rep_ethtool_ops = {
.get_drvinfo = mlx5e_rep_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_strings = mlx5e_rep_get_strings,
@@ -281,27 +356,58 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
.set_ringparam = mlx5e_rep_set_ringparam,
.get_channels = mlx5e_rep_get_channels,
.set_channels = mlx5e_rep_set_channels,
+ .get_coalesce = mlx5e_rep_get_coalesce,
+ .set_coalesce = mlx5e_rep_set_coalesce,
.get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size,
.get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
};
-int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = {
+ .get_drvinfo = mlx5e_rep_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = mlx5e_rep_get_strings,
+ .get_sset_count = mlx5e_rep_get_sset_count,
+ .get_ethtool_stats = mlx5e_rep_get_ethtool_stats,
+ .get_ringparam = mlx5e_rep_get_ringparam,
+ .set_ringparam = mlx5e_rep_set_ringparam,
+ .get_channels = mlx5e_rep_get_channels,
+ .set_channels = mlx5e_rep_set_channels,
+ .get_coalesce = mlx5e_rep_get_coalesce,
+ .set_coalesce = mlx5e_rep_set_coalesce,
+ .get_link_ksettings = mlx5e_uplink_rep_get_link_ksettings,
+ .set_link_ksettings = mlx5e_uplink_rep_set_link_ksettings,
+ .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size,
+ .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
+ .get_pauseparam = mlx5e_uplink_rep_get_pauseparam,
+ .set_pauseparam = mlx5e_uplink_rep_set_pauseparam,
+};
+
+static int mlx5e_rep_get_port_parent_id(struct net_device *dev,
+ struct netdev_phys_item_id *ppid)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct net_device *uplink_upper = NULL;
+ struct mlx5e_priv *uplink_priv = NULL;
+ struct net_device *uplink_dev;
if (esw->mode == SRIOV_NONE)
return -EOPNOTSUPP;
- switch (attr->id) {
- case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
- attr->u.ppid.id_len = ETH_ALEN;
- ether_addr_copy(attr->u.ppid.id, rep->hw_id);
- break;
- default:
- return -EOPNOTSUPP;
+ uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ if (uplink_dev) {
+ uplink_upper = netdev_master_upper_dev_get(uplink_dev);
+ uplink_priv = netdev_priv(uplink_dev);
+ }
+
+ ppid->id_len = ETH_ALEN;
+ if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) {
+ ether_addr_copy(ppid->id, uplink_upper->dev_addr);
+ } else {
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+
+ ether_addr_copy(ppid->id, rep->hw_id);
}
return 0;
@@ -474,6 +580,10 @@ static void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
ether_addr_copy(e->h_dest, ha);
ether_addr_copy(eth->h_dest, ha);
+ /* Update the encap source mac, in case that we delete
+ * the flows when encap source mac changed.
+ */
+ ether_addr_copy(eth->h_source, e->route_dev->dev_addr);
mlx5e_tc_encap_flows_add(priv, e);
}
@@ -519,6 +629,186 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
neigh_release(n);
}
+static struct mlx5e_rep_indr_block_priv *
+mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev)
+{
+ struct mlx5e_rep_indr_block_priv *cb_priv;
+
+ /* All callback list access should be protected by RTNL. */
+ ASSERT_RTNL();
+
+ list_for_each_entry(cb_priv,
+ &rpriv->uplink_priv.tc_indr_block_priv_list,
+ list)
+ if (cb_priv->netdev == netdev)
+ return cb_priv;
+
+ return NULL;
+}
+
+static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_rep_indr_block_priv *cb_priv, *temp;
+ struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list;
+
+ list_for_each_entry_safe(cb_priv, temp, head, list) {
+ mlx5e_rep_indr_unregister_block(rpriv, cb_priv->netdev);
+ kfree(cb_priv);
+ }
+}
+
+static int
+mlx5e_rep_indr_offload(struct net_device *netdev,
+ struct tc_cls_flower_offload *flower,
+ struct mlx5e_rep_indr_block_priv *indr_priv)
+{
+ struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
+ int flags = MLX5E_TC_EGRESS | MLX5E_TC_ESW_OFFLOAD;
+ int err = 0;
+
+ switch (flower->command) {
+ case TC_CLSFLOWER_REPLACE:
+ err = mlx5e_configure_flower(netdev, priv, flower, flags);
+ break;
+ case TC_CLSFLOWER_DESTROY:
+ err = mlx5e_delete_flower(netdev, priv, flower, flags);
+ break;
+ case TC_CLSFLOWER_STATS:
+ err = mlx5e_stats_flower(netdev, priv, flower, flags);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type,
+ void *type_data, void *indr_priv)
+{
+ struct mlx5e_rep_indr_block_priv *priv = indr_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_indr_offload(priv->netdev, type_data, priv);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+mlx5e_rep_indr_setup_tc_block(struct net_device *netdev,
+ struct mlx5e_rep_priv *rpriv,
+ struct tc_block_offload *f)
+{
+ struct mlx5e_rep_indr_block_priv *indr_priv;
+ int err = 0;
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
+ if (indr_priv)
+ return -EEXIST;
+
+ indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
+ if (!indr_priv)
+ return -ENOMEM;
+
+ indr_priv->netdev = netdev;
+ indr_priv->rpriv = rpriv;
+ list_add(&indr_priv->list,
+ &rpriv->uplink_priv.tc_indr_block_priv_list);
+
+ err = tcf_block_cb_register(f->block,
+ mlx5e_rep_indr_setup_block_cb,
+ indr_priv, indr_priv, f->extack);
+ if (err) {
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+ }
+
+ return err;
+ case TC_BLOCK_UNBIND:
+ indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
+ if (!indr_priv)
+ return -ENOENT;
+
+ tcf_block_cb_unregister(f->block,
+ mlx5e_rep_indr_setup_block_cb,
+ indr_priv);
+ list_del(&indr_priv->list);
+ kfree(indr_priv);
+
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static
+int mlx5e_rep_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv,
+ enum tc_setup_type type, void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return mlx5e_rep_indr_setup_tc_block(netdev, cb_priv,
+ type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev)
+{
+ int err;
+
+ err = __tc_indr_block_cb_register(netdev, rpriv,
+ mlx5e_rep_indr_setup_tc_cb,
+ rpriv);
+ if (err) {
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n",
+ netdev_name(netdev), err);
+ }
+ return err;
+}
+
+static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev)
+{
+ __tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb,
+ rpriv);
+}
+
+static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
+ uplink_priv.netdevice_nb);
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ if (!mlx5e_tc_tun_device_to_offload(priv, netdev))
+ return NOTIFY_OK;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ mlx5e_rep_indr_register_block(rpriv, netdev);
+ break;
+ case NETDEV_UNREGISTER:
+ mlx5e_rep_indr_unregister_block(rpriv, netdev);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
static struct mlx5e_neigh_hash_entry *
mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
struct mlx5e_neigh *m_neigh);
@@ -755,14 +1045,23 @@ static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv,
int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e)
{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
struct mlx5e_neigh_hash_entry *nhe;
int err;
+ err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
+ if (err)
+ return err;
nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
if (!nhe) {
err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
- if (err)
+ if (err) {
+ mlx5_tun_entropy_refcount_dec(tun_entropy,
+ e->reformat_type);
return err;
+ }
}
list_add(&e->encap_list, &nhe->encap_list);
return 0;
@@ -771,6 +1070,9 @@ int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e)
{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
struct mlx5e_neigh_hash_entry *nhe;
list_del(&e->encap_list);
@@ -778,9 +1080,10 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
if (list_empty(&nhe->encap_list))
mlx5e_rep_neigh_entry_destroy(priv, nhe);
+ mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
}
-static int mlx5e_rep_open(struct net_device *dev)
+static int mlx5e_vf_rep_open(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -794,7 +1097,8 @@ static int mlx5e_rep_open(struct net_device *dev)
if (!mlx5_modify_vport_admin_state(priv->mdev,
MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
- rep->vport, MLX5_VPORT_ADMIN_STATE_UP))
+ rep->vport, 1,
+ MLX5_VPORT_ADMIN_STATE_UP))
netif_carrier_on(dev);
unlock:
@@ -802,7 +1106,7 @@ unlock:
return err;
}
-static int mlx5e_rep_close(struct net_device *dev)
+static int mlx5e_vf_rep_close(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -812,7 +1116,8 @@ static int mlx5e_rep_close(struct net_device *dev)
mutex_lock(&priv->state_lock);
mlx5_modify_vport_admin_state(priv->mdev,
MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
- rep->vport, MLX5_VPORT_ADMIN_STATE_DOWN);
+ rep->vport, 1,
+ MLX5_VPORT_ADMIN_STATE_DOWN);
ret = mlx5e_close_locked(dev);
mutex_unlock(&priv->state_lock);
return ret;
@@ -824,9 +1129,18 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep = rpriv->rep;
+ unsigned int fn;
int ret;
- ret = snprintf(buf, len, "%d", rep->vport - 1);
+ fn = PCI_FUNC(priv->mdev->pdev->devfn);
+ if (fn >= MLX5_MAX_PORTS)
+ return -EOPNOTSUPP;
+
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ ret = snprintf(buf, len, "p%d", fn);
+ else
+ ret = snprintf(buf, len, "pf%dvf%d", fn, rep->vport - 1);
+
if (ret >= len)
return -EOPNOTSUPP;
@@ -839,24 +1153,14 @@ mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
{
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
- return mlx5e_configure_flower(priv, cls_flower, flags);
+ return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+ flags);
case TC_CLSFLOWER_DESTROY:
- return mlx5e_delete_flower(priv, cls_flower, flags);
+ return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+ flags);
case TC_CLSFLOWER_STATS:
- return mlx5e_stats_flower(priv, cls_flower, flags);
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data,
- void *cb_priv)
-{
- struct mlx5e_priv *priv = cb_priv;
-
- switch (type) {
- case TC_SETUP_CLSFLOWER:
- return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS);
+ return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+ flags);
default:
return -EOPNOTSUPP;
}
@@ -869,7 +1173,8 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
switch (type) {
case TC_SETUP_CLSFLOWER:
- return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
+ return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS |
+ MLX5E_TC_ESW_OFFLOAD);
default:
return -EOPNOTSUPP;
}
@@ -908,43 +1213,23 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep;
if (!MLX5_ESWITCH_MANAGER(priv->mdev))
return false;
- rep = rpriv->rep;
- if (esw->mode == SRIOV_OFFLOADS &&
- rep && rep->vport == FDB_UPLINK_VPORT)
- return true;
-
- return false;
-}
-
-static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
-{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_eswitch_rep *rep;
-
- if (!MLX5_ESWITCH_MANAGER(priv->mdev))
+ if (!rpriv) /* non vport rep mlx5e instances don't use this field */
return false;
rep = rpriv->rep;
- if (rep && rep->vport != FDB_UPLINK_VPORT)
- return true;
-
- return false;
+ return (rep->vport == MLX5_VPORT_UPLINK);
}
-bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
+static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id)
{
- struct mlx5e_priv *priv = netdev_priv(dev);
-
switch (attr_id) {
case IFLA_OFFLOAD_XSTATS_CPU_HIT:
- if (mlx5e_is_vf_vport_rep(priv) || mlx5e_is_uplink_rep(priv))
return true;
}
@@ -956,22 +1241,13 @@ mlx5e_get_sw_stats64(const struct net_device *dev,
struct rtnl_link_stats64 *stats)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_sw_stats *sstats = &priv->stats.sw;
-
- mlx5e_rep_update_sw_counters(priv);
-
- stats->rx_packets = sstats->rx_packets;
- stats->rx_bytes = sstats->rx_bytes;
- stats->tx_packets = sstats->tx_packets;
- stats->tx_bytes = sstats->tx_bytes;
-
- stats->tx_dropped = sstats->tx_queue_dropped;
+ mlx5e_fold_sw_stats64(priv, stats);
return 0;
}
-int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
- void *sp)
+static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
+ void *sp)
{
switch (attr_id) {
case IFLA_OFFLOAD_XSTATS_CPU_HIT:
@@ -982,7 +1258,7 @@ int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
}
static void
-mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
struct mlx5e_priv *priv = netdev_priv(dev);
@@ -991,37 +1267,104 @@ mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
memcpy(stats, &priv->stats.vf_vport, sizeof(*stats));
}
-static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
- .switchdev_port_attr_get = mlx5e_attr_get,
-};
-
-static int mlx5e_change_rep_mtu(struct net_device *netdev, int new_mtu)
+static int mlx5e_vf_rep_change_mtu(struct net_device *netdev, int new_mtu)
{
return mlx5e_change_mtu(netdev, new_mtu, NULL);
}
-static const struct net_device_ops mlx5e_netdev_ops_rep = {
- .ndo_open = mlx5e_rep_open,
- .ndo_stop = mlx5e_rep_close,
+static int mlx5e_uplink_rep_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu);
+}
+
+static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr)
+{
+ struct sockaddr *saddr = addr;
+
+ if (!is_valid_ether_addr(saddr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ ether_addr_copy(netdev->dev_addr, saddr->sa_data);
+ return 0;
+}
+
+static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
+ __be16 vlan_proto)
+{
+ netdev_warn_once(dev, "legacy vf vlan setting isn't supported in switchdev mode\n");
+
+ if (vlan != 0)
+ return -EOPNOTSUPP;
+
+ /* allow setting 0-vid for compatibility with libvirt */
+ return 0;
+}
+
+static const struct net_device_ops mlx5e_netdev_ops_vf_rep = {
+ .ndo_open = mlx5e_vf_rep_open,
+ .ndo_stop = mlx5e_vf_rep_close,
+ .ndo_start_xmit = mlx5e_xmit,
+ .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name,
+ .ndo_setup_tc = mlx5e_rep_setup_tc,
+ .ndo_get_stats64 = mlx5e_vf_rep_get_stats,
+ .ndo_has_offload_stats = mlx5e_rep_has_offload_stats,
+ .ndo_get_offload_stats = mlx5e_rep_get_offload_stats,
+ .ndo_change_mtu = mlx5e_vf_rep_change_mtu,
+ .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id,
+};
+
+static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = {
+ .ndo_open = mlx5e_open,
+ .ndo_stop = mlx5e_close,
.ndo_start_xmit = mlx5e_xmit,
+ .ndo_set_mac_address = mlx5e_uplink_rep_set_mac,
.ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name,
.ndo_setup_tc = mlx5e_rep_setup_tc,
- .ndo_get_stats64 = mlx5e_rep_get_stats,
- .ndo_has_offload_stats = mlx5e_has_offload_stats,
- .ndo_get_offload_stats = mlx5e_get_offload_stats,
- .ndo_change_mtu = mlx5e_change_rep_mtu,
+ .ndo_get_stats64 = mlx5e_get_stats,
+ .ndo_has_offload_stats = mlx5e_rep_has_offload_stats,
+ .ndo_get_offload_stats = mlx5e_rep_get_offload_stats,
+ .ndo_change_mtu = mlx5e_uplink_rep_change_mtu,
+ .ndo_udp_tunnel_add = mlx5e_add_vxlan_port,
+ .ndo_udp_tunnel_del = mlx5e_del_vxlan_port,
+ .ndo_features_check = mlx5e_features_check,
+ .ndo_set_vf_mac = mlx5e_set_vf_mac,
+ .ndo_set_vf_rate = mlx5e_set_vf_rate,
+ .ndo_get_vf_config = mlx5e_get_vf_config,
+ .ndo_get_vf_stats = mlx5e_get_vf_stats,
+ .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan,
+ .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id,
};
-static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params, u16 mtu)
+bool mlx5e_eswitch_rep(struct net_device *netdev)
+{
+ if (netdev->netdev_ops == &mlx5e_netdev_ops_vf_rep ||
+ netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep)
+ return true;
+
+ return false;
+}
+
+static void mlx5e_build_rep_params(struct net_device *netdev)
{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_params *params;
+
u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+ params = &priv->channels.params;
params->hard_mtu = MLX5E_ETH_HARD_MTU;
- params->sw_mtu = mtu;
- params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE;
+ params->sw_mtu = netdev->mtu;
+
+ /* SQ */
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+ else
+ params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE;
/* RQ */
mlx5e_build_rq_params(mdev, params);
@@ -1035,24 +1378,36 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
/* RSS */
- mlx5e_build_rss_params(params);
+ mlx5e_build_rss_params(&priv->rss_params, params->num_channels);
}
static void mlx5e_build_rep_netdev(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5_core_dev *mdev = priv->mdev;
- u16 max_mtu;
- netdev->netdev_ops = &mlx5e_netdev_ops_rep;
+ if (rep->vport == MLX5_VPORT_UPLINK) {
+ SET_NETDEV_DEV(netdev, &priv->mdev->pdev->dev);
+ netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep;
+ /* we want a persistent mac for the uplink rep */
+ mlx5_query_nic_vport_mac_address(mdev, 0, netdev->dev_addr);
+ netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (MLX5_CAP_GEN(mdev, qos))
+ netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
+#endif
+ } else {
+ netdev->netdev_ops = &mlx5e_netdev_ops_vf_rep;
+ eth_hw_addr_random(netdev);
+ netdev->ethtool_ops = &mlx5e_vf_rep_ethtool_ops;
+ }
netdev->watchdog_timeo = 15 * HZ;
- netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
-
- netdev->switchdev_ops = &mlx5e_rep_switchdev_ops;
- netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
+ netdev->features |= NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
netdev->hw_features |= NETIF_F_HW_TC;
netdev->hw_features |= NETIF_F_SG;
@@ -1063,13 +1418,10 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
netdev->hw_features |= NETIF_F_TSO6;
netdev->hw_features |= NETIF_F_RXCSUM;
- netdev->features |= netdev->hw_features;
-
- eth_hw_addr_random(netdev);
+ if (rep->vport != MLX5_VPORT_UPLINK)
+ netdev->features |= NETIF_F_VLAN_CHALLENGED;
- netdev->min_mtu = ETH_MIN_MTU;
- mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
- netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+ netdev->features |= netdev->hw_features;
}
static int mlx5e_init_rep(struct mlx5_core_dev *mdev,
@@ -1086,7 +1438,7 @@ static int mlx5e_init_rep(struct mlx5_core_dev *mdev,
priv->channels.params.num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS;
- mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu);
+ mlx5e_build_rep_params(netdev);
mlx5e_build_rep_netdev(netdev);
mlx5e_timestamp_init(priv);
@@ -1209,94 +1561,193 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
{
- int err;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ int tc, err;
err = mlx5e_create_tises(priv);
if (err) {
mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
return err;
}
+
+ if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
+ uplink_priv = &rpriv->uplink_priv;
+
+ INIT_LIST_HEAD(&uplink_priv->unready_flows);
+
+ /* init shared tc flow table */
+ err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
+ if (err)
+ goto destroy_tises;
+
+ mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev);
+
+ /* init indirect block notifications */
+ INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
+ uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event;
+ err = register_netdevice_notifier(&uplink_priv->netdevice_nb);
+ if (err) {
+ mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n");
+ goto tc_esw_cleanup;
+ }
+ }
+
return 0;
+
+tc_esw_cleanup:
+ mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht);
+destroy_tises:
+ for (tc = 0; tc < priv->profile->max_tc; tc++)
+ mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
+ return err;
}
-static const struct mlx5e_profile mlx5e_rep_profile = {
- .init = mlx5e_init_rep,
- .cleanup = mlx5e_cleanup_rep,
- .init_rx = mlx5e_init_rep_rx,
- .cleanup_rx = mlx5e_cleanup_rep_rx,
- .init_tx = mlx5e_init_rep_tx,
- .cleanup_tx = mlx5e_cleanup_nic_tx,
- .update_stats = mlx5e_rep_update_hw_counters,
- .update_carrier = NULL,
- .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
- .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
- .max_tc = 1,
-};
+static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ int tc;
-/* e-Switch vport representors */
+ for (tc = 0; tc < priv->profile->max_tc; tc++)
+ mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
-static int
-mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+ if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
+ /* clean indirect TC block notifications */
+ unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb);
+ mlx5e_rep_indr_clean_block_privs(rpriv);
+
+ /* delete shared tc flow table */
+ mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
+ }
+}
+
+static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
- struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 max_mtu;
- int err;
+ netdev->min_mtu = ETH_MIN_MTU;
+ mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
+ netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+}
- if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- err = mlx5e_add_sqs_fwd_rules(priv);
- if (err)
- return err;
+static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
+
+ if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
+ struct mlx5_eqe *eqe = data;
+
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ queue_work(priv->wq, &priv->update_carrier_work);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
}
- err = mlx5e_rep_neigh_init(rpriv);
- if (err)
- goto err_remove_sqs;
+ if (event == MLX5_DEV_EVENT_PORT_AFFINITY) {
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
- /* init shared tc flow table */
- err = mlx5e_tc_esw_init(&rpriv->tc_ht);
- if (err)
- goto err_neigh_cleanup;
+ queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
- return 0;
+ return NOTIFY_OK;
+ }
-err_neigh_cleanup:
- mlx5e_rep_neigh_cleanup(rpriv);
-err_remove_sqs:
- mlx5e_remove_sqs_fwd_rules(priv);
- return err;
+ return NOTIFY_DONE;
}
-static void
-mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
- struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ u16 max_mtu;
+
+ netdev->min_mtu = ETH_MIN_MTU;
+ mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1);
+ netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+ mlx5e_set_dev_port_mtu(priv);
- if (test_bit(MLX5E_STATE_OPENED, &priv->state))
- mlx5e_remove_sqs_fwd_rules(priv);
+ INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
+ mlx5e_tc_reoffload_flows_work);
- /* clean uplink offloaded TC rules, delete shared tc flow table */
- mlx5e_tc_esw_cleanup(&rpriv->tc_ht);
+ mlx5_lag_add(mdev, netdev);
+ priv->events_nb.notifier_call = uplink_rep_async_event;
+ mlx5_notifier_register(mdev, &priv->events_nb);
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ mlx5e_dcbnl_initialize(priv);
+ mlx5e_dcbnl_init_app(priv);
+#endif
+}
- mlx5e_rep_neigh_cleanup(rpriv);
+static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ mlx5e_dcbnl_delete_app(priv);
+#endif
+ mlx5_notifier_unregister(mdev, &priv->events_nb);
+ cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
+ mlx5_lag_remove(mdev);
}
+static const struct mlx5e_profile mlx5e_vf_rep_profile = {
+ .init = mlx5e_init_rep,
+ .cleanup = mlx5e_cleanup_rep,
+ .init_rx = mlx5e_init_rep_rx,
+ .cleanup_rx = mlx5e_cleanup_rep_rx,
+ .init_tx = mlx5e_init_rep_tx,
+ .cleanup_tx = mlx5e_cleanup_rep_tx,
+ .enable = mlx5e_vf_rep_enable,
+ .update_stats = mlx5e_vf_rep_update_hw_counters,
+ .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
+ .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+ .max_tc = 1,
+};
+
+static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
+ .init = mlx5e_init_rep,
+ .cleanup = mlx5e_cleanup_rep,
+ .init_rx = mlx5e_init_rep_rx,
+ .cleanup_rx = mlx5e_cleanup_rep_rx,
+ .init_tx = mlx5e_init_rep_tx,
+ .cleanup_tx = mlx5e_cleanup_rep_tx,
+ .enable = mlx5e_uplink_rep_enable,
+ .disable = mlx5e_uplink_rep_disable,
+ .update_stats = mlx5e_uplink_rep_update_hw_counters,
+ .update_carrier = mlx5e_update_carrier,
+ .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
+ .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+ .max_tc = MLX5E_MAX_NUM_TC,
+};
+
+/* e-Switch vport representors */
static int
mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
- struct mlx5e_rep_priv *uplink_rpriv;
+ const struct mlx5e_profile *profile;
struct mlx5e_rep_priv *rpriv;
struct net_device *netdev;
- struct mlx5e_priv *upriv;
int nch, err;
rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
if (!rpriv)
return -ENOMEM;
+ /* rpriv->rep to be looked up when profile->init() is called */
+ rpriv->rep = rep;
+
nch = mlx5e_get_max_num_channels(dev);
- netdev = mlx5e_create_netdev(dev, &mlx5e_rep_profile, nch, rpriv);
+ profile = (rep->vport == MLX5_VPORT_UPLINK) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile;
+ netdev = mlx5e_create_netdev(dev, profile, nch, rpriv);
if (!netdev) {
pr_warn("Failed to create representor netdev for vport %d\n",
rep->vport);
@@ -1305,15 +1756,20 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
}
rpriv->netdev = netdev;
- rpriv->rep = rep;
rep->rep_if[REP_ETH].priv = rpriv;
INIT_LIST_HEAD(&rpriv->vport_sqs_list);
+ if (rep->vport == MLX5_VPORT_UPLINK) {
+ err = mlx5e_create_mdev_resources(dev);
+ if (err)
+ goto err_destroy_netdev;
+ }
+
err = mlx5e_attach_netdev(netdev_priv(netdev));
if (err) {
pr_warn("Failed to attach representor netdev for vport %d\n",
rep->vport);
- goto err_destroy_netdev;
+ goto err_destroy_mdev_resources;
}
err = mlx5e_rep_neigh_init(rpriv);
@@ -1323,32 +1779,25 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
goto err_detach_netdev;
}
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch, REP_ETH);
- upriv = netdev_priv(uplink_rpriv->netdev);
- err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb_egdev,
- upriv);
- if (err)
- goto err_neigh_cleanup;
-
err = register_netdev(netdev);
if (err) {
pr_warn("Failed to register representor netdev for vport %d\n",
rep->vport);
- goto err_egdev_cleanup;
+ goto err_neigh_cleanup;
}
return 0;
-err_egdev_cleanup:
- tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev,
- upriv);
-
err_neigh_cleanup:
mlx5e_rep_neigh_cleanup(rpriv);
err_detach_netdev:
mlx5e_detach_netdev(netdev_priv(netdev));
+err_destroy_mdev_resources:
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ mlx5e_destroy_mdev_resources(dev);
+
err_destroy_netdev:
mlx5e_destroy_netdev(netdev_priv(netdev));
kfree(rpriv);
@@ -1361,18 +1810,13 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_rep_priv *uplink_rpriv;
void *ppriv = priv->ppriv;
- struct mlx5e_priv *upriv;
unregister_netdev(netdev);
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch,
- REP_ETH);
- upriv = netdev_priv(uplink_rpriv->netdev);
- tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev,
- upriv);
mlx5e_rep_neigh_cleanup(rpriv);
mlx5e_detach_netdev(priv);
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ mlx5e_destroy_mdev_resources(priv->mdev);
mlx5e_destroy_netdev(priv);
kfree(ppriv); /* mlx5e_rep_priv */
}
@@ -1386,72 +1830,21 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
return rpriv->netdev;
}
-static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
-{
- struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
- int total_vfs = MLX5_TOTAL_VPORTS(mdev);
- int vport;
-
- for (vport = 1; vport < total_vfs; vport++) {
- struct mlx5_eswitch_rep_if rep_if = {};
-
- rep_if.load = mlx5e_vport_rep_load;
- rep_if.unload = mlx5e_vport_rep_unload;
- rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
- mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
- }
-}
-
-static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv)
+void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev)
{
- struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5_eswitch *esw = mdev->priv.eswitch;
- int total_vfs = MLX5_TOTAL_VPORTS(mdev);
- int vport;
-
- for (vport = 1; vport < total_vfs; vport++)
- mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH);
-}
-
-void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
-{
- struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
- struct mlx5_eswitch_rep_if rep_if;
- struct mlx5e_rep_priv *rpriv;
-
- rpriv = priv->ppriv;
- rpriv->netdev = priv->netdev;
+ struct mlx5_eswitch_rep_if rep_if = {};
- rep_if.load = mlx5e_nic_rep_load;
- rep_if.unload = mlx5e_nic_rep_unload;
+ rep_if.load = mlx5e_vport_rep_load;
+ rep_if.unload = mlx5e_vport_rep_unload;
rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
- rep_if.priv = rpriv;
- INIT_LIST_HEAD(&rpriv->vport_sqs_list);
- mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
-
- mlx5e_rep_register_vf_vports(priv); /* VFs vports */
-}
-
-void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv)
-{
- struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
- mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */
- mlx5_eswitch_unregister_vport_rep(esw, 0, REP_ETH); /* UPLINK PF*/
+ mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH);
}
-void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev)
+void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev)
{
struct mlx5_eswitch *esw = mdev->priv.eswitch;
- struct mlx5e_rep_priv *rpriv;
-
- rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
- if (!rpriv)
- return NULL;
- rpriv->rep = &esw->offloads.vport_reps[0];
- return rpriv;
+ mlx5_eswitch_unregister_vport_reps(esw, REP_ETH);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 844d32d5c29f..83b573b1abac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -37,6 +37,7 @@
#include <linux/rhashtable.h>
#include "eswitch.h"
#include "en.h"
+#include "lib/port_tun.h"
#ifdef CONFIG_MLX5_ESWITCH
struct mlx5e_neigh_update_table {
@@ -53,13 +54,38 @@ struct mlx5e_neigh_update_table {
unsigned long min_interval; /* jiffies */
};
+struct mlx5_rep_uplink_priv {
+ /* Filters DB - instantiated by the uplink representor and shared by
+ * the uplink's VFs
+ */
+ struct rhashtable tc_ht;
+
+ /* indirect block callbacks are invoked on bind/unbind events
+ * on registered higher level devices (e.g. tunnel devices)
+ *
+ * tc_indr_block_cb_priv_list is used to lookup indirect callback
+ * private data
+ *
+ * netdevice_nb is the netdev events notifier - used to register
+ * tunnel devices for block events
+ *
+ */
+ struct list_head tc_indr_block_priv_list;
+ struct notifier_block netdevice_nb;
+
+ struct mlx5_tun_entropy tun_entropy;
+
+ struct list_head unready_flows;
+ struct work_struct reoffload_flows_work;
+};
+
struct mlx5e_rep_priv {
struct mlx5_eswitch_rep *rep;
struct mlx5e_neigh_update_table neigh_update;
struct net_device *netdev;
struct mlx5_flow_handle *vport_rx_rule;
struct list_head vport_sqs_list;
- struct rhashtable tc_ht; /* valid for uplink rep */
+ struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */
};
static inline
@@ -128,7 +154,10 @@ struct mlx5e_encap_entry {
unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
struct net_device *out_dev;
+ struct net_device *route_dev;
int tunnel_type;
+ int tunnel_hlen;
+ int reformat_type;
u8 flags;
char *encap_header;
int encap_size;
@@ -140,16 +169,12 @@ struct mlx5e_rep_sq {
};
void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev);
-void mlx5e_register_vport_reps(struct mlx5e_priv *priv);
-void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv);
+void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev);
+void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev);
bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
-int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, void *sp);
-bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id);
-
-int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
@@ -158,12 +183,17 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e);
void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
+
+bool mlx5e_eswitch_rep(struct net_device *netdev);
+
#else /* CONFIG_MLX5_ESWITCH */
-static inline void mlx5e_register_vport_reps(struct mlx5e_priv *priv) {}
-static inline void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) {}
static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; }
static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {}
#endif
+static inline bool mlx5e_is_vport_rep(struct mlx5e_priv *priv)
+{
+ return (MLX5_ESWITCH_MANAGER(priv->mdev) && priv->ppriv);
+}
#endif /* __MLX5E_REP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 0b5ef6d4e815..c3b3002ff62f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -52,40 +52,45 @@ static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
return config->rx_filter == HWTSTAMP_FILTER_ALL;
}
-static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc,
- void *data)
+static inline void mlx5e_read_cqe_slot(struct mlx5_cqwq *wq,
+ u32 cqcc, void *data)
{
- u32 ci = mlx5_cqwq_ctr2ix(&cq->wq, cqcc);
+ u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc);
- memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64));
+ memcpy(data, mlx5_cqwq_get_wqe(wq, ci), sizeof(struct mlx5_cqe64));
}
static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq,
- struct mlx5e_cq *cq, u32 cqcc)
+ struct mlx5_cqwq *wq,
+ u32 cqcc)
{
- mlx5e_read_cqe_slot(cq, cqcc, &cq->title);
- cq->decmprs_left = be32_to_cpu(cq->title.byte_cnt);
- cq->decmprs_wqe_counter = be16_to_cpu(cq->title.wqe_counter);
+ struct mlx5e_cq_decomp *cqd = &rq->cqd;
+ struct mlx5_cqe64 *title = &cqd->title;
+
+ mlx5e_read_cqe_slot(wq, cqcc, title);
+ cqd->left = be32_to_cpu(title->byte_cnt);
+ cqd->wqe_counter = be16_to_cpu(title->wqe_counter);
rq->stats->cqe_compress_blks++;
}
-static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc)
+static inline void mlx5e_read_mini_arr_slot(struct mlx5_cqwq *wq,
+ struct mlx5e_cq_decomp *cqd,
+ u32 cqcc)
{
- mlx5e_read_cqe_slot(cq, cqcc, cq->mini_arr);
- cq->mini_arr_idx = 0;
+ mlx5e_read_cqe_slot(wq, cqcc, cqd->mini_arr);
+ cqd->mini_arr_idx = 0;
}
-static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n)
+static inline void mlx5e_cqes_update_owner(struct mlx5_cqwq *wq, int n)
{
- struct mlx5_cqwq *wq = &cq->wq;
-
+ u32 cqcc = wq->cc;
u8 op_own = mlx5_cqwq_get_ctr_wrap_cnt(wq, cqcc) & 1;
u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc);
u32 wq_sz = mlx5_cqwq_get_size(wq);
u32 ci_top = min_t(u32, wq_sz, ci + n);
for (; ci < ci_top; ci++, n--) {
- struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci);
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
cqe->op_own = op_own;
}
@@ -93,7 +98,7 @@ static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n)
if (unlikely(ci == wq_sz)) {
op_own = !op_own;
for (ci = 0; ci < n; ci++) {
- struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci);
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
cqe->op_own = op_own;
}
@@ -101,68 +106,79 @@ static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n)
}
static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
- struct mlx5e_cq *cq, u32 cqcc)
+ struct mlx5_cqwq *wq,
+ u32 cqcc)
{
- cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt;
- cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum;
- cq->title.op_own &= 0xf0;
- cq->title.op_own |= 0x01 & (cqcc >> cq->wq.fbc.log_sz);
- cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter);
+ struct mlx5e_cq_decomp *cqd = &rq->cqd;
+ struct mlx5_mini_cqe8 *mini_cqe = &cqd->mini_arr[cqd->mini_arr_idx];
+ struct mlx5_cqe64 *title = &cqd->title;
+
+ title->byte_cnt = mini_cqe->byte_cnt;
+ title->check_sum = mini_cqe->checksum;
+ title->op_own &= 0xf0;
+ title->op_own |= 0x01 & (cqcc >> wq->fbc.log_sz);
+ title->wqe_counter = cpu_to_be16(cqd->wqe_counter);
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
- cq->decmprs_wqe_counter +=
- mpwrq_get_cqe_consumed_strides(&cq->title);
+ cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title);
else
- cq->decmprs_wqe_counter =
- mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cq->decmprs_wqe_counter + 1);
+ cqd->wqe_counter =
+ mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cqd->wqe_counter + 1);
}
static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq,
- struct mlx5e_cq *cq, u32 cqcc)
+ struct mlx5_cqwq *wq,
+ u32 cqcc)
{
- mlx5e_decompress_cqe(rq, cq, cqcc);
- cq->title.rss_hash_type = 0;
- cq->title.rss_hash_result = 0;
+ struct mlx5e_cq_decomp *cqd = &rq->cqd;
+
+ mlx5e_decompress_cqe(rq, wq, cqcc);
+ cqd->title.rss_hash_type = 0;
+ cqd->title.rss_hash_result = 0;
}
static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
- struct mlx5e_cq *cq,
+ struct mlx5_cqwq *wq,
int update_owner_only,
int budget_rem)
{
- u32 cqcc = cq->wq.cc + update_owner_only;
+ struct mlx5e_cq_decomp *cqd = &rq->cqd;
+ u32 cqcc = wq->cc + update_owner_only;
u32 cqe_count;
u32 i;
- cqe_count = min_t(u32, cq->decmprs_left, budget_rem);
+ cqe_count = min_t(u32, cqd->left, budget_rem);
for (i = update_owner_only; i < cqe_count;
- i++, cq->mini_arr_idx++, cqcc++) {
- if (cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE)
- mlx5e_read_mini_arr_slot(cq, cqcc);
+ i++, cqd->mini_arr_idx++, cqcc++) {
+ if (cqd->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE)
+ mlx5e_read_mini_arr_slot(wq, cqd, cqcc);
- mlx5e_decompress_cqe_no_hash(rq, cq, cqcc);
- rq->handle_rx_cqe(rq, &cq->title);
+ mlx5e_decompress_cqe_no_hash(rq, wq, cqcc);
+ rq->handle_rx_cqe(rq, &cqd->title);
}
- mlx5e_cqes_update_owner(cq, cq->wq.cc, cqcc - cq->wq.cc);
- cq->wq.cc = cqcc;
- cq->decmprs_left -= cqe_count;
+ mlx5e_cqes_update_owner(wq, cqcc - wq->cc);
+ wq->cc = cqcc;
+ cqd->left -= cqe_count;
rq->stats->cqe_compress_pkts += cqe_count;
return cqe_count;
}
static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
- struct mlx5e_cq *cq,
+ struct mlx5_cqwq *wq,
int budget_rem)
{
- mlx5e_read_title_slot(rq, cq, cq->wq.cc);
- mlx5e_read_mini_arr_slot(cq, cq->wq.cc + 1);
- mlx5e_decompress_cqe(rq, cq, cq->wq.cc);
- rq->handle_rx_cqe(rq, &cq->title);
- cq->mini_arr_idx++;
+ struct mlx5e_cq_decomp *cqd = &rq->cqd;
+ u32 cc = wq->cc;
- return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1;
+ mlx5e_read_title_slot(rq, wq, cc);
+ mlx5e_read_mini_arr_slot(wq, cqd, cc + 1);
+ mlx5e_decompress_cqe(rq, wq, cc);
+ rq->handle_rx_cqe(rq, &cqd->title);
+ cqd->mini_arr_idx++;
+
+ return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1;
}
static inline bool mlx5e_page_is_reserved(struct page *page)
@@ -369,7 +385,7 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
static inline void
mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
struct mlx5e_dma_info *dma_info,
- int offset_from, int offset_to, u32 headlen)
+ int offset_from, u32 headlen)
{
const void *from = page_address(dma_info->page) + offset_from;
/* Aligning len to sizeof(long) optimizes memcpy performance */
@@ -377,24 +393,7 @@ mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
dma_sync_single_for_cpu(pdev, dma_info->addr + offset_from, len,
DMA_FROM_DEVICE);
- skb_copy_to_linear_data_offset(skb, offset_to, from, len);
-}
-
-static inline void
-mlx5e_copy_skb_header_mpwqe(struct device *pdev,
- struct sk_buff *skb,
- struct mlx5e_dma_info *dma_info,
- u32 offset, u32 headlen)
-{
- u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset);
-
- mlx5e_copy_skb_header(pdev, skb, dma_info, offset, 0, headlen_pg);
-
- if (unlikely(offset + headlen > PAGE_SIZE)) {
- dma_info++;
- mlx5e_copy_skb_header(pdev, skb, dma_info, 0, headlen_pg,
- headlen - headlen_pg);
- }
+ skb_copy_to_linear_data(skb, from, len);
}
static void
@@ -554,9 +553,9 @@ static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
mlx5_cqwq_pop(&cq->wq);
- if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) {
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
netdev_WARN_ONCE(cq->channel->netdev,
- "Bad OP in ICOSQ CQE: 0x%x\n", cqe->op_own);
+ "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe));
return;
}
@@ -693,7 +692,14 @@ static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth,
{
*proto = ((struct ethhdr *)skb->data)->h_proto;
*proto = __vlan_get_protocol(skb, *proto, network_depth);
- return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6));
+
+ if (*proto == htons(ETH_P_IP))
+ return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr));
+
+ if (*proto == htons(ETH_P_IPV6))
+ return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr));
+
+ return false;
}
static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
@@ -713,23 +719,76 @@ static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
rq->stats->ecn_mark += !!rc;
}
-static u32 mlx5e_get_fcs(const struct sk_buff *skb)
+static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
{
- const void *fcs_bytes;
- u32 _fcs_bytes;
+ void *ip_p = skb->data + network_depth;
+
+ return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
+ ((struct ipv6hdr *)ip_p)->nexthdr;
+}
+
+#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
- fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN,
- ETH_FCS_LEN, &_fcs_bytes);
+#define MAX_PADDING 8
- return __get_unaligned_cpu32(fcs_bytes);
+static void
+tail_padding_csum_slow(struct sk_buff *skb, int offset, int len,
+ struct mlx5e_rq_stats *stats)
+{
+ stats->csum_complete_tail_slow++;
+ skb->csum = csum_block_add(skb->csum,
+ skb_checksum(skb, offset, len, 0),
+ offset);
}
-static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
+static void
+tail_padding_csum(struct sk_buff *skb, int offset,
+ struct mlx5e_rq_stats *stats)
{
- void *ip_p = skb->data + network_depth;
+ u8 tail_padding[MAX_PADDING];
+ int len = skb->len - offset;
+ void *tail;
- return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
- ((struct ipv6hdr *)ip_p)->nexthdr;
+ if (unlikely(len > MAX_PADDING)) {
+ tail_padding_csum_slow(skb, offset, len, stats);
+ return;
+ }
+
+ tail = skb_header_pointer(skb, offset, len, tail_padding);
+ if (unlikely(!tail)) {
+ tail_padding_csum_slow(skb, offset, len, stats);
+ return;
+ }
+
+ stats->csum_complete_tail++;
+ skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset);
+}
+
+static void
+mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto,
+ struct mlx5e_rq_stats *stats)
+{
+ struct ipv6hdr *ip6;
+ struct iphdr *ip4;
+ int pkt_len;
+
+ switch (proto) {
+ case htons(ETH_P_IP):
+ ip4 = (struct iphdr *)(skb->data + network_depth);
+ pkt_len = network_depth + ntohs(ip4->tot_len);
+ break;
+ case htons(ETH_P_IPV6):
+ ip6 = (struct ipv6hdr *)(skb->data + network_depth);
+ pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len);
+ break;
+ default:
+ return;
+ }
+
+ if (likely(pkt_len >= skb->len))
+ return;
+
+ tail_padding_csum(skb, pkt_len, stats);
}
static inline void mlx5e_handle_csum(struct net_device *netdev,
@@ -751,7 +810,19 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
return;
}
- if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)))
+ /* True when explicitly set via priv flag, or XDP prog is loaded */
+ if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))
+ goto csum_unnecessary;
+
+ /* CQE csum doesn't cover padding octets in short ethernet
+ * frames. And the pad field is appended prior to calculating
+ * and appending the FCS field.
+ *
+ * Detecting these padded frames requires to verify and parse
+ * IP headers, so we simply force all those small frames to be
+ * CHECKSUM_UNNECESSARY even if they are not padded.
+ */
+ if (short_frame(skb->len))
goto csum_unnecessary;
if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) {
@@ -768,18 +839,15 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
skb->csum = csum_partial(skb->data + ETH_HLEN,
network_depth - ETH_HLEN,
skb->csum);
- if (unlikely(netdev->features & NETIF_F_RXFCS))
- skb->csum = csum_block_add(skb->csum,
- (__force __wsum)mlx5e_get_fcs(skb),
- skb->len - ETH_FCS_LEN);
+
+ mlx5e_skb_padding_csum(skb, network_depth, proto, stats);
stats->csum_complete++;
return;
}
csum_unnecessary:
if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
- ((cqe->hds_ip_ext & CQE_L4_OK) ||
- (get_cqe_l4_hdr_type(cqe) == CQE_L4_HDR_TYPE_NONE)))) {
+ (cqe->hds_ip_ext & CQE_L4_OK))) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (cqe_is_tunneled(cqe)) {
skb->csum_level = 1;
@@ -898,7 +966,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
prefetchw(va); /* xdp_frame data area */
prefetch(data);
- if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
return NULL;
}
@@ -930,7 +998,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
u16 byte_cnt = cqe_bcnt - headlen;
struct sk_buff *skb;
- if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
return NULL;
}
@@ -960,8 +1028,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
}
/* copy header */
- mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset,
- 0, headlen);
+ mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, headlen);
/* skb linear part was allocated with headlen and aligned to long */
skb->tail += headlen;
skb->len += headlen;
@@ -1083,8 +1150,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
di++;
}
/* copy header */
- mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, head_di,
- head_offset, headlen);
+ mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, headlen);
/* skb linear part was allocated with headlen and aligned to long */
skb->tail += headlen;
skb->len += headlen;
@@ -1154,7 +1220,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
wi->consumed_strides += cstrides;
- if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;
goto mpwrq_cqe_out;
}
@@ -1190,17 +1256,17 @@ mpwrq_cqe_out:
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
{
struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
- struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
+ struct mlx5_cqwq *cqwq = &cq->wq;
struct mlx5_cqe64 *cqe;
int work_done = 0;
if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
return 0;
- if (cq->decmprs_left)
- work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
+ if (rq->cqd.left)
+ work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget);
- cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ cqe = mlx5_cqwq_get_cqe(cqwq);
if (!cqe) {
if (unlikely(work_done))
goto out;
@@ -1210,28 +1276,21 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
do {
if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
work_done +=
- mlx5e_decompress_cqes_start(rq, cq,
+ mlx5e_decompress_cqes_start(rq, cqwq,
budget - work_done);
continue;
}
- mlx5_cqwq_pop(&cq->wq);
+ mlx5_cqwq_pop(cqwq);
rq->handle_rx_cqe(rq, cqe);
- } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+ } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
out:
- if (xdpsq->doorbell) {
- mlx5e_xmit_xdp_doorbell(xdpsq);
- xdpsq->doorbell = false;
- }
-
- if (xdpsq->redirect_flush) {
- xdp_do_flush_map();
- xdpsq->redirect_flush = false;
- }
+ if (rq->xdp_prog)
+ mlx5e_xdp_rx_poll_complete(rq);
- mlx5_cqwq_update_db_record(&cq->wq);
+ mlx5_cqwq_update_db_record(cqwq);
/* ensure cq space is freed before enabling more cqes */
wmb();
@@ -1292,8 +1351,14 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
skb->protocol = *((__be16 *)(skb->data));
- skb->ip_summed = CHECKSUM_COMPLETE;
- skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
+ if (netdev->features & NETIF_F_RXCSUM) {
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
+ stats->csum_complete++;
+ } else {
+ skb->ip_summed = CHECKSUM_NONE;
+ stats->csum_none++;
+ }
if (unlikely(mlx5e_rx_hw_stamp(tstamp)))
skb_hwtstamps(skb)->hwtstamp =
@@ -1312,7 +1377,6 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
skb->dev = netdev;
- stats->csum_complete++;
stats->packets++;
stats->bytes += cqe_bcnt;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 4337afd610d7..b75aa8b8bf04 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -30,6 +30,7 @@
* SOFTWARE.
*/
+#include "lib/mlx5.h"
#include "en.h"
#include "en_accel/ipsec.h"
#include "en_accel/tls.h"
@@ -58,6 +59,8 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) },
@@ -126,9 +129,9 @@ static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
return idx;
}
-void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
+static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
{
- struct mlx5e_sw_stats temp, *s = &temp;
+ struct mlx5e_sw_stats *s = &priv->stats.sw;
int i;
memset(s, 0, sizeof(*s));
@@ -150,6 +153,8 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
s->rx_csum_none += rq_stats->csum_none;
s->rx_csum_complete += rq_stats->csum_complete;
+ s->rx_csum_complete_tail += rq_stats->csum_complete_tail;
+ s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow;
s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
s->rx_xdp_drop += rq_stats->xdp_drop;
@@ -211,8 +216,6 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->tx_cqes += sq_stats->cqes;
}
}
-
- memcpy(&priv->stats.sw, s, sizeof(*s));
}
static const struct counter_desc q_stats_desc[] = {
@@ -480,7 +483,10 @@ static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data,
return idx;
}
-static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv)
+#define MLX5_BASIC_PPCNT_SUPPORTED(mdev) \
+ (MLX5_CAP_GEN(mdev, pcam_reg) ? MLX5_CAP_PCAM_REG(mdev, ppcnt) : 1)
+
+void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv)
{
struct mlx5e_pport_stats *pstats = &priv->stats.pport;
struct mlx5_core_dev *mdev = priv->mdev;
@@ -488,6 +494,9 @@ static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv)
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
void *out;
+ if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+ return;
+
MLX5_SET(ppcnt_reg, in, local_port, 1);
out = pstats->IEEE_802_3_counters;
MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
@@ -600,6 +609,9 @@ static void mlx5e_grp_2819_update_stats(struct mlx5e_priv *priv)
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
void *out;
+ if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+ return;
+
MLX5_SET(ppcnt_reg, in, local_port, 1);
out = pstats->RFC_2819_counters;
MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
@@ -934,7 +946,7 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
};
static const struct counter_desc pport_pfc_stall_stats_desc[] = {
- { "tx_pause_storm_warning_events ", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) },
+ { "tx_pause_storm_warning_events", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) },
{ "tx_pause_storm_error_events", PPORT_PER_PRIO_OFF(device_stall_critical_watermark_cnt) },
};
@@ -1075,6 +1087,9 @@ static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv)
int prio;
void *out;
+ if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
+ return;
+
MLX5_SET(ppcnt_reg, in, local_port, 1);
MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
@@ -1086,13 +1101,13 @@ static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv)
}
static const struct counter_desc mlx5e_pme_status_desc[] = {
- { "module_unplug", 8 },
+ { "module_unplug", sizeof(u64) * MLX5_MODULE_STATUS_UNPLUGGED },
};
static const struct counter_desc mlx5e_pme_error_desc[] = {
- { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */
- { "module_high_temp", 48 }, /* high temperature */
- { "module_bad_shorted", 56 }, /* bad or shorted cable/module */
+ { "module_bus_stuck", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BUS_STUCK },
+ { "module_high_temp", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE },
+ { "module_bad_shorted", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BAD_CABLE },
};
#define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc)
@@ -1120,15 +1135,17 @@ static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data,
static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data,
int idx)
{
- struct mlx5_priv *mlx5_priv = &priv->mdev->priv;
+ struct mlx5_pme_stats pme_stats;
int i;
+ mlx5_get_pme_stats(priv->mdev, &pme_stats);
+
for (i = 0; i < NUM_PME_STATUS_STATS; i++)
- data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters,
+ data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters,
mlx5e_pme_status_desc, i);
for (i = 0; i < NUM_PME_ERR_STATS; i++)
- data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
+ data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters,
mlx5e_pme_error_desc, i);
return idx;
@@ -1177,6 +1194,8 @@ static const struct counter_desc rq_stats_desc[] = {
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 3ff69ddae2d3..16c3b785f282 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -71,6 +71,8 @@ struct mlx5e_sw_stats {
u64 rx_csum_unnecessary;
u64 rx_csum_none;
u64 rx_csum_complete;
+ u64 rx_csum_complete_tail;
+ u64 rx_csum_complete_tail_slow;
u64 rx_csum_unnecessary_inner;
u64 rx_xdp_drop;
u64 rx_xdp_redirect;
@@ -181,6 +183,8 @@ struct mlx5e_rq_stats {
u64 packets;
u64 bytes;
u64 csum_complete;
+ u64 csum_complete_tail;
+ u64 csum_complete_tail_slow;
u64 csum_unnecessary;
u64 csum_unnecessary_inner;
u64 csum_none;
@@ -277,6 +281,6 @@ struct mlx5e_stats_grp {
extern const struct mlx5e_stats_grp mlx5e_stats_grps[];
extern const int mlx5e_num_stats_grps;
-void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv);
+void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv);
#endif /* __MLX5_EN_STATS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9dabe9d4b279..d75dc44eb2ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -38,21 +38,20 @@
#include <linux/mlx5/fs.h>
#include <linux/mlx5/device.h>
#include <linux/rhashtable.h>
-#include <net/switchdev.h>
#include <net/tc_act/tc_mirred.h>
#include <net/tc_act/tc_vlan.h>
#include <net/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_pedit.h>
#include <net/tc_act/tc_csum.h>
-#include <net/vxlan.h>
#include <net/arp.h>
#include "en.h"
#include "en_rep.h"
#include "en_tc.h"
#include "eswitch.h"
-#include "lib/vxlan.h"
#include "fs_core.h"
#include "en/port.h"
+#include "en/tc_tun.h"
+#include "lib/devcom.h"
struct mlx5_nic_flow_attr {
u32 action;
@@ -69,25 +68,56 @@ struct mlx5_nic_flow_attr {
enum {
MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS,
MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS,
- MLX5E_TC_FLOW_ESWITCH = BIT(MLX5E_TC_FLOW_BASE),
- MLX5E_TC_FLOW_NIC = BIT(MLX5E_TC_FLOW_BASE + 1),
- MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2),
- MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3),
- MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
- MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 5),
+ MLX5E_TC_FLOW_ESWITCH = MLX5E_TC_ESW_OFFLOAD,
+ MLX5E_TC_FLOW_NIC = MLX5E_TC_NIC_OFFLOAD,
+ MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE),
+ MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 1),
+ MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 2),
+ MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 3),
+ MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 4),
+ MLX5E_TC_FLOW_NOT_READY = BIT(MLX5E_TC_FLOW_BASE + 5),
};
#define MLX5E_TC_MAX_SPLITS 1
+/* Helper struct for accessing a struct containing list_head array.
+ * Containing struct
+ * |- Helper array
+ * [0] Helper item 0
+ * |- list_head item 0
+ * |- index (0)
+ * [1] Helper item 1
+ * |- list_head item 1
+ * |- index (1)
+ * To access the containing struct from one of the list_head items:
+ * 1. Get the helper item from the list_head item using
+ * helper item =
+ * container_of(list_head item, helper struct type, list_head field)
+ * 2. Get the contining struct from the helper item and its index in the array:
+ * containing struct =
+ * container_of(helper item, containing struct type, helper field[index])
+ */
+struct encap_flow_item {
+ struct list_head list;
+ int index;
+};
+
struct mlx5e_tc_flow {
struct rhash_head node;
struct mlx5e_priv *priv;
u64 cookie;
u16 flags;
struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
- struct list_head encap; /* flows sharing the same encap ID */
+ /* Flow can be associated with multiple encap IDs.
+ * The number of encaps is bounded by the number of supported
+ * destinations.
+ */
+ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_tc_flow *peer_flow;
struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
struct list_head hairpin; /* flows sharing the same hairpin */
+ struct list_head peer; /* flows with peer flow */
+ struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */
union {
struct mlx5_esw_flow_attr esw_attr[0];
struct mlx5_nic_flow_attr nic_attr[0];
@@ -95,11 +125,13 @@ struct mlx5e_tc_flow {
};
struct mlx5e_tc_flow_parse_attr {
- struct ip_tunnel_info tun_info;
+ struct ip_tunnel_info tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct net_device *filter_dev;
struct mlx5_flow_spec spec;
int num_mod_hdr_actions;
+ int max_mod_hdr_actions;
void *mod_hdr_actions;
- int mirred_ifindex;
+ int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
};
#define MLX5E_TC_TABLE_NUM_GROUPS 4
@@ -316,7 +348,7 @@ static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
for (i = 0; i < sz; i++) {
ix = i;
- if (priv->channels.params.rss_hfunc == ETH_RSS_HASH_XOR)
+ if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
ix = mlx5e_bits_invert(i, ilog2(sz));
ix = indirection_rqt[ix];
rqn = hp->pair->rqn[ix];
@@ -360,13 +392,15 @@ static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
void *tirc;
for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
+
memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
- mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
+ mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
err = mlx5_core_create_tir(hp->func_mdev, in,
MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]);
@@ -569,7 +603,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct netlink_ext_ack *extack)
{
- int peer_ifindex = parse_attr->mirred_ifindex;
+ int peer_ifindex = parse_attr->mirred_ifindex[0];
struct mlx5_hairpin_params params;
struct mlx5_core_dev *peer_mdev;
struct mlx5e_hairpin_entry *hpe;
@@ -802,7 +836,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
mlx5_del_flow_rules(flow->rule[0]);
mlx5_fc_destroy(priv->mdev, counter);
- if (!mlx5e_tc_num_filters(priv) && priv->fs.tc.t) {
+ if (!mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD) && priv->fs.tc.t) {
mlx5_destroy_flow_table(priv->fs.tc.t);
priv->fs.tc.t = NULL;
}
@@ -815,14 +849,15 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
}
static void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow);
+ struct mlx5e_tc_flow *flow, int out_index);
static int mlx5e_attach_encap(struct mlx5e_priv *priv,
- struct ip_tunnel_info *tun_info,
+ struct mlx5e_tc_flow *flow,
struct net_device *mirred_dev,
+ int out_index,
+ struct netlink_ext_ack *extack,
struct net_device **encap_dev,
- struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack);
+ bool *encap_valid);
static struct mlx5_flow_handle *
mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
@@ -836,7 +871,7 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
if (IS_ERR(rule))
return rule;
- if (attr->mirror_count) {
+ if (attr->split_count) {
flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
if (IS_ERR(flow->rule[1])) {
mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
@@ -855,7 +890,7 @@ mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
{
flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
- if (attr->mirror_count)
+ if (attr->split_count)
mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
@@ -871,7 +906,7 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- slow_attr->mirror_count = 0;
+ slow_attr->split_count = 0;
slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN;
rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
@@ -888,27 +923,49 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
{
memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- slow_attr->mirror_count = 0;
+ slow_attr->split_count = 0;
slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN;
mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
flow->flags &= ~MLX5E_TC_FLOW_SLOW;
}
+static void add_unready_flow(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5_eswitch *esw;
+
+ esw = flow->priv->mdev->priv.eswitch;
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &rpriv->uplink_priv;
+
+ flow->flags |= MLX5E_TC_FLOW_NOT_READY;
+ list_add_tail(&flow->unready, &uplink_priv->unready_flows);
+}
+
+static void remove_unready_flow(struct mlx5e_tc_flow *flow)
+{
+ list_del(&flow->unready);
+ flow->flags &= ~MLX5E_TC_FLOW_NOT_READY;
+}
+
static int
mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
u32 max_chain = mlx5_eswitch_get_chain_range(esw);
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
u16 max_prio = mlx5_eswitch_get_prio_range(esw);
struct net_device *out_dev, *encap_dev = NULL;
struct mlx5_fc *counter = NULL;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_priv *out_priv;
- int err = 0, encap_err = 0;
+ bool encap_valid = true;
+ int err = 0;
+ int out_index;
if (!mlx5_eswitch_prios_supported(esw) && attr->prio != 1) {
NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW");
@@ -927,20 +984,24 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
goto err_max_prio_chain;
}
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+ int mirred_ifindex;
+
+ if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+
+ mirred_ifindex = parse_attr->mirred_ifindex[out_index];
out_dev = __dev_get_by_index(dev_net(priv->netdev),
- attr->parse_attr->mirred_ifindex);
- encap_err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
- out_dev, &encap_dev, flow,
- extack);
- if (encap_err && encap_err != -EAGAIN) {
- err = encap_err;
+ mirred_ifindex);
+ err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
+ extack, &encap_dev, &encap_valid);
+ if (err)
goto err_attach_encap;
- }
+
out_priv = netdev_priv(encap_dev);
rpriv = out_priv->ppriv;
- attr->out_rep[attr->out_count] = rpriv->rep;
- attr->out_mdev[attr->out_count++] = out_priv->mdev;
+ attr->dests[out_index].rep = rpriv->rep;
+ attr->dests[out_index].mdev = out_priv->mdev;
}
err = mlx5_eswitch_add_vlan_action(esw, attr);
@@ -955,7 +1016,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- counter = mlx5_fc_create(esw->dev, true);
+ counter = mlx5_fc_create(attr->counter_dev, true);
if (IS_ERR(counter)) {
err = PTR_ERR(counter);
goto err_create_counter;
@@ -964,10 +1025,11 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
attr->counter = counter;
}
- /* we get here if (1) there's no error or when
- * (2) there's an encap action and we're on -EAGAIN (no valid neigh)
+ /* we get here if one of the following takes place:
+ * (1) there's no error
+ * (2) there's an encap action and we don't have valid neigh
*/
- if (encap_err == -EAGAIN) {
+ if (!encap_valid) {
/* continue with goto slow path rule instead */
struct mlx5_esw_flow_attr slow_attr;
@@ -984,15 +1046,16 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
return 0;
err_add_rule:
- mlx5_fc_destroy(esw->dev, counter);
+ mlx5_fc_destroy(attr->counter_dev, counter);
err_create_counter:
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
err_mod_hdr:
mlx5_eswitch_del_vlan_action(esw, attr);
err_add_vlan:
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
- mlx5e_detach_encap(priv, flow);
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
+ if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
+ mlx5e_detach_encap(priv, flow, out_index);
err_attach_encap:
err_max_prio_chain:
return err;
@@ -1004,6 +1067,13 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct mlx5_esw_flow_attr slow_attr;
+ int out_index;
+
+ if (flow->flags & MLX5E_TC_FLOW_NOT_READY) {
+ remove_unready_flow(flow);
+ kvfree(attr->parse_attr);
+ return;
+ }
if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
if (flow->flags & MLX5E_TC_FLOW_SLOW)
@@ -1014,16 +1084,16 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5_eswitch_del_vlan_action(esw, attr);
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
- mlx5e_detach_encap(priv, flow);
- kvfree(attr->parse_attr);
- }
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
+ if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
+ mlx5e_detach_encap(priv, flow, out_index);
+ kvfree(attr->parse_attr);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
- mlx5_fc_destroy(esw->dev, attr->counter);
+ mlx5_fc_destroy(attr->counter_dev, attr->counter);
}
void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
@@ -1033,10 +1103,12 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
struct mlx5_esw_flow_attr slow_attr, *esw_attr;
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
+ struct encap_flow_item *efi;
struct mlx5e_tc_flow *flow;
int err;
- err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
+ err = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
e->encap_size, e->encap_header,
MLX5_FLOW_NAMESPACE_FDB,
&e->encap_id);
@@ -1048,11 +1120,31 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(priv);
- list_for_each_entry(flow, &e->flows, encap) {
+ list_for_each_entry(efi, &e->flows, list) {
+ bool all_flow_encaps_valid = true;
+ int i;
+
+ flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
esw_attr = flow->esw_attr;
- esw_attr->encap_id = e->encap_id;
spec = &esw_attr->parse_attr->spec;
+ esw_attr->dests[efi->index].encap_id = e->encap_id;
+ esw_attr->dests[efi->index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ /* Flow can be associated with multiple encap entries.
+ * Before offloading the flow verify that all of them have
+ * a valid neighbour.
+ */
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
+ all_flow_encaps_valid = false;
+ break;
+ }
+ }
+ /* Do not offload flows with unresolved neighbors */
+ if (!all_flow_encaps_valid)
+ continue;
/* update from slow path rule to encap rule */
rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
if (IS_ERR(rule)) {
@@ -1075,14 +1167,18 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
struct mlx5_esw_flow_attr slow_attr;
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
+ struct encap_flow_item *efi;
struct mlx5e_tc_flow *flow;
int err;
- list_for_each_entry(flow, &e->flows, encap) {
+ list_for_each_entry(efi, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
spec = &flow->esw_attr->parse_attr->spec;
/* update from encap rule to slow path rule */
rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr);
+ /* mark the flow's encap dest as non-valid */
+ flow->esw_attr->dests[efi->index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -1130,9 +1226,12 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
return;
list_for_each_entry(e, &nhe->encap_list, encap_list) {
+ struct encap_flow_item *efi;
if (!(e->flags & MLX5_ENCAP_ENTRY_VALID))
continue;
- list_for_each_entry(flow, &e->flows, encap) {
+ list_for_each_entry(efi, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow,
+ encaps[efi->index]);
if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
counter = mlx5e_tc_get_counter(flow);
mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
@@ -1162,11 +1261,11 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
}
static void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow)
+ struct mlx5e_tc_flow *flow, int out_index)
{
- struct list_head *next = flow->encap.next;
+ struct list_head *next = flow->encaps[out_index].list.next;
- list_del(&flow->encap);
+ list_del(&flow->encaps[out_index].list);
if (list_empty(next)) {
struct mlx5e_encap_entry *e;
@@ -1182,177 +1281,137 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
}
}
-static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow)
+static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
{
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
- mlx5e_tc_del_fdb_flow(priv, flow);
- else
- mlx5e_tc_del_nic_flow(priv, flow);
+ struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
+
+ if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
+ !(flow->flags & MLX5E_TC_FLOW_DUP))
+ return;
+
+ mutex_lock(&esw->offloads.peer_mutex);
+ list_del(&flow->peer);
+ mutex_unlock(&esw->offloads.peer_mutex);
+
+ flow->flags &= ~MLX5E_TC_FLOW_DUP;
+
+ mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
+ kvfree(flow->peer_flow);
+ flow->peer_flow = NULL;
}
-static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f)
+static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
{
- void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- outer_headers);
- void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- outer_headers);
- void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
- void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
+ struct mlx5_core_dev *dev = flow->priv->mdev;
+ struct mlx5_devcom *devcom = dev->priv.devcom;
+ struct mlx5_eswitch *peer_esw;
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ return;
+
+ __mlx5e_tc_del_fdb_peer_flow(flow);
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+}
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
- struct flow_dissector_key_keyid *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_KEYID,
- f->key);
- struct flow_dissector_key_keyid *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_KEYID,
- f->mask);
- MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
- be32_to_cpu(mask->keyid));
- MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
- be32_to_cpu(key->keyid));
+static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
+ mlx5e_tc_del_fdb_peer_flow(flow);
+ mlx5e_tc_del_fdb_flow(priv, flow);
+ } else {
+ mlx5e_tc_del_nic_flow(priv, flow);
}
}
+
static int parse_tunnel_attr(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f)
+ struct tc_cls_flower_offload *f,
+ struct net_device *filter_dev, u8 *match_level)
{
struct netlink_ext_ack *extack = f->common.extack;
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
outer_headers);
void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
outer_headers);
+ struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+ struct flow_match_control enc_control;
+ int err;
- struct flow_dissector_key_control *enc_control =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_CONTROL,
- f->key);
-
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
- struct flow_dissector_key_ports *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_PORTS,
- f->key);
- struct flow_dissector_key_ports *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_PORTS,
- f->mask);
-
- /* Full udp dst port must be given */
- if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
- goto vxlan_match_offload_err;
-
- if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst)) &&
- MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
- parse_vxlan_attr(spec, f);
- else {
- NL_SET_ERR_MSG_MOD(extack,
- "port isn't an offloaded vxlan udp dport");
- netdev_warn(priv->netdev,
- "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
- return -EOPNOTSUPP;
- }
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- udp_dport, ntohs(mask->dst));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- udp_dport, ntohs(key->dst));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- udp_sport, ntohs(mask->src));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- udp_sport, ntohs(key->src));
- } else { /* udp dst port must be given */
-vxlan_match_offload_err:
+ err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
+ headers_c, headers_v, match_level);
+ if (err) {
NL_SET_ERR_MSG_MOD(extack,
- "IP tunnel decap offload supported only for vxlan, must set UDP dport");
- netdev_warn(priv->netdev,
- "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
- return -EOPNOTSUPP;
+ "failed to parse tunnel attributes");
+ return err;
}
- if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
- struct flow_dissector_key_ipv4_addrs *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
- f->key);
- struct flow_dissector_key_ipv4_addrs *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
- f->mask);
+ flow_rule_match_enc_control(rule, &enc_control);
+
+ if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_enc_ipv4_addrs(rule, &match);
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv4_layout.ipv4,
- ntohl(mask->src));
+ ntohl(match.mask->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
src_ipv4_src_ipv6.ipv4_layout.ipv4,
- ntohl(key->src));
+ ntohl(match.key->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
- ntohl(mask->dst));
+ ntohl(match.mask->dst));
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
- ntohl(key->dst));
+ ntohl(match.key->dst));
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
- } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
- struct flow_dissector_key_ipv6_addrs *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
- f->key);
- struct flow_dissector_key_ipv6_addrs *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
- f->mask);
+ } else if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+ flow_rule_match_enc_ipv6_addrs(rule, &match);
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+ &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+ &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+ &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+ &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
}
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
- struct flow_dissector_key_ip *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_IP,
- f->key);
- struct flow_dissector_key_ip *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_IP,
- f->mask);
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
+ struct flow_match_ip match;
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
+ flow_rule_match_enc_ip(rule, &match);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
+ match.mask->tos & 0x3);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
+ match.key->tos & 0x3);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
+ match.mask->tos >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
+ match.key->tos >> 2);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
+ match.mask->ttl);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
+ match.key->ttl);
- if (mask->ttl &&
+ if (match.mask->ttl &&
!MLX5_CAP_ESW_FLOWTABLE_FDB
(priv->mdev,
ft_field_support.outer_ipv4_ttl)) {
@@ -1381,7 +1440,8 @@ vxlan_match_offload_err:
static int __parse_cls_flower(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct tc_cls_flower_offload *f,
- u8 *match_level)
+ struct net_device *filter_dev,
+ u8 *match_level, u8 *tunnel_match_level)
{
struct netlink_ext_ack *extack = f->common.extack;
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
@@ -1392,12 +1452,14 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
misc_parameters);
void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
misc_parameters);
+ struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+ struct flow_dissector *dissector = rule->match.dissector;
u16 addr_type = 0;
u8 ip_proto = 0;
*match_level = MLX5_MATCH_NONE;
- if (f->dissector->used_keys &
+ if (dissector->used_keys &
~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
BIT(FLOW_DISSECTOR_KEY_BASIC) |
BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
@@ -1416,23 +1478,21 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
BIT(FLOW_DISSECTOR_KEY_ENC_IP))) {
NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
- f->dissector->used_keys);
+ dissector->used_keys);
return -EOPNOTSUPP;
}
- if ((dissector_uses_key(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
- dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
- dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
- dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
- struct flow_dissector_key_control *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ENC_CONTROL,
- f->key);
- switch (key->addr_type) {
+ if ((flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
+ flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
+ flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
+ flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+ struct flow_match_control match;
+
+ flow_rule_match_enc_control(rule, &match);
+ switch (match.key->addr_type) {
case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
- if (parse_tunnel_attr(priv, spec, f))
+ if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level))
return -EOPNOTSUPP;
break;
default:
@@ -1448,35 +1508,27 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
inner_headers);
}
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
- struct flow_dissector_key_basic *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_BASIC,
- f->key);
- struct flow_dissector_key_basic *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_BASIC,
- f->mask);
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_match_basic match;
+
+ flow_rule_match_basic(rule, &match);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
- ntohs(mask->n_proto));
+ ntohs(match.mask->n_proto));
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
- ntohs(key->n_proto));
+ ntohs(match.key->n_proto));
- if (mask->n_proto)
+ if (match.mask->n_proto)
*match_level = MLX5_MATCH_L2;
}
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
- struct flow_dissector_key_vlan *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_VLAN,
- f->key);
- struct flow_dissector_key_vlan *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_VLAN,
- f->mask);
- if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) {
- if (key->vlan_tpid == htons(ETH_P_8021AD)) {
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+ struct flow_match_vlan match;
+
+ flow_rule_match_vlan(rule, &match);
+ if (match.mask->vlan_id ||
+ match.mask->vlan_priority ||
+ match.mask->vlan_tpid) {
+ if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
svlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
@@ -1488,11 +1540,15 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
cvlan_tag, 1);
}
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
+ match.mask->vlan_id);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
+ match.key->vlan_id);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
+ match.mask->vlan_priority);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
+ match.key->vlan_priority);
*match_level = MLX5_MATCH_L2;
}
@@ -1502,17 +1558,14 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
*match_level = MLX5_MATCH_L2;
}
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) {
- struct flow_dissector_key_vlan *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_CVLAN,
- f->key);
- struct flow_dissector_key_vlan *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_CVLAN,
- f->mask);
- if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) {
- if (key->vlan_tpid == htons(ETH_P_8021AD)) {
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
+ struct flow_match_vlan match;
+
+ flow_rule_match_vlan(rule, &match);
+ if (match.mask->vlan_id ||
+ match.mask->vlan_priority ||
+ match.mask->vlan_tpid) {
+ if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
MLX5_SET(fte_match_set_misc, misc_c,
outer_second_svlan_tag, 1);
MLX5_SET(fte_match_set_misc, misc_v,
@@ -1525,69 +1578,58 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
}
MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
- mask->vlan_id);
+ match.mask->vlan_id);
MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
- key->vlan_id);
+ match.key->vlan_id);
MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
- mask->vlan_priority);
+ match.mask->vlan_priority);
MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
- key->vlan_priority);
+ match.key->vlan_priority);
*match_level = MLX5_MATCH_L2;
}
}
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
- struct flow_dissector_key_eth_addrs *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ETH_ADDRS,
- f->key);
- struct flow_dissector_key_eth_addrs *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_ETH_ADDRS,
- f->mask);
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ struct flow_match_eth_addrs match;
+ flow_rule_match_eth_addrs(rule, &match);
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dmac_47_16),
- mask->dst);
+ match.mask->dst);
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
dmac_47_16),
- key->dst);
+ match.key->dst);
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
smac_47_16),
- mask->src);
+ match.mask->src);
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
smac_47_16),
- key->src);
+ match.key->src);
- if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst))
+ if (!is_zero_ether_addr(match.mask->src) ||
+ !is_zero_ether_addr(match.mask->dst))
*match_level = MLX5_MATCH_L2;
}
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
- struct flow_dissector_key_control *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_CONTROL,
- f->key);
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_match_control match;
- struct flow_dissector_key_control *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_CONTROL,
- f->mask);
- addr_type = key->addr_type;
+ flow_rule_match_control(rule, &match);
+ addr_type = match.key->addr_type;
/* the HW doesn't support frag first/later */
- if (mask->flags & FLOW_DIS_FIRST_FRAG)
+ if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
return -EOPNOTSUPP;
- if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
+ if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
- key->flags & FLOW_DIS_IS_FRAGMENT);
+ match.key->flags & FLOW_DIS_IS_FRAGMENT);
/* the HW doesn't need L3 inline to match on frag=no */
- if (!(key->flags & FLOW_DIS_IS_FRAGMENT))
+ if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
*match_level = MLX5_MATCH_L2;
/* *** L2 attributes parsing up to here *** */
else
@@ -1595,102 +1637,85 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
}
}
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
- struct flow_dissector_key_basic *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_BASIC,
- f->key);
- struct flow_dissector_key_basic *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_BASIC,
- f->mask);
- ip_proto = key->ip_proto;
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_match_basic match;
+
+ flow_rule_match_basic(rule, &match);
+ ip_proto = match.key->ip_proto;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
- mask->ip_proto);
+ match.mask->ip_proto);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
- key->ip_proto);
+ match.key->ip_proto);
- if (mask->ip_proto)
+ if (match.mask->ip_proto)
*match_level = MLX5_MATCH_L3;
}
if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
- struct flow_dissector_key_ipv4_addrs *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_IPV4_ADDRS,
- f->key);
- struct flow_dissector_key_ipv4_addrs *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_IPV4_ADDRS,
- f->mask);
+ struct flow_match_ipv4_addrs match;
+ flow_rule_match_ipv4_addrs(rule, &match);
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &mask->src, sizeof(mask->src));
+ &match.mask->src, sizeof(match.mask->src));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &key->src, sizeof(key->src));
+ &match.key->src, sizeof(match.key->src));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &mask->dst, sizeof(mask->dst));
+ &match.mask->dst, sizeof(match.mask->dst));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &key->dst, sizeof(key->dst));
+ &match.key->dst, sizeof(match.key->dst));
- if (mask->src || mask->dst)
+ if (match.mask->src || match.mask->dst)
*match_level = MLX5_MATCH_L3;
}
if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
- struct flow_dissector_key_ipv6_addrs *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_IPV6_ADDRS,
- f->key);
- struct flow_dissector_key_ipv6_addrs *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_IPV6_ADDRS,
- f->mask);
+ struct flow_match_ipv6_addrs match;
+ flow_rule_match_ipv6_addrs(rule, &match);
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &mask->src, sizeof(mask->src));
+ &match.mask->src, sizeof(match.mask->src));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &key->src, sizeof(key->src));
+ &match.key->src, sizeof(match.key->src));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &mask->dst, sizeof(mask->dst));
+ &match.mask->dst, sizeof(match.mask->dst));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &key->dst, sizeof(key->dst));
+ &match.key->dst, sizeof(match.key->dst));
- if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY ||
- ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY)
+ if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
+ ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
*match_level = MLX5_MATCH_L3;
}
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) {
- struct flow_dissector_key_ip *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_IP,
- f->key);
- struct flow_dissector_key_ip *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_IP,
- f->mask);
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+ struct flow_match_ip match;
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
+ flow_rule_match_ip(rule, &match);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
+ match.mask->tos & 0x3);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
+ match.key->tos & 0x3);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
+ match.mask->tos >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
+ match.key->tos >> 2);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
+ match.mask->ttl);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
+ match.key->ttl);
- if (mask->ttl &&
+ if (match.mask->ttl &&
!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
ft_field_support.outer_ipv4_ttl)) {
NL_SET_ERR_MSG_MOD(extack,
@@ -1698,44 +1723,39 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
}
- if (mask->tos || mask->ttl)
+ if (match.mask->tos || match.mask->ttl)
*match_level = MLX5_MATCH_L3;
}
/* *** L3 attributes parsing up to here *** */
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
- struct flow_dissector_key_ports *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_PORTS,
- f->key);
- struct flow_dissector_key_ports *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_PORTS,
- f->mask);
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_match_ports match;
+
+ flow_rule_match_ports(rule, &match);
switch (ip_proto) {
case IPPROTO_TCP:
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- tcp_sport, ntohs(mask->src));
+ tcp_sport, ntohs(match.mask->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- tcp_sport, ntohs(key->src));
+ tcp_sport, ntohs(match.key->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- tcp_dport, ntohs(mask->dst));
+ tcp_dport, ntohs(match.mask->dst));
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- tcp_dport, ntohs(key->dst));
+ tcp_dport, ntohs(match.key->dst));
break;
case IPPROTO_UDP:
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- udp_sport, ntohs(mask->src));
+ udp_sport, ntohs(match.mask->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- udp_sport, ntohs(key->src));
+ udp_sport, ntohs(match.key->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- udp_dport, ntohs(mask->dst));
+ udp_dport, ntohs(match.mask->dst));
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- udp_dport, ntohs(key->dst));
+ udp_dport, ntohs(match.key->dst));
break;
default:
NL_SET_ERR_MSG_MOD(extack,
@@ -1745,26 +1765,20 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
return -EINVAL;
}
- if (mask->src || mask->dst)
+ if (match.mask->src || match.mask->dst)
*match_level = MLX5_MATCH_L4;
}
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) {
- struct flow_dissector_key_tcp *key =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_TCP,
- f->key);
- struct flow_dissector_key_tcp *mask =
- skb_flow_dissector_target(f->dissector,
- FLOW_DISSECTOR_KEY_TCP,
- f->mask);
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
+ struct flow_match_tcp match;
+ flow_rule_match_tcp(rule, &match);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
- ntohs(mask->flags));
+ ntohs(match.mask->flags));
MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
- ntohs(key->flags));
+ ntohs(match.key->flags));
- if (mask->flags)
+ if (match.mask->flags)
*match_level = MLX5_MATCH_L4;
}
@@ -1774,21 +1788,22 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
static int parse_cls_flower(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f)
+ struct tc_cls_flower_offload *f,
+ struct net_device *filter_dev)
{
struct netlink_ext_ack *extack = f->common.extack;
struct mlx5_core_dev *dev = priv->mdev;
struct mlx5_eswitch *esw = dev->priv.eswitch;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ u8 match_level, tunnel_match_level = MLX5_MATCH_NONE;
struct mlx5_eswitch_rep *rep;
- u8 match_level;
int err;
- err = __parse_cls_flower(priv, spec, f, &match_level);
+ err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level, &tunnel_match_level);
if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
rep = rpriv->rep;
- if (rep->vport != FDB_UPLINK_VPORT &&
+ if (rep->vport != MLX5_VPORT_UPLINK &&
(esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
esw->offloads.inline_mode < match_level)) {
NL_SET_ERR_MSG_MOD(extack,
@@ -1800,10 +1815,12 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
}
}
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
flow->esw_attr->match_level = match_level;
- else
+ flow->esw_attr->tunnel_match_level = tunnel_match_level;
+ } else {
flow->nic_attr->match_level = match_level;
+ }
return err;
}
@@ -1816,27 +1833,29 @@ struct pedit_headers {
struct udphdr udp;
};
+struct pedit_headers_action {
+ struct pedit_headers vals;
+ struct pedit_headers masks;
+ u32 pedits;
+};
+
static int pedit_header_offsets[] = {
- [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
- [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
- [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
- [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
- [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
+ [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
+ [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
+ [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
+ [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
+ [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
};
#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
- struct pedit_headers *masks,
- struct pedit_headers *vals)
+ struct pedit_headers_action *hdrs)
{
u32 *curr_pmask, *curr_pval;
- if (hdr_type >= __PEDIT_HDR_TYPE_MAX)
- goto out_err;
-
- curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset);
- curr_pval = (u32 *)(pedit_header(vals, hdr_type) + offset);
+ curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
+ curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
if (*curr_pmask & mask) /* disallow acting twice on the same location */
goto out_err;
@@ -1888,12 +1907,11 @@ static struct mlx5_fields fields[] = {
OFFLOAD(UDP_DPORT, 2, udp.dest, 0),
};
-/* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at
- * max from the SW pedit action. On success, it says how many HW actions were
- * actually parsed.
+/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
+ * max from the SW pedit action. On success, attr->num_mod_hdr_actions
+ * says how many HW actions were actually parsed.
*/
-static int offload_pedit_fields(struct pedit_headers *masks,
- struct pedit_headers *vals,
+static int offload_pedit_fields(struct pedit_headers_action *hdrs,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct netlink_ext_ack *extack)
{
@@ -1908,15 +1926,17 @@ static int offload_pedit_fields(struct pedit_headers *masks,
__be16 mask_be16;
void *action;
- set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET];
- add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD];
- set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET];
- add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD];
+ set_masks = &hdrs[0].masks;
+ add_masks = &hdrs[1].masks;
+ set_vals = &hdrs[0].vals;
+ add_vals = &hdrs[1].vals;
action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
- action = parse_attr->mod_hdr_actions;
- max_actions = parse_attr->num_mod_hdr_actions;
- nactions = 0;
+ action = parse_attr->mod_hdr_actions +
+ parse_attr->num_mod_hdr_actions * action_size;
+
+ max_actions = parse_attr->max_mod_hdr_actions;
+ nactions = parse_attr->num_mod_hdr_actions;
for (i = 0; i < ARRAY_SIZE(fields); i++) {
f = &fields[i];
@@ -2007,12 +2027,14 @@ static int offload_pedit_fields(struct pedit_headers *masks,
}
static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
- const struct tc_action *a, int namespace,
+ struct pedit_headers_action *hdrs,
+ int namespace,
struct mlx5e_tc_flow_parse_attr *parse_attr)
{
int nkeys, action_size, max_actions;
- nkeys = tcf_pedit_nkeys(a);
+ nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits +
+ hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits;
action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
@@ -2027,62 +2049,67 @@ static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
if (!parse_attr->mod_hdr_actions)
return -ENOMEM;
- parse_attr->num_mod_hdr_actions = max_actions;
+ parse_attr->max_mod_hdr_actions = max_actions;
return 0;
}
static const struct pedit_headers zero_masks = {};
static int parse_tc_pedit_action(struct mlx5e_priv *priv,
- const struct tc_action *a, int namespace,
+ const struct flow_action_entry *act, int namespace,
struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct pedit_headers_action *hdrs,
struct netlink_ext_ack *extack)
{
- struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks;
- int nkeys, i, err = -EOPNOTSUPP;
+ u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
+ int err = -EOPNOTSUPP;
u32 mask, val, offset;
- u8 cmd, htype;
+ u8 htype;
- nkeys = tcf_pedit_nkeys(a);
+ htype = act->mangle.htype;
+ err = -EOPNOTSUPP; /* can't be all optimistic */
- memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
- memset(vals, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
+ if (htype == FLOW_ACT_MANGLE_UNSPEC) {
+ NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
+ goto out_err;
+ }
- for (i = 0; i < nkeys; i++) {
- htype = tcf_pedit_htype(a, i);
- cmd = tcf_pedit_cmd(a, i);
- err = -EOPNOTSUPP; /* can't be all optimistic */
+ mask = act->mangle.mask;
+ val = act->mangle.val;
+ offset = act->mangle.offset;
- if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) {
- NL_SET_ERR_MSG_MOD(extack,
- "legacy pedit isn't offloaded");
- goto out_err;
- }
+ err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]);
+ if (err)
+ goto out_err;
- if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) {
- NL_SET_ERR_MSG_MOD(extack, "pedit cmd isn't offloaded");
- goto out_err;
- }
+ hdrs[cmd].pedits++;
+
+ return 0;
+out_err:
+ return err;
+}
- mask = tcf_pedit_mask(a, i);
- val = tcf_pedit_val(a, i);
- offset = tcf_pedit_offset(a, i);
+static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack)
+{
+ struct pedit_headers *cmd_masks;
+ int err;
+ u8 cmd;
- err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]);
+ if (!parse_attr->mod_hdr_actions) {
+ err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr);
if (err)
goto out_err;
}
- err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr);
- if (err)
- goto out_err;
-
- err = offload_pedit_fields(masks, vals, parse_attr, extack);
+ err = offload_pedit_fields(hdrs, parse_attr, extack);
if (err < 0)
goto out_dealloc_parsed_actions;
for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
- cmd_masks = &masks[cmd];
+ cmd_masks = &hdrs[cmd].masks;
if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
NL_SET_ERR_MSG_MOD(extack,
"attempt to offload an unsupported field");
@@ -2131,19 +2158,69 @@ static bool csum_offload_supported(struct mlx5e_priv *priv,
return true;
}
+struct ip_ttl_word {
+ __u8 ttl;
+ __u8 protocol;
+ __sum16 check;
+};
+
+struct ipv6_hoplimit_word {
+ __be16 payload_len;
+ __u8 nexthdr;
+ __u8 hop_limit;
+};
+
+static bool is_action_keys_supported(const struct flow_action_entry *act)
+{
+ u32 mask, offset;
+ u8 htype;
+
+ htype = act->mangle.htype;
+ offset = act->mangle.offset;
+ mask = ~act->mangle.mask;
+ /* For IPv4 & IPv6 header check 4 byte word,
+ * to determine that modified fields
+ * are NOT ttl & hop_limit only.
+ */
+ if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
+ struct ip_ttl_word *ttl_word =
+ (struct ip_ttl_word *)&mask;
+
+ if (offset != offsetof(struct iphdr, ttl) ||
+ ttl_word->protocol ||
+ ttl_word->check) {
+ return true;
+ }
+ } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
+ struct ipv6_hoplimit_word *hoplimit_word =
+ (struct ipv6_hoplimit_word *)&mask;
+
+ if (offset != offsetof(struct ipv6hdr, payload_len) ||
+ hoplimit_word->payload_len ||
+ hoplimit_word->nexthdr) {
+ return true;
+ }
+ }
+ return false;
+}
+
static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
- struct tcf_exts *exts,
+ struct flow_action *flow_action,
+ u32 actions,
struct netlink_ext_ack *extack)
{
- const struct tc_action *a;
+ const struct flow_action_entry *act;
bool modify_ip_header;
- LIST_HEAD(actions);
- u8 htype, ip_proto;
void *headers_v;
u16 ethertype;
- int nkeys, i;
+ u8 ip_proto;
+ int i;
+
+ if (actions & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
+ else
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
- headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
/* for non-IP we only re-write MACs, so we're okay */
@@ -2151,20 +2228,14 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
goto out_ok;
modify_ip_header = false;
- tcf_exts_for_each_action(i, a, exts) {
- int k;
-
- if (!is_tcf_pedit(a))
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id != FLOW_ACTION_MANGLE &&
+ act->id != FLOW_ACTION_ADD)
continue;
- nkeys = tcf_pedit_nkeys(a);
- for (k = 0; k < nkeys; k++) {
- htype = tcf_pedit_htype(a, k);
- if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 ||
- htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) {
- modify_ip_header = true;
- break;
- }
+ if (is_action_keys_supported(act)) {
+ modify_ip_header = true;
+ break;
}
}
@@ -2182,7 +2253,7 @@ out_ok:
}
static bool actions_match_supported(struct mlx5e_priv *priv,
- struct tcf_exts *exts,
+ struct flow_action *flow_action,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
@@ -2199,7 +2270,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
return false;
if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
- return modify_header_match_supported(&parse_attr->spec, exts,
+ return modify_header_match_supported(&parse_attr->spec,
+ flow_action, actions,
extack);
return true;
@@ -2219,57 +2291,54 @@ static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
return (fsystem_guid == psystem_guid);
}
-static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
+static int parse_tc_nic_actions(struct mlx5e_priv *priv,
+ struct flow_action *flow_action,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
struct mlx5_nic_flow_attr *attr = flow->nic_attr;
- const struct tc_action *a;
- LIST_HEAD(actions);
+ struct pedit_headers_action hdrs[2] = {};
+ const struct flow_action_entry *act;
u32 action = 0;
int err, i;
- if (!tcf_exts_has_actions(exts))
+ if (!flow_action_has_entries(flow_action))
return -EINVAL;
attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
- tcf_exts_for_each_action(i, a, exts) {
- if (is_tcf_gact_shot(a)) {
+ flow_action_for_each(i, act, flow_action) {
+ switch (act->id) {
+ case FLOW_ACTION_DROP:
action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
if (MLX5_CAP_FLOWTABLE(priv->mdev,
flow_table_properties_nic_receive.flow_counter))
action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- continue;
- }
-
- if (is_tcf_pedit(a)) {
- err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL,
- parse_attr, extack);
+ break;
+ case FLOW_ACTION_MANGLE:
+ case FLOW_ACTION_ADD:
+ err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
+ parse_attr, hdrs, extack);
if (err)
return err;
action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- continue;
- }
-
- if (is_tcf_csum(a)) {
+ break;
+ case FLOW_ACTION_CSUM:
if (csum_offload_supported(priv, action,
- tcf_csum_update_flags(a),
+ act->csum_flags,
extack))
- continue;
+ break;
return -EOPNOTSUPP;
- }
-
- if (is_tcf_mirred_egress_redirect(a)) {
- struct net_device *peer_dev = tcf_mirred_dev(a);
+ case FLOW_ACTION_REDIRECT: {
+ struct net_device *peer_dev = act->dev;
if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
same_hw_devs(priv, netdev_priv(peer_dev))) {
- parse_attr->mirred_ifindex = peer_dev->ifindex;
+ parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
flow->flags |= MLX5E_TC_FLOW_HAIRPIN;
action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
@@ -2280,11 +2349,10 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
peer_dev->name);
return -EINVAL;
}
- continue;
- }
-
- if (is_tcf_skbedit_mark(a)) {
- u32 mark = tcf_skbedit_mark(a);
+ }
+ break;
+ case FLOW_ACTION_MARK: {
+ u32 mark = act->mark;
if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
NL_SET_ERR_MSG_MOD(extack,
@@ -2294,70 +2362,47 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
attr->flow_tag = mark;
action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- continue;
+ }
+ break;
+ default:
+ return -EINVAL;
}
+ }
- return -EINVAL;
+ if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
+ hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
+ err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
+ parse_attr, hdrs, extack);
+ if (err)
+ return err;
}
attr->action = action;
- if (!actions_match_supported(priv, exts, parse_attr, flow, extack))
+ if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
return -EOPNOTSUPP;
return 0;
}
-static inline int cmp_encap_info(struct ip_tunnel_key *a,
- struct ip_tunnel_key *b)
-{
- return memcmp(a, b, sizeof(*a));
-}
+struct encap_key {
+ struct ip_tunnel_key *ip_tun_key;
+ int tunnel_type;
+};
-static inline int hash_encap_info(struct ip_tunnel_key *key)
+static inline int cmp_encap_info(struct encap_key *a,
+ struct encap_key *b)
{
- return jhash(key, sizeof(*key), 0);
+ return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
+ a->tunnel_type != b->tunnel_type;
}
-static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
- struct net_device **out_dev,
- struct flowi4 *fl4,
- struct neighbour **out_n,
- u8 *out_ttl)
+static inline int hash_encap_info(struct encap_key *key)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5e_rep_priv *uplink_rpriv;
- struct rtable *rt;
- struct neighbour *n = NULL;
-
-#if IS_ENABLED(CONFIG_INET)
- int ret;
-
- rt = ip_route_output_key(dev_net(mirred_dev), fl4);
- ret = PTR_ERR_OR_ZERO(rt);
- if (ret)
- return ret;
-#else
- return -EOPNOTSUPP;
-#endif
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
- /* if the egress device isn't on the same HW e-switch, we use the uplink */
- if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
- *out_dev = uplink_rpriv->netdev;
- else
- *out_dev = rt->dst.dev;
-
- if (!(*out_ttl))
- *out_ttl = ip4_dst_hoplimit(&rt->dst);
- n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
- ip_rt_put(rt);
- if (!n)
- return -ENOMEM;
-
- *out_n = n;
- return 0;
+ return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
+ key->tunnel_type);
}
+
static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
struct net_device *peer_netdev)
{
@@ -2366,389 +2411,45 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
peer_priv = netdev_priv(peer_netdev);
return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
- (priv->netdev->netdev_ops == peer_netdev->netdev_ops) &&
- same_hw_devs(priv, peer_priv) &&
- MLX5_VPORT_MANAGER(peer_priv->mdev) &&
- (peer_priv->mdev->priv.eswitch->mode == SRIOV_OFFLOADS));
-}
-
-static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
- struct net_device **out_dev,
- struct flowi6 *fl6,
- struct neighbour **out_n,
- u8 *out_ttl)
-{
- struct neighbour *n = NULL;
- struct dst_entry *dst;
-
-#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
- struct mlx5e_rep_priv *uplink_rpriv;
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- int ret;
-
- ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
- fl6);
- if (ret < 0)
- return ret;
-
- if (!(*out_ttl))
- *out_ttl = ip6_dst_hoplimit(dst);
-
- uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
- /* if the egress device isn't on the same HW e-switch, we use the uplink */
- if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
- *out_dev = uplink_rpriv->netdev;
- else
- *out_dev = dst->dev;
-#else
- return -EOPNOTSUPP;
-#endif
-
- n = dst_neigh_lookup(dst, &fl6->daddr);
- dst_release(dst);
- if (!n)
- return -ENOMEM;
-
- *out_n = n;
- return 0;
-}
-
-static void gen_vxlan_header_ipv4(struct net_device *out_dev,
- char buf[], int encap_size,
- unsigned char h_dest[ETH_ALEN],
- u8 tos, u8 ttl,
- __be32 daddr,
- __be32 saddr,
- __be16 udp_dst_port,
- __be32 vx_vni)
-{
- struct ethhdr *eth = (struct ethhdr *)buf;
- struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
- struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
- struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
-
- memset(buf, 0, encap_size);
-
- ether_addr_copy(eth->h_dest, h_dest);
- ether_addr_copy(eth->h_source, out_dev->dev_addr);
- eth->h_proto = htons(ETH_P_IP);
-
- ip->daddr = daddr;
- ip->saddr = saddr;
-
- ip->tos = tos;
- ip->ttl = ttl;
- ip->protocol = IPPROTO_UDP;
- ip->version = 0x4;
- ip->ihl = 0x5;
-
- udp->dest = udp_dst_port;
- vxh->vx_flags = VXLAN_HF_VNI;
- vxh->vx_vni = vxlan_vni_field(vx_vni);
-}
-
-static void gen_vxlan_header_ipv6(struct net_device *out_dev,
- char buf[], int encap_size,
- unsigned char h_dest[ETH_ALEN],
- u8 tos, u8 ttl,
- struct in6_addr *daddr,
- struct in6_addr *saddr,
- __be16 udp_dst_port,
- __be32 vx_vni)
-{
- struct ethhdr *eth = (struct ethhdr *)buf;
- struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
- struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
- struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
-
- memset(buf, 0, encap_size);
-
- ether_addr_copy(eth->h_dest, h_dest);
- ether_addr_copy(eth->h_source, out_dev->dev_addr);
- eth->h_proto = htons(ETH_P_IPV6);
-
- ip6_flow_hdr(ip6h, tos, 0);
- /* the HW fills up ipv6 payload len */
- ip6h->nexthdr = IPPROTO_UDP;
- ip6h->hop_limit = ttl;
- ip6h->daddr = *daddr;
- ip6h->saddr = *saddr;
-
- udp->dest = udp_dst_port;
- vxh->vx_flags = VXLAN_HF_VNI;
- vxh->vx_vni = vxlan_vni_field(vx_vni);
-}
-
-static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
- struct mlx5e_encap_entry *e)
-{
- int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
- int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN;
- struct ip_tunnel_key *tun_key = &e->tun_info.key;
- struct net_device *out_dev;
- struct neighbour *n = NULL;
- struct flowi4 fl4 = {};
- u8 nud_state, tos, ttl;
- char *encap_header;
- int err;
-
- if (max_encap_size < ipv4_encap_size) {
- mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
- ipv4_encap_size, max_encap_size);
- return -EOPNOTSUPP;
- }
-
- encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
- if (!encap_header)
- return -ENOMEM;
-
- switch (e->tunnel_type) {
- case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
- fl4.flowi4_proto = IPPROTO_UDP;
- fl4.fl4_dport = tun_key->tp_dst;
- break;
- default:
- err = -EOPNOTSUPP;
- goto free_encap;
- }
-
- tos = tun_key->tos;
- ttl = tun_key->ttl;
-
- fl4.flowi4_tos = tun_key->tos;
- fl4.daddr = tun_key->u.ipv4.dst;
- fl4.saddr = tun_key->u.ipv4.src;
-
- err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev,
- &fl4, &n, &ttl);
- if (err)
- goto free_encap;
-
- /* used by mlx5e_detach_encap to lookup a neigh hash table
- * entry in the neigh hash table when a user deletes a rule
- */
- e->m_neigh.dev = n->dev;
- e->m_neigh.family = n->ops->family;
- memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
- e->out_dev = out_dev;
-
- /* It's importent to add the neigh to the hash table before checking
- * the neigh validity state. So if we'll get a notification, in case the
- * neigh changes it's validity state, we would find the relevant neigh
- * in the hash.
- */
- err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
- if (err)
- goto free_encap;
-
- read_lock_bh(&n->lock);
- nud_state = n->nud_state;
- ether_addr_copy(e->h_dest, n->ha);
- read_unlock_bh(&n->lock);
-
- switch (e->tunnel_type) {
- case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
- gen_vxlan_header_ipv4(out_dev, encap_header,
- ipv4_encap_size, e->h_dest, tos, ttl,
- fl4.daddr,
- fl4.saddr, tun_key->tp_dst,
- tunnel_id_to_key32(tun_key->tun_id));
- break;
- default:
- err = -EOPNOTSUPP;
- goto destroy_neigh_entry;
- }
- e->encap_size = ipv4_encap_size;
- e->encap_header = encap_header;
-
- if (!(nud_state & NUD_VALID)) {
- neigh_event_send(n, NULL);
- err = -EAGAIN;
- goto out;
- }
-
- err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
- ipv4_encap_size, encap_header,
- MLX5_FLOW_NAMESPACE_FDB,
- &e->encap_id);
- if (err)
- goto destroy_neigh_entry;
-
- e->flags |= MLX5_ENCAP_ENTRY_VALID;
- mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
- neigh_release(n);
- return err;
-
-destroy_neigh_entry:
- mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
-free_encap:
- kfree(encap_header);
-out:
- if (n)
- neigh_release(n);
- return err;
+ mlx5e_eswitch_rep(priv->netdev) &&
+ mlx5e_eswitch_rep(peer_netdev) &&
+ same_hw_devs(priv, peer_priv));
}
-static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
- struct mlx5e_encap_entry *e)
-{
- int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
- int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN;
- struct ip_tunnel_key *tun_key = &e->tun_info.key;
- struct net_device *out_dev;
- struct neighbour *n = NULL;
- struct flowi6 fl6 = {};
- u8 nud_state, tos, ttl;
- char *encap_header;
- int err;
-
- if (max_encap_size < ipv6_encap_size) {
- mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
- ipv6_encap_size, max_encap_size);
- return -EOPNOTSUPP;
- }
-
- encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
- if (!encap_header)
- return -ENOMEM;
-
- switch (e->tunnel_type) {
- case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
- fl6.flowi6_proto = IPPROTO_UDP;
- fl6.fl6_dport = tun_key->tp_dst;
- break;
- default:
- err = -EOPNOTSUPP;
- goto free_encap;
- }
-
- tos = tun_key->tos;
- ttl = tun_key->ttl;
-
- fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
- fl6.daddr = tun_key->u.ipv6.dst;
- fl6.saddr = tun_key->u.ipv6.src;
-
- err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev,
- &fl6, &n, &ttl);
- if (err)
- goto free_encap;
-
- /* used by mlx5e_detach_encap to lookup a neigh hash table
- * entry in the neigh hash table when a user deletes a rule
- */
- e->m_neigh.dev = n->dev;
- e->m_neigh.family = n->ops->family;
- memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
- e->out_dev = out_dev;
-
- /* It's importent to add the neigh to the hash table before checking
- * the neigh validity state. So if we'll get a notification, in case the
- * neigh changes it's validity state, we would find the relevant neigh
- * in the hash.
- */
- err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
- if (err)
- goto free_encap;
-
- read_lock_bh(&n->lock);
- nud_state = n->nud_state;
- ether_addr_copy(e->h_dest, n->ha);
- read_unlock_bh(&n->lock);
-
- switch (e->tunnel_type) {
- case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
- gen_vxlan_header_ipv6(out_dev, encap_header,
- ipv6_encap_size, e->h_dest, tos, ttl,
- &fl6.daddr,
- &fl6.saddr, tun_key->tp_dst,
- tunnel_id_to_key32(tun_key->tun_id));
- break;
- default:
- err = -EOPNOTSUPP;
- goto destroy_neigh_entry;
- }
-
- e->encap_size = ipv6_encap_size;
- e->encap_header = encap_header;
-
- if (!(nud_state & NUD_VALID)) {
- neigh_event_send(n, NULL);
- err = -EAGAIN;
- goto out;
- }
-
- err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
- ipv6_encap_size, encap_header,
- MLX5_FLOW_NAMESPACE_FDB,
- &e->encap_id);
- if (err)
- goto destroy_neigh_entry;
- e->flags |= MLX5_ENCAP_ENTRY_VALID;
- mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
- neigh_release(n);
- return err;
-
-destroy_neigh_entry:
- mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
-free_encap:
- kfree(encap_header);
-out:
- if (n)
- neigh_release(n);
- return err;
-}
static int mlx5e_attach_encap(struct mlx5e_priv *priv,
- struct ip_tunnel_info *tun_info,
+ struct mlx5e_tc_flow *flow,
struct net_device *mirred_dev,
+ int out_index,
+ struct netlink_ext_ack *extack,
struct net_device **encap_dev,
- struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack)
+ bool *encap_valid)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- unsigned short family = ip_tunnel_info_af(tun_info);
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
- struct ip_tunnel_key *key = &tun_info->key;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct ip_tunnel_info *tun_info;
+ struct encap_key key, e_key;
struct mlx5e_encap_entry *e;
- int tunnel_type, err = 0;
+ unsigned short family;
uintptr_t hash_key;
bool found = false;
+ int err = 0;
- /* udp dst port must be set */
- if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
- goto vxlan_encap_offload_err;
-
- /* setting udp src port isn't supported */
- if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
-vxlan_encap_offload_err:
- NL_SET_ERR_MSG_MOD(extack,
- "must set udp dst port and not set udp src port");
- netdev_warn(priv->netdev,
- "must set udp dst port and not set udp src port\n");
- return -EOPNOTSUPP;
- }
-
- if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) &&
- MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
- tunnel_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
- } else {
- NL_SET_ERR_MSG_MOD(extack,
- "port isn't an offloaded vxlan udp dport");
- netdev_warn(priv->netdev,
- "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
- return -EOPNOTSUPP;
- }
+ parse_attr = attr->parse_attr;
+ tun_info = &parse_attr->tun_info[out_index];
+ family = ip_tunnel_info_af(tun_info);
+ key.ip_tun_key = &tun_info->key;
+ key.tunnel_type = mlx5e_tc_tun_get_type(mirred_dev);
- hash_key = hash_encap_info(key);
+ hash_key = hash_encap_info(&key);
hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
encap_hlist, hash_key) {
- if (!cmp_encap_info(&e->tun_info.key, key)) {
+ e_key.ip_tun_key = &e->tun_info.key;
+ e_key.tunnel_type = e->tunnel_type;
+ if (!cmp_encap_info(&e_key, &key)) {
found = true;
break;
}
@@ -2763,26 +2464,33 @@ vxlan_encap_offload_err:
return -ENOMEM;
e->tun_info = *tun_info;
- e->tunnel_type = tunnel_type;
+ err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
+ if (err)
+ goto out_err;
+
INIT_LIST_HEAD(&e->flows);
if (family == AF_INET)
- err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e);
+ err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
else if (family == AF_INET6)
- err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e);
+ err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
- if (err && err != -EAGAIN)
+ if (err)
goto out_err;
hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
attach_flow:
- list_add(&flow->encap, &e->flows);
+ list_add(&flow->encaps[out_index].list, &e->flows);
+ flow->encaps[out_index].index = out_index;
*encap_dev = e->out_dev;
- if (e->flags & MLX5_ENCAP_ENTRY_VALID)
- attr->encap_id = e->encap_id;
- else
- err = -EAGAIN;
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ attr->dests[out_index].encap_id = e->encap_id;
+ attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ *encap_valid = true;
+ } else {
+ *encap_valid = false;
+ }
return err;
@@ -2792,7 +2500,7 @@ out_err:
}
static int parse_tc_vlan_action(struct mlx5e_priv *priv,
- const struct tc_action *a,
+ const struct flow_action_entry *act,
struct mlx5_esw_flow_attr *attr,
u32 *action)
{
@@ -2801,7 +2509,8 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv,
if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
return -EOPNOTSUPP;
- if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
+ switch (act->id) {
+ case FLOW_ACTION_VLAN_POP:
if (vlan_idx) {
if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
MLX5_FS_VLAN_DEPTH))
@@ -2811,10 +2520,11 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv,
} else {
*action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
}
- } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
- attr->vlan_vid[vlan_idx] = tcf_vlan_push_vid(a);
- attr->vlan_prio[vlan_idx] = tcf_vlan_push_prio(a);
- attr->vlan_proto[vlan_idx] = tcf_vlan_push_proto(a);
+ break;
+ case FLOW_ACTION_VLAN_PUSH:
+ attr->vlan_vid[vlan_idx] = act->vlan.vid;
+ attr->vlan_prio[vlan_idx] = act->vlan.prio;
+ attr->vlan_proto[vlan_idx] = act->vlan.proto;
if (!attr->vlan_proto[vlan_idx])
attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
@@ -2826,13 +2536,15 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv,
*action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
} else {
if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
- (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) ||
- tcf_vlan_push_prio(a)))
+ (act->vlan.proto != htons(ETH_P_8021Q) ||
+ act->vlan.prio))
return -EOPNOTSUPP;
*action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
}
- } else { /* action is TCA_VLAN_ACT_MODIFY */
+ break;
+ default:
+ /* action is FLOW_ACT_VLAN_MANGLE */
return -EOPNOTSUPP;
}
@@ -2841,59 +2553,63 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv,
return 0;
}
-static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
+static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
+ struct flow_action *flow_action,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
+ struct pedit_headers_action hdrs[2] = {};
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct ip_tunnel_info *info = NULL;
- const struct tc_action *a;
- LIST_HEAD(actions);
+ const struct ip_tunnel_info *info = NULL;
+ const struct flow_action_entry *act;
bool encap = false;
u32 action = 0;
int err, i;
- if (!tcf_exts_has_actions(exts))
+ if (!flow_action_has_entries(flow_action))
return -EINVAL;
attr->in_rep = rpriv->rep;
attr->in_mdev = priv->mdev;
- tcf_exts_for_each_action(i, a, exts) {
- if (is_tcf_gact_shot(a)) {
+ flow_action_for_each(i, act, flow_action) {
+ switch (act->id) {
+ case FLOW_ACTION_DROP:
action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
- continue;
- }
-
- if (is_tcf_pedit(a)) {
- err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB,
- parse_attr, extack);
+ break;
+ case FLOW_ACTION_MANGLE:
+ case FLOW_ACTION_ADD:
+ err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
+ parse_attr, hdrs, extack);
if (err)
return err;
action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- attr->mirror_count = attr->out_count;
- continue;
- }
-
- if (is_tcf_csum(a)) {
+ attr->split_count = attr->out_count;
+ break;
+ case FLOW_ACTION_CSUM:
if (csum_offload_supported(priv, action,
- tcf_csum_update_flags(a),
- extack))
- continue;
+ act->csum_flags, extack))
+ break;
return -EOPNOTSUPP;
- }
-
- if (is_tcf_mirred_egress_redirect(a) || is_tcf_mirred_egress_mirror(a)) {
+ case FLOW_ACTION_REDIRECT:
+ case FLOW_ACTION_MIRRED: {
struct mlx5e_priv *out_priv;
struct net_device *out_dev;
- out_dev = tcf_mirred_dev(a);
+ out_dev = act->dev;
+ if (!out_dev) {
+ /* out_dev is NULL when filters with
+ * non-existing mirred device are replayed to
+ * the driver.
+ */
+ return -EINVAL;
+ }
if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
NL_SET_ERR_MSG_MOD(extack,
@@ -2903,23 +2619,47 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
return -EOPNOTSUPP;
}
- if (switchdev_port_same_parent_id(priv->netdev,
- out_dev) ||
+ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ if (netdev_port_same_parent_id(priv->netdev,
+ out_dev) ||
is_merged_eswitch_dev(priv, out_dev)) {
- action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev);
+
+ if (uplink_upper &&
+ netif_is_lag_master(uplink_upper) &&
+ uplink_upper == out_dev)
+ out_dev = uplink_dev;
+
+ if (!mlx5e_eswitch_rep(out_dev))
+ return -EOPNOTSUPP;
+
out_priv = netdev_priv(out_dev);
rpriv = out_priv->ppriv;
- attr->out_rep[attr->out_count] = rpriv->rep;
- attr->out_mdev[attr->out_count++] = out_priv->mdev;
+ attr->dests[attr->out_count].rep = rpriv->rep;
+ attr->dests[attr->out_count].mdev = out_priv->mdev;
+ attr->out_count++;
} else if (encap) {
- parse_attr->mirred_ifindex = out_dev->ifindex;
- parse_attr->tun_info = *info;
+ parse_attr->mirred_ifindex[attr->out_count] =
+ out_dev->ifindex;
+ parse_attr->tun_info[attr->out_count] = *info;
+ encap = false;
attr->parse_attr = parse_attr;
- action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
- MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
- /* attr->out_rep is resolved when we handle encap */
+ attr->dests[attr->out_count].flags |=
+ MLX5_ESW_DEST_ENCAP;
+ attr->out_count++;
+ /* attr->dests[].rep is resolved when we
+ * handle encap
+ */
+ } else if (parse_attr->filter_dev != priv->netdev) {
+ /* All mlx5 devices are called to configure
+ * high level device filters. Therefore, the
+ * *attempt* to install a filter on invalid
+ * eswitch should not trigger an explicit error
+ */
+ return -EINVAL;
} else {
NL_SET_ERR_MSG_MOD(extack,
"devices are not on same switch HW, can't offload forwarding");
@@ -2927,36 +2667,29 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
priv->netdev->name, out_dev->name);
return -EINVAL;
}
- continue;
- }
-
- if (is_tcf_tunnel_set(a)) {
- info = tcf_tunnel_info(a);
+ }
+ break;
+ case FLOW_ACTION_TUNNEL_ENCAP:
+ info = act->tunnel;
if (info)
encap = true;
else
return -EOPNOTSUPP;
- attr->mirror_count = attr->out_count;
- continue;
- }
-
- if (is_tcf_vlan(a)) {
- err = parse_tc_vlan_action(priv, a, attr, &action);
+ break;
+ case FLOW_ACTION_VLAN_PUSH:
+ case FLOW_ACTION_VLAN_POP:
+ err = parse_tc_vlan_action(priv, act, attr, &action);
if (err)
return err;
- attr->mirror_count = attr->out_count;
- continue;
- }
-
- if (is_tcf_tunnel_release(a)) {
+ attr->split_count = attr->out_count;
+ break;
+ case FLOW_ACTION_TUNNEL_DECAP:
action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
- continue;
- }
-
- if (is_tcf_gact_goto_chain(a)) {
- u32 dest_chain = tcf_gact_goto_chain_index(a);
+ break;
+ case FLOW_ACTION_GOTO: {
+ u32 dest_chain = act->chain_index;
u32 max_chain = mlx5_eswitch_get_chain_range(esw);
if (dest_chain <= attr->chain) {
@@ -2969,15 +2702,23 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
}
action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
attr->dest_chain = dest_chain;
-
- continue;
+ break;
+ }
+ default:
+ return -EINVAL;
}
+ }
- return -EINVAL;
+ if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
+ hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
+ err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
+ parse_attr, hdrs, extack);
+ if (err)
+ return err;
}
attr->action = action;
- if (!actions_match_supported(priv, exts, parse_attr, flow, extack))
+ if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
return -EOPNOTSUPP;
if (attr->dest_chain) {
@@ -2988,7 +2729,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
}
- if (attr->mirror_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
+ if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
NL_SET_ERR_MSG_MOD(extack,
"current firmware doesn't support split rule for port mirroring");
netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
@@ -3007,6 +2748,11 @@ static void get_flags(int flags, u16 *flow_flags)
if (flags & MLX5E_TC_EGRESS)
__flow_flags |= MLX5E_TC_FLOW_EGRESS;
+ if (flags & MLX5E_TC_ESW_OFFLOAD)
+ __flow_flags |= MLX5E_TC_FLOW_ESWITCH;
+ if (flags & MLX5E_TC_NIC_OFFLOAD)
+ __flow_flags |= MLX5E_TC_FLOW_NIC;
+
*flow_flags = __flow_flags;
}
@@ -3017,18 +2763,39 @@ static const struct rhashtable_params tc_ht_params = {
.automatic_shrinking = true,
};
-static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
+static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *uplink_rpriv;
- if (MLX5_VPORT_MANAGER(priv->mdev) && esw->mode == SRIOV_OFFLOADS) {
+ if (flags & MLX5E_TC_ESW_OFFLOAD) {
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
- return &uplink_rpriv->tc_ht;
- } else
+ return &uplink_rpriv->uplink_priv.tc_ht;
+ } else /* NIC offload */
return &priv->fs.tc.ht;
}
+static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK &&
+ flow->flags & MLX5E_TC_FLOW_INGRESS;
+ bool act_is_encap = !!(attr->action &
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
+ bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS);
+
+ if (!esw_paired)
+ return false;
+
+ if ((mlx5_lag_is_sriov(attr->in_mdev) ||
+ mlx5_lag_is_multipath(attr->in_mdev)) &&
+ (is_rep_ingress || act_is_encap))
+ return true;
+
+ return false;
+}
+
static int
mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
struct tc_cls_flower_offload *f, u16 flow_flags,
@@ -3050,10 +2817,6 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
flow->flags = flow_flags;
flow->priv = priv;
- err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
- if (err)
- goto err_free;
-
*__flow = flow;
*__parse_attr = parse_attr;
@@ -3065,12 +2828,39 @@ err_free:
return err;
}
-static int
-mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *f,
- u16 flow_flags,
- struct mlx5e_tc_flow **__flow)
+static void
+mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
+ struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct tc_cls_flower_offload *f,
+ struct mlx5_eswitch_rep *in_rep,
+ struct mlx5_core_dev *in_mdev)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ esw_attr->parse_attr = parse_attr;
+ esw_attr->chain = f->common.chain_index;
+ esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16;
+
+ esw_attr->in_rep = in_rep;
+ esw_attr->in_mdev = in_mdev;
+
+ if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
+ MLX5_COUNTER_SOURCE_ESWITCH)
+ esw_attr->counter_dev = in_mdev;
+ else
+ esw_attr->counter_dev = priv->mdev;
+}
+
+static struct mlx5e_tc_flow *
+__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f,
+ u16 flow_flags,
+ struct net_device *filter_dev,
+ struct mlx5_eswitch_rep *in_rep,
+ struct mlx5_core_dev *in_mdev)
{
+ struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
struct netlink_ext_ack *extack = f->common.extack;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_tc_flow *flow;
@@ -3083,28 +2873,118 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
if (err)
goto out;
- flow->esw_attr->chain = f->common.chain_index;
- flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16;
- err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow, extack);
+ parse_attr->filter_dev = filter_dev;
+ mlx5e_flow_esw_attr_init(flow->esw_attr,
+ priv, parse_attr,
+ f, in_rep, in_mdev);
+
+ err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
+ f, filter_dev);
if (err)
goto err_free;
- err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack);
+ err = parse_tc_fdb_actions(priv, &rule->action, parse_attr, flow, extack);
if (err)
goto err_free;
- if (!(flow->esw_attr->action &
- MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT))
- kvfree(parse_attr);
+ err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
+ if (err) {
+ if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
+ goto err_free;
- *__flow = flow;
+ add_unready_flow(flow);
+ }
- return 0;
+ return flow;
err_free:
kfree(flow);
kvfree(parse_attr);
out:
+ return ERR_PTR(err);
+}
+
+static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f,
+ struct mlx5e_tc_flow *flow,
+ u16 flow_flags)
+{
+ struct mlx5e_priv *priv = flow->priv, *peer_priv;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
+ struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_rep_priv *peer_urpriv;
+ struct mlx5e_tc_flow *peer_flow;
+ struct mlx5_core_dev *in_mdev;
+ int err = 0;
+
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ return -ENODEV;
+
+ peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
+ peer_priv = netdev_priv(peer_urpriv->netdev);
+
+ /* in_mdev is assigned of which the packet originated from.
+ * So packets redirected to uplink use the same mdev of the
+ * original flow and packets redirected from uplink use the
+ * peer mdev.
+ */
+ if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK)
+ in_mdev = peer_priv->mdev;
+ else
+ in_mdev = priv->mdev;
+
+ parse_attr = flow->esw_attr->parse_attr;
+ peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
+ parse_attr->filter_dev,
+ flow->esw_attr->in_rep, in_mdev);
+ if (IS_ERR(peer_flow)) {
+ err = PTR_ERR(peer_flow);
+ goto out;
+ }
+
+ flow->peer_flow = peer_flow;
+ flow->flags |= MLX5E_TC_FLOW_DUP;
+ mutex_lock(&esw->offloads.peer_mutex);
+ list_add_tail(&flow->peer, &esw->offloads.peer_flows);
+ mutex_unlock(&esw->offloads.peer_mutex);
+
+out:
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ return err;
+}
+
+static int
+mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f,
+ u16 flow_flags,
+ struct net_device *filter_dev,
+ struct mlx5e_tc_flow **__flow)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *in_rep = rpriv->rep;
+ struct mlx5_core_dev *in_mdev = priv->mdev;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
+ in_mdev);
+ if (IS_ERR(flow))
+ return PTR_ERR(flow);
+
+ if (is_peer_flow_needed(flow)) {
+ err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
+ if (err) {
+ mlx5e_tc_del_fdb_flow(priv, flow);
+ goto out;
+ }
+ }
+
+ *__flow = flow;
+
+ return 0;
+
+out:
return err;
}
@@ -3112,8 +2992,10 @@ static int
mlx5e_add_nic_flow(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f,
u16 flow_flags,
+ struct net_device *filter_dev,
struct mlx5e_tc_flow **__flow)
{
+ struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
struct netlink_ext_ack *extack = f->common.extack;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_tc_flow *flow;
@@ -3130,7 +3012,13 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
if (err)
goto out;
- err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, extack);
+ parse_attr->filter_dev = filter_dev;
+ err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
+ f, filter_dev);
+ if (err)
+ goto err_free;
+
+ err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
if (err)
goto err_free;
@@ -3155,6 +3043,7 @@ static int
mlx5e_tc_add_flow(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f,
int flags,
+ struct net_device *filter_dev,
struct mlx5e_tc_flow **flow)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -3167,18 +3056,20 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
if (esw && esw->mode == SRIOV_OFFLOADS)
- err = mlx5e_add_fdb_flow(priv, f, flow_flags, flow);
+ err = mlx5e_add_fdb_flow(priv, f, flow_flags,
+ filter_dev, flow);
else
- err = mlx5e_add_nic_flow(priv, f, flow_flags, flow);
+ err = mlx5e_add_nic_flow(priv, f, flow_flags,
+ filter_dev, flow);
return err;
}
-int mlx5e_configure_flower(struct mlx5e_priv *priv,
+int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags)
{
struct netlink_ext_ack *extack = f->common.extack;
- struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
struct mlx5e_tc_flow *flow;
int err = 0;
@@ -3192,7 +3083,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
goto out;
}
- err = mlx5e_tc_add_flow(priv, f, flags, &flow);
+ err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
if (err)
goto out;
@@ -3220,10 +3111,10 @@ static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
return false;
}
-int mlx5e_delete_flower(struct mlx5e_priv *priv,
+int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags)
{
- struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
struct mlx5e_tc_flow *flow;
flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
@@ -3239,30 +3130,57 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv,
return 0;
}
-int mlx5e_stats_flower(struct mlx5e_priv *priv,
+int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags)
{
- struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
+ struct mlx5_eswitch *peer_esw;
struct mlx5e_tc_flow *flow;
struct mlx5_fc *counter;
- u64 bytes;
- u64 packets;
- u64 lastuse;
+ u64 lastuse = 0;
+ u64 packets = 0;
+ u64 bytes = 0;
flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
if (!flow || !same_flow_direction(flow, flags))
return -EINVAL;
- if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED))
- return 0;
+ if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
+ counter = mlx5e_tc_get_counter(flow);
+ if (!counter)
+ return 0;
- counter = mlx5e_tc_get_counter(flow);
- if (!counter)
- return 0;
+ mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
+ }
+
+ /* Under multipath it's possible for one rule to be currently
+ * un-offloaded while the other rule is offloaded.
+ */
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ goto out;
- mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
+ if ((flow->flags & MLX5E_TC_FLOW_DUP) &&
+ (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) {
+ u64 bytes2;
+ u64 packets2;
+ u64 lastuse2;
- tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
+ counter = mlx5e_tc_get_counter(flow->peer_flow);
+ if (!counter)
+ goto no_peer_counter;
+ mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
+
+ bytes += bytes2;
+ packets += packets2;
+ lastuse = max_t(u64, lastuse, lastuse2);
+ }
+
+no_peer_counter:
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+out:
+ flow_stats_update(&f->stats, bytes, packets, lastuse);
return 0;
}
@@ -3350,7 +3268,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
if (tc->netdevice_nb.notifier_call)
unregister_netdevice_notifier(&tc->netdevice_nb);
- rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
+ rhashtable_destroy(&tc->ht);
if (!IS_ERR_OR_NULL(tc->t)) {
mlx5_destroy_flow_table(tc->t);
@@ -3368,9 +3286,32 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
}
-int mlx5e_tc_num_filters(struct mlx5e_priv *priv)
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags)
{
- struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct rhashtable *tc_ht = get_tc_ht(priv, flags);
return atomic_read(&tc_ht->nelems);
}
+
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
+{
+ struct mlx5e_tc_flow *flow, *tmp;
+
+ list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
+ __mlx5e_tc_del_fdb_peer_flow(flow);
+}
+
+void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
+{
+ struct mlx5_rep_uplink_priv *rpriv =
+ container_of(work, struct mlx5_rep_uplink_priv,
+ reoffload_flows_work);
+ struct mlx5e_tc_flow *flow, *tmp;
+
+ rtnl_lock();
+ list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
+ if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
+ remove_unready_flow(flow);
+ }
+ rtnl_unlock();
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 49436bf3b80a..f62e81902d27 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -42,7 +42,9 @@
enum {
MLX5E_TC_INGRESS = BIT(0),
MLX5E_TC_EGRESS = BIT(1),
- MLX5E_TC_LAST_EXPORTED_BIT = 1,
+ MLX5E_TC_NIC_OFFLOAD = BIT(2),
+ MLX5E_TC_ESW_OFFLOAD = BIT(3),
+ MLX5E_TC_LAST_EXPORTED_BIT = 3,
};
int mlx5e_tc_nic_init(struct mlx5e_priv *priv);
@@ -51,12 +53,12 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
int mlx5e_tc_esw_init(struct rhashtable *tc_ht);
void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht);
-int mlx5e_configure_flower(struct mlx5e_priv *priv,
+int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags);
-int mlx5e_delete_flower(struct mlx5e_priv *priv,
+int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags);
-int mlx5e_stats_flower(struct mlx5e_priv *priv,
+int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags);
struct mlx5e_encap_entry;
@@ -68,12 +70,14 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
struct mlx5e_neigh_hash_entry;
void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
-int mlx5e_tc_num_filters(struct mlx5e_priv *priv);
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags);
+
+void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
#else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
-static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return 0; }
+static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags) { return 0; }
#endif
#endif /* __MLX5_EN_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 6dacaeba2fbf..25a8f8260c14 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -127,7 +127,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
else
#endif
if (skb_vlan_tag_present(skb))
- up = skb->vlan_tci >> VLAN_PRIO_SHIFT;
+ up = skb_vlan_tag_get_prio(skb);
/* channel_ix can be larger than num_channels since
* dev->num_real_tx_queues = num_channels * num_tc
@@ -148,12 +148,8 @@ static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb)
static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb)
{
- struct flow_keys keys;
-
if (skb_transport_header_was_set(skb))
return skb_transport_offset(skb);
- else if (skb_flow_dissect_flow_keys(skb, &keys, 0))
- return keys.control.thoff;
else
return mlx5e_skb_l2_header_offset(skb);
}
@@ -172,15 +168,8 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
hlen += VLAN_HLEN;
break;
case MLX5_INLINE_MODE_IP:
- /* When transport header is set to zero, it means no transport
- * header. When transport header is set to 0xff's, it means
- * transport header wasn't set.
- */
- if (skb_transport_offset(skb)) {
- hlen = mlx5e_skb_l3_header_offset(skb);
- break;
- }
- /* fall through */
+ hlen = mlx5e_skb_l3_header_offset(skb);
+ break;
case MLX5_INLINE_MODE_L2:
default:
hlen = mlx5e_skb_l2_header_offset(skb);
@@ -387,8 +376,14 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+#ifdef CONFIG_MLX5_EN_IPSEC
+ struct mlx5_wqe_eth_seg cur_eth = wqe->eth;
+#endif
mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
+#ifdef CONFIG_MLX5_EN_IPSEC
+ wqe->eth = cur_eth;
+#endif
}
/* fill wqe */
@@ -459,9 +454,10 @@ static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq,
u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq);
netdev_err(sq->channel->netdev,
- "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
- sq->cq.mcq.cqn, ci, sq->sqn, err_cqe->syndrome,
- err_cqe->vendor_err_synd);
+ "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
+ sq->cq.mcq.cqn, ci, sq->sqn,
+ get_cqe_opcode((struct mlx5_cqe64 *)err_cqe),
+ err_cqe->syndrome, err_cqe->vendor_err_synd);
mlx5_dump_err_cqe(sq->cq.mdev, err_cqe);
}
@@ -507,13 +503,13 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
wqe_counter = be16_to_cpu(cqe->wqe_counter);
- if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) {
+ if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
&sq->state)) {
mlx5e_dump_error_cqe(sq,
(struct mlx5_err_cqe *)cqe);
queue_work(cq->channel->priv->wq,
- &sq->recover.recover_work);
+ &sq->recover_work);
}
stats->cqe_err++;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 85d517360157..b4af5e19f6ac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -76,6 +76,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
napi);
struct mlx5e_ch_stats *ch_stats = c->stats;
+ struct mlx5e_rq *rq = &c->rq;
bool busy = false;
int work_done = 0;
int i;
@@ -85,17 +86,17 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
for (i = 0; i < c->num_tc; i++)
busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
- busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
+ busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq, NULL);
if (c->xdp)
- busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq);
+ busy |= mlx5e_poll_xdpsq_cq(&rq->xdpsq.cq, rq);
if (likely(budget)) { /* budget=0 means: don't poll rx rings */
- work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget);
+ work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
busy |= work_done == budget;
}
- busy |= c->rq.post_wqes(&c->rq);
+ busy |= c->rq.post_wqes(rq);
if (busy) {
if (likely(mlx5e_channel_no_affinity_change(c)))
@@ -115,9 +116,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
mlx5e_cq_arm(&c->sq[i].cq);
}
- mlx5e_handle_rx_dim(&c->rq);
+ mlx5e_handle_rx_dim(rq);
- mlx5e_cq_arm(&c->rq.cq);
+ mlx5e_cq_arm(&rq->cq);
mlx5e_cq_arm(&c->icosq.cq);
mlx5e_cq_arm(&c->xdpsq.cq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index c1e1a16a9b07..bb6e5b5d9681 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -31,20 +31,23 @@
*/
#include <linux/interrupt.h>
+#include <linux/notifier.h>
#include <linux/module.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/eq.h>
#include <linux/mlx5/cmd.h>
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
#include "mlx5_core.h"
+#include "lib/eq.h"
#include "fpga/core.h"
#include "eswitch.h"
#include "lib/clock.h"
#include "diag/fw_tracer.h"
enum {
- MLX5_EQE_SIZE = sizeof(struct mlx5_eqe),
MLX5_EQE_OWNER_INIT_VAL = 0x1,
};
@@ -55,14 +58,32 @@ enum {
};
enum {
- MLX5_NUM_SPARE_EQE = 0x80,
- MLX5_NUM_ASYNC_EQE = 0x1000,
- MLX5_NUM_CMD_EQE = 32,
- MLX5_NUM_PF_DRAIN = 64,
+ MLX5_EQ_DOORBEL_OFFSET = 0x40,
};
-enum {
- MLX5_EQ_DOORBEL_OFFSET = 0x40,
+struct mlx5_irq_info {
+ cpumask_var_t mask;
+ char name[MLX5_MAX_IRQ_NAME];
+ void *context; /* dev_id provided to request_irq */
+};
+
+struct mlx5_eq_table {
+ struct list_head comp_eqs_list;
+ struct mlx5_eq pages_eq;
+ struct mlx5_eq cmd_eq;
+ struct mlx5_eq async_eq;
+
+ struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
+
+ /* Since CQ DB is stored in async_eq */
+ struct mlx5_nb cq_err_nb;
+
+ struct mutex lock; /* sync async eqs creations */
+ int num_comp_vectors;
+ struct mlx5_irq_info *irq_info;
+#ifdef CONFIG_RFS_ACCEL
+ struct cpu_rmap *rmap;
+#endif
};
#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
@@ -78,17 +99,6 @@ enum {
(1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \
(1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
-struct map_eq_in {
- u64 mask;
- u32 reserved;
- u32 unmap_eqn;
-};
-
-struct cre_des_eq {
- u8 reserved[15];
- u8 eqn;
-};
-
static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
{
u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
@@ -99,213 +109,56 @@ static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
-static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
-{
- return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
-}
-
-static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
-{
- struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
-
- return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
-}
-
-static const char *eqe_type_str(u8 type)
-{
- switch (type) {
- case MLX5_EVENT_TYPE_COMP:
- return "MLX5_EVENT_TYPE_COMP";
- case MLX5_EVENT_TYPE_PATH_MIG:
- return "MLX5_EVENT_TYPE_PATH_MIG";
- case MLX5_EVENT_TYPE_COMM_EST:
- return "MLX5_EVENT_TYPE_COMM_EST";
- case MLX5_EVENT_TYPE_SQ_DRAINED:
- return "MLX5_EVENT_TYPE_SQ_DRAINED";
- case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
- return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
- case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
- return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
- case MLX5_EVENT_TYPE_CQ_ERROR:
- return "MLX5_EVENT_TYPE_CQ_ERROR";
- case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
- return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
- case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
- return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
- case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
- return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
- case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
- return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
- case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
- return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
- case MLX5_EVENT_TYPE_INTERNAL_ERROR:
- return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
- case MLX5_EVENT_TYPE_PORT_CHANGE:
- return "MLX5_EVENT_TYPE_PORT_CHANGE";
- case MLX5_EVENT_TYPE_GPIO_EVENT:
- return "MLX5_EVENT_TYPE_GPIO_EVENT";
- case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
- return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
- case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
- return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
- case MLX5_EVENT_TYPE_REMOTE_CONFIG:
- return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
- case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
- return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
- case MLX5_EVENT_TYPE_STALL_EVENT:
- return "MLX5_EVENT_TYPE_STALL_EVENT";
- case MLX5_EVENT_TYPE_CMD:
- return "MLX5_EVENT_TYPE_CMD";
- case MLX5_EVENT_TYPE_PAGE_REQUEST:
- return "MLX5_EVENT_TYPE_PAGE_REQUEST";
- case MLX5_EVENT_TYPE_PAGE_FAULT:
- return "MLX5_EVENT_TYPE_PAGE_FAULT";
- case MLX5_EVENT_TYPE_PPS_EVENT:
- return "MLX5_EVENT_TYPE_PPS_EVENT";
- case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
- return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
- case MLX5_EVENT_TYPE_FPGA_ERROR:
- return "MLX5_EVENT_TYPE_FPGA_ERROR";
- case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
- return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
- case MLX5_EVENT_TYPE_GENERAL_EVENT:
- return "MLX5_EVENT_TYPE_GENERAL_EVENT";
- case MLX5_EVENT_TYPE_DEVICE_TRACER:
- return "MLX5_EVENT_TYPE_DEVICE_TRACER";
- default:
- return "Unrecognized event";
- }
-}
-
-static enum mlx5_dev_event port_subtype_event(u8 subtype)
-{
- switch (subtype) {
- case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
- return MLX5_DEV_EVENT_PORT_DOWN;
- case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
- return MLX5_DEV_EVENT_PORT_UP;
- case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
- return MLX5_DEV_EVENT_PORT_INITIALIZED;
- case MLX5_PORT_CHANGE_SUBTYPE_LID:
- return MLX5_DEV_EVENT_LID_CHANGE;
- case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
- return MLX5_DEV_EVENT_PKEY_CHANGE;
- case MLX5_PORT_CHANGE_SUBTYPE_GUID:
- return MLX5_DEV_EVENT_GUID_CHANGE;
- case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
- return MLX5_DEV_EVENT_CLIENT_REREG;
- }
- return -1;
-}
-
-static void eq_update_ci(struct mlx5_eq *eq, int arm)
+/* caller must eventually call mlx5_cq_put on the returned cq */
+static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
{
- __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
- u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
-
- __raw_writel((__force u32)cpu_to_be32(val), addr);
- /* We still want ordering, just not swabbing, so add a barrier */
- mb();
-}
+ struct mlx5_cq_table *table = &eq->cq_table;
+ struct mlx5_core_cq *cq = NULL;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-static void eqe_pf_action(struct work_struct *work)
-{
- struct mlx5_pagefault *pfault = container_of(work,
- struct mlx5_pagefault,
- work);
- struct mlx5_eq *eq = pfault->eq;
+ rcu_read_lock();
+ cq = radix_tree_lookup(&table->tree, cqn);
+ if (likely(cq))
+ mlx5_cq_hold(cq);
+ rcu_read_unlock();
- mlx5_core_page_fault(eq->dev, pfault);
- mempool_free(pfault, eq->pf_ctx.pool);
+ return cq;
}
-static void eq_pf_process(struct mlx5_eq *eq)
+static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
{
- struct mlx5_core_dev *dev = eq->dev;
- struct mlx5_eqe_page_fault *pf_eqe;
- struct mlx5_pagefault *pfault;
+ struct mlx5_eq_comp *eq_comp = eq_ptr;
+ struct mlx5_eq *eq = eq_ptr;
struct mlx5_eqe *eqe;
int set_ci = 0;
+ u32 cqn = -1;
while ((eqe = next_eqe_sw(eq))) {
- pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC);
- if (!pfault) {
- schedule_work(&eq->pf_ctx.work);
- break;
- }
-
+ struct mlx5_core_cq *cq;
+ /* Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
dma_rmb();
- pf_eqe = &eqe->data.page_fault;
- pfault->event_subtype = eqe->sub_type;
- pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
-
- mlx5_core_dbg(dev,
- "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
- eqe->sub_type, pfault->bytes_committed);
-
- switch (eqe->sub_type) {
- case MLX5_PFAULT_SUBTYPE_RDMA:
- /* RDMA based event */
- pfault->type =
- be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
- pfault->token =
- be32_to_cpu(pf_eqe->rdma.pftype_token) &
- MLX5_24BIT_MASK;
- pfault->rdma.r_key =
- be32_to_cpu(pf_eqe->rdma.r_key);
- pfault->rdma.packet_size =
- be16_to_cpu(pf_eqe->rdma.packet_length);
- pfault->rdma.rdma_op_len =
- be32_to_cpu(pf_eqe->rdma.rdma_op_len);
- pfault->rdma.rdma_va =
- be64_to_cpu(pf_eqe->rdma.rdma_va);
- mlx5_core_dbg(dev,
- "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
- pfault->type, pfault->token,
- pfault->rdma.r_key);
- mlx5_core_dbg(dev,
- "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
- pfault->rdma.rdma_op_len,
- pfault->rdma.rdma_va);
- break;
-
- case MLX5_PFAULT_SUBTYPE_WQE:
- /* WQE based event */
- pfault->type =
- (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
- pfault->token =
- be32_to_cpu(pf_eqe->wqe.token);
- pfault->wqe.wq_num =
- be32_to_cpu(pf_eqe->wqe.pftype_wq) &
- MLX5_24BIT_MASK;
- pfault->wqe.wqe_index =
- be16_to_cpu(pf_eqe->wqe.wqe_index);
- pfault->wqe.packet_size =
- be16_to_cpu(pf_eqe->wqe.packet_length);
- mlx5_core_dbg(dev,
- "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
- pfault->type, pfault->token,
- pfault->wqe.wq_num,
- pfault->wqe.wqe_index);
- break;
-
- default:
- mlx5_core_warn(dev,
- "Unsupported page fault event sub-type: 0x%02hhx\n",
- eqe->sub_type);
- /* Unsupported page faults should still be
- * resolved by the page fault handler
- */
+ /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */
+ cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
+
+ cq = mlx5_eq_cq_get(eq, cqn);
+ if (likely(cq)) {
+ ++cq->arm_sn;
+ cq->comp(cq);
+ mlx5_cq_put(cq);
+ } else {
+ mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
}
- pfault->eq = eq;
- INIT_WORK(&pfault->work, eqe_pf_action);
- queue_work(eq->pf_ctx.wq, &pfault->work);
-
++eq->cons_index;
++set_ci;
+ /* The HCA will think the queue has overflowed if we
+ * don't tell it we've been processing events. We
+ * create our EQs with MLX5_NUM_SPARE_EQE extra
+ * entries, so we must update our consumer index at
+ * least that often.
+ */
if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
eq_update_ci(eq, 0);
set_ci = 0;
@@ -313,165 +166,41 @@ static void eq_pf_process(struct mlx5_eq *eq)
}
eq_update_ci(eq, 1);
-}
-
-static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr)
-{
- struct mlx5_eq *eq = eq_ptr;
- unsigned long flags;
- if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) {
- eq_pf_process(eq);
- spin_unlock_irqrestore(&eq->pf_ctx.lock, flags);
- } else {
- schedule_work(&eq->pf_ctx.work);
- }
+ if (cqn != -1)
+ tasklet_schedule(&eq_comp->tasklet_ctx.task);
return IRQ_HANDLED;
}
-/* mempool_refill() was proposed but unfortunately wasn't accepted
- * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
- * Chip workaround.
+/* Some architectures don't latch interrupts when they are disabled, so using
+ * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
+ * avoid losing them. It is not recommended to use it, unless this is the last
+ * resort.
*/
-static void mempool_refill(mempool_t *pool)
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
{
- while (pool->curr_nr < pool->min_nr)
- mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
-}
-
-static void eq_pf_action(struct work_struct *work)
-{
- struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work);
-
- mempool_refill(eq->pf_ctx.pool);
-
- spin_lock_irq(&eq->pf_ctx.lock);
- eq_pf_process(eq);
- spin_unlock_irq(&eq->pf_ctx.lock);
-}
-
-static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name)
-{
- spin_lock_init(&pf_ctx->lock);
- INIT_WORK(&pf_ctx->work, eq_pf_action);
-
- pf_ctx->wq = alloc_ordered_workqueue(name,
- WQ_MEM_RECLAIM);
- if (!pf_ctx->wq)
- return -ENOMEM;
-
- pf_ctx->pool = mempool_create_kmalloc_pool
- (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault));
- if (!pf_ctx->pool)
- goto err_wq;
-
- return 0;
-err_wq:
- destroy_workqueue(pf_ctx->wq);
- return -ENOMEM;
-}
-
-int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
- u32 wq_num, u8 type, int error)
-{
- u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0};
-
- MLX5_SET(page_fault_resume_in, in, opcode,
- MLX5_CMD_OP_PAGE_FAULT_RESUME);
- MLX5_SET(page_fault_resume_in, in, error, !!error);
- MLX5_SET(page_fault_resume_in, in, page_fault_type, type);
- MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
- MLX5_SET(page_fault_resume_in, in, token, token);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
-#endif
-
-static void general_event_handler(struct mlx5_core_dev *dev,
- struct mlx5_eqe *eqe)
-{
- switch (eqe->sub_type) {
- case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
- if (dev->event)
- dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0);
- break;
- default:
- mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n",
- eqe->sub_type);
- }
-}
-
-static void mlx5_temp_warning_event(struct mlx5_core_dev *dev,
- struct mlx5_eqe *eqe)
-{
- u64 value_lsb;
- u64 value_msb;
-
- value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
- value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
-
- mlx5_core_warn(dev,
- "High temperature on sensors with bit set %llx %llx",
- value_msb, value_lsb);
-}
-
-/* caller must eventually call mlx5_cq_put on the returned cq */
-static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
-{
- struct mlx5_cq_table *table = &eq->cq_table;
- struct mlx5_core_cq *cq = NULL;
-
- spin_lock(&table->lock);
- cq = radix_tree_lookup(&table->tree, cqn);
- if (likely(cq))
- mlx5_cq_hold(cq);
- spin_unlock(&table->lock);
-
- return cq;
-}
-
-static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
-{
- struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
-
- if (unlikely(!cq)) {
- mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
- return;
- }
-
- ++cq->arm_sn;
-
- cq->comp(cq);
-
- mlx5_cq_put(cq);
-}
-
-static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
-{
- struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
-
- if (unlikely(!cq)) {
- mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
- return;
- }
+ u32 count_eqe;
- cq->event(cq, event_type);
+ disable_irq(eq->core.irqn);
+ count_eqe = eq->core.cons_index;
+ mlx5_eq_comp_int(eq->core.irqn, eq);
+ count_eqe = eq->core.cons_index - count_eqe;
+ enable_irq(eq->core.irqn);
- mlx5_cq_put(cq);
+ return count_eqe;
}
-static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
+static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
{
struct mlx5_eq *eq = eq_ptr;
- struct mlx5_core_dev *dev = eq->dev;
+ struct mlx5_eq_table *eqt;
+ struct mlx5_core_dev *dev;
struct mlx5_eqe *eqe;
int set_ci = 0;
- u32 cqn = -1;
- u32 rsn;
- u8 port;
+
+ dev = eq->dev;
+ eqt = dev->priv.eq_table;
while ((eqe = next_eqe_sw(eq))) {
/*
@@ -480,116 +209,12 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
*/
dma_rmb();
- mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n",
- eq->eqn, eqe_type_str(eqe->type));
- switch (eqe->type) {
- case MLX5_EVENT_TYPE_COMP:
- cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
- mlx5_eq_cq_completion(eq, cqn);
- break;
- case MLX5_EVENT_TYPE_DCT_DRAINED:
- rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
- rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
- mlx5_rsc_event(dev, rsn, eqe->type);
- break;
- case MLX5_EVENT_TYPE_PATH_MIG:
- case MLX5_EVENT_TYPE_COMM_EST:
- case MLX5_EVENT_TYPE_SQ_DRAINED:
- case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
- case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
- case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
- case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
- case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
- rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
- rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
- mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
- eqe_type_str(eqe->type), eqe->type, rsn);
- mlx5_rsc_event(dev, rsn, eqe->type);
- break;
-
- case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
- case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
- rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
- mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
- eqe_type_str(eqe->type), eqe->type, rsn);
- mlx5_srq_event(dev, rsn, eqe->type);
- break;
-
- case MLX5_EVENT_TYPE_CMD:
- mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
- break;
-
- case MLX5_EVENT_TYPE_PORT_CHANGE:
- port = (eqe->data.port.port >> 4) & 0xf;
- switch (eqe->sub_type) {
- case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
- case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
- case MLX5_PORT_CHANGE_SUBTYPE_LID:
- case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
- case MLX5_PORT_CHANGE_SUBTYPE_GUID:
- case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
- case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
- if (dev->event)
- dev->event(dev, port_subtype_event(eqe->sub_type),
- (unsigned long)port);
- break;
- default:
- mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
- port, eqe->sub_type);
- }
- break;
- case MLX5_EVENT_TYPE_CQ_ERROR:
- cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
- mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
- cqn, eqe->data.cq_err.syndrome);
- mlx5_eq_cq_event(eq, cqn, eqe->type);
- break;
+ if (likely(eqe->type < MLX5_EVENT_TYPE_MAX))
+ atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe);
+ else
+ mlx5_core_warn_once(dev, "notifier_call_chain is not setup for eqe: %d\n", eqe->type);
- case MLX5_EVENT_TYPE_PAGE_REQUEST:
- {
- u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
- s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
-
- mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
- func_id, npages);
- mlx5_core_req_pages_handler(dev, func_id, npages);
- }
- break;
-
- case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
- mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
- break;
-
- case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
- mlx5_port_module_event(dev, eqe);
- break;
-
- case MLX5_EVENT_TYPE_PPS_EVENT:
- mlx5_pps_event(dev, eqe);
- break;
-
- case MLX5_EVENT_TYPE_FPGA_ERROR:
- case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
- mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
- break;
-
- case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
- mlx5_temp_warning_event(dev, eqe);
- break;
-
- case MLX5_EVENT_TYPE_GENERAL_EVENT:
- general_event_handler(dev, eqe);
- break;
-
- case MLX5_EVENT_TYPE_DEVICE_TRACER:
- mlx5_fw_tracer_event(dev, eqe);
- break;
-
- default:
- mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
- eqe->type, eq->eqn);
- break;
- }
+ atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
++eq->cons_index;
++set_ci;
@@ -608,30 +233,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
eq_update_ci(eq, 1);
- if (cqn != -1)
- tasklet_schedule(&eq->tasklet_ctx.task);
-
return IRQ_HANDLED;
}
-/* Some architectures don't latch interrupts when they are disabled, so using
- * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
- * avoid losing them. It is not recommended to use it, unless this is the last
- * resort.
- */
-u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq)
-{
- u32 count_eqe;
-
- disable_irq(eq->irqn);
- count_eqe = eq->cons_index;
- mlx5_eq_int(eq->irqn, eq);
- count_eqe = eq->cons_index - count_eqe;
- enable_irq(eq->irqn);
-
- return count_eqe;
-}
-
static void init_eq_buf(struct mlx5_eq *eq)
{
struct mlx5_eqe *eqe;
@@ -643,39 +247,35 @@ static void init_eq_buf(struct mlx5_eq *eq)
}
}
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
- int nent, u64 mask, const char *name,
- enum mlx5_eq_type type)
+static int
+create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
+ struct mlx5_eq_param *param)
{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
struct mlx5_cq_table *cq_table = &eq->cq_table;
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
struct mlx5_priv *priv = &dev->priv;
- irq_handler_t handler;
+ u8 vecidx = param->index;
__be64 *pas;
void *eqc;
int inlen;
u32 *in;
int err;
+ if (eq_table->irq_info[vecidx].context)
+ return -EEXIST;
+
/* Init CQ table */
memset(cq_table, 0, sizeof(*cq_table));
spin_lock_init(&cq_table->lock);
INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
- eq->type = type;
- eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
+ eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE);
eq->cons_index = 0;
err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
if (err)
return err;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (type == MLX5_EQ_TYPE_PF)
- handler = mlx5_eq_pf_int;
- else
-#endif
- handler = mlx5_eq_int;
-
init_eq_buf(eq);
inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
@@ -691,7 +291,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
mlx5_fill_page_array(&eq->buf, pas);
MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
- MLX5_SET64(create_eq_in, in, event_bitmask, mask);
+ MLX5_SET64(create_eq_in, in, event_bitmask, param->mask);
eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
@@ -704,15 +304,17 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
if (err)
goto err_in;
- snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
+ snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
name, pci_name(dev->pdev));
+ eq_table->irq_info[vecidx].context = param->context;
+ eq->vecidx = vecidx;
eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
eq->irqn = pci_irq_vector(dev->pdev, vecidx);
eq->dev = dev;
eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
- err = request_irq(eq->irqn, handler, 0,
- priv->irq_info[vecidx].name, eq);
+ err = request_irq(eq->irqn, param->handler, 0,
+ eq_table->irq_info[vecidx].name, param->context);
if (err)
goto err_eq;
@@ -720,21 +322,6 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
if (err)
goto err_irq;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (type == MLX5_EQ_TYPE_PF) {
- err = init_pf_ctx(&eq->pf_ctx, name);
- if (err)
- goto err_irq;
- } else
-#endif
- {
- INIT_LIST_HEAD(&eq->tasklet_ctx.list);
- INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
- spin_lock_init(&eq->tasklet_ctx.lock);
- tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
- (unsigned long)&eq->tasklet_ctx);
- }
-
/* EQs are created in ARMED state
*/
eq_update_ci(eq, 1);
@@ -756,27 +343,25 @@ err_buf:
return err;
}
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+ struct mlx5_irq_info *irq_info;
int err;
+ irq_info = &eq_table->irq_info[eq->vecidx];
+
mlx5_debug_eq_remove(dev, eq);
- free_irq(eq->irqn, eq);
+
+ free_irq(eq->irqn, irq_info->context);
+ irq_info->context = NULL;
+
err = mlx5_cmd_destroy_eq(dev, eq->eqn);
if (err)
mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
eq->eqn);
synchronize_irq(eq->irqn);
- if (eq->type == MLX5_EQ_TYPE_COMP) {
- tasklet_disable(&eq->tasklet_ctx.task);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- } else if (eq->type == MLX5_EQ_TYPE_PF) {
- cancel_work_sync(&eq->pf_ctx.work);
- destroy_workqueue(eq->pf_ctx.wq);
- mempool_destroy(eq->pf_ctx.pool);
-#endif
- }
mlx5_buf_free(dev, &eq->buf);
return err;
@@ -787,9 +372,9 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
struct mlx5_cq_table *table = &eq->cq_table;
int err;
- spin_lock_irq(&table->lock);
+ spin_lock(&table->lock);
err = radix_tree_insert(&table->tree, cq->cqn, cq);
- spin_unlock_irq(&table->lock);
+ spin_unlock(&table->lock);
return err;
}
@@ -799,9 +384,9 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
struct mlx5_cq_table *table = &eq->cq_table;
struct mlx5_core_cq *tmp;
- spin_lock_irq(&table->lock);
+ spin_lock(&table->lock);
tmp = radix_tree_delete(&table->tree, cq->cqn);
- spin_unlock_irq(&table->lock);
+ spin_unlock(&table->lock);
if (!tmp) {
mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn);
@@ -816,28 +401,106 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
return 0;
}
-int mlx5_eq_init(struct mlx5_core_dev *dev)
+int mlx5_eq_table_init(struct mlx5_core_dev *dev)
{
- int err;
+ struct mlx5_eq_table *eq_table;
+ int i, err;
- spin_lock_init(&dev->priv.eq_table.lock);
+ eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL);
+ if (!eq_table)
+ return -ENOMEM;
+
+ dev->priv.eq_table = eq_table;
err = mlx5_eq_debugfs_init(dev);
+ if (err)
+ goto kvfree_eq_table;
+ mutex_init(&eq_table->lock);
+ for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
+ ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
+
+ return 0;
+
+kvfree_eq_table:
+ kvfree(eq_table);
+ dev->priv.eq_table = NULL;
return err;
}
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
{
mlx5_eq_debugfs_cleanup(dev);
+ kvfree(dev->priv.eq_table);
}
-int mlx5_start_eqs(struct mlx5_core_dev *dev)
+/* Async EQs */
+
+static int create_async_eq(struct mlx5_core_dev *dev, const char *name,
+ struct mlx5_eq *eq, struct mlx5_eq_param *param)
{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+ int err;
+
+ mutex_lock(&eq_table->lock);
+ if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) {
+ err = -ENOSPC;
+ goto unlock;
+ }
+
+ err = create_map_eq(dev, eq, name, param);
+unlock:
+ mutex_unlock(&eq_table->lock);
+ return err;
+}
+
+static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
int err;
+ mutex_lock(&eq_table->lock);
+ err = destroy_unmap_eq(dev, eq);
+ mutex_unlock(&eq_table->lock);
+ return err;
+}
+
+static int cq_err_event_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_eq_table *eqt;
+ struct mlx5_core_cq *cq;
+ struct mlx5_eqe *eqe;
+ struct mlx5_eq *eq;
+ u32 cqn;
+
+ /* type == MLX5_EVENT_TYPE_CQ_ERROR */
+
+ eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
+ eq = &eqt->async_eq;
+ eqe = data;
+
+ cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+ mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
+ cqn, eqe->data.cq_err.syndrome);
+
+ cq = mlx5_eq_cq_get(eq, cqn);
+ if (unlikely(!cq)) {
+ mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
+ return NOTIFY_OK;
+ }
+
+ cq->event(cq, type);
+
+ mlx5_cq_put(cq);
+
+ return NOTIFY_OK;
+}
+
+static u64 gather_async_events_mask(struct mlx5_core_dev *dev)
+{
+ u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+
if (MLX5_VPORT_MANAGER(dev))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
@@ -865,127 +528,524 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
- err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
- MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
- "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
+ if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER);
+
+ if (mlx5_core_is_ecpf_esw_manager(dev))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE);
+
+ return async_event_mask;
+}
+
+static int create_async_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_param param = {};
+ int err;
+
+ MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
+ mlx5_eq_notifier_register(dev, &table->cq_err_nb);
+
+ param = (struct mlx5_eq_param) {
+ .index = MLX5_EQ_CMD_IDX,
+ .mask = 1ull << MLX5_EVENT_TYPE_CMD,
+ .nent = MLX5_NUM_CMD_EQE,
+ .context = &table->cmd_eq,
+ .handler = mlx5_eq_async_int,
+ };
+ err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, &param);
if (err) {
mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
- return err;
+ goto err0;
}
mlx5_cmd_use_events(dev);
- err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
- MLX5_NUM_ASYNC_EQE, async_event_mask,
- "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC);
+ param = (struct mlx5_eq_param) {
+ .index = MLX5_EQ_ASYNC_IDX,
+ .mask = gather_async_events_mask(dev),
+ .nent = MLX5_NUM_ASYNC_EQE,
+ .context = &table->async_eq,
+ .handler = mlx5_eq_async_int,
+ };
+ err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, &param);
if (err) {
mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
goto err1;
}
- err = mlx5_create_map_eq(dev, &table->pages_eq,
- MLX5_EQ_VEC_PAGES,
- /* TODO: sriov max_vf + */ 1,
- 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
- MLX5_EQ_TYPE_ASYNC);
+ param = (struct mlx5_eq_param) {
+ .index = MLX5_EQ_PAGEREQ_IDX,
+ .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST,
+ .nent = /* TODO: sriov max_vf + */ 1,
+ .context = &table->pages_eq,
+ .handler = mlx5_eq_async_int,
+ };
+ err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, &param);
if (err) {
mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
goto err2;
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (MLX5_CAP_GEN(dev, pg)) {
- err = mlx5_create_map_eq(dev, &table->pfault_eq,
- MLX5_EQ_VEC_PFAULT,
- MLX5_NUM_ASYNC_EQE,
- 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
- "mlx5_page_fault_eq",
- MLX5_EQ_TYPE_PF);
- if (err) {
- mlx5_core_warn(dev, "failed to create page fault EQ %d\n",
- err);
- goto err3;
- }
- }
-
return err;
-err3:
- mlx5_destroy_unmap_eq(dev, &table->pages_eq);
-#else
- return err;
-#endif
err2:
- mlx5_destroy_unmap_eq(dev, &table->async_eq);
+ destroy_async_eq(dev, &table->async_eq);
err1:
mlx5_cmd_use_polling(dev);
- mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+ destroy_async_eq(dev, &table->cmd_eq);
+err0:
+ mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
return err;
}
-void mlx5_stop_eqs(struct mlx5_core_dev *dev)
+static void destroy_async_eqs(struct mlx5_core_dev *dev)
{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
+ struct mlx5_eq_table *table = dev->priv.eq_table;
int err;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (MLX5_CAP_GEN(dev, pg)) {
- err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
- if (err)
- mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
- err);
- }
-#endif
-
- err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
+ err = destroy_async_eq(dev, &table->pages_eq);
if (err)
mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
err);
- err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
+ err = destroy_async_eq(dev, &table->async_eq);
if (err)
mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
err);
+
mlx5_cmd_use_polling(dev);
- err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+ err = destroy_async_eq(dev, &table->cmd_eq);
if (err)
mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
err);
+
+ mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
+}
+
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
+{
+ return &dev->priv.eq_table->async_eq;
+}
+
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
+{
+ synchronize_irq(dev->priv.eq_table->async_eq.irqn);
+}
+
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
+{
+ synchronize_irq(dev->priv.eq_table->cmd_eq.irqn);
+}
+
+/* Generic EQ API for mlx5_core consumers
+ * Needed For RDMA ODP EQ for now
+ */
+struct mlx5_eq *
+mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
+ struct mlx5_eq_param *param)
+{
+ struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
+ int err;
+
+ if (!eq)
+ return ERR_PTR(-ENOMEM);
+
+ err = create_async_eq(dev, name, eq, param);
+ if (err) {
+ kvfree(eq);
+ eq = ERR_PTR(err);
+ }
+
+ return eq;
+}
+EXPORT_SYMBOL(mlx5_eq_create_generic);
+
+int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ int err;
+
+ if (IS_ERR(eq))
+ return -EINVAL;
+
+ err = destroy_async_eq(dev, eq);
+ if (err)
+ goto out;
+
+ kvfree(eq);
+out:
+ return err;
}
+EXPORT_SYMBOL(mlx5_eq_destroy_generic);
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
- u32 *out, int outlen)
+struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc)
{
- u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0};
+ u32 ci = eq->cons_index + cc;
+ struct mlx5_eqe *eqe;
+
+ eqe = get_eqe(eq, ci & (eq->nent - 1));
+ eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe;
+ /* Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
+ if (eqe)
+ dma_rmb();
- MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
- MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+ return eqe;
+}
+EXPORT_SYMBOL(mlx5_eq_get_eqe);
+
+void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
+{
+ __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+ u32 val;
+
+ eq->cons_index += cc;
+ val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+ __raw_writel((__force u32)cpu_to_be32(val), addr);
+ /* We still want ordering, just not swabbing, so add a barrier */
+ mb();
+}
+EXPORT_SYMBOL(mlx5_eq_update_ci);
+
+/* Completion EQs */
+
+static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ struct mlx5_priv *priv = &mdev->priv;
+ int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
+ int irq = pci_irq_vector(mdev->pdev, vecidx);
+ struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
+
+ if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) {
+ mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+ return -ENOMEM;
+ }
+
+ cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
+ irq_info->mask);
+
+ if (IS_ENABLED(CONFIG_SMP) &&
+ irq_set_affinity_hint(irq, irq_info->mask))
+ mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
+
+ return 0;
+}
+
+static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
+ struct mlx5_priv *priv = &mdev->priv;
+ int irq = pci_irq_vector(mdev->pdev, vecidx);
+ struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
+
+ irq_set_affinity_hint(irq, NULL);
+ free_cpumask_var(irq_info->mask);
+}
+
+static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) {
+ err = set_comp_irq_affinity_hint(mdev, i);
+ if (err)
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ for (i--; i >= 0; i--)
+ clear_comp_irq_affinity_hint(mdev, i);
+
+ return err;
+}
+
+static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int i;
+
+ for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++)
+ clear_comp_irq_affinity_hint(mdev, i);
+}
+
+static void destroy_comp_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq, *n;
+
+ clear_comp_irqs_affinity_hints(dev);
+
+#ifdef CONFIG_RFS_ACCEL
+ if (table->rmap) {
+ free_irq_cpu_rmap(table->rmap);
+ table->rmap = NULL;
+ }
+#endif
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ list_del(&eq->list);
+ if (destroy_unmap_eq(dev, &eq->core))
+ mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
+ eq->core.eqn);
+ tasklet_disable(&eq->tasklet_ctx.task);
+ kfree(eq);
+ }
+}
+
+static int create_comp_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ char name[MLX5_MAX_IRQ_NAME];
+ struct mlx5_eq_comp *eq;
+ int ncomp_vec;
+ int nent;
+ int err;
+ int i;
+
+ INIT_LIST_HEAD(&table->comp_eqs_list);
+ ncomp_vec = table->num_comp_vectors;
+ nent = MLX5_COMP_EQ_SIZE;
+#ifdef CONFIG_RFS_ACCEL
+ table->rmap = alloc_irq_cpu_rmap(ncomp_vec);
+ if (!table->rmap)
+ return -ENOMEM;
+#endif
+ for (i = 0; i < ncomp_vec; i++) {
+ int vecidx = i + MLX5_EQ_VEC_COMP_BASE;
+ struct mlx5_eq_param param = {};
+
+ eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+ if (!eq) {
+ err = -ENOMEM;
+ goto clean;
+ }
+
+ INIT_LIST_HEAD(&eq->tasklet_ctx.list);
+ INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
+ spin_lock_init(&eq->tasklet_ctx.lock);
+ tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
+ (unsigned long)&eq->tasklet_ctx);
+
+#ifdef CONFIG_RFS_ACCEL
+ irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx));
+#endif
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
+ param = (struct mlx5_eq_param) {
+ .index = vecidx,
+ .mask = 0,
+ .nent = nent,
+ .context = &eq->core,
+ .handler = mlx5_eq_comp_int
+ };
+ err = create_map_eq(dev, &eq->core, name, &param);
+ if (err) {
+ kfree(eq);
+ goto clean;
+ }
+ mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
+ /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
+ list_add_tail(&eq->list, &table->comp_eqs_list);
+ }
+
+ err = set_comp_irq_affinity_hints(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
+ goto clean;
+ }
+
+ return 0;
+
+clean:
+ destroy_comp_eqs(dev);
+ return err;
+}
+
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
+ unsigned int *irqn)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq, *n;
+ int err = -ENOENT;
+ int i = 0;
+
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ if (i++ == vector) {
+ *eqn = eq->core.eqn;
+ *irqn = eq->core.irqn;
+ err = 0;
+ break;
+ }
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_vector2eqn);
+
+unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
+{
+ return dev->priv.eq_table->num_comp_vectors;
+}
+EXPORT_SYMBOL(mlx5_comp_vectors_count);
+
+struct cpumask *
+mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
+{
+ /* TODO: consider irq_get_affinity_mask(irq) */
+ return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask;
+}
+EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
+
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+ return dev->priv.eq_table->rmap;
+#else
+ return NULL;
+#endif
+}
+
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_eq_comp *eq;
+
+ list_for_each_entry(eq, &table->comp_eqs_list, list) {
+ if (eq->core.eqn == eqn)
+ return eq;
+ }
+
+ return ERR_PTR(-ENOENT);
}
/* This function should only be called after mlx5_cmd_force_teardown_hca */
void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- struct mlx5_eq *eq;
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ int i, max_eqs;
+
+ clear_comp_irqs_affinity_hints(dev);
#ifdef CONFIG_RFS_ACCEL
- if (dev->rmap) {
- free_irq_cpu_rmap(dev->rmap);
- dev->rmap = NULL;
+ if (table->rmap) {
+ free_irq_cpu_rmap(table->rmap);
+ table->rmap = NULL;
}
#endif
- list_for_each_entry(eq, &table->comp_eqs_list, list)
- free_irq(eq->irqn, eq);
-
- free_irq(table->pages_eq.irqn, &table->pages_eq);
- free_irq(table->async_eq.irqn, &table->async_eq);
- free_irq(table->cmd_eq.irqn, &table->cmd_eq);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (MLX5_CAP_GEN(dev, pg))
- free_irq(table->pfault_eq.irqn, &table->pfault_eq);
-#endif
+
+ mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
+ max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE;
+ for (i = max_eqs - 1; i >= 0; i--) {
+ if (!table->irq_info[i].context)
+ continue;
+ free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context);
+ table->irq_info[i].context = NULL;
+ }
+ mutex_unlock(&table->lock);
+ pci_free_irq_vectors(dev->pdev);
+}
+
+static int alloc_irq_vectors(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_eq_table *table = priv->eq_table;
+ int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+ MLX5_CAP_GEN(dev, max_num_eqs) :
+ 1 << MLX5_CAP_GEN(dev, log_max_eq);
+ int nvec;
+ int err;
+
+ nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+ MLX5_EQ_VEC_COMP_BASE;
+ nvec = min_t(int, nvec, num_eqs);
+ if (nvec <= MLX5_EQ_VEC_COMP_BASE)
+ return -ENOMEM;
+
+ table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL);
+ if (!table->irq_info)
+ return -ENOMEM;
+
+ nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1,
+ nvec, PCI_IRQ_MSIX);
+ if (nvec < 0) {
+ err = nvec;
+ goto err_free_irq_info;
+ }
+
+ table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
+
+ return 0;
+
+err_free_irq_info:
+ kfree(table->irq_info);
+ return err;
+}
+
+static void free_irq_vectors(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+
pci_free_irq_vectors(dev->pdev);
+ kfree(priv->eq_table->irq_info);
+}
+
+int mlx5_eq_table_create(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ err = alloc_irq_vectors(dev);
+ if (err) {
+ mlx5_core_err(dev, "alloc irq vectors failed\n");
+ return err;
+ }
+
+ err = create_async_eqs(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to create async EQs\n");
+ goto err_async_eqs;
+ }
+
+ err = create_comp_eqs(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to create completion EQs\n");
+ goto err_comp_eqs;
+ }
+
+ return 0;
+err_comp_eqs:
+ destroy_async_eqs(dev);
+err_async_eqs:
+ free_irq_vectors(dev);
+ return err;
+}
+
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
+{
+ destroy_comp_eqs(dev);
+ destroy_async_eqs(dev);
+ free_irq_vectors(dev);
+}
+
+int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
+{
+ struct mlx5_eq_table *eqt = dev->priv.eq_table;
+
+ if (nb->event_type >= MLX5_EVENT_TYPE_MAX)
+ return -EINVAL;
+
+ return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb);
+}
+
+int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
+{
+ struct mlx5_eq_table *eqt = dev->priv.eq_table;
+
+ if (nb->event_type >= MLX5_EVENT_TYPE_MAX)
+ return -EINVAL;
+
+ return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index d004957328f9..8a67fd197b79 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -36,10 +36,10 @@
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
#include "eswitch.h"
#include "fs_core.h"
-
-#define UPLINK_VPORT 0xFFFF
+#include "ecpf.h"
enum {
MLX5_ACTION_NONE = 0,
@@ -51,7 +51,7 @@ enum {
struct vport_addr {
struct l2addr_node node;
u8 action;
- u32 vport;
+ u16 vport;
struct mlx5_flow_handle *flow_rule;
bool mpfs; /* UC MAC was added to MPFs */
/* A flag indicating that mac was added due to mc promiscuous vport */
@@ -64,11 +64,36 @@ enum {
PROMISC_CHANGE = BIT(3),
};
+static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw);
+static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw);
+
/* Vport context events */
#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \
MC_ADDR_CHANGE | \
PROMISC_CHANGE)
+/* The vport getter/iterator are only valid after esw->total_vports
+ * and vport->vport are initialized in mlx5_eswitch_init.
+ */
+#define mlx5_esw_for_all_vports(esw, i, vport) \
+ for ((i) = MLX5_VPORT_PF; \
+ (vport) = &(esw)->vports[i], \
+ (i) < (esw)->total_vports; (i)++)
+
+#define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \
+ for ((i) = MLX5_VPORT_FIRST_VF; \
+ (vport) = &(esw)->vports[i], \
+ (i) <= (nvfs); (i)++)
+
+static struct mlx5_vport *mlx5_eswitch_get_vport(struct mlx5_eswitch *esw,
+ u16 vport_num)
+{
+ u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
+
+ WARN_ON(idx > esw->total_vports - 1);
+ return &esw->vports[idx];
+}
+
static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
u32 events_mask)
{
@@ -80,8 +105,7 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1);
MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
- if (vport)
- MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
in, nic_vport_context);
@@ -109,12 +133,11 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
MLX5_SET(modify_esw_vport_context_in, in, opcode,
MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
- if (vport)
- MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+ MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
}
-static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
+static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
u16 vlan, u8 qos, u8 set_flags)
{
u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0};
@@ -151,7 +174,7 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
/* E-Switch FDB */
static struct mlx5_flow_handle *
-__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
+__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule,
u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN])
{
int match_header = (is_zero_ether_addr(mac_c) ? 0 :
@@ -187,7 +210,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
misc_parameters);
mc_misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
misc_parameters);
- MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT);
+ MLX5_SET(fte_match_set_misc, mv_misc, source_port, MLX5_VPORT_UPLINK);
MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port);
}
@@ -214,7 +237,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
}
static struct mlx5_flow_handle *
-esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
+esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u16 vport)
{
u8 mac_c[ETH_ALEN];
@@ -223,7 +246,7 @@ esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
}
static struct mlx5_flow_handle *
-esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport)
+esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u16 vport)
{
u8 mac_c[ETH_ALEN];
u8 mac_v[ETH_ALEN];
@@ -236,7 +259,7 @@ esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport)
}
static struct mlx5_flow_handle *
-esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport)
+esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport)
{
u8 mac_c[ETH_ALEN];
u8 mac_v[ETH_ALEN];
@@ -246,6 +269,37 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport)
return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v);
}
+enum {
+ LEGACY_VEPA_PRIO = 0,
+ LEGACY_FDB_PRIO,
+};
+
+static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *fdb;
+ int err;
+
+ root_ns = mlx5_get_fdb_sub_ns(dev, 0);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get FDB flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* num FTE 2, num FG 2 */
+ fdb = mlx5_create_auto_grouped_flow_table(root_ns, LEGACY_VEPA_PRIO,
+ 2, 2, 0, 0);
+ if (IS_ERR(fdb)) {
+ err = PTR_ERR(fdb);
+ esw_warn(dev, "Failed to create VEPA FDB err %d\n", err);
+ return err;
+ }
+ esw->fdb_table.legacy.vepa_fdb = fdb;
+
+ return 0;
+}
+
static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
@@ -274,8 +328,8 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
return -ENOMEM;
table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
-
ft_attr.max_fte = table_size;
+ ft_attr.prio = LEGACY_FDB_PRIO;
fdb = mlx5_create_flow_table(root_ns, &ft_attr);
if (IS_ERR(fdb)) {
err = PTR_ERR(fdb);
@@ -334,41 +388,67 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
esw->fdb_table.legacy.promisc_grp = g;
out:
- if (err) {
- if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) {
- mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
- esw->fdb_table.legacy.allmulti_grp = NULL;
- }
- if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) {
- mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
- esw->fdb_table.legacy.addr_grp = NULL;
- }
- if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.fdb)) {
- mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb);
- esw->fdb_table.legacy.fdb = NULL;
- }
- }
+ if (err)
+ esw_destroy_legacy_fdb_table(esw);
kvfree(flow_group_in);
return err;
}
+static void esw_destroy_legacy_vepa_table(struct mlx5_eswitch *esw)
+{
+ esw_debug(esw->dev, "Destroy VEPA Table\n");
+ if (!esw->fdb_table.legacy.vepa_fdb)
+ return;
+
+ mlx5_destroy_flow_table(esw->fdb_table.legacy.vepa_fdb);
+ esw->fdb_table.legacy.vepa_fdb = NULL;
+}
+
static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
{
+ esw_debug(esw->dev, "Destroy FDB Table\n");
if (!esw->fdb_table.legacy.fdb)
return;
- esw_debug(esw->dev, "Destroy FDB Table\n");
- mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp);
- mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
- mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
+ if (esw->fdb_table.legacy.promisc_grp)
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp);
+ if (esw->fdb_table.legacy.allmulti_grp)
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+ if (esw->fdb_table.legacy.addr_grp)
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb);
+
esw->fdb_table.legacy.fdb = NULL;
esw->fdb_table.legacy.addr_grp = NULL;
esw->fdb_table.legacy.allmulti_grp = NULL;
esw->fdb_table.legacy.promisc_grp = NULL;
}
+static int esw_create_legacy_table(struct mlx5_eswitch *esw)
+{
+ int err;
+
+ memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb));
+
+ err = esw_create_legacy_vepa_table(esw);
+ if (err)
+ return err;
+
+ err = esw_create_legacy_fdb_table(esw);
+ if (err)
+ esw_destroy_legacy_vepa_table(esw);
+
+ return err;
+}
+
+static void esw_destroy_legacy_table(struct mlx5_eswitch *esw)
+{
+ esw_cleanup_vepa_rules(esw);
+ esw_destroy_legacy_fdb_table(esw);
+ esw_destroy_legacy_vepa_table(esw);
+}
+
/* E-Switch vport UC/MC lists management */
typedef int (*vport_addr_action)(struct mlx5_eswitch *esw,
struct vport_addr *vaddr);
@@ -376,19 +456,19 @@ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw,
static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
{
u8 *mac = vaddr->node.addr;
- u32 vport = vaddr->vport;
+ u16 vport = vaddr->vport;
int err;
- /* Skip mlx5_mpfs_add_mac for PFs,
- * it is already done by the PF netdev in mlx5e_execute_l2_action
+ /* Skip mlx5_mpfs_add_mac for eswitch_managers,
+ * it is already done by its netdev in mlx5e_execute_l2_action
*/
- if (!vport)
+ if (esw->manager_vport == vport)
goto fdb_add;
err = mlx5_mpfs_add_mac(esw->dev, mac);
if (err) {
esw_warn(esw->dev,
- "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n",
+ "Failed to add L2 table mac(%pM) for vport(0x%x), err(%d)\n",
mac, vport, err);
return err;
}
@@ -408,13 +488,13 @@ fdb_add:
static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
{
u8 *mac = vaddr->node.addr;
- u32 vport = vaddr->vport;
+ u16 vport = vaddr->vport;
int err = 0;
- /* Skip mlx5_mpfs_del_mac for PFs,
- * it is already done by the PF netdev in mlx5e_execute_l2_action
+ /* Skip mlx5_mpfs_del_mac for eswitch managerss,
+ * it is already done by its netdev in mlx5e_execute_l2_action
*/
- if (!vport || !vaddr->mpfs)
+ if (!vaddr->mpfs || esw->manager_vport == vport)
goto fdb_del;
err = mlx5_mpfs_del_mac(esw->dev, mac);
@@ -437,17 +517,18 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw,
struct esw_mc_addr *esw_mc)
{
u8 *mac = vaddr->node.addr;
- u32 vport_idx = 0;
+ struct mlx5_vport *vport;
+ u16 i, vport_num;
- for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) {
- struct mlx5_vport *vport = &esw->vports[vport_idx];
+ mlx5_esw_for_all_vports(esw, i, vport) {
struct hlist_head *vport_hash = vport->mc_list;
struct vport_addr *iter_vaddr =
l2addr_hash_find(vport_hash,
mac,
struct vport_addr);
+ vport_num = vport->vport;
if (IS_ERR_OR_NULL(vport->allmulti_rule) ||
- vaddr->vport == vport_idx)
+ vaddr->vport == vport_num)
continue;
switch (vaddr->action) {
case MLX5_ACTION_ADD:
@@ -459,14 +540,14 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw,
if (!iter_vaddr) {
esw_warn(esw->dev,
"ALL-MULTI: Failed to add MAC(%pM) to vport[%d] DB\n",
- mac, vport_idx);
+ mac, vport_num);
continue;
}
- iter_vaddr->vport = vport_idx;
+ iter_vaddr->vport = vport_num;
iter_vaddr->flow_rule =
esw_fdb_set_vport_rule(esw,
mac,
- vport_idx);
+ vport_num);
iter_vaddr->mc_promisc = true;
break;
case MLX5_ACTION_DEL:
@@ -484,7 +565,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
struct hlist_head *hash = esw->mc_table;
struct esw_mc_addr *esw_mc;
u8 *mac = vaddr->node.addr;
- u32 vport = vaddr->vport;
+ u16 vport = vaddr->vport;
if (!esw->fdb_table.legacy.fdb)
return 0;
@@ -498,7 +579,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
return -ENOMEM;
esw_mc->uplink_rule = /* Forward MC MAC to Uplink */
- esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT);
+ esw_fdb_set_vport_rule(esw, mac, MLX5_VPORT_UPLINK);
/* Add this multicast mac to all the mc promiscuous vports */
update_allmulti_vports(esw, vaddr, esw_mc);
@@ -524,7 +605,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
struct hlist_head *hash = esw->mc_table;
struct esw_mc_addr *esw_mc;
u8 *mac = vaddr->node.addr;
- u32 vport = vaddr->vport;
+ u16 vport = vaddr->vport;
if (!esw->fdb_table.legacy.fdb)
return 0;
@@ -563,9 +644,9 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
/* Apply vport UC/MC list to HW l2 table and FDB table */
static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw,
- u32 vport_num, int list_type)
+ u16 vport_num, int list_type)
{
- struct mlx5_vport *vport = &esw->vports[vport_num];
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
vport_addr_action vport_addr_add;
vport_addr_action vport_addr_del;
@@ -598,9 +679,9 @@ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw,
/* Sync vport UC/MC list from vport context */
static void esw_update_vport_addr_list(struct mlx5_eswitch *esw,
- u32 vport_num, int list_type)
+ u16 vport_num, int list_type)
{
- struct mlx5_vport *vport = &esw->vports[vport_num];
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
u8 (*mac_list)[ETH_ALEN];
struct l2addr_node *node;
@@ -685,9 +766,9 @@ out:
/* Sync vport UC/MC list from vport context
* Must be called after esw_update_vport_addr_list
*/
-static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num)
+static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u16 vport_num)
{
- struct mlx5_vport *vport = &esw->vports[vport_num];
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
struct l2addr_node *node;
struct vport_addr *addr;
struct hlist_head *hash;
@@ -720,11 +801,11 @@ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num)
}
/* Apply vport rx mode to HW FDB table */
-static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num,
+static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num,
bool promisc, bool mc_promisc)
{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
struct esw_mc_addr *allmulti_addr = &esw->mc_promisc;
- struct mlx5_vport *vport = &esw->vports[vport_num];
if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc)
goto promisc;
@@ -735,7 +816,7 @@ static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num,
if (!allmulti_addr->uplink_rule)
allmulti_addr->uplink_rule =
esw_fdb_set_vport_allmulti_rule(esw,
- UPLINK_VPORT);
+ MLX5_VPORT_UPLINK);
allmulti_addr->refcnt++;
} else if (vport->allmulti_rule) {
mlx5_del_flow_rules(vport->allmulti_rule);
@@ -763,9 +844,9 @@ promisc:
}
/* Sync vport rx mode from vport context */
-static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num)
+static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num)
{
- struct mlx5_vport *vport = &esw->vports[vport_num];
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
int promisc_all = 0;
int promisc_uc = 0;
int promisc_mc = 0;
@@ -1133,13 +1214,6 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
int err = 0;
u8 *smac_v;
- if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) {
- mlx5_core_warn(esw->dev,
- "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n",
- vport->vport);
- return -EPERM;
- }
-
esw_vport_cleanup_ingress_rules(esw, vport);
if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
@@ -1349,8 +1423,8 @@ static void esw_destroy_tsar(struct mlx5_eswitch *esw)
static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num,
u32 initial_max_rate, u32 initial_bw_share)
{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
- struct mlx5_vport *vport = &esw->vports[vport_num];
struct mlx5_core_dev *dev = esw->dev;
void *vport_elem;
int err = 0;
@@ -1389,7 +1463,7 @@ static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num,
static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num)
{
- struct mlx5_vport *vport = &esw->vports[vport_num];
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
int err = 0;
if (!vport->qos.enabled)
@@ -1408,8 +1482,8 @@ static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num)
static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num,
u32 max_rate, u32 bw_share)
{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
- struct mlx5_vport *vport = &esw->vports[vport_num];
struct mlx5_core_dev *dev = esw->dev;
void *vport_elem;
u32 bitmask = 0;
@@ -1465,15 +1539,22 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
{
int vport_num = vport->vport;
- if (!vport_num)
+ if (esw->manager_vport == vport_num)
return;
mlx5_modify_vport_admin_state(esw->dev,
MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
- vport_num,
+ vport_num, 1,
vport->info.link_state);
- mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac);
- mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid);
+
+ /* Host PF has its own mac/guid. */
+ if (vport_num) {
+ mlx5_modify_nic_vport_mac_address(esw->dev, vport_num,
+ vport->info.mac);
+ mlx5_modify_nic_vport_node_guid(esw->dev, vport_num,
+ vport->info.node_guid);
+ }
+
modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos,
(vport->info.vlan || vport->info.qos));
@@ -1519,10 +1600,10 @@ static void esw_vport_destroy_drop_counters(struct mlx5_vport *vport)
mlx5_fc_destroy(dev, vport->egress.drop_counter);
}
-static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
+static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
int enable_events)
{
- struct mlx5_vport *vport = &esw->vports[vport_num];
+ u16 vport_num = vport->vport;
mutex_lock(&esw->state_lock);
WARN_ON(vport->enabled);
@@ -1545,8 +1626,11 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
vport->enabled_events = enable_events;
vport->enabled = true;
- /* only PF is trusted by default */
- if (!vport_num)
+ /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well
+ * in smartNIC as it's a vport group manager.
+ */
+ if (esw->manager_vport == vport_num ||
+ (!vport_num && mlx5_core_is_ecpf(esw->dev)))
vport->info.trusted = true;
esw_vport_change_handle_locked(vport);
@@ -1556,9 +1640,10 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
mutex_unlock(&esw->state_lock);
}
-static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
+static void esw_disable_vport(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
{
- struct mlx5_vport *vport = &esw->vports[vport_num];
+ u16 vport_num = vport->vport;
if (!vport->enabled)
return;
@@ -1567,7 +1652,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
/* Mark this vport as disabled to discard new events */
vport->enabled = false;
- synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC));
/* Wait for current already scheduled events to complete */
flush_workqueue(esw->work_queue);
/* Disable events from this vport */
@@ -1580,10 +1664,11 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
esw_vport_change_handle_locked(vport);
vport->enabled_events = 0;
esw_vport_disable_qos(esw, vport_num);
- if (vport_num && esw->mode == SRIOV_LEGACY) {
+ if (esw->manager_vport != vport_num &&
+ esw->mode == SRIOV_LEGACY) {
mlx5_modify_vport_admin_state(esw->dev,
MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
- vport_num,
+ vport_num, 1,
MLX5_VPORT_ADMIN_STATE_DOWN);
esw_vport_disable_egress_acl(esw, vport);
esw_vport_disable_ingress_acl(esw, vport);
@@ -1593,12 +1678,29 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
mutex_unlock(&esw->state_lock);
}
+static int eswitch_vport_event(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_eswitch *esw = mlx5_nb_cof(nb, struct mlx5_eswitch, nb);
+ struct mlx5_eqe *eqe = data;
+ struct mlx5_vport *vport;
+ u16 vport_num;
+
+ vport_num = be16_to_cpu(eqe->data.vport_change.vport_num);
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (vport->enabled)
+ queue_work(esw->work_queue, &vport->vport_change_handler);
+
+ return NOTIFY_OK;
+}
+
/* Public E-Switch API */
#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
-
int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
{
+ int vf_nvports = 0, total_nvports = 0;
+ struct mlx5_vport *vport;
int err;
int i, enabled_events;
@@ -1615,14 +1717,31 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n");
esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
+
+ if (mode == SRIOV_OFFLOADS) {
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ err = mlx5_query_host_params_num_vfs(esw->dev, &vf_nvports);
+ if (err)
+ return err;
+ total_nvports = esw->total_vports;
+ } else {
+ vf_nvports = nvfs;
+ total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev);
+ }
+ }
+
esw->mode = mode;
+ mlx5_lag_update(esw->dev);
+
if (mode == SRIOV_LEGACY) {
- err = esw_create_legacy_fdb_table(esw);
+ err = esw_create_legacy_table(esw);
+ if (err)
+ goto abort;
} else {
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-
- err = esw_offloads_init(esw, nvfs + 1);
+ err = esw_offloads_init(esw, vf_nvports, total_nvports);
}
if (err)
@@ -1637,8 +1756,25 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
* 2. FDB/Eswitch is programmed by user space tools
*/
enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0;
- for (i = 0; i <= nvfs; i++)
- esw_enable_vport(esw, i, enabled_events);
+
+ /* Enable PF vport */
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
+ esw_enable_vport(esw, vport, enabled_events);
+
+ /* Enable ECPF vports */
+ if (mlx5_ecpf_vport_exists(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
+ esw_enable_vport(esw, vport, enabled_events);
+ }
+
+ /* Enable VF vports */
+ mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs)
+ esw_enable_vport(esw, vport, enabled_events);
+
+ if (mode == SRIOV_LEGACY) {
+ MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
+ mlx5_eq_notifier_register(esw->dev, &esw->nb);
+ }
esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
esw->enabled_vports);
@@ -1647,8 +1783,10 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
abort:
esw->mode = SRIOV_NONE;
- if (mode == SRIOV_OFFLOADS)
+ if (mode == SRIOV_OFFLOADS) {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
+ }
return err;
}
@@ -1656,8 +1794,8 @@ abort:
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
{
struct esw_mc_addr *mc_promisc;
+ struct mlx5_vport *vport;
int old_mode;
- int nvports;
int i;
if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE)
@@ -1667,10 +1805,12 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
esw->enabled_vports, esw->mode);
mc_promisc = &esw->mc_promisc;
- nvports = esw->enabled_vports;
- for (i = 0; i < esw->total_vports; i++)
- esw_disable_vport(esw, i);
+ if (esw->mode == SRIOV_LEGACY)
+ mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
+
+ mlx5_esw_for_all_vports(esw, i, vport)
+ esw_disable_vport(esw, vport);
if (mc_promisc && mc_promisc->uplink_rule)
mlx5_del_flow_rules(mc_promisc->uplink_rule);
@@ -1678,25 +1818,29 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
esw_destroy_tsar(esw);
if (esw->mode == SRIOV_LEGACY)
- esw_destroy_legacy_fdb_table(esw);
+ esw_destroy_legacy_table(esw);
else if (esw->mode == SRIOV_OFFLOADS)
- esw_offloads_cleanup(esw, nvports);
+ esw_offloads_cleanup(esw);
old_mode = esw->mode;
esw->mode = SRIOV_NONE;
- if (old_mode == SRIOV_OFFLOADS)
+ mlx5_lag_update(esw->dev);
+
+ if (old_mode == SRIOV_OFFLOADS) {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
+ }
}
int mlx5_eswitch_init(struct mlx5_core_dev *dev)
{
int total_vports = MLX5_TOTAL_VPORTS(dev);
struct mlx5_eswitch *esw;
- int vport_num;
- int err;
+ struct mlx5_vport *vport;
+ int err, i;
- if (!MLX5_ESWITCH_MANAGER(dev))
+ if (!MLX5_VPORT_MANAGER(dev))
return 0;
esw_info(dev,
@@ -1710,6 +1854,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
return -ENOMEM;
esw->dev = dev;
+ esw->manager_vport = mlx5_eswitch_manager_vport(dev);
esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
if (!esw->work_queue) {
@@ -1724,6 +1869,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
goto abort;
}
+ esw->total_vports = total_vports;
+
err = esw_offloads_init_reps(esw);
if (err)
goto abort;
@@ -1732,17 +1879,14 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
hash_init(esw->offloads.mod_hdr_tbl);
mutex_init(&esw->state_lock);
- for (vport_num = 0; vport_num < total_vports; vport_num++) {
- struct mlx5_vport *vport = &esw->vports[vport_num];
-
- vport->vport = vport_num;
+ mlx5_esw_for_all_vports(esw, i, vport) {
+ vport->vport = mlx5_eswitch_index_to_vport_num(esw, i);
vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
vport->dev = dev;
INIT_WORK(&vport->vport_change_handler,
esw_vport_change_handler);
}
- esw->total_vports = total_vports;
esw->enabled_vports = 0;
esw->mode = SRIOV_NONE;
esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
@@ -1765,7 +1909,7 @@ abort:
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
{
- if (!esw || !MLX5_ESWITCH_MANAGER(esw->dev))
+ if (!esw || !MLX5_VPORT_MANAGER(esw->dev))
return;
esw_info(esw->dev, "cleanup\n");
@@ -1777,23 +1921,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
kfree(esw);
}
-void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
-{
- struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change;
- u16 vport_num = be16_to_cpu(vc_eqe->vport_num);
- struct mlx5_vport *vport;
-
- if (!esw) {
- pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n",
- vport_num);
- return;
- }
-
- vport = &esw->vports[vport_num];
- if (vport->enabled)
- queue_work(esw->work_queue, &vport->vport_change_handler);
-}
-
/* Vport Administration */
#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
@@ -1804,7 +1931,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
u64 node_guid;
int err = 0;
- if (!MLX5_CAP_GEN(esw->dev, vport_group_manager))
+ if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager))
return -EPERM;
if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac))
return -EINVAL;
@@ -1812,13 +1939,10 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
mutex_lock(&esw->state_lock);
evport = &esw->vports[vport];
- if (evport->info.spoofchk && !is_valid_ether_addr(mac)) {
+ if (evport->info.spoofchk && !is_valid_ether_addr(mac))
mlx5_core_warn(esw->dev,
- "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n",
+ "Set invalid MAC while spoofchk is on, vport(%d)\n",
vport);
- err = -EPERM;
- goto unlock;
- }
err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);
if (err) {
@@ -1861,7 +1985,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
err = mlx5_modify_vport_admin_state(esw->dev,
MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
- vport, link_state);
+ vport, 1, link_state);
if (err) {
mlx5_core_warn(esw->dev,
"Failed to set vport %d link state, err = %d",
@@ -1881,7 +2005,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
{
struct mlx5_vport *evport;
- if (!MLX5_CAP_GEN(esw->dev, vport_group_manager))
+ if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager))
return -EPERM;
if (!LEGAL_VPORT(esw, vport))
return -EINVAL;
@@ -1964,6 +2088,10 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
evport = &esw->vports[vport];
pschk = evport->info.spoofchk;
evport->info.spoofchk = spoofchk;
+ if (pschk && !is_valid_ether_addr(evport->info.mac))
+ mlx5_core_warn(esw->dev,
+ "Spoofchk in set while MAC is invalid, vport(%d)\n",
+ evport->vport);
if (evport->enabled && esw->mode == SRIOV_LEGACY)
err = esw_vport_ingress_config(esw, evport);
if (err)
@@ -1973,6 +2101,128 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
return err;
}
+static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw)
+{
+ if (esw->fdb_table.legacy.vepa_uplink_rule)
+ mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_uplink_rule);
+
+ if (esw->fdb_table.legacy.vepa_star_rule)
+ mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_star_rule);
+
+ esw->fdb_table.legacy.vepa_uplink_rule = NULL;
+ esw->fdb_table.legacy.vepa_star_rule = NULL;
+}
+
+static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw,
+ u8 setting)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ void *misc;
+
+ if (!setting) {
+ esw_cleanup_vepa_rules(esw);
+ return 0;
+ }
+
+ if (esw->fdb_table.legacy.vepa_uplink_rule)
+ return 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ /* Uplink rule forward uplink traffic to FDB */
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = esw->fdb_table.legacy.fdb;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ goto out;
+ } else {
+ esw->fdb_table.legacy.vepa_uplink_rule = flow_rule;
+ }
+
+ /* Star rule to forward all traffic to uplink vport */
+ memset(spec, 0, sizeof(*spec));
+ memset(&dest, 0, sizeof(dest));
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = MLX5_VPORT_UPLINK;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ goto out;
+ } else {
+ esw->fdb_table.legacy.vepa_star_rule = flow_rule;
+ }
+
+out:
+ kvfree(spec);
+ if (err)
+ esw_cleanup_vepa_rules(esw);
+ return err;
+}
+
+int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting)
+{
+ int err = 0;
+
+ if (!esw)
+ return -EOPNOTSUPP;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+
+ mutex_lock(&esw->state_lock);
+ if (esw->mode != SRIOV_LEGACY) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ err = _mlx5_eswitch_set_vepa_locked(esw, setting);
+
+out:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
+{
+ int err = 0;
+
+ if (!esw)
+ return -EOPNOTSUPP;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+
+ mutex_lock(&esw->state_lock);
+ if (esw->mode != SRIOV_LEGACY) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0;
+
+out:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
int vport, bool setting)
{
@@ -2000,8 +2250,7 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
u32 max_guarantee = 0;
int i;
- for (i = 0; i < esw->total_vports; i++) {
- evport = &esw->vports[i];
+ mlx5_esw_for_all_vports(esw, i, evport) {
if (!evport->enabled || evport->info.min_rate < max_guarantee)
continue;
max_guarantee = evport->info.min_rate;
@@ -2020,8 +2269,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider)
int err;
int i;
- for (i = 0; i < esw->total_vports; i++) {
- evport = &esw->vports[i];
+ mlx5_esw_for_all_vports(esw, i, evport) {
if (!evport->enabled)
continue;
vport_min_rate = evport->info.min_rate;
@@ -2036,7 +2284,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider)
if (bw_share == evport->qos.bw_share)
continue;
- err = esw_vport_qos_config(esw, i, vport_max_rate,
+ err = esw_vport_qos_config(esw, evport->vport, vport_max_rate,
bw_share);
if (!err)
evport->qos.bw_share = bw_share;
@@ -2050,19 +2298,24 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider)
int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport,
u32 max_rate, u32 min_rate)
{
- u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
- bool min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
- fw_max_bw_share >= MLX5_MIN_BW_SHARE;
- bool max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
struct mlx5_vport *evport;
+ u32 fw_max_bw_share;
u32 previous_min_rate;
u32 divider;
+ bool min_rate_supported;
+ bool max_rate_supported;
int err = 0;
if (!ESW_ALLOWED(esw))
return -EPERM;
if (!LEGAL_VPORT(esw, vport))
return -EINVAL;
+
+ fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
+ fw_max_bw_share >= MLX5_MIN_BW_SHARE;
+ max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
+
if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported))
return -EOPNOTSUPP;
@@ -2119,7 +2372,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
!MLX5_CAP_GEN(dev, transmit_discard_vport_down))
return 0;
- err = mlx5_query_vport_down_stats(dev, vport_idx,
+ err = mlx5_query_vport_down_stats(dev, vport_idx, 1,
&rx_discard_vport_down,
&tx_discard_vport_down);
if (err)
@@ -2156,8 +2409,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
MLX5_CMD_OP_QUERY_VPORT_COUNTER);
MLX5_SET(query_vport_counter_in, in, op_mod, 0);
MLX5_SET(query_vport_counter_in, in, vport_number, vport);
- if (vport)
- MLX5_SET(query_vport_counter_in, in, other_vport, 1);
+ MLX5_SET(query_vport_counter_in, in, other_vport, 1);
memset(out, 0, outlen);
err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen);
@@ -2219,3 +2471,21 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE;
}
EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
+
+bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
+{
+ if ((dev0->priv.eswitch->mode == SRIOV_NONE &&
+ dev1->priv.eswitch->mode == SRIOV_NONE) ||
+ (dev0->priv.eswitch->mode == SRIOV_OFFLOADS &&
+ dev1->priv.eswitch->mode == SRIOV_OFFLOADS))
+ return true;
+
+ return false;
+}
+
+bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
+ struct mlx5_core_dev *dev1)
+{
+ return (dev0->priv.eswitch->mode == SRIOV_OFFLOADS &&
+ dev1->priv.eswitch->mode == SRIOV_OFFLOADS);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index aaafc9f17115..3f3cd32ae60a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -38,6 +38,7 @@
#include <net/devlink.h>
#include <linux/mlx5/device.h>
#include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
#include "lib/mpfs.h"
@@ -49,8 +50,6 @@
#define MLX5_MAX_MC_PER_VPORT(dev) \
(1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))
-#define FDB_UPLINK_VPORT 0xffff
-
#define MLX5_MIN_BW_SHARE 1
#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
@@ -138,11 +137,16 @@ struct mlx5_eswitch_fdb {
struct mlx5_flow_group *addr_grp;
struct mlx5_flow_group *allmulti_grp;
struct mlx5_flow_group *promisc_grp;
+ struct mlx5_flow_table *vepa_fdb;
+ struct mlx5_flow_handle *vepa_uplink_rule;
+ struct mlx5_flow_handle *vepa_star_rule;
} legacy;
struct offloads_fdb {
struct mlx5_flow_table *slow_fdb;
struct mlx5_flow_group *send_to_vport_grp;
+ struct mlx5_flow_group *peer_miss_grp;
+ struct mlx5_flow_handle **peer_miss_rules;
struct mlx5_flow_group *miss_grp;
struct mlx5_flow_handle *miss_rule_uni;
struct mlx5_flow_handle *miss_rule_multi;
@@ -165,6 +169,8 @@ struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads;
struct mlx5_flow_group *vport_rx_group;
struct mlx5_eswitch_rep *vport_reps;
+ struct list_head peer_flows;
+ struct mutex peer_mutex;
DECLARE_HASHTABLE(encap_tbl, 8);
DECLARE_HASHTABLE(mod_hdr_tbl, 8);
u8 inline_mode;
@@ -179,8 +185,19 @@ struct esw_mc_addr { /* SRIOV only */
u32 refcnt;
};
+struct mlx5_host_work {
+ struct work_struct work;
+ struct mlx5_eswitch *esw;
+};
+
+struct mlx5_host_info {
+ struct mlx5_nb nb;
+ u16 num_vfs;
+};
+
struct mlx5_eswitch {
struct mlx5_core_dev *dev;
+ struct mlx5_nb nb;
struct mlx5_eswitch_fdb fdb_table;
struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
struct workqueue_struct *work_queue;
@@ -201,17 +218,19 @@ struct mlx5_eswitch {
struct mlx5_esw_offload offloads;
int mode;
int nvports;
+ u16 manager_vport;
+ struct mlx5_host_info host_info;
};
-void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
-int esw_offloads_init(struct mlx5_eswitch *esw, int nvports);
+void esw_offloads_cleanup(struct mlx5_eswitch *esw);
+int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
+ int total_nvports);
void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
int esw_offloads_init_reps(struct mlx5_eswitch *esw);
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
-void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
@@ -226,6 +245,8 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
int vport_num, bool setting);
int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport,
u32 max_rate, u32 min_rate);
+int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting);
+int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting);
int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
int vport, struct ifla_vf_info *ivi);
int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
@@ -281,13 +302,17 @@ enum mlx5_flow_match_level {
/* current maximum for flow based vport multicasting */
#define MLX5_MAX_FLOW_FWD_VPORTS 2
+enum {
+ MLX5_ESW_DEST_ENCAP = BIT(0),
+ MLX5_ESW_DEST_ENCAP_VALID = BIT(1),
+};
+
struct mlx5_esw_flow_attr {
struct mlx5_eswitch_rep *in_rep;
- struct mlx5_eswitch_rep *out_rep[MLX5_MAX_FLOW_FWD_VPORTS];
- struct mlx5_core_dev *out_mdev[MLX5_MAX_FLOW_FWD_VPORTS];
struct mlx5_core_dev *in_mdev;
+ struct mlx5_core_dev *counter_dev;
- int mirror_count;
+ int split_count;
int out_count;
int action;
@@ -296,9 +321,15 @@ struct mlx5_esw_flow_attr {
u8 vlan_prio[MLX5_FS_VLAN_DEPTH];
u8 total_vlan;
bool vlan_handled;
- u32 encap_id;
+ struct {
+ u32 flags;
+ struct mlx5_eswitch_rep *rep;
+ struct mlx5_core_dev *mdev;
+ u32 encap_id;
+ } dests[MLX5_MAX_FLOW_FWD_VPORTS];
u32 mod_hdr_id;
u8 match_level;
+ u8 tunnel_match_level;
struct mlx5_fc *counter;
u32 chain;
u16 prio;
@@ -338,6 +369,11 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2);
}
+bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
+ struct mlx5_core_dev *dev1);
+bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
+ struct mlx5_core_dev *dev1);
+
#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
#define esw_info(dev, format, ...) \
@@ -348,13 +384,60 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev
#define esw_debug(dev, format, ...) \
mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
+
+/* The returned number is valid only when the dev is eswitch manager. */
+static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev)
+{
+ return mlx5_core_is_ecpf_esw_manager(dev) ?
+ MLX5_VPORT_ECPF : MLX5_VPORT_PF;
+}
+
+static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw)
+{
+ /* Uplink always locate at the last element of the array.*/
+ return esw->total_vports - 1;
+}
+
+static inline int mlx5_eswitch_ecpf_idx(struct mlx5_eswitch *esw)
+{
+ return esw->total_vports - 2;
+}
+
+static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw,
+ u16 vport_num)
+{
+ if (vport_num == MLX5_VPORT_ECPF) {
+ if (!mlx5_ecpf_vport_exists(esw->dev))
+ esw_warn(esw->dev, "ECPF vport doesn't exist!\n");
+ return mlx5_eswitch_ecpf_idx(esw);
+ }
+
+ if (vport_num == MLX5_VPORT_UPLINK)
+ return mlx5_eswitch_uplink_idx(esw);
+
+ return vport_num;
+}
+
+static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw,
+ int index)
+{
+ if (index == mlx5_eswitch_ecpf_idx(esw) &&
+ mlx5_ecpf_vport_exists(esw->dev))
+ return MLX5_VPORT_ECPF;
+
+ if (index == mlx5_eswitch_uplink_idx(esw))
+ return MLX5_VPORT_UPLINK;
+
+ return index;
+}
+
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
-static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {}
static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
+static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
#define FDB_MAX_CHAIN 1
#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 9eac137790f5..9b2d78ee22b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -39,15 +39,60 @@
#include "eswitch.h"
#include "en.h"
#include "fs_core.h"
+#include "lib/devcom.h"
+#include "ecpf.h"
+#include "lib/eq.h"
enum {
FDB_FAST_PATH = 0,
FDB_SLOW_PATH
};
+/* There are two match-all miss flows, one for unicast dst mac and
+ * one for multicast.
+ */
+#define MLX5_ESW_MISS_FLOWS (2)
+
#define fdb_prio_table(esw, chain, prio, level) \
(esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)]
+#define UPLINK_REP_INDEX 0
+
+/* The rep getter/iterator are only valid after esw->total_vports
+ * and vport->vport are initialized in mlx5_eswitch_init.
+ */
+#define mlx5_esw_for_all_reps(esw, i, rep) \
+ for ((i) = MLX5_VPORT_PF; \
+ (rep) = &(esw)->offloads.vport_reps[i], \
+ (i) < (esw)->total_vports; (i)++)
+
+#define mlx5_esw_for_each_vf_rep(esw, i, rep, nvfs) \
+ for ((i) = MLX5_VPORT_FIRST_VF; \
+ (rep) = &(esw)->offloads.vport_reps[i], \
+ (i) <= (nvfs); (i)++)
+
+#define mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvfs) \
+ for ((i) = (nvfs); \
+ (rep) = &(esw)->offloads.vport_reps[i], \
+ (i) >= MLX5_VPORT_FIRST_VF; (i)--)
+
+#define mlx5_esw_for_each_vf_vport(esw, vport, nvfs) \
+ for ((vport) = MLX5_VPORT_FIRST_VF; \
+ (vport) <= (nvfs); (vport)++)
+
+#define mlx5_esw_for_each_vf_vport_reverse(esw, vport, nvfs) \
+ for ((vport) = (nvfs); \
+ (vport) >= MLX5_VPORT_FIRST_VF; (vport)--)
+
+static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
+ u16 vport_num)
+{
+ u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
+
+ WARN_ON(idx > esw->total_vports - 1);
+ return &esw->offloads.vport_reps[idx];
+}
+
static struct mlx5_flow_table *
esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
static void
@@ -81,7 +126,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
{
struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
- bool mirror = !!(attr->mirror_count);
+ bool split = !!(attr->split_count);
struct mlx5_flow_handle *rule;
struct mlx5_flow_table *fdb;
int j, i = 0;
@@ -120,13 +165,21 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
dest[i].ft = ft;
i++;
} else {
- for (j = attr->mirror_count; j < attr->out_count; j++) {
+ for (j = attr->split_count; j < attr->out_count; j++) {
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest[i].vport.num = attr->out_rep[j]->vport;
+ dest[i].vport.num = attr->dests[j].rep->vport;
dest[i].vport.vhca_id =
- MLX5_CAP_GEN(attr->out_mdev[j], vhca_id);
- dest[i].vport.vhca_id_valid =
- !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+ MLX5_CAP_GEN(attr->dests[j].mdev, vhca_id);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ dest[i].vport.flags |=
+ MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_act.reformat_id = attr->dests[j].encap_id;
+ dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+ dest[i].vport.reformat_id =
+ attr->dests[j].encap_id;
+ }
i++;
}
}
@@ -151,22 +204,20 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
MLX5_SET_TO_ONES(fte_match_set_misc, misc,
source_eswitch_owner_vhca_id);
- if (attr->match_level == MLX5_MATCH_NONE)
- spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
- else
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
- MLX5_MATCH_MISC_PARAMETERS;
-
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
- spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
+ if (attr->tunnel_match_level != MLX5_MATCH_NONE)
+ spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+ if (attr->match_level != MLX5_MATCH_NONE)
+ spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
+ } else if (attr->match_level != MLX5_MATCH_NONE) {
+ spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+ }
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
flow_act.modify_id = attr->mod_hdr_id;
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
- flow_act.reformat_id = attr->encap_id;
-
- fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!mirror);
+ fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split);
if (IS_ERR(fdb)) {
rule = ERR_CAST(fdb);
goto err_esw_get;
@@ -181,7 +232,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
return rule;
err_add_rule:
- esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror);
+ esw_put_prio_table(esw, attr->chain, attr->prio, !!split);
err_esw_get:
if (attr->dest_chain)
esw_put_prio_table(esw, attr->dest_chain, 1, 0);
@@ -215,12 +266,17 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
}
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- for (i = 0; i < attr->mirror_count; i++) {
+ for (i = 0; i < attr->split_count; i++) {
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest[i].vport.num = attr->out_rep[i]->vport;
+ dest[i].vport.num = attr->dests[i].rep->vport;
dest[i].vport.vhca_id =
- MLX5_CAP_GEN(attr->out_mdev[i], vhca_id);
- dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+ MLX5_CAP_GEN(attr->dests[i].mdev, vhca_id);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) {
+ dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+ dest[i].vport.reformat_id = attr->dests[i].encap_id;
+ }
}
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest[i].ft = fwd_fdb,
@@ -268,7 +324,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
struct mlx5_esw_flow_attr *attr,
bool fwd_rule)
{
- bool mirror = (attr->mirror_count > 0);
+ bool split = (attr->split_count > 0);
mlx5_del_flow_rules(rule);
esw->offloads.num_flows--;
@@ -277,7 +333,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
esw_put_prio_table(esw, attr->chain, attr->prio, 1);
esw_put_prio_table(esw, attr->chain, attr->prio, 0);
} else {
- esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror);
+ esw_put_prio_table(esw, attr->chain, attr->prio, !!split);
if (attr->dest_chain)
esw_put_prio_table(esw, attr->dest_chain, 1, 0);
}
@@ -307,7 +363,7 @@ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) {
rep = &esw->offloads.vport_reps[vf_vport];
- if (!rep->rep_if[REP_ETH].valid)
+ if (rep->rep_if[REP_ETH].state != REP_LOADED)
continue;
err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val);
@@ -325,7 +381,7 @@ esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop)
struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL;
in_rep = attr->in_rep;
- out_rep = attr->out_rep[0];
+ out_rep = attr->dests[0].rep;
if (push)
vport = in_rep;
@@ -346,17 +402,17 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
goto out_notsupp;
in_rep = attr->in_rep;
- out_rep = attr->out_rep[0];
+ out_rep = attr->dests[0].rep;
- if (push && in_rep->vport == FDB_UPLINK_VPORT)
+ if (push && in_rep->vport == MLX5_VPORT_UPLINK)
goto out_notsupp;
- if (pop && out_rep->vport == FDB_UPLINK_VPORT)
+ if (pop && out_rep->vport == MLX5_VPORT_UPLINK)
goto out_notsupp;
/* vport has vlan push configured, can't offload VF --> wire rules w.o it */
if (!push && !pop && fwd)
- if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT)
+ if (in_rep->vlan && out_rep->vport == MLX5_VPORT_UPLINK)
goto out_notsupp;
/* protects against (1) setting rules with different vlans to push and
@@ -398,7 +454,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
if (!push && !pop && fwd) {
/* tracks VF --> wire rules without vlan push action */
- if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT) {
+ if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
vport->vlan_refcount++;
attr->vlan_handled = true;
}
@@ -458,7 +514,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
if (!push && !pop && fwd) {
/* tracks VF --> wire rules without vlan push action */
- if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT)
+ if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
vport->vlan_refcount--;
return 0;
@@ -505,7 +561,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
- MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */
+ /* source vport is the esw manager */
+ MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport);
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
@@ -531,6 +588,134 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
mlx5_del_flow_rules(rule);
}
+static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_destination *dest)
+{
+ void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(peer_dev, vhca_id));
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest->vport.num = peer_dev->priv.eswitch->manager_vport;
+ dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
+ dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+}
+
+static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
+ struct mlx5_core_dev *peer_dev)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_handle **flows;
+ struct mlx5_flow_handle *flow;
+ struct mlx5_flow_spec *spec;
+ /* total vports is the same for both e-switches */
+ int nvports = esw->total_vports;
+ void *misc;
+ int err, i;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ peer_miss_rules_setup(peer_dev, spec, &dest);
+
+ flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL);
+ if (!flows) {
+ err = -ENOMEM;
+ goto alloc_flows_err;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF);
+ flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow)) {
+ err = PTR_ERR(flow);
+ goto add_pf_flow_err;
+ }
+ flows[MLX5_VPORT_PF] = flow;
+ }
+
+ if (mlx5_ecpf_vport_exists(esw->dev)) {
+ MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF);
+ flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow)) {
+ err = PTR_ERR(flow);
+ goto add_ecpf_flow_err;
+ }
+ flows[mlx5_eswitch_ecpf_idx(esw)] = flow;
+ }
+
+ mlx5_esw_for_each_vf_vport(esw, i, mlx5_core_max_vfs(esw->dev)) {
+ MLX5_SET(fte_match_set_misc, misc, source_port, i);
+ flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+ spec, &flow_act, &dest, 1);
+ if (IS_ERR(flow)) {
+ err = PTR_ERR(flow);
+ goto add_vf_flow_err;
+ }
+ flows[i] = flow;
+ }
+
+ esw->fdb_table.offloads.peer_miss_rules = flows;
+
+ kvfree(spec);
+ return 0;
+
+add_vf_flow_err:
+ nvports = --i;
+ mlx5_esw_for_each_vf_vport_reverse(esw, i, nvports)
+ mlx5_del_flow_rules(flows[i]);
+
+ if (mlx5_ecpf_vport_exists(esw->dev))
+ mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
+add_ecpf_flow_err:
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev))
+ mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
+add_pf_flow_err:
+ esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
+ kvfree(flows);
+alloc_flows_err:
+ kvfree(spec);
+ return err;
+}
+
+static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_handle **flows;
+ int i;
+
+ flows = esw->fdb_table.offloads.peer_miss_rules;
+
+ mlx5_esw_for_each_vf_vport_reverse(esw, i, mlx5_core_max_vfs(esw->dev))
+ mlx5_del_flow_rules(flows[i]);
+
+ if (mlx5_ecpf_vport_exists(esw->dev))
+ mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
+
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev))
+ mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
+
+ kvfree(flows);
+}
+
static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
{
struct mlx5_flow_act flow_act = {0};
@@ -557,7 +742,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
dmac_c[0] = 0x01;
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest.vport.num = 0;
+ dest.vport.num = esw->manager_vport;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
@@ -801,7 +986,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
esw->fdb_table.offloads.fdb_left[i] =
ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
- table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
+ table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+ MLX5_ESW_MISS_FLOWS + esw->total_vports;
/* create the slow path fdb with encap set, so further table instances
* can be created at run time while VFs are probed if the FW allows that.
@@ -856,6 +1042,34 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
}
esw->fdb_table.offloads.send_to_vport_grp = g;
+ /* create peer esw miss group */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_port);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ ix + esw->total_vports - 1);
+ ix += esw->total_vports;
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err);
+ goto peer_miss_err;
+ }
+ esw->fdb_table.offloads.peer_miss_grp = g;
+
/* create miss group */
memset(flow_group_in, 0, inlen);
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
@@ -867,7 +1081,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
dmac[0] = 0x01;
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+ ix + MLX5_ESW_MISS_FLOWS);
g = mlx5_create_flow_group(fdb, flow_group_in);
if (IS_ERR(g)) {
@@ -888,6 +1103,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
miss_rule_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
miss_err:
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
+peer_miss_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
send_vport_err:
esw_destroy_offloads_fast_fdb_tables(esw);
@@ -907,13 +1124,14 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
esw_destroy_offloads_fast_fdb_tables(esw);
}
-static int esw_create_offloads_table(struct mlx5_eswitch *esw)
+static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_core_dev *dev = esw->dev;
@@ -927,7 +1145,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw)
return -EOPNOTSUPP;
}
- ft_attr.max_fte = dev->priv.sriov.num_vfs + 2;
+ ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS;
ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft_offloads)) {
@@ -947,16 +1165,15 @@ static void esw_destroy_offloads_table(struct mlx5_eswitch *esw)
mlx5_destroy_flow_table(offloads->ft_offloads);
}
-static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
+static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_group *g;
- struct mlx5_priv *priv = &esw->dev->priv;
u32 *flow_group_in;
void *match_criteria, *misc;
int err = 0;
- int nvports = priv->sriov.num_vfs + 2;
+ nvports = nvports + MLX5_ESW_MISS_FLOWS;
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
if (!flow_group_in)
return -ENOMEM;
@@ -1033,7 +1250,8 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
{
int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
- if (esw->mode != SRIOV_LEGACY) {
+ if (esw->mode != SRIOV_LEGACY &&
+ !mlx5_core_is_ecpf_esw_manager(esw->dev)) {
NL_SET_ERR_MSG_MOD(extack,
"Can't set offloads mode, SRIOV legacy not enabled");
return -EINVAL;
@@ -1071,9 +1289,8 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
{
int total_vfs = MLX5_TOTAL_VPORTS(esw->dev);
struct mlx5_core_dev *dev = esw->dev;
- struct mlx5_esw_offload *offloads;
struct mlx5_eswitch_rep *rep;
- u8 hw_id[ETH_ALEN];
+ u8 hw_id[ETH_ALEN], rep_type;
int vport;
esw->offloads.vport_reps = kcalloc(total_vfs,
@@ -1082,75 +1299,203 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
if (!esw->offloads.vport_reps)
return -ENOMEM;
- offloads = &esw->offloads;
mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
- for (vport = 0; vport < total_vfs; vport++) {
- rep = &offloads->vport_reps[vport];
-
- rep->vport = vport;
+ mlx5_esw_for_all_reps(esw, vport, rep) {
+ rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport);
ether_addr_copy(rep->hw_id, hw_id);
- }
- offloads->vport_reps[0].vport = FDB_UPLINK_VPORT;
+ for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
+ rep->rep_if[rep_type].state = REP_UNREGISTERED;
+ }
return 0;
}
-static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports,
- u8 rep_type)
+static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep, u8 rep_type)
+{
+ if (rep->rep_if[rep_type].state != REP_LOADED)
+ return;
+
+ rep->rep_if[rep_type].unload(rep);
+ rep->rep_if[rep_type].state = REP_REGISTERED;
+}
+
+static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
{
struct mlx5_eswitch_rep *rep;
- int vport;
- for (vport = nvports - 1; vport >= 0; vport--) {
- rep = &esw->offloads.vport_reps[vport];
- if (!rep->rep_if[rep_type].valid)
- continue;
+ if (mlx5_ecpf_vport_exists(esw->dev)) {
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF);
+ __esw_offloads_unload_rep(esw, rep, rep_type);
+ }
- rep->rep_if[rep_type].unload(rep);
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
+ __esw_offloads_unload_rep(esw, rep, rep_type);
}
+
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+ __esw_offloads_unload_rep(esw, rep, rep_type);
+}
+
+static void __unload_reps_vf_vport(struct mlx5_eswitch *esw, int nvports,
+ u8 rep_type)
+{
+ struct mlx5_eswitch_rep *rep;
+ int i;
+
+ mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvports)
+ __esw_offloads_unload_rep(esw, rep, rep_type);
+}
+
+static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports)
+{
+ u8 rep_type = NUM_REP_TYPES;
+
+ while (rep_type-- > 0)
+ __unload_reps_vf_vport(esw, nvports, rep_type);
+}
+
+static void __unload_reps_all_vport(struct mlx5_eswitch *esw, int nvports,
+ u8 rep_type)
+{
+ __unload_reps_vf_vport(esw, nvports, rep_type);
+
+ /* Special vports must be the last to unload. */
+ __unload_reps_special_vport(esw, rep_type);
}
-static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports)
+static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw, int nvports)
{
u8 rep_type = NUM_REP_TYPES;
while (rep_type-- > 0)
- esw_offloads_unload_reps_type(esw, nvports, rep_type);
+ __unload_reps_all_vport(esw, nvports, rep_type);
}
-static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports,
- u8 rep_type)
+static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep, u8 rep_type)
+{
+ int err = 0;
+
+ if (rep->rep_if[rep_type].state != REP_REGISTERED)
+ return 0;
+
+ err = rep->rep_if[rep_type].load(esw->dev, rep);
+ if (err)
+ return err;
+
+ rep->rep_if[rep_type].state = REP_LOADED;
+
+ return 0;
+}
+
+static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
{
struct mlx5_eswitch_rep *rep;
- int vport;
int err;
- for (vport = 0; vport < nvports; vport++) {
- rep = &esw->offloads.vport_reps[vport];
- if (!rep->rep_if[rep_type].valid)
- continue;
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+ err = __esw_offloads_load_rep(esw, rep, rep_type);
+ if (err)
+ return err;
- err = rep->rep_if[rep_type].load(esw->dev, rep);
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
+ err = __esw_offloads_load_rep(esw, rep, rep_type);
if (err)
- goto err_reps;
+ goto err_pf;
+ }
+
+ if (mlx5_ecpf_vport_exists(esw->dev)) {
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF);
+ err = __esw_offloads_load_rep(esw, rep, rep_type);
+ if (err)
+ goto err_ecpf;
+ }
+
+ return 0;
+
+err_ecpf:
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
+ __esw_offloads_unload_rep(esw, rep, rep_type);
+ }
+
+err_pf:
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+ __esw_offloads_unload_rep(esw, rep, rep_type);
+ return err;
+}
+
+static int __load_reps_vf_vport(struct mlx5_eswitch *esw, int nvports,
+ u8 rep_type)
+{
+ struct mlx5_eswitch_rep *rep;
+ int err, i;
+
+ mlx5_esw_for_each_vf_rep(esw, i, rep, nvports) {
+ err = __esw_offloads_load_rep(esw, rep, rep_type);
+ if (err)
+ goto err_vf;
}
return 0;
+err_vf:
+ __unload_reps_vf_vport(esw, --i, rep_type);
+ return err;
+}
+
+static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports)
+{
+ u8 rep_type = 0;
+ int err;
+
+ for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
+ err = __load_reps_vf_vport(esw, nvports, rep_type);
+ if (err)
+ goto err_reps;
+ }
+
+ return err;
+
err_reps:
- esw_offloads_unload_reps_type(esw, vport, rep_type);
+ while (rep_type-- > 0)
+ __unload_reps_vf_vport(esw, nvports, rep_type);
+ return err;
+}
+
+static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports,
+ u8 rep_type)
+{
+ int err;
+
+ /* Special vports must be loaded first. */
+ err = __load_reps_special_vport(esw, rep_type);
+ if (err)
+ return err;
+
+ err = __load_reps_vf_vport(esw, nvports, rep_type);
+ if (err)
+ goto err_vfs;
+
+ return 0;
+
+err_vfs:
+ __unload_reps_special_vport(esw, rep_type);
return err;
}
-static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports)
+static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports)
{
u8 rep_type = 0;
int err;
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
- err = esw_offloads_load_reps_type(esw, nvports, rep_type);
+ err = __load_reps_all_vport(esw, nvports, rep_type);
if (err)
goto err_reps;
}
@@ -1159,37 +1504,130 @@ static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports)
err_reps:
while (rep_type-- > 0)
- esw_offloads_unload_reps_type(esw, nvports, rep_type);
+ __unload_reps_all_vport(esw, nvports, rep_type);
return err;
}
-int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
+#define ESW_OFFLOADS_DEVCOM_PAIR (0)
+#define ESW_OFFLOADS_DEVCOM_UNPAIR (1)
+
+static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch *peer_esw)
{
int err;
+ err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
+
+static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
+{
+ mlx5e_tc_clean_fdb_peer_flows(esw);
+ esw_del_fdb_peer_miss_rules(esw);
+}
+
+static int mlx5_esw_offloads_devcom_event(int event,
+ void *my_data,
+ void *event_data)
+{
+ struct mlx5_eswitch *esw = my_data;
+ struct mlx5_eswitch *peer_esw = event_data;
+ struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+ int err;
+
+ switch (event) {
+ case ESW_OFFLOADS_DEVCOM_PAIR:
+ err = mlx5_esw_offloads_pair(esw, peer_esw);
+ if (err)
+ goto err_out;
+
+ err = mlx5_esw_offloads_pair(peer_esw, esw);
+ if (err)
+ goto err_pair;
+
+ mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
+ break;
+
+ case ESW_OFFLOADS_DEVCOM_UNPAIR:
+ if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
+ break;
+
+ mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
+ mlx5_esw_offloads_unpair(peer_esw);
+ mlx5_esw_offloads_unpair(esw);
+ break;
+ }
+
+ return 0;
+
+err_pair:
+ mlx5_esw_offloads_unpair(esw);
+
+err_out:
+ mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d",
+ event, err);
+ return err;
+}
+
+static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
+{
+ struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+
+ INIT_LIST_HEAD(&esw->offloads.peer_flows);
+ mutex_init(&esw->offloads.peer_mutex);
+
+ if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ return;
+
+ mlx5_devcom_register_component(devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS,
+ mlx5_esw_offloads_devcom_event,
+ esw);
+
+ mlx5_devcom_send_event(devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS,
+ ESW_OFFLOADS_DEVCOM_PAIR, esw);
+}
+
+static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
+{
+ struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+
+ if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ return;
+
+ mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
+ ESW_OFFLOADS_DEVCOM_UNPAIR, esw);
+
+ mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+}
+
+static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports)
+{
+ int err;
+
+ memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
err = esw_create_offloads_fdb_tables(esw, nvports);
if (err)
return err;
- err = esw_create_offloads_table(esw);
+ err = esw_create_offloads_table(esw, nvports);
if (err)
goto create_ft_err;
- err = esw_create_vport_rx_group(esw);
+ err = esw_create_vport_rx_group(esw, nvports);
if (err)
goto create_fg_err;
- err = esw_offloads_load_reps(esw, nvports);
- if (err)
- goto err_reps;
-
return 0;
-err_reps:
- esw_destroy_vport_rx_group(esw);
-
create_fg_err:
esw_destroy_offloads_table(esw);
@@ -1199,6 +1637,95 @@ create_ft_err:
return err;
}
+static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
+{
+ esw_destroy_vport_rx_group(esw);
+ esw_destroy_offloads_table(esw);
+ esw_destroy_offloads_fdb_tables(esw);
+}
+
+static void esw_host_params_event_handler(struct work_struct *work)
+{
+ struct mlx5_host_work *host_work;
+ struct mlx5_eswitch *esw;
+ int err, num_vf = 0;
+
+ host_work = container_of(work, struct mlx5_host_work, work);
+ esw = host_work->esw;
+
+ err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf);
+ if (err || num_vf == esw->host_info.num_vfs)
+ goto out;
+
+ /* Number of VFs can only change from "0 to x" or "x to 0". */
+ if (esw->host_info.num_vfs > 0) {
+ esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs);
+ } else {
+ err = esw_offloads_load_vf_reps(esw, num_vf);
+
+ if (err)
+ goto out;
+ }
+
+ esw->host_info.num_vfs = num_vf;
+
+out:
+ kfree(host_work);
+}
+
+static int esw_host_params_event(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_host_work *host_work;
+ struct mlx5_host_info *host_info;
+ struct mlx5_eswitch *esw;
+
+ host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC);
+ if (!host_work)
+ return NOTIFY_DONE;
+
+ host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb);
+ esw = container_of(host_info, struct mlx5_eswitch, host_info);
+
+ host_work->esw = esw;
+
+ INIT_WORK(&host_work->work, esw_host_params_event_handler);
+ queue_work(esw->work_queue, &host_work->work);
+
+ return NOTIFY_OK;
+}
+
+int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
+ int total_nvports)
+{
+ int err;
+
+ mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
+
+ err = esw_offloads_steering_init(esw, total_nvports);
+ if (err)
+ return err;
+
+ err = esw_offloads_load_all_reps(esw, vf_nvports);
+ if (err)
+ goto err_reps;
+
+ esw_offloads_devcom_init(esw);
+
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event,
+ HOST_PARAMS_CHANGE);
+ mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb);
+ esw->host_info.num_vfs = vf_nvports;
+ }
+
+ return 0;
+
+err_reps:
+ esw_offloads_steering_cleanup(esw);
+ return err;
+}
+
static int esw_offloads_stop(struct mlx5_eswitch *esw,
struct netlink_ext_ack *extack)
{
@@ -1215,18 +1742,24 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
}
}
- /* enable back PF RoCE */
- mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-
return err;
}
-void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports)
+void esw_offloads_cleanup(struct mlx5_eswitch *esw)
{
- esw_offloads_unload_reps(esw, nvports);
- esw_destroy_vport_rx_group(esw);
- esw_destroy_offloads_table(esw);
- esw_destroy_offloads_fdb_tables(esw);
+ u16 num_vfs;
+
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb);
+ flush_workqueue(esw->work_queue);
+ num_vfs = esw->host_info.num_vfs;
+ } else {
+ num_vfs = esw->dev->priv.sriov.num_vfs;
+ }
+
+ esw_offloads_devcom_cleanup(esw);
+ esw_offloads_unload_all_reps(esw, num_vfs);
+ esw_offloads_steering_cleanup(esw);
}
static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
@@ -1315,7 +1848,8 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink)
if(!MLX5_ESWITCH_MANAGER(dev))
return -EPERM;
- if (dev->priv.eswitch->mode == SRIOV_NONE)
+ if (dev->priv.eswitch->mode == SRIOV_NONE &&
+ !mlx5_core_is_ecpf_esw_manager(dev))
return -EOPNOTSUPP;
return 0;
@@ -1527,47 +2061,45 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
return 0;
}
-void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
- int vport_index,
- struct mlx5_eswitch_rep_if *__rep_if,
- u8 rep_type)
+void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep_if *__rep_if,
+ u8 rep_type)
{
- struct mlx5_esw_offload *offloads = &esw->offloads;
struct mlx5_eswitch_rep_if *rep_if;
+ struct mlx5_eswitch_rep *rep;
+ int i;
- rep_if = &offloads->vport_reps[vport_index].rep_if[rep_type];
-
- rep_if->load = __rep_if->load;
- rep_if->unload = __rep_if->unload;
- rep_if->get_proto_dev = __rep_if->get_proto_dev;
- rep_if->priv = __rep_if->priv;
+ mlx5_esw_for_all_reps(esw, i, rep) {
+ rep_if = &rep->rep_if[rep_type];
+ rep_if->load = __rep_if->load;
+ rep_if->unload = __rep_if->unload;
+ rep_if->get_proto_dev = __rep_if->get_proto_dev;
+ rep_if->priv = __rep_if->priv;
- rep_if->valid = true;
+ rep_if->state = REP_REGISTERED;
+ }
}
-EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
+EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
-void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
- int vport_index, u8 rep_type)
+void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
{
- struct mlx5_esw_offload *offloads = &esw->offloads;
+ u16 max_vf = mlx5_core_max_vfs(esw->dev);
struct mlx5_eswitch_rep *rep;
+ int i;
- rep = &offloads->vport_reps[vport_index];
-
- if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled)
- rep->rep_if[rep_type].unload(rep);
+ if (esw->mode == SRIOV_OFFLOADS)
+ __unload_reps_all_vport(esw, max_vf, rep_type);
- rep->rep_if[rep_type].valid = false;
+ mlx5_esw_for_all_reps(esw, i, rep)
+ rep->rep_if[rep_type].state = REP_UNREGISTERED;
}
-EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
+EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
{
-#define UPLINK_REP_INDEX 0
- struct mlx5_esw_offload *offloads = &esw->offloads;
struct mlx5_eswitch_rep *rep;
- rep = &offloads->vport_reps[UPLINK_REP_INDEX];
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
return rep->rep_if[rep_type].priv;
}
@@ -1575,15 +2107,11 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
int vport,
u8 rep_type)
{
- struct mlx5_esw_offload *offloads = &esw->offloads;
struct mlx5_eswitch_rep *rep;
- if (vport == FDB_UPLINK_VPORT)
- vport = UPLINK_REP_INDEX;
-
- rep = &offloads->vport_reps[vport];
+ rep = mlx5_eswitch_get_rep(esw, vport);
- if (rep->rep_if[rep_type].valid &&
+ if (rep->rep_if[rep_type].state == REP_LOADED &&
rep->rep_if[rep_type].get_proto_dev)
return rep->rep_if[rep_type].get_proto_dev(rep);
return NULL;
@@ -1592,13 +2120,13 @@ EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
{
- return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type);
+ return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type);
}
EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
int vport)
{
- return &esw->offloads.vport_reps[vport];
+ return mlx5_eswitch_get_rep(esw, vport);
}
EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
new file mode 100644
index 000000000000..5d5864e8df3c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
+
+struct mlx5_event_nb {
+ struct mlx5_nb nb;
+ void *ctx;
+};
+
+/* General events handlers for the low level mlx5_core driver
+ *
+ * Other Major feature specific events such as
+ * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
+ * separate notifiers callbacks, specifically by those mlx5 components.
+ */
+static int any_notifier(struct notifier_block *, unsigned long, void *);
+static int temp_warn(struct notifier_block *, unsigned long, void *);
+static int port_module(struct notifier_block *, unsigned long, void *);
+
+/* handler which forwards the event to events->nh, driver notifiers */
+static int forward_event(struct notifier_block *, unsigned long, void *);
+
+static struct mlx5_nb events_nbs_ref[] = {
+ /* Events to be proccessed by mlx5_core */
+ {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
+ {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
+ {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
+
+ /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
+ /* QP/WQ resource events to forward */
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
+ /* SRQ events */
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
+ {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
+};
+
+struct mlx5_events {
+ struct mlx5_core_dev *dev;
+ struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)];
+ /* driver notifier chain */
+ struct atomic_notifier_head nh;
+ /* port module events stats */
+ struct mlx5_pme_stats pme_stats;
+};
+
+static const char *eqe_type_str(u8 type)
+{
+ switch (type) {
+ case MLX5_EVENT_TYPE_COMP:
+ return "MLX5_EVENT_TYPE_COMP";
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ return "MLX5_EVENT_TYPE_PATH_MIG";
+ case MLX5_EVENT_TYPE_COMM_EST:
+ return "MLX5_EVENT_TYPE_COMM_EST";
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ return "MLX5_EVENT_TYPE_SQ_DRAINED";
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ return "MLX5_EVENT_TYPE_CQ_ERROR";
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
+ case MLX5_EVENT_TYPE_INTERNAL_ERROR:
+ return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ return "MLX5_EVENT_TYPE_PORT_CHANGE";
+ case MLX5_EVENT_TYPE_GPIO_EVENT:
+ return "MLX5_EVENT_TYPE_GPIO_EVENT";
+ case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+ return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
+ case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
+ return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
+ case MLX5_EVENT_TYPE_REMOTE_CONFIG:
+ return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
+ case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
+ return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
+ case MLX5_EVENT_TYPE_STALL_EVENT:
+ return "MLX5_EVENT_TYPE_STALL_EVENT";
+ case MLX5_EVENT_TYPE_CMD:
+ return "MLX5_EVENT_TYPE_CMD";
+ case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE:
+ return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE";
+ case MLX5_EVENT_TYPE_PAGE_REQUEST:
+ return "MLX5_EVENT_TYPE_PAGE_REQUEST";
+ case MLX5_EVENT_TYPE_PAGE_FAULT:
+ return "MLX5_EVENT_TYPE_PAGE_FAULT";
+ case MLX5_EVENT_TYPE_PPS_EVENT:
+ return "MLX5_EVENT_TYPE_PPS_EVENT";
+ case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
+ return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
+ case MLX5_EVENT_TYPE_FPGA_ERROR:
+ return "MLX5_EVENT_TYPE_FPGA_ERROR";
+ case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
+ return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
+ case MLX5_EVENT_TYPE_GENERAL_EVENT:
+ return "MLX5_EVENT_TYPE_GENERAL_EVENT";
+ case MLX5_EVENT_TYPE_MONITOR_COUNTER:
+ return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
+ case MLX5_EVENT_TYPE_DEVICE_TRACER:
+ return "MLX5_EVENT_TYPE_DEVICE_TRACER";
+ default:
+ return "Unrecognized event";
+ }
+}
+
+/* handles all FW events, type == eqe->type */
+static int any_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
+ eqe_type_str(eqe->type), eqe->sub_type);
+ return NOTIFY_OK;
+}
+
+/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
+static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+ u64 value_lsb;
+ u64 value_msb;
+
+ value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
+ value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
+
+ mlx5_core_warn(events->dev,
+ "High temperature on sensors with bit set %llx %llx",
+ value_msb, value_lsb);
+
+ return NOTIFY_OK;
+}
+
+/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
+static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
+{
+ switch (status) {
+ case MLX5_MODULE_STATUS_PLUGGED:
+ return "Cable plugged";
+ case MLX5_MODULE_STATUS_UNPLUGGED:
+ return "Cable unplugged";
+ case MLX5_MODULE_STATUS_ERROR:
+ return "Cable error";
+ case MLX5_MODULE_STATUS_DISABLED:
+ return "Cable disabled";
+ default:
+ return "Unknown status";
+ }
+}
+
+static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
+{
+ switch (error) {
+ case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
+ return "Power budget exceeded";
+ case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
+ return "Long Range for non MLNX cable";
+ case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
+ return "Bus stuck (I2C or data shorted)";
+ case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
+ return "No EEPROM/retry timeout";
+ case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
+ return "Enforce part number list";
+ case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
+ return "Unknown identifier";
+ case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
+ return "High Temperature";
+ case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
+ return "Bad or shorted cable/module";
+ case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
+ return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
+ default:
+ return "Unknown error";
+ }
+}
+
+/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
+static int port_module(struct notifier_block *nb, unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ enum port_module_event_status_type module_status;
+ enum port_module_event_error_type error_type;
+ struct mlx5_eqe_port_module *module_event_eqe;
+ const char *status_str;
+ u8 module_num;
+
+ module_event_eqe = &eqe->data.port_module;
+ module_status = module_event_eqe->module_status &
+ PORT_MODULE_EVENT_MODULE_STATUS_MASK;
+ error_type = module_event_eqe->error_type &
+ PORT_MODULE_EVENT_ERROR_TYPE_MASK;
+
+ if (module_status < MLX5_MODULE_STATUS_NUM)
+ events->pme_stats.status_counters[module_status]++;
+
+ if (module_status == MLX5_MODULE_STATUS_ERROR)
+ if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
+ events->pme_stats.error_counters[error_type]++;
+
+ if (!printk_ratelimit())
+ return NOTIFY_OK;
+
+ module_num = module_event_eqe->module;
+ status_str = mlx5_pme_status_to_string(module_status);
+ if (module_status == MLX5_MODULE_STATUS_ERROR) {
+ const char *error_str = mlx5_pme_error_to_string(error_type);
+
+ mlx5_core_err(events->dev,
+ "Port module event[error]: module %u, %s, %s\n",
+ module_num, status_str, error_str);
+ } else {
+ mlx5_core_info(events->dev,
+ "Port module event: module %u, %s\n",
+ module_num, status_str);
+ }
+
+ return NOTIFY_OK;
+}
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
+{
+ *stats = dev->priv.events->pme_stats;
+}
+
+/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
+static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
+ eqe_type_str(eqe->type), eqe->sub_type);
+ atomic_notifier_call_chain(&events->nh, event, data);
+ return NOTIFY_OK;
+}
+
+int mlx5_events_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
+
+ if (!events)
+ return -ENOMEM;
+
+ ATOMIC_INIT_NOTIFIER_HEAD(&events->nh);
+ events->dev = dev;
+ dev->priv.events = events;
+ return 0;
+}
+
+void mlx5_events_cleanup(struct mlx5_core_dev *dev)
+{
+ kvfree(dev->priv.events);
+}
+
+void mlx5_events_start(struct mlx5_core_dev *dev)
+{
+ struct mlx5_events *events = dev->priv.events;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
+ events->notifiers[i].nb = events_nbs_ref[i];
+ events->notifiers[i].ctx = events;
+ mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
+ }
+}
+
+void mlx5_events_stop(struct mlx5_core_dev *dev)
+{
+ struct mlx5_events *events = dev->priv.events;
+ int i;
+
+ for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
+ mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
+}
+
+int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ struct mlx5_events *events = dev->priv.events;
+
+ return atomic_notifier_chain_register(&events->nh, nb);
+}
+EXPORT_SYMBOL(mlx5_notifier_register);
+
+int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+ struct mlx5_events *events = dev->priv.events;
+
+ return atomic_notifier_chain_unregister(&events->nh, nb);
+}
+EXPORT_SYMBOL(mlx5_notifier_unregister);
+
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
+{
+ return atomic_notifier_call_chain(&events->nh, event, data);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index 8ca1d1949d93..873541ef4c1b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -334,7 +334,7 @@ static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn,
{
u8 opcode, status = 0;
- opcode = cqe->op_own >> 4;
+ opcode = get_cqe_opcode(cqe);
switch (opcode) {
case MLX5_CQE_REQ_ERR:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index 436a8136f26f..d046d1ec2a86 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -36,6 +36,7 @@
#include "mlx5_core.h"
#include "lib/mlx5.h"
+#include "lib/eq.h"
#include "fpga/core.h"
#include "fpga/conn.h"
@@ -145,6 +146,22 @@ static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
return 0;
}
+static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
+
+static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
+{
+ struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
+
+ return mlx5_fpga_event(fdev, event, eqe);
+}
+
+static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
+{
+ struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
+
+ return mlx5_fpga_event(fdev, event, eqe);
+}
+
int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
{
struct mlx5_fpga_device *fdev = mdev->fpga;
@@ -185,6 +202,11 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
if (err)
goto out;
+ MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
+ MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
+ mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
+ mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
+
err = mlx5_fpga_conn_device_init(fdev);
if (err)
goto err_rsvd_gid;
@@ -201,6 +223,8 @@ err_conn_init:
mlx5_fpga_conn_device_cleanup(fdev);
err_rsvd_gid:
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
mlx5_core_unreserve_gids(mdev, max_num_qps);
out:
spin_lock_irqsave(&fdev->state_lock, flags);
@@ -256,6 +280,9 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
}
mlx5_fpga_conn_device_cleanup(fdev);
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
+ mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
+
max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
mlx5_core_unreserve_gids(mdev, max_num_qps);
}
@@ -283,13 +310,13 @@ static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
return "Unknown";
}
-void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
+static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
+ unsigned long event, void *eqe)
{
- struct mlx5_fpga_device *fdev = mdev->fpga;
+ void *data = ((struct mlx5_eqe *)eqe)->data.raw;
const char *event_name;
bool teardown = false;
unsigned long flags;
- u32 fpga_qpn;
u8 syndrome;
switch (event) {
@@ -300,12 +327,9 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
- fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn);
break;
default:
- mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n",
- event);
- return;
+ return NOTIFY_DONE;
}
spin_lock_irqsave(&fdev->state_lock, flags);
@@ -326,4 +350,6 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
*/
if (teardown)
mlx5_trigger_health_work(fdev->mdev);
+
+ return NOTIFY_OK;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index 3e2355c8df3f..7e2e871dbf83 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -35,11 +35,16 @@
#ifdef CONFIG_MLX5_FPGA
+#include <linux/mlx5/eq.h>
+
+#include "lib/eq.h"
#include "fpga/cmd.h"
/* Represents an Innova device */
struct mlx5_fpga_device {
struct mlx5_core_dev *mdev;
+ struct mlx5_nb fpga_err_nb;
+ struct mlx5_nb fpga_qp_err_nb;
spinlock_t state_lock; /* Protects state transitions */
enum mlx5_fpga_status state;
enum mlx5_fpga_image last_admin_image;
@@ -82,7 +87,6 @@ int mlx5_fpga_init(struct mlx5_core_dev *mdev);
void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev);
-void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data);
#else
@@ -104,11 +108,6 @@ static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
{
}
-static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event,
- void *data)
-{
-}
-
#endif
#endif /* __MLX5_FPGA_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
index 5cf5f2a9d51f..22a2ef111514 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
@@ -148,14 +148,16 @@ static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock,
return ret;
}
-static void mlx5_fpga_tls_release_swid(struct idr *idr,
- spinlock_t *idr_spinlock, u32 swid)
+static void *mlx5_fpga_tls_release_swid(struct idr *idr,
+ spinlock_t *idr_spinlock, u32 swid)
{
unsigned long flags;
+ void *ptr;
spin_lock_irqsave(idr_spinlock, flags);
- idr_remove(idr, swid);
+ ptr = idr_remove(idr, swid);
spin_unlock_irqrestore(idr_spinlock, flags);
+ return ptr;
}
static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
@@ -165,20 +167,12 @@ static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
kfree(buf);
}
-struct mlx5_teardown_stream_context {
- struct mlx5_fpga_tls_command_context cmd;
- u32 swid;
-};
-
static void
mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
struct mlx5_fpga_device *fdev,
struct mlx5_fpga_tls_command_context *cmd,
struct mlx5_fpga_dma_buf *resp)
{
- struct mlx5_teardown_stream_context *ctx =
- container_of(cmd, struct mlx5_teardown_stream_context, cmd);
-
if (resp) {
u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
@@ -186,14 +180,6 @@ mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
mlx5_fpga_err(fdev,
"Teardown stream failed with syndrome = %d",
syndrome);
- else if (MLX5_GET(tls_cmd, cmd->buf.sg[0].data, direction_sx))
- mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr,
- &fdev->tls->tx_idr_spinlock,
- ctx->swid);
- else
- mlx5_fpga_tls_release_swid(&fdev->tls->rx_idr,
- &fdev->tls->rx_idr_spinlock,
- ctx->swid);
}
mlx5_fpga_tls_put_command_ctx(cmd);
}
@@ -225,8 +211,14 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
rcu_read_lock();
flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
- rcu_read_unlock();
+ if (unlikely(!flow)) {
+ rcu_read_unlock();
+ WARN_ONCE(1, "Received NULL pointer for handle\n");
+ kfree(buf);
+ return -EINVAL;
+ }
mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+ rcu_read_unlock();
MLX5_SET(tls_cmd, cmd, swid, ntohl(handle));
MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn));
@@ -238,6 +230,8 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
buf->complete = mlx_tls_kfree_complete;
ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf);
+ if (ret < 0)
+ kfree(buf);
return ret;
}
@@ -245,7 +239,7 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
void *flow, u32 swid, gfp_t flags)
{
- struct mlx5_teardown_stream_context *ctx;
+ struct mlx5_fpga_tls_command_context *ctx;
struct mlx5_fpga_dma_buf *buf;
void *cmd;
@@ -253,7 +247,7 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
if (!ctx)
return;
- buf = &ctx->cmd.buf;
+ buf = &ctx->buf;
cmd = (ctx + 1);
MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM);
MLX5_SET(tls_cmd, cmd, swid, swid);
@@ -264,8 +258,7 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
buf->sg[0].data = cmd;
buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
- ctx->swid = swid;
- mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd,
+ mlx5_fpga_tls_cmd_send(mdev->fpga, ctx,
mlx5_fpga_tls_teardown_completion);
}
@@ -275,13 +268,14 @@ void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
struct mlx5_fpga_tls *tls = mdev->fpga->tls;
void *flow;
- rcu_read_lock();
if (direction_sx)
- flow = idr_find(&tls->tx_idr, swid);
+ flow = mlx5_fpga_tls_release_swid(&tls->tx_idr,
+ &tls->tx_idr_spinlock,
+ swid);
else
- flow = idr_find(&tls->rx_idr, swid);
-
- rcu_read_unlock();
+ flow = mlx5_fpga_tls_release_swid(&tls->rx_idr,
+ &tls->rx_idr_spinlock,
+ swid);
if (!flow) {
mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n",
@@ -289,6 +283,7 @@ void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
return;
}
+ synchronize_rcu(); /* before kfree(flow) */
mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 08a891f9aade..c44ccb67c4a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -308,22 +308,68 @@ static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
+static int mlx5_set_extended_dest(struct mlx5_core_dev *dev,
+ struct fs_fte *fte, bool *extended_dest)
+{
+ int fw_log_max_fdb_encap_uplink =
+ MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink);
+ int num_fwd_destinations = 0;
+ struct mlx5_flow_rule *dst;
+ int num_encap = 0;
+
+ *extended_dest = false;
+ if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+ return 0;
+
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+ if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
+ num_encap++;
+ num_fwd_destinations++;
+ }
+ if (num_fwd_destinations > 1 && num_encap > 0)
+ *extended_dest = true;
+
+ if (*extended_dest && !fw_log_max_fdb_encap_uplink) {
+ mlx5_core_warn(dev, "FW does not support extended destination");
+ return -EOPNOTSUPP;
+ }
+ if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) {
+ mlx5_core_warn(dev, "FW does not support more than %d encaps",
+ 1 << fw_log_max_fdb_encap_uplink);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
int opmod, int modify_mask,
struct mlx5_flow_table *ft,
unsigned group_id,
struct fs_fte *fte)
{
- unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) +
- fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct);
u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0};
+ bool extended_dest = false;
struct mlx5_flow_rule *dst;
void *in_flow_context, *vlan;
void *in_match_value;
+ unsigned int inlen;
+ int dst_cnt_size;
void *in_dests;
u32 *in;
int err;
+ if (mlx5_set_extended_dest(dev, fte, &extended_dest))
+ return -EOPNOTSUPP;
+
+ if (!extended_dest)
+ dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct);
+ else
+ dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format);
+
+ inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -343,9 +389,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(flow_context, in_flow_context, group_id, group_id);
MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
- MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
- MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
- fte->action.reformat_id);
+ MLX5_SET(flow_context, in_flow_context, extended_destination,
+ extended_dest);
+ if (extended_dest) {
+ u32 action;
+
+ action = fte->action.action &
+ ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ MLX5_SET(flow_context, in_flow_context, action, action);
+ } else {
+ MLX5_SET(flow_context, in_flow_context, action,
+ fte->action.action);
+ MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
+ fte->action.reformat_id);
+ }
MLX5_SET(flow_context, in_flow_context, modify_header_id,
fte->action.modify_id);
@@ -387,10 +444,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
id = dst->dest_attr.vport.num;
MLX5_SET(dest_format_struct, in_dests,
destination_eswitch_owner_vhca_id_valid,
- dst->dest_attr.vport.vhca_id_valid);
+ !!(dst->dest_attr.vport.flags &
+ MLX5_FLOW_DEST_VPORT_VHCA_ID));
MLX5_SET(dest_format_struct, in_dests,
destination_eswitch_owner_vhca_id,
dst->dest_attr.vport.vhca_id);
+ if (extended_dest) {
+ MLX5_SET(dest_format_struct, in_dests,
+ packet_reformat,
+ !!(dst->dest_attr.vport.flags &
+ MLX5_FLOW_DEST_VPORT_REFORMAT_ID));
+ MLX5_SET(extended_dest_format, in_dests,
+ packet_reformat_id,
+ dst->dest_attr.vport.reformat_id);
+ }
break;
default:
id = dst->dest_attr.tir_num;
@@ -399,7 +466,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(dest_format_struct, in_dests, destination_type,
type);
MLX5_SET(dest_format_struct, in_dests, destination_id, id);
- in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+ in_dests += dst_cnt_size;
list_size++;
}
@@ -420,7 +487,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
dst->dest_attr.counter_id);
- in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+ in_dests += dst_cnt_size;
list_size++;
}
if (list_size > max_list_size) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 08233cf44871..0be3eb86dd84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -32,6 +32,7 @@
#include <linux/mutex.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
#include <linux/mlx5/eswitch.h>
#include "mlx5_core.h"
@@ -262,10 +263,11 @@ static void nested_down_write_ref_node(struct fs_node *node,
}
}
-static void down_write_ref_node(struct fs_node *node)
+static void down_write_ref_node(struct fs_node *node, bool locked)
{
if (node) {
- down_write(&node->lock);
+ if (!locked)
+ down_write(&node->lock);
refcount_inc(&node->refcount);
}
}
@@ -276,13 +278,14 @@ static void up_read_ref_node(struct fs_node *node)
up_read(&node->lock);
}
-static void up_write_ref_node(struct fs_node *node)
+static void up_write_ref_node(struct fs_node *node, bool locked)
{
refcount_dec(&node->refcount);
- up_write(&node->lock);
+ if (!locked)
+ up_write(&node->lock);
}
-static void tree_put_node(struct fs_node *node)
+static void tree_put_node(struct fs_node *node, bool locked)
{
struct fs_node *parent_node = node->parent;
@@ -293,27 +296,27 @@ static void tree_put_node(struct fs_node *node)
/* Only root namespace doesn't have parent and we just
* need to free its node.
*/
- down_write_ref_node(parent_node);
+ down_write_ref_node(parent_node, locked);
list_del_init(&node->list);
if (node->del_sw_func)
node->del_sw_func(node);
- up_write_ref_node(parent_node);
+ up_write_ref_node(parent_node, locked);
} else {
kfree(node);
}
node = NULL;
}
if (!node && parent_node)
- tree_put_node(parent_node);
+ tree_put_node(parent_node, locked);
}
-static int tree_remove_node(struct fs_node *node)
+static int tree_remove_node(struct fs_node *node, bool locked)
{
if (refcount_read(&node->refcount) > 1) {
refcount_dec(&node->refcount);
return -EEXIST;
}
- tree_put_node(node);
+ tree_put_node(node, locked);
return 0;
}
@@ -397,6 +400,7 @@ static void del_hw_flow_table(struct fs_node *node)
fs_get_obj(ft, node);
dev = get_dev(&ft->node);
root = find_root(&ft->node);
+ trace_mlx5_fs_del_ft(ft);
if (node->active) {
err = root->cmds->destroy_flow_table(dev, ft);
@@ -418,22 +422,34 @@ static void del_sw_flow_table(struct fs_node *node)
kfree(ft);
}
-static void del_sw_hw_rule(struct fs_node *node)
+static void modify_fte(struct fs_fte *fte)
{
struct mlx5_flow_root_namespace *root;
- struct mlx5_flow_rule *rule;
struct mlx5_flow_table *ft;
struct mlx5_flow_group *fg;
- struct fs_fte *fte;
- int modify_mask;
- struct mlx5_core_dev *dev = get_dev(node);
+ struct mlx5_core_dev *dev;
int err;
- bool update_fte = false;
- fs_get_obj(rule, node);
- fs_get_obj(fte, rule->node.parent);
fs_get_obj(fg, fte->node.parent);
fs_get_obj(ft, fg->node.parent);
+ dev = get_dev(&fte->node);
+
+ root = find_root(&ft->node);
+ err = root->cmds->update_fte(dev, ft, fg->id, fte->modify_mask, fte);
+ if (err)
+ mlx5_core_warn(dev,
+ "%s can't del rule fg id=%d fte_index=%d\n",
+ __func__, fg->id, fte->index);
+ fte->modify_mask = 0;
+}
+
+static void del_sw_hw_rule(struct fs_node *node)
+{
+ struct mlx5_flow_rule *rule;
+ struct fs_fte *fte;
+
+ fs_get_obj(rule, node);
+ fs_get_obj(fte, rule->node.parent);
trace_mlx5_fs_del_rule(rule);
if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
mutex_lock(&rule->dest_attr.ft->lock);
@@ -443,27 +459,19 @@ static void del_sw_hw_rule(struct fs_node *node)
if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER &&
--fte->dests_size) {
- modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
- BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
+ fte->modify_mask |=
+ BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
+ BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
- update_fte = true;
goto out;
}
if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
--fte->dests_size) {
- modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
- update_fte = true;
+ fte->modify_mask |=
+ BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
}
out:
- root = find_root(&ft->node);
- if (update_fte && fte->dests_size) {
- err = root->cmds->update_fte(dev, ft, fg->id, modify_mask, fte);
- if (err)
- mlx5_core_warn(dev,
- "%s can't del rule fg id=%d fte_index=%d\n",
- __func__, fg->id, fte->index);
- }
kfree(rule);
}
@@ -489,6 +497,7 @@ static void del_hw_fte(struct fs_node *node)
mlx5_core_warn(dev,
"flow steering can't delete fte in index %d of flow group id %d\n",
fte->index, fg->id);
+ node->active = 0;
}
}
@@ -589,7 +598,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
fte->node.type = FS_TYPE_FLOW_ENTRY;
fte->action = *flow_act;
- tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
+ tree_init_node(&fte->node, NULL, del_sw_fte);
return fte;
}
@@ -618,7 +627,8 @@ static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steer
if (ret) {
kmem_cache_free(steering->fgs_cache, fg);
return ERR_PTR(ret);
-}
+ }
+
ida_init(&fg->fte_allocator);
fg->mask.match_criteria_enable = match_criteria_enable;
memcpy(&fg->mask.match_criteria, match_criteria,
@@ -855,7 +865,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
fs_get_obj(fte, rule->node.parent);
if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
return -EINVAL;
- down_write_ref_node(&fte->node);
+ down_write_ref_node(&fte->node, false);
fs_get_obj(fg, fte->node.parent);
fs_get_obj(ft, fg->node.parent);
@@ -863,7 +873,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
root = find_root(&ft->node);
err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id,
modify_mask, fte);
- up_write_ref_node(&fte->node);
+ up_write_ref_node(&fte->node, false);
return err;
}
@@ -1013,12 +1023,13 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
if (err)
goto destroy_ft;
ft->node.active = true;
- down_write_ref_node(&fs_prio->node);
+ down_write_ref_node(&fs_prio->node, false);
tree_add_node(&ft->node, &fs_prio->node);
list_add_flow_table(ft, fs_prio);
fs_prio->num_ft++;
- up_write_ref_node(&fs_prio->node);
+ up_write_ref_node(&fs_prio->node, false);
mutex_unlock(&root->chain_lock);
+ trace_mlx5_fs_add_ft(ft);
return ft;
destroy_ft:
root->cmds->destroy_flow_table(root->dev, ft);
@@ -1110,17 +1121,17 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
if (ft->autogroup.active)
return ERR_PTR(-EPERM);
- down_write_ref_node(&ft->node);
+ down_write_ref_node(&ft->node, false);
fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria,
start_index, end_index,
ft->node.children.prev);
- up_write_ref_node(&ft->node);
+ up_write_ref_node(&ft->node, false);
if (IS_ERR(fg))
return fg;
err = root->cmds->create_flow_group(dev, ft, fg_in, &fg->id);
if (err) {
- tree_put_node(&fg->node);
+ tree_put_node(&fg->node, false);
return ERR_PTR(err);
}
trace_mlx5_fs_add_fg(fg);
@@ -1373,7 +1384,10 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
{
if (d1->type == d2->type) {
if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
- d1->vport.num == d2->vport.num) ||
+ d1->vport.num == d2->vport.num &&
+ d1->vport.flags == d2->vport.flags &&
+ ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
+ (d1->vport.reformat_id == d2->vport.reformat_id) : true)) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
d1->ft == d2->ft) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
@@ -1514,10 +1528,10 @@ static void free_match_list(struct match_list_head *head)
struct match_list *iter, *match_tmp;
list_del(&head->first.list);
- tree_put_node(&head->first.g->node);
+ tree_put_node(&head->first.g->node, false);
list_for_each_entry_safe(iter, match_tmp, &head->list,
list) {
- tree_put_node(&iter->g->node);
+ tree_put_node(&iter->g->node, false);
list_del(&iter->list);
kfree(iter);
}
@@ -1594,11 +1608,16 @@ lookup_fte_locked(struct mlx5_flow_group *g,
fte_tmp = NULL;
goto out;
}
+ if (!fte_tmp->node.active) {
+ tree_put_node(&fte_tmp->node, false);
+ fte_tmp = NULL;
+ goto out;
+ }
nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
out:
if (take_write)
- up_write_ref_node(&g->node);
+ up_write_ref_node(&g->node, false);
else
up_read_ref_node(&g->node);
return fte_tmp;
@@ -1640,8 +1659,8 @@ search_again_locked:
continue;
rule = add_rule_fg(g, spec->match_value,
flow_act, dest, dest_num, fte_tmp);
- up_write_ref_node(&fte_tmp->node);
- tree_put_node(&fte_tmp->node);
+ up_write_ref_node(&fte_tmp->node, false);
+ tree_put_node(&fte_tmp->node, false);
kmem_cache_free(steering->ftes_cache, fte);
return rule;
}
@@ -1677,7 +1696,7 @@ skip_search:
err = insert_fte(g, fte);
if (err) {
- up_write_ref_node(&g->node);
+ up_write_ref_node(&g->node, false);
if (err == -ENOSPC)
continue;
kmem_cache_free(steering->ftes_cache, fte);
@@ -1685,11 +1704,11 @@ skip_search:
}
nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
- up_write_ref_node(&g->node);
+ up_write_ref_node(&g->node, false);
rule = add_rule_fg(g, spec->match_value,
flow_act, dest, dest_num, fte);
- up_write_ref_node(&fte->node);
- tree_put_node(&fte->node);
+ up_write_ref_node(&fte->node, false);
+ tree_put_node(&fte->node, false);
return rule;
}
rule = ERR_PTR(-ENOENT);
@@ -1731,7 +1750,7 @@ search_again_locked:
err = build_match_list(&match_head, ft, spec);
if (err) {
if (take_write)
- up_write_ref_node(&ft->node);
+ up_write_ref_node(&ft->node, false);
else
up_read_ref_node(&ft->node);
return ERR_PTR(err);
@@ -1746,7 +1765,7 @@ search_again_locked:
if (!IS_ERR(rule) ||
(PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) {
if (take_write)
- up_write_ref_node(&ft->node);
+ up_write_ref_node(&ft->node, false);
return rule;
}
@@ -1762,12 +1781,12 @@ search_again_locked:
g = alloc_auto_flow_group(ft, spec);
if (IS_ERR(g)) {
rule = ERR_CAST(g);
- up_write_ref_node(&ft->node);
+ up_write_ref_node(&ft->node, false);
return rule;
}
nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
- up_write_ref_node(&ft->node);
+ up_write_ref_node(&ft->node, false);
err = create_auto_flow_group(ft, g);
if (err)
@@ -1786,17 +1805,17 @@ search_again_locked:
}
nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
- up_write_ref_node(&g->node);
+ up_write_ref_node(&g->node, false);
rule = add_rule_fg(g, spec->match_value, flow_act, dest,
dest_num, fte);
- up_write_ref_node(&fte->node);
- tree_put_node(&fte->node);
- tree_put_node(&g->node);
+ up_write_ref_node(&fte->node, false);
+ tree_put_node(&fte->node, false);
+ tree_put_node(&g->node, false);
return rule;
err_release_fg:
- up_write_ref_node(&g->node);
- tree_put_node(&g->node);
+ up_write_ref_node(&g->node, false);
+ tree_put_node(&g->node, false);
return ERR_PTR(err);
}
@@ -1859,10 +1878,33 @@ EXPORT_SYMBOL(mlx5_add_flow_rules);
void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
{
+ struct fs_fte *fte;
int i;
+ /* In order to consolidate the HW changes we lock the FTE for other
+ * changes, and increase its refcount, in order not to perform the
+ * "del" functions of the FTE. Will handle them here.
+ * The removal of the rules is done under locked FTE.
+ * After removing all the handle's rules, if there are remaining
+ * rules, it means we just need to modify the FTE in FW, and
+ * unlock/decrease the refcount we increased before.
+ * Otherwise, it means the FTE should be deleted. First delete the
+ * FTE in FW. Then, unlock the FTE, and proceed the tree_put_node of
+ * the FTE, which will handle the last decrease of the refcount, as
+ * well as required handling of its parent.
+ */
+ fs_get_obj(fte, handle->rule[0]->node.parent);
+ down_write_ref_node(&fte->node, false);
for (i = handle->num_rules - 1; i >= 0; i--)
- tree_remove_node(&handle->rule[i]->node);
+ tree_remove_node(&handle->rule[i]->node, true);
+ if (fte->modify_mask && fte->dests_size) {
+ modify_fte(fte);
+ up_write_ref_node(&fte->node, false);
+ } else {
+ del_hw_fte(&fte->node);
+ up_write(&fte->node.lock);
+ tree_put_node(&fte->node, false);
+ }
kfree(handle);
}
EXPORT_SYMBOL(mlx5_del_flow_rules);
@@ -1965,7 +2007,7 @@ int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
mutex_unlock(&root->chain_lock);
return err;
}
- if (tree_remove_node(&ft->node))
+ if (tree_remove_node(&ft->node, false))
mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n",
ft->id);
mutex_unlock(&root->chain_lock);
@@ -1976,7 +2018,7 @@ EXPORT_SYMBOL(mlx5_destroy_flow_table);
void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
{
- if (tree_remove_node(&fg->node))
+ if (tree_remove_node(&fg->node, false))
mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n",
fg->id);
}
@@ -2360,8 +2402,8 @@ static void clean_tree(struct fs_node *node)
tree_get_node(node);
list_for_each_entry_safe(iter, temp, &node->children, list)
clean_tree(iter);
- tree_put_node(node);
- tree_remove_node(node);
+ tree_put_node(node, false);
+ tree_remove_node(node, false);
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index b51ad217da32..87de0e4d9124 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -145,29 +145,6 @@ struct mlx5_flow_table {
struct rhltable fgs_hash;
};
-struct mlx5_fc_cache {
- u64 packets;
- u64 bytes;
- u64 lastuse;
-};
-
-struct mlx5_fc {
- struct list_head list;
- struct llist_node addlist;
- struct llist_node dellist;
-
- /* last{packets,bytes} members are used when calculating the delta since
- * last reading
- */
- u64 lastpackets;
- u64 lastbytes;
-
- u32 id;
- bool aging;
-
- struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
-};
-
struct mlx5_ft_underlay_qp {
struct list_head list;
u32 qpn;
@@ -195,6 +172,7 @@ struct fs_fte {
enum fs_fte_status status;
struct mlx5_fc *counter;
struct rhash_head hash;
+ int modify_mask;
};
/* Type of children is mlx5_flow_table/namespace */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 32accd6b041b..c6c28f56aa29 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -41,6 +41,29 @@
/* Max number of counters to query in bulk read is 32K */
#define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
+struct mlx5_fc_cache {
+ u64 packets;
+ u64 bytes;
+ u64 lastuse;
+};
+
+struct mlx5_fc {
+ struct list_head list;
+ struct llist_node addlist;
+ struct llist_node dellist;
+
+ /* last{packets,bytes} members are used when calculating the delta since
+ * last reading
+ */
+ u64 lastpackets;
+ u64 lastbytes;
+
+ u32 id;
+ bool aging;
+
+ struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
+};
+
/* locking scheme:
*
* It is the responsibility of the user to prevent concurrent calls or bad
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 43118de8ee99..cb9fa3430c53 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -38,6 +38,8 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
enum {
MLX5_HEALTH_POLL_INTERVAL = 2 * HZ,
@@ -78,29 +80,6 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
&dev->iseg->cmdq_addr_l_sz);
}
-static void trigger_cmd_completions(struct mlx5_core_dev *dev)
-{
- unsigned long flags;
- u64 vector;
-
- /* wait for pending handlers to complete */
- synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD));
- spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
- vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
- if (!vector)
- goto no_trig;
-
- vector |= MLX5_TRIGGERED_CMD_COMP;
- spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
-
- mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
- mlx5_cmd_comp_handler(dev, vector, true);
- return;
-
-no_trig:
- spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
-}
-
static int in_fatal(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
@@ -124,10 +103,10 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
mlx5_core_err(dev, "start\n");
if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) {
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
- trigger_cmd_completions(dev);
+ mlx5_cmd_flush(dev);
}
- mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1);
+ mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
mlx5_core_err(dev, "end\n");
unlock:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 11dabd62e2c7..4eac42555c7d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -87,7 +87,7 @@ int mlx5i_init(struct mlx5_core_dev *mdev,
mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
netdev->mtu = max_mtu;
- mlx5e_build_nic_params(mdev, &priv->channels.params,
+ mlx5e_build_nic_params(mdev, &priv->rss_params, &priv->channels.params,
mlx5e_get_netdev_max_channels(netdev),
netdev->mtu);
mlx5i_build_nic_params(mdev, &priv->channels.params);
@@ -446,11 +446,11 @@ static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu)
new_channels.params = *params;
new_channels.params.sw_mtu = new_mtu;
- err = mlx5e_open_channels(priv, &new_channels);
+
+ err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
if (err)
goto out;
- mlx5e_switch_priv_channels(priv, &new_channels, NULL);
netdev->mtu = new_channels.params.sw_mtu;
out:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 582b2f18010a..959605559858 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -34,39 +34,9 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
-
-enum {
- MLX5_LAG_FLAG_BONDED = 1 << 0,
-};
-
-struct lag_func {
- struct mlx5_core_dev *dev;
- struct net_device *netdev;
-};
-
-/* Used for collection of netdev event info. */
-struct lag_tracker {
- enum netdev_lag_tx_type tx_type;
- struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS];
- bool is_bonded;
-};
-
-/* LAG data of a ConnectX card.
- * It serves both its phys functions.
- */
-struct mlx5_lag {
- u8 flags;
- u8 v2p_map[MLX5_MAX_PORTS];
- struct lag_func pf[MLX5_MAX_PORTS];
- struct lag_tracker tracker;
- struct delayed_work bond_work;
- struct notifier_block nb;
-
- /* Admin state. Allow lag only if allowed is true
- * even if network conditions for lag were met
- */
- bool allowed;
-};
+#include "eswitch.h"
+#include "lag.h"
+#include "lag_mp.h"
/* General purpose, use for short periods of time.
* Beware of lock dependencies (preferably, no locks should be acquired
@@ -148,13 +118,8 @@ static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
}
-static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev)
-{
- return dev->priv.lag;
-}
-
-static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
- struct net_device *ndev)
+int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
+ struct net_device *ndev)
{
int i;
@@ -165,9 +130,14 @@ static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
return -1;
}
-static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev)
+static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
+{
+ return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
+}
+
+static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
{
- return !!(ldev->flags & MLX5_LAG_FLAG_BONDED);
+ return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
}
static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
@@ -186,36 +156,131 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
*port2 = 1;
}
-static void mlx5_activate_lag(struct mlx5_lag *ldev,
- struct lag_tracker *tracker)
+void mlx5_modify_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker)
{
struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ u8 v2p_port1, v2p_port2;
int err;
- ldev->flags |= MLX5_LAG_FLAG_BONDED;
+ mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
+ &v2p_port2);
+
+ if (v2p_port1 != ldev->v2p_map[0] ||
+ v2p_port2 != ldev->v2p_map[1]) {
+ ldev->v2p_map[0] = v2p_port1;
+ ldev->v2p_map[1] = v2p_port2;
+
+ mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
+ ldev->v2p_map[0], ldev->v2p_map[1]);
+
+ err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
+ if (err)
+ mlx5_core_err(dev0,
+ "Failed to modify LAG (%d)\n",
+ err);
+ }
+}
+
+static int mlx5_create_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ int err;
mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0],
&ldev->v2p_map[1]);
+ mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
+ ldev->v2p_map[0], ldev->v2p_map[1]);
+
err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]);
if (err)
mlx5_core_err(dev0,
"Failed to create LAG (%d)\n",
err);
+ return err;
+}
+
+int mlx5_activate_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ u8 flags)
+{
+ bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
+ struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ int err;
+
+ err = mlx5_create_lag(ldev, tracker);
+ if (err) {
+ if (roce_lag) {
+ mlx5_core_err(dev0,
+ "Failed to activate RoCE LAG\n");
+ } else {
+ mlx5_core_err(dev0,
+ "Failed to activate VF LAG\n"
+ "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
+ }
+ return err;
+ }
+
+ ldev->flags |= flags;
+ return 0;
}
-static void mlx5_deactivate_lag(struct mlx5_lag *ldev)
+static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ bool roce_lag = __mlx5_lag_is_roce(ldev);
int err;
- ldev->flags &= ~MLX5_LAG_FLAG_BONDED;
+ ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
err = mlx5_cmd_destroy_lag(dev0);
- if (err)
- mlx5_core_err(dev0,
- "Failed to destroy LAG (%d)\n",
- err);
+ if (err) {
+ if (roce_lag) {
+ mlx5_core_err(dev0,
+ "Failed to deactivate RoCE LAG; driver restart required\n");
+ } else {
+ mlx5_core_err(dev0,
+ "Failed to deactivate VF LAG; driver restart required\n"
+ "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
+ }
+ }
+
+ return err;
+}
+
+static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
+{
+ if (!ldev->pf[0].dev || !ldev->pf[1].dev)
+ return false;
+
+#ifdef CONFIG_MLX5_ESWITCH
+ return mlx5_esw_lag_prereq(ldev->pf[0].dev, ldev->pf[1].dev);
+#else
+ return (!mlx5_sriov_is_enabled(ldev->pf[0].dev) &&
+ !mlx5_sriov_is_enabled(ldev->pf[1].dev));
+#endif
+}
+
+static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (ldev->pf[i].dev)
+ mlx5_add_dev_by_protocol(ldev->pf[i].dev,
+ MLX5_INTERFACE_PROTOCOL_IB);
+}
+
+static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev)
+{
+ int i;
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (ldev->pf[i].dev)
+ mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
+ MLX5_INTERFACE_PROTOCOL_IB);
}
static void mlx5_do_bond(struct mlx5_lag *ldev)
@@ -223,9 +288,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
struct mlx5_core_dev *dev1 = ldev->pf[1].dev;
struct lag_tracker tracker;
- u8 v2p_port1, v2p_port2;
- int i, err;
- bool do_bond;
+ bool do_bond, roce_lag;
+ int err;
if (!dev0 || !dev1)
return;
@@ -234,48 +298,56 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
tracker = ldev->tracker;
mutex_unlock(&lag_mutex);
- do_bond = tracker.is_bonded && ldev->allowed;
+ do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
- if (do_bond && !mlx5_lag_is_bonded(ldev)) {
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
- MLX5_INTERFACE_PROTOCOL_IB);
+ if (do_bond && !__mlx5_lag_is_active(ldev)) {
+ roce_lag = !mlx5_sriov_is_enabled(dev0) &&
+ !mlx5_sriov_is_enabled(dev1);
- mlx5_activate_lag(ldev, &tracker);
+#ifdef CONFIG_MLX5_ESWITCH
+ roce_lag &= dev0->priv.eswitch->mode == SRIOV_NONE &&
+ dev1->priv.eswitch->mode == SRIOV_NONE;
+#endif
- mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_nic_vport_enable_roce(dev1);
- } else if (do_bond && mlx5_lag_is_bonded(ldev)) {
- mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1,
- &v2p_port2);
+ if (roce_lag)
+ mlx5_lag_remove_ib_devices(ldev);
- if ((v2p_port1 != ldev->v2p_map[0]) ||
- (v2p_port2 != ldev->v2p_map[1])) {
- ldev->v2p_map[0] = v2p_port1;
- ldev->v2p_map[1] = v2p_port2;
+ err = mlx5_activate_lag(ldev, &tracker,
+ roce_lag ? MLX5_LAG_FLAG_ROCE :
+ MLX5_LAG_FLAG_SRIOV);
+ if (err) {
+ if (roce_lag)
+ mlx5_lag_add_ib_devices(ldev);
- err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
- if (err)
- mlx5_core_err(dev0,
- "Failed to modify LAG (%d)\n",
- err);
+ return;
}
- } else if (!do_bond && mlx5_lag_is_bonded(ldev)) {
- mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_nic_vport_disable_roce(dev1);
- mlx5_deactivate_lag(ldev);
+ if (roce_lag) {
+ mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_nic_vport_enable_roce(dev1);
+ }
+ } else if (do_bond && __mlx5_lag_is_active(ldev)) {
+ mlx5_modify_lag(ldev, &tracker);
+ } else if (!do_bond && __mlx5_lag_is_active(ldev)) {
+ roce_lag = __mlx5_lag_is_roce(ldev);
+
+ if (roce_lag) {
+ mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_nic_vport_disable_roce(dev1);
+ }
- for (i = 0; i < MLX5_MAX_PORTS; i++)
- if (ldev->pf[i].dev)
- mlx5_add_dev_by_protocol(ldev->pf[i].dev,
- MLX5_INTERFACE_PROTOCOL_IB);
+ err = mlx5_deactivate_lag(ldev);
+ if (err)
+ return;
+
+ if (roce_lag)
+ mlx5_lag_add_ib_devices(ldev);
}
}
static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
{
- schedule_delayed_work(&ldev->bond_work, delay);
+ queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
}
static void mlx5_do_bond_work(struct work_struct *work)
@@ -419,15 +491,6 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
}
-static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
-{
- if ((ldev->pf[0].dev && mlx5_sriov_is_enabled(ldev->pf[0].dev)) ||
- (ldev->pf[1].dev && mlx5_sriov_is_enabled(ldev->pf[1].dev)))
- return false;
- else
- return true;
-}
-
static struct mlx5_lag *mlx5_lag_dev_alloc(void)
{
struct mlx5_lag *ldev;
@@ -436,14 +499,20 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(void)
if (!ldev)
return NULL;
+ ldev->wq = create_singlethread_workqueue("mlx5_lag");
+ if (!ldev->wq) {
+ kfree(ldev);
+ return NULL;
+ }
+
INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
- ldev->allowed = mlx5_lag_check_prereq(ldev);
return ldev;
}
static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
{
+ destroy_workqueue(ldev->wq);
kfree(ldev);
}
@@ -462,7 +531,6 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
ldev->tracker.netdev_state[fn].link_up = 0;
ldev->tracker.netdev_state[fn].tx_enabled = 0;
- ldev->allowed = mlx5_lag_check_prereq(ldev);
dev->priv.lag = ldev;
mutex_unlock(&lag_mutex);
@@ -484,7 +552,6 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
dev->priv.lag = NULL;
- ldev->allowed = mlx5_lag_check_prereq(ldev);
mutex_unlock(&lag_mutex);
}
@@ -493,6 +560,7 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
{
struct mlx5_lag *ldev = NULL;
struct mlx5_core_dev *tmp_dev;
+ int err;
if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
!MLX5_CAP_GEN(dev, lag_master) ||
@@ -520,6 +588,11 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
}
}
+
+ err = mlx5_lag_mp_init(ldev);
+ if (err)
+ mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
+ err);
}
/* Must be called with intf_mutex held */
@@ -532,7 +605,7 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev)
if (!ldev)
return;
- if (mlx5_lag_is_bonded(ldev))
+ if (__mlx5_lag_is_active(ldev))
mlx5_deactivate_lag(ldev);
mlx5_lag_dev_remove_pf(ldev, dev);
@@ -544,61 +617,67 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev)
if (i == MLX5_MAX_PORTS) {
if (ldev->nb.notifier_call)
unregister_netdevice_notifier(&ldev->nb);
+ mlx5_lag_mp_cleanup(ldev);
cancel_delayed_work_sync(&ldev->bond_work);
mlx5_lag_dev_free(ldev);
}
}
-bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
+bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
bool res;
mutex_lock(&lag_mutex);
ldev = mlx5_lag_dev_get(dev);
- res = ldev && mlx5_lag_is_bonded(ldev);
+ res = ldev && __mlx5_lag_is_roce(ldev);
mutex_unlock(&lag_mutex);
return res;
}
-EXPORT_SYMBOL(mlx5_lag_is_active);
+EXPORT_SYMBOL(mlx5_lag_is_roce);
-static int mlx5_lag_set_state(struct mlx5_core_dev *dev, bool allow)
+bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
- int ret = 0;
- bool lag_active;
-
- mlx5_dev_list_lock();
+ bool res;
+ mutex_lock(&lag_mutex);
ldev = mlx5_lag_dev_get(dev);
- if (!ldev) {
- ret = -ENODEV;
- goto unlock;
- }
- lag_active = mlx5_lag_is_bonded(ldev);
- if (!mlx5_lag_check_prereq(ldev) && allow) {
- ret = -EINVAL;
- goto unlock;
- }
- if (ldev->allowed == allow)
- goto unlock;
- ldev->allowed = allow;
- if ((lag_active && !allow) || allow)
- mlx5_do_bond(ldev);
-unlock:
- mlx5_dev_list_unlock();
- return ret;
+ res = ldev && __mlx5_lag_is_active(ldev);
+ mutex_unlock(&lag_mutex);
+
+ return res;
}
+EXPORT_SYMBOL(mlx5_lag_is_active);
-int mlx5_lag_forbid(struct mlx5_core_dev *dev)
+bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
{
- return mlx5_lag_set_state(dev, false);
+ struct mlx5_lag *ldev;
+ bool res;
+
+ mutex_lock(&lag_mutex);
+ ldev = mlx5_lag_dev_get(dev);
+ res = ldev && __mlx5_lag_is_sriov(ldev);
+ mutex_unlock(&lag_mutex);
+
+ return res;
}
+EXPORT_SYMBOL(mlx5_lag_is_sriov);
-int mlx5_lag_allow(struct mlx5_core_dev *dev)
+void mlx5_lag_update(struct mlx5_core_dev *dev)
{
- return mlx5_lag_set_state(dev, true);
+ struct mlx5_lag *ldev;
+
+ mlx5_dev_list_lock();
+ ldev = mlx5_lag_dev_get(dev);
+ if (!ldev)
+ goto unlock;
+
+ mlx5_do_bond(ldev);
+
+unlock:
+ mlx5_dev_list_unlock();
}
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
@@ -609,7 +688,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
mutex_lock(&lag_mutex);
ldev = mlx5_lag_dev_get(dev);
- if (!(ldev && mlx5_lag_is_bonded(ldev)))
+ if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock;
if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
@@ -638,7 +717,7 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
return true;
ldev = mlx5_lag_dev_get(dev);
- if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev)
+ if (!ldev || !__mlx5_lag_is_roce(ldev) || ldev->pf[0].dev == dev)
return true;
/* If bonded, we do not add an IB device for PF1. */
@@ -665,7 +744,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
mutex_lock(&lag_mutex);
ldev = mlx5_lag_dev_get(dev);
- if (ldev && mlx5_lag_is_bonded(ldev)) {
+ if (ldev && __mlx5_lag_is_roce(ldev)) {
num_ports = MLX5_MAX_PORTS;
mdev[0] = ldev->pf[0].dev;
mdev[1] = ldev->pf[1].dev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
new file mode 100644
index 000000000000..1dea0b1c9826
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_LAG_H__
+#define __MLX5_LAG_H__
+
+#include "mlx5_core.h"
+#include "lag_mp.h"
+
+enum {
+ MLX5_LAG_FLAG_ROCE = 1 << 0,
+ MLX5_LAG_FLAG_SRIOV = 1 << 1,
+ MLX5_LAG_FLAG_MULTIPATH = 1 << 2,
+};
+
+#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\
+ MLX5_LAG_FLAG_MULTIPATH)
+
+struct lag_func {
+ struct mlx5_core_dev *dev;
+ struct net_device *netdev;
+};
+
+/* Used for collection of netdev event info. */
+struct lag_tracker {
+ enum netdev_lag_tx_type tx_type;
+ struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS];
+ unsigned int is_bonded:1;
+};
+
+/* LAG data of a ConnectX card.
+ * It serves both its phys functions.
+ */
+struct mlx5_lag {
+ u8 flags;
+ u8 v2p_map[MLX5_MAX_PORTS];
+ struct lag_func pf[MLX5_MAX_PORTS];
+ struct lag_tracker tracker;
+ struct workqueue_struct *wq;
+ struct delayed_work bond_work;
+ struct notifier_block nb;
+ struct lag_mp lag_mp;
+};
+
+static inline struct mlx5_lag *
+mlx5_lag_dev_get(struct mlx5_core_dev *dev)
+{
+ return dev->priv.lag;
+}
+
+static inline bool
+__mlx5_lag_is_active(struct mlx5_lag *ldev)
+{
+ return !!(ldev->flags & MLX5_LAG_MODE_FLAGS);
+}
+
+void mlx5_modify_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker);
+int mlx5_activate_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ u8 flags);
+int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
+ struct net_device *ndev);
+
+#endif /* __MLX5_LAG_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
new file mode 100644
index 000000000000..5633f8572800
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/netdevice.h>
+#include "lag.h"
+#include "lag_mp.h"
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "lib/mlx5.h"
+
+static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
+{
+ if (!ldev->pf[0].dev || !ldev->pf[1].dev)
+ return false;
+
+ return mlx5_esw_multipath_prereq(ldev->pf[0].dev, ldev->pf[1].dev);
+}
+
+static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
+{
+ return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
+}
+
+bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ bool res;
+
+ ldev = mlx5_lag_dev_get(dev);
+ res = ldev && __mlx5_lag_is_multipath(ldev);
+
+ return res;
+}
+
+/**
+ * Set lag port affinity
+ *
+ * @ldev: lag device
+ * @port:
+ * 0 - set normal affinity.
+ * 1 - set affinity to port 1.
+ * 2 - set affinity to port 2.
+ *
+ **/
+static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, int port)
+{
+ struct lag_tracker tracker;
+
+ if (!__mlx5_lag_is_multipath(ldev))
+ return;
+
+ switch (port) {
+ case 0:
+ tracker.netdev_state[0].tx_enabled = true;
+ tracker.netdev_state[1].tx_enabled = true;
+ tracker.netdev_state[0].link_up = true;
+ tracker.netdev_state[1].link_up = true;
+ break;
+ case 1:
+ tracker.netdev_state[0].tx_enabled = true;
+ tracker.netdev_state[0].link_up = true;
+ tracker.netdev_state[1].tx_enabled = false;
+ tracker.netdev_state[1].link_up = false;
+ break;
+ case 2:
+ tracker.netdev_state[0].tx_enabled = false;
+ tracker.netdev_state[0].link_up = false;
+ tracker.netdev_state[1].tx_enabled = true;
+ tracker.netdev_state[1].link_up = true;
+ break;
+ default:
+ mlx5_core_warn(ldev->pf[0].dev, "Invalid affinity port %d",
+ port);
+ return;
+ }
+
+ if (tracker.netdev_state[0].tx_enabled)
+ mlx5_notifier_call_chain(ldev->pf[0].dev->priv.events,
+ MLX5_DEV_EVENT_PORT_AFFINITY,
+ (void *)0);
+
+ if (tracker.netdev_state[1].tx_enabled)
+ mlx5_notifier_call_chain(ldev->pf[1].dev->priv.events,
+ MLX5_DEV_EVENT_PORT_AFFINITY,
+ (void *)0);
+
+ mlx5_modify_lag(ldev, &tracker);
+}
+
+static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
+{
+ struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
+ struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
+
+ flush_workqueue(ldev->wq);
+}
+
+struct mlx5_fib_event_work {
+ struct work_struct work;
+ struct mlx5_lag *ldev;
+ unsigned long event;
+ union {
+ struct fib_entry_notifier_info fen_info;
+ struct fib_nh_notifier_info fnh_info;
+ };
+};
+
+static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
+ unsigned long event,
+ struct fib_info *fi)
+{
+ struct lag_mp *mp = &ldev->lag_mp;
+
+ /* Handle delete event */
+ if (event == FIB_EVENT_ENTRY_DEL) {
+ /* stop track */
+ if (mp->mfi == fi)
+ mp->mfi = NULL;
+ return;
+ }
+
+ /* Handle add/replace event */
+ if (fi->fib_nhs == 1) {
+ if (__mlx5_lag_is_active(ldev)) {
+ struct net_device *nh_dev = fi->fib_nh[0].nh_dev;
+ int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
+
+ mlx5_lag_set_port_affinity(ldev, ++i);
+ }
+ return;
+ }
+
+ if (fi->fib_nhs != 2)
+ return;
+
+ /* Verify next hops are ports of the same hca */
+ if (!(fi->fib_nh[0].nh_dev == ldev->pf[0].netdev &&
+ fi->fib_nh[1].nh_dev == ldev->pf[1].netdev) &&
+ !(fi->fib_nh[0].nh_dev == ldev->pf[1].netdev &&
+ fi->fib_nh[1].nh_dev == ldev->pf[0].netdev)) {
+ mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n");
+ return;
+ }
+
+ /* First time we see multipath route */
+ if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
+ struct lag_tracker tracker;
+
+ tracker = ldev->tracker;
+ mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
+ }
+
+ mlx5_lag_set_port_affinity(ldev, 0);
+ mp->mfi = fi;
+}
+
+static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
+ unsigned long event,
+ struct fib_nh *fib_nh,
+ struct fib_info *fi)
+{
+ struct lag_mp *mp = &ldev->lag_mp;
+
+ /* Check the nh event is related to the route */
+ if (!mp->mfi || mp->mfi != fi)
+ return;
+
+ /* nh added/removed */
+ if (event == FIB_EVENT_NH_DEL) {
+ int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->nh_dev);
+
+ if (i >= 0) {
+ i = (i + 1) % 2 + 1; /* peer port */
+ mlx5_lag_set_port_affinity(ldev, i);
+ }
+ } else if (event == FIB_EVENT_NH_ADD &&
+ fi->fib_nhs == 2) {
+ mlx5_lag_set_port_affinity(ldev, 0);
+ }
+}
+
+static void mlx5_lag_fib_update(struct work_struct *work)
+{
+ struct mlx5_fib_event_work *fib_work =
+ container_of(work, struct mlx5_fib_event_work, work);
+ struct mlx5_lag *ldev = fib_work->ldev;
+ struct fib_nh *fib_nh;
+
+ /* Protect internal structures from changes */
+ rtnl_lock();
+ switch (fib_work->event) {
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_APPEND: /* fall through */
+ case FIB_EVENT_ENTRY_ADD: /* fall through */
+ case FIB_EVENT_ENTRY_DEL:
+ mlx5_lag_fib_route_event(ldev, fib_work->event,
+ fib_work->fen_info.fi);
+ fib_info_put(fib_work->fen_info.fi);
+ break;
+ case FIB_EVENT_NH_ADD: /* fall through */
+ case FIB_EVENT_NH_DEL:
+ fib_nh = fib_work->fnh_info.fib_nh;
+ mlx5_lag_fib_nexthop_event(ldev,
+ fib_work->event,
+ fib_work->fnh_info.fib_nh,
+ fib_nh->nh_parent);
+ fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
+ break;
+ }
+
+ rtnl_unlock();
+ kfree(fib_work);
+}
+
+static struct mlx5_fib_event_work *
+mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
+{
+ struct mlx5_fib_event_work *fib_work;
+
+ fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
+ if (WARN_ON(!fib_work))
+ return NULL;
+
+ INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
+ fib_work->ldev = ldev;
+ fib_work->event = event;
+
+ return fib_work;
+}
+
+static int mlx5_lag_fib_event(struct notifier_block *nb,
+ unsigned long event,
+ void *ptr)
+{
+ struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
+ struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
+ struct fib_notifier_info *info = ptr;
+ struct mlx5_fib_event_work *fib_work;
+ struct fib_entry_notifier_info *fen_info;
+ struct fib_nh_notifier_info *fnh_info;
+ struct fib_info *fi;
+
+ if (info->family != AF_INET)
+ return NOTIFY_DONE;
+
+ if (!mlx5_lag_multipath_check_prereq(ldev))
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_APPEND: /* fall through */
+ case FIB_EVENT_ENTRY_ADD: /* fall through */
+ case FIB_EVENT_ENTRY_DEL:
+ fen_info = container_of(info, struct fib_entry_notifier_info,
+ info);
+ fi = fen_info->fi;
+ if (fi->fib_dev != ldev->pf[0].netdev &&
+ fi->fib_dev != ldev->pf[1].netdev) {
+ return NOTIFY_DONE;
+ }
+ fib_work = mlx5_lag_init_fib_work(ldev, event);
+ if (!fib_work)
+ return NOTIFY_DONE;
+ fib_work->fen_info = *fen_info;
+ /* Take reference on fib_info to prevent it from being
+ * freed while work is queued. Release it afterwards.
+ */
+ fib_info_hold(fib_work->fen_info.fi);
+ break;
+ case FIB_EVENT_NH_ADD: /* fall through */
+ case FIB_EVENT_NH_DEL:
+ fnh_info = container_of(info, struct fib_nh_notifier_info,
+ info);
+ fib_work = mlx5_lag_init_fib_work(ldev, event);
+ if (!fib_work)
+ return NOTIFY_DONE;
+ fib_work->fnh_info = *fnh_info;
+ fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ queue_work(ldev->wq, &fib_work->work);
+
+ return NOTIFY_DONE;
+}
+
+int mlx5_lag_mp_init(struct mlx5_lag *ldev)
+{
+ struct lag_mp *mp = &ldev->lag_mp;
+ int err;
+
+ if (mp->fib_nb.notifier_call)
+ return 0;
+
+ mp->fib_nb.notifier_call = mlx5_lag_fib_event;
+ err = register_fib_notifier(&mp->fib_nb,
+ mlx5_lag_fib_event_flush);
+ if (err)
+ mp->fib_nb.notifier_call = NULL;
+
+ return err;
+}
+
+void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
+{
+ struct lag_mp *mp = &ldev->lag_mp;
+
+ if (!mp->fib_nb.notifier_call)
+ return;
+
+ unregister_fib_notifier(&mp->fib_nb);
+ mp->fib_nb.notifier_call = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
new file mode 100644
index 000000000000..6d14b1100be9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_LAG_MP_H__
+#define __MLX5_LAG_MP_H__
+
+#include "lag.h"
+#include "mlx5_core.h"
+
+struct lag_mp {
+ struct notifier_block fib_nb;
+ struct fib_info *mfi; /* used in tracking fib events */
+};
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+int mlx5_lag_mp_init(struct mlx5_lag *ldev);
+void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev);
+
+#else /* CONFIG_MLX5_ESWITCH */
+
+static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; }
+static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {}
+
+#endif /* CONFIG_MLX5_ESWITCH */
+#endif /* __MLX5_LAG_MP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 0d90b1b4a3d3..ca0ee9916e9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -33,6 +33,7 @@
#include <linux/clocksource.h>
#include <linux/highmem.h>
#include <rdma/mlx5-abi.h>
+#include "lib/eq.h"
#include "en.h"
#include "clock.h"
@@ -71,7 +72,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc)
struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
clock);
- return mlx5_read_internal_timer(mdev) & cc->mask;
+ return mlx5_read_internal_timer(mdev, NULL) & cc->mask;
}
static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
@@ -155,15 +156,19 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
return 0;
}
-static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
{
struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
ptp_info);
- u64 ns;
+ struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+ clock);
unsigned long flags;
+ u64 cycles, ns;
write_seqlock_irqsave(&clock->lock, flags);
- ns = timecounter_read(&clock->tc);
+ cycles = mlx5_read_internal_timer(mdev, sts);
+ ns = timecounter_cyc2time(&clock->tc, cycles);
write_sequnlock_irqrestore(&clock->lock, flags);
*ts = ns_to_timespec64(ns);
@@ -306,7 +311,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
ts.tv_sec = rq->perout.start.sec;
ts.tv_nsec = rq->perout.start.nsec;
ns = timespec64_to_ns(&ts);
- cycles_now = mlx5_read_internal_timer(mdev);
+ cycles_now = mlx5_read_internal_timer(mdev, NULL);
write_seqlock_irqsave(&clock->lock, flags);
nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
nsec_delta = ns - nsec_now;
@@ -383,7 +388,7 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = {
.pps = 0,
.adjfreq = mlx5_ptp_adjfreq,
.adjtime = mlx5_ptp_adjtime,
- .gettime64 = mlx5_ptp_gettime,
+ .gettimex64 = mlx5_ptp_gettimex,
.settime64 = mlx5_ptp_settime,
.enable = NULL,
.verify = NULL,
@@ -439,16 +444,17 @@ static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
}
-void mlx5_pps_event(struct mlx5_core_dev *mdev,
- struct mlx5_eqe *eqe)
+static int mlx5_pps_event(struct notifier_block *nb,
+ unsigned long type, void *data)
{
- struct mlx5_clock *clock = &mdev->clock;
+ struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb);
+ struct mlx5_core_dev *mdev = clock->mdev;
struct ptp_clock_event ptp_event;
- struct timespec64 ts;
- u64 nsec_now, nsec_delta;
u64 cycles_now, cycles_delta;
+ u64 nsec_now, nsec_delta, ns;
+ struct mlx5_eqe *eqe = data;
int pin = eqe->data.pps.pin;
- s64 ns;
+ struct timespec64 ts;
unsigned long flags;
switch (clock->ptp_info.pin_config[pin].func) {
@@ -463,11 +469,12 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
} else {
ptp_event.type = PTP_CLOCK_EXTTS;
}
+ /* TODOL clock->ptp can be NULL if ptp_clock_register failes */
ptp_clock_event(clock->ptp, &ptp_event);
break;
case PTP_PF_PEROUT:
- mlx5_ptp_gettime(&clock->ptp_info, &ts);
- cycles_now = mlx5_read_internal_timer(mdev);
+ mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL);
+ cycles_now = mlx5_read_internal_timer(mdev, NULL);
ts.tv_sec += 1;
ts.tv_nsec = 0;
ns = timespec64_to_ns(&ts);
@@ -481,8 +488,11 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
write_sequnlock_irqrestore(&clock->lock, flags);
break;
default:
- mlx5_core_err(mdev, " Unhandled event\n");
+ mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n",
+ clock->ptp_info.pin_config[pin].func);
}
+
+ return NOTIFY_OK;
}
void mlx5_init_clock(struct mlx5_core_dev *mdev)
@@ -511,14 +521,14 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
ktime_to_ns(ktime_get_real()));
/* Calculate period in seconds to call the overflow watchdog - to make
- * sure counter is checked at least once every wrap around.
+ * sure counter is checked at least twice every wrap around.
* The period is calculated as the minimum between max HW cycles count
* (The clock source mask) and max amount of cycles that can be
* multiplied by clock multiplier where the result doesn't exceed
* 64bits.
*/
overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult);
- overflow_cycles = min(overflow_cycles, clock->cycles.mask >> 1);
+ overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3));
ns = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles,
frac, &frac);
@@ -567,6 +577,9 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
PTR_ERR(clock->ptp));
clock->ptp = NULL;
}
+
+ MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT);
+ mlx5_eq_notifier_register(mdev, &clock->pps_nb);
}
void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
@@ -576,6 +589,7 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
return;
+ mlx5_eq_notifier_unregister(mdev, &clock->pps_nb);
if (clock->ptp) {
ptp_clock_unregister(clock->ptp);
clock->ptp = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
index 263cb6e2aeee..31600924bdc3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -36,7 +36,6 @@
#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
void mlx5_init_clock(struct mlx5_core_dev *mdev);
void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
-void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
{
@@ -60,8 +59,6 @@ static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
#else
static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {}
static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {}
-static inline void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) {}
-
static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
{
return -1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
new file mode 100644
index 000000000000..bced2efe9bef
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#include <linux/mlx5/vport.h>
+#include "lib/devcom.h"
+
+static LIST_HEAD(devcom_list);
+
+#define devcom_for_each_component(priv, comp, iter) \
+ for (iter = 0; \
+ comp = &(priv)->components[iter], iter < MLX5_DEVCOM_NUM_COMPONENTS; \
+ iter++)
+
+struct mlx5_devcom_component {
+ struct {
+ void *data;
+ } device[MLX5_MAX_PORTS];
+
+ mlx5_devcom_event_handler_t handler;
+ struct rw_semaphore sem;
+ bool paired;
+};
+
+struct mlx5_devcom_list {
+ struct list_head list;
+
+ struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS];
+ struct mlx5_core_dev *devs[MLX5_MAX_PORTS];
+};
+
+struct mlx5_devcom {
+ struct mlx5_devcom_list *priv;
+ int idx;
+};
+
+static struct mlx5_devcom_list *mlx5_devcom_list_alloc(void)
+{
+ struct mlx5_devcom_component *comp;
+ struct mlx5_devcom_list *priv;
+ int i;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return NULL;
+
+ devcom_for_each_component(priv, comp, i)
+ init_rwsem(&comp->sem);
+
+ return priv;
+}
+
+static struct mlx5_devcom *mlx5_devcom_alloc(struct mlx5_devcom_list *priv,
+ u8 idx)
+{
+ struct mlx5_devcom *devcom;
+
+ devcom = kzalloc(sizeof(*devcom), GFP_KERNEL);
+ if (!devcom)
+ return NULL;
+
+ devcom->priv = priv;
+ devcom->idx = idx;
+ return devcom;
+}
+
+/* Must be called with intf_mutex held */
+struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
+{
+ struct mlx5_devcom_list *priv = NULL, *iter;
+ struct mlx5_devcom *devcom = NULL;
+ bool new_priv = false;
+ u64 sguid0, sguid1;
+ int idx, i;
+
+ if (!mlx5_core_is_pf(dev))
+ return NULL;
+
+ sguid0 = mlx5_query_nic_system_image_guid(dev);
+ list_for_each_entry(iter, &devcom_list, list) {
+ struct mlx5_core_dev *tmp_dev = NULL;
+
+ idx = -1;
+ for (i = 0; i < MLX5_MAX_PORTS; i++) {
+ if (iter->devs[i])
+ tmp_dev = iter->devs[i];
+ else
+ idx = i;
+ }
+
+ if (idx == -1)
+ continue;
+
+ sguid1 = mlx5_query_nic_system_image_guid(tmp_dev);
+ if (sguid0 != sguid1)
+ continue;
+
+ priv = iter;
+ break;
+ }
+
+ if (!priv) {
+ priv = mlx5_devcom_list_alloc();
+ if (!priv)
+ return ERR_PTR(-ENOMEM);
+
+ idx = 0;
+ new_priv = true;
+ }
+
+ priv->devs[idx] = dev;
+ devcom = mlx5_devcom_alloc(priv, idx);
+ if (!devcom) {
+ kfree(priv);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ if (new_priv)
+ list_add(&priv->list, &devcom_list);
+
+ return devcom;
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
+{
+ struct mlx5_devcom_list *priv;
+ int i;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return;
+
+ priv = devcom->priv;
+ priv->devs[devcom->idx] = NULL;
+
+ kfree(devcom);
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (priv->devs[i])
+ break;
+
+ if (i != MLX5_MAX_PORTS)
+ return;
+
+ list_del(&priv->list);
+ kfree(priv);
+}
+
+void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ mlx5_devcom_event_handler_t handler,
+ void *data)
+{
+ struct mlx5_devcom_component *comp;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return;
+
+ WARN_ON(!data);
+
+ comp = &devcom->priv->components[id];
+ down_write(&comp->sem);
+ comp->handler = handler;
+ comp->device[devcom->idx].data = data;
+ up_write(&comp->sem);
+}
+
+void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id)
+{
+ struct mlx5_devcom_component *comp;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return;
+
+ comp = &devcom->priv->components[id];
+ down_write(&comp->sem);
+ comp->device[devcom->idx].data = NULL;
+ up_write(&comp->sem);
+}
+
+int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ int event,
+ void *event_data)
+{
+ struct mlx5_devcom_component *comp;
+ int err = -ENODEV, i;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return err;
+
+ comp = &devcom->priv->components[id];
+ down_write(&comp->sem);
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (i != devcom->idx && comp->device[i].data) {
+ err = comp->handler(event, comp->device[i].data,
+ event_data);
+ break;
+ }
+
+ up_write(&comp->sem);
+ return err;
+}
+
+void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ bool paired)
+{
+ struct mlx5_devcom_component *comp;
+
+ comp = &devcom->priv->components[id];
+ WARN_ON(!rwsem_is_locked(&comp->sem));
+
+ comp->paired = paired;
+}
+
+bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id)
+{
+ if (IS_ERR_OR_NULL(devcom))
+ return false;
+
+ return devcom->priv->components[id].paired;
+}
+
+void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id)
+{
+ struct mlx5_devcom_component *comp;
+ int i;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return NULL;
+
+ comp = &devcom->priv->components[id];
+ down_read(&comp->sem);
+ if (!comp->paired) {
+ up_read(&comp->sem);
+ return NULL;
+ }
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (i != devcom->idx)
+ break;
+
+ return comp->device[i].data;
+}
+
+void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id)
+{
+ struct mlx5_devcom_component *comp = &devcom->priv->components[id];
+
+ up_read(&comp->sem);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
new file mode 100644
index 000000000000..939d5bf1581b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#ifndef __LIB_MLX5_DEVCOM_H__
+#define __LIB_MLX5_DEVCOM_H__
+
+#include <linux/mlx5/driver.h>
+
+enum mlx5_devcom_components {
+ MLX5_DEVCOM_ESW_OFFLOADS,
+
+ MLX5_DEVCOM_NUM_COMPONENTS,
+};
+
+typedef int (*mlx5_devcom_event_handler_t)(int event,
+ void *my_data,
+ void *event_data);
+
+struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev);
+void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom);
+
+void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ mlx5_devcom_event_handler_t handler,
+ void *data);
+void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+
+int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ int event,
+ void *event_data);
+
+void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id,
+ bool paired);
+bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+
+void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
+ enum mlx5_devcom_components id);
+
+#endif
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
new file mode 100644
index 000000000000..c0fb6d72b695
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#ifndef __LIB_MLX5_EQ_H__
+#define __LIB_MLX5_EQ_H__
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
+#include <linux/mlx5/cq.h>
+
+#define MLX5_MAX_IRQ_NAME (32)
+#define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe))
+
+struct mlx5_eq_tasklet {
+ struct list_head list;
+ struct list_head process_list;
+ struct tasklet_struct task;
+ spinlock_t lock; /* lock completion tasklet list */
+};
+
+struct mlx5_cq_table {
+ spinlock_t lock; /* protect radix tree */
+ struct radix_tree_root tree;
+};
+
+struct mlx5_eq {
+ struct mlx5_core_dev *dev;
+ struct mlx5_cq_table cq_table;
+ __be32 __iomem *doorbell;
+ u32 cons_index;
+ struct mlx5_frag_buf buf;
+ int size;
+ unsigned int vecidx;
+ unsigned int irqn;
+ u8 eqn;
+ int nent;
+ struct mlx5_rsc_debug *dbg;
+};
+
+struct mlx5_eq_comp {
+ struct mlx5_eq core; /* Must be first */
+ struct mlx5_eq_tasklet tasklet_ctx;
+ struct list_head list;
+};
+
+static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
+{
+ return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
+}
+
+static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
+{
+ struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
+
+ return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
+}
+
+static inline void eq_update_ci(struct mlx5_eq *eq, int arm)
+{
+ __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+ u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+ __raw_writel((__force u32)cpu_to_be32(val), addr);
+ /* We still want ordering, just not swabbing, so add a barrier */
+ mb();
+}
+
+int mlx5_eq_table_init(struct mlx5_core_dev *dev);
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_eq_table_create(struct mlx5_core_dev *dev);
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn);
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev);
+void mlx5_cq_tasklet_cb(unsigned long data);
+struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix);
+
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq);
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev);
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev);
+
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev);
+#endif
+
+int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
+int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index 7550b1cc8c6a..397a2847867a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -33,6 +33,8 @@
#ifndef __LIB_MLX5_H__
#define __LIB_MLX5_H__
+#include "mlx5_core.h"
+
void mlx5_init_reserved_gids(struct mlx5_core_dev *dev);
void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev);
int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
@@ -40,4 +42,38 @@ void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+/* TODO move to lib/events.h */
+
+#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF
+#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF
+
+enum port_module_event_status_type {
+ MLX5_MODULE_STATUS_PLUGGED = 0x1,
+ MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
+ MLX5_MODULE_STATUS_ERROR = 0x3,
+ MLX5_MODULE_STATUS_DISABLED = 0x4,
+ MLX5_MODULE_STATUS_NUM,
+};
+
+enum port_module_event_error_type {
+ MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0,
+ MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX = 0x1,
+ MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2,
+ MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3,
+ MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4,
+ MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER = 0x5,
+ MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6,
+ MLX5_MODULE_EVENT_ERROR_BAD_CABLE = 0x7,
+ MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc,
+ MLX5_MODULE_EVENT_ERROR_NUM,
+};
+
+struct mlx5_pme_stats {
+ u64 status_counters[MLX5_MODULE_STATUS_NUM];
+ u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
+};
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats);
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data);
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
index 98359559c77e..a71d5b9c7ab2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
@@ -108,8 +108,7 @@ int mlx5_mpfs_init(struct mlx5_core_dev *dev)
mutex_init(&mpfs->lock);
mpfs->size = l2table_size;
- mpfs->bitmap = kcalloc(BITS_TO_LONGS(l2table_size),
- sizeof(uintptr_t), GFP_KERNEL);
+ mpfs->bitmap = bitmap_zalloc(l2table_size, GFP_KERNEL);
if (!mpfs->bitmap) {
kfree(mpfs);
return -ENOMEM;
@@ -127,7 +126,7 @@ void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev)
return;
WARN_ON(!hlist_empty(mpfs->hash));
- kfree(mpfs->bitmap);
+ bitmap_free(mpfs->bitmap);
kfree(mpfs);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
new file mode 100644
index 000000000000..40f4a19b1ce1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/port.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+#include "lib/port_tun.h"
+
+struct mlx5_port_tun_entropy_flags {
+ bool force_supported, force_enabled;
+ bool calc_supported, calc_enabled;
+ bool gre_calc_supported, gre_calc_enabled;
+};
+
+static void mlx5_query_port_tun_entropy(struct mlx5_core_dev *mdev,
+ struct mlx5_port_tun_entropy_flags *entropy_flags)
+{
+ u32 out[MLX5_ST_SZ_DW(pcmr_reg)];
+ /* Default values for FW which do not support MLX5_REG_PCMR */
+ entropy_flags->force_supported = false;
+ entropy_flags->calc_supported = false;
+ entropy_flags->gre_calc_supported = false;
+ entropy_flags->force_enabled = false;
+ entropy_flags->calc_enabled = true;
+ entropy_flags->gre_calc_enabled = true;
+
+ if (!MLX5_CAP_GEN(mdev, ports_check))
+ return;
+
+ if (mlx5_query_ports_check(mdev, out, sizeof(out)))
+ return;
+
+ entropy_flags->force_supported = !!(MLX5_GET(pcmr_reg, out, entropy_force_cap));
+ entropy_flags->calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_calc_cap));
+ entropy_flags->gre_calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc_cap));
+ entropy_flags->force_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_force));
+ entropy_flags->calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_calc));
+ entropy_flags->gre_calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc));
+}
+
+static int mlx5_set_port_tun_entropy_calc(struct mlx5_core_dev *mdev, u8 enable,
+ u8 force)
+{
+ u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+ int err;
+
+ err = mlx5_query_ports_check(mdev, in, sizeof(in));
+ if (err)
+ return err;
+ MLX5_SET(pcmr_reg, in, local_port, 1);
+ MLX5_SET(pcmr_reg, in, entropy_force, force);
+ MLX5_SET(pcmr_reg, in, entropy_calc, enable);
+ return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
+static int mlx5_set_port_gre_tun_entropy_calc(struct mlx5_core_dev *mdev,
+ u8 enable, u8 force)
+{
+ u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+ int err;
+
+ err = mlx5_query_ports_check(mdev, in, sizeof(in));
+ if (err)
+ return err;
+ MLX5_SET(pcmr_reg, in, local_port, 1);
+ MLX5_SET(pcmr_reg, in, entropy_force, force);
+ MLX5_SET(pcmr_reg, in, entropy_gre_calc, enable);
+ return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
+void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy,
+ struct mlx5_core_dev *mdev)
+{
+ struct mlx5_port_tun_entropy_flags entropy_flags;
+
+ tun_entropy->mdev = mdev;
+ mutex_init(&tun_entropy->lock);
+ mlx5_query_port_tun_entropy(mdev, &entropy_flags);
+ tun_entropy->num_enabling_entries = 0;
+ tun_entropy->num_disabling_entries = 0;
+ tun_entropy->enabled = entropy_flags.calc_enabled;
+ tun_entropy->enabled =
+ (entropy_flags.calc_supported) ?
+ entropy_flags.calc_enabled : true;
+}
+
+static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy,
+ int reformat_type, bool enable)
+{
+ struct mlx5_port_tun_entropy_flags entropy_flags;
+ int err;
+
+ mlx5_query_port_tun_entropy(tun_entropy->mdev, &entropy_flags);
+ /* Tunnel entropy calculation may be controlled either on port basis
+ * for all tunneling protocols or specifically for GRE protocol.
+ * Prioritize GRE protocol control (if capable) over global port
+ * configuration.
+ */
+ if (entropy_flags.gre_calc_supported &&
+ reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) {
+ /* Other applications may change the global FW entropy
+ * calculations settings. Check that the current entropy value
+ * is the negative of the updated value.
+ */
+ if (entropy_flags.force_enabled &&
+ enable == entropy_flags.gre_calc_enabled) {
+ mlx5_core_warn(tun_entropy->mdev,
+ "Unexpected GRE entropy calc setting - expected %d",
+ !entropy_flags.gre_calc_enabled);
+ return -EOPNOTSUPP;
+ }
+ err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, enable,
+ entropy_flags.force_supported);
+ if (err)
+ return err;
+ /* if we turn on the entropy we don't need to force it anymore */
+ if (entropy_flags.force_supported && enable) {
+ err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, 1, 0);
+ if (err)
+ return err;
+ }
+ } else if (entropy_flags.calc_supported) {
+ /* Other applications may change the global FW entropy
+ * calculations settings. Check that the current entropy value
+ * is the negative of the updated value.
+ */
+ if (entropy_flags.force_enabled &&
+ enable == entropy_flags.calc_enabled) {
+ mlx5_core_warn(tun_entropy->mdev,
+ "Unexpected entropy calc setting - expected %d",
+ !entropy_flags.calc_enabled);
+ return -EOPNOTSUPP;
+ }
+ /* GRE requires disabling entropy calculation. if there are
+ * enabling entries (i.e VXLAN) we cannot turn it off for them,
+ * thus fail.
+ */
+ if (tun_entropy->num_enabling_entries)
+ return -EOPNOTSUPP;
+ err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, enable,
+ entropy_flags.force_supported);
+ if (err)
+ return err;
+ tun_entropy->enabled = enable;
+ /* if we turn on the entropy we don't need to force it anymore */
+ if (entropy_flags.force_supported && enable) {
+ err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, 1, 0);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+/* the function manages the refcount for enabling/disabling tunnel types.
+ * the return value indicates if the inc is successful or not, depending on
+ * entropy capabilities and configuration.
+ */
+int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy,
+ int reformat_type)
+{
+ /* the default is error for unknown (non VXLAN/GRE tunnel types) */
+ int err = -EOPNOTSUPP;
+
+ mutex_lock(&tun_entropy->lock);
+ if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN &&
+ tun_entropy->enabled) {
+ /* in case entropy calculation is enabled for all tunneling
+ * types, it is ok for VXLAN, so approve.
+ * otherwise keep the error default.
+ */
+ tun_entropy->num_enabling_entries++;
+ err = 0;
+ } else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) {
+ /* turn off the entropy only for the first GRE rule.
+ * for the next rules the entropy was already disabled
+ * successfully.
+ */
+ if (tun_entropy->num_disabling_entries == 0)
+ err = mlx5_set_entropy(tun_entropy, reformat_type, 0);
+ else
+ err = 0;
+ if (!err)
+ tun_entropy->num_disabling_entries++;
+ }
+ mutex_unlock(&tun_entropy->lock);
+
+ return err;
+}
+
+void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy,
+ int reformat_type)
+{
+ mutex_lock(&tun_entropy->lock);
+ if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN)
+ tun_entropy->num_enabling_entries--;
+ else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE &&
+ --tun_entropy->num_disabling_entries == 0)
+ mlx5_set_entropy(tun_entropy, reformat_type, 1);
+ mutex_unlock(&tun_entropy->lock);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h
new file mode 100644
index 000000000000..54c42a88705e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_PORT_TUN_H__
+#define __MLX5_PORT_TUN_H__
+
+#include <linux/mlx5/driver.h>
+
+struct mlx5_tun_entropy {
+ struct mlx5_core_dev *mdev;
+ u32 num_enabling_entries;
+ u32 num_disabling_entries;
+ u8 enabled;
+ struct mutex lock; /* lock the entropy fields */
+};
+
+void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy,
+ struct mlx5_core_dev *mdev);
+int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy,
+ int reformat_type);
+void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy,
+ int reformat_type);
+
+#endif /* __MLX5_PORT_TUN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
deleted file mode 100644
index 3a3b0005fd2b..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
-#include "mlx5_core.h"
-
-int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
- u16 opmod, u8 port)
-{
- int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
- int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in);
- int err = -ENOMEM;
- void *data;
- void *resp;
- u32 *out;
- u32 *in;
-
- in = kzalloc(inlen, GFP_KERNEL);
- out = kzalloc(outlen, GFP_KERNEL);
- if (!in || !out)
- goto out;
-
- MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
- MLX5_SET(mad_ifc_in, in, op_mod, opmod);
- MLX5_SET(mad_ifc_in, in, port, port);
-
- data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
- memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
-
- err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
- if (err)
- goto out;
-
- resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet);
- memcpy(outb, resp,
- MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet));
-
-out:
- kfree(out);
- kfree(in);
- return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 28132c7dc05f..76716419370d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -43,7 +43,6 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
#include <linux/debugfs.h>
#include <linux/kmod.h>
#include <linux/mlx5/mlx5_ifc.h>
@@ -53,6 +52,7 @@
#endif
#include <net/devlink.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
#include "fs_core.h"
#include "lib/mpfs.h"
#include "eswitch.h"
@@ -63,7 +63,9 @@
#include "accel/tls.h"
#include "lib/clock.h"
#include "lib/vxlan.h"
+#include "lib/devcom.h"
#include "diag/fw_tracer.h"
+#include "ecpf.h"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
@@ -162,26 +164,6 @@ static struct mlx5_profile profile[] = {
.size = 8,
.limit = 4
},
- .mr_cache[16] = {
- .size = 8,
- .limit = 4
- },
- .mr_cache[17] = {
- .size = 8,
- .limit = 4
- },
- .mr_cache[18] = {
- .size = 8,
- .limit = 4
- },
- .mr_cache[19] = {
- .size = 4,
- .limit = 2
- },
- .mr_cache[20] = {
- .size = 4,
- .limit = 2
- },
},
};
@@ -319,51 +301,6 @@ static void release_bar(struct pci_dev *pdev)
pci_release_regions(pdev);
}
-static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
-{
- struct mlx5_priv *priv = &dev->priv;
- struct mlx5_eq_table *table = &priv->eq_table;
- int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
- MLX5_CAP_GEN(dev, max_num_eqs) :
- 1 << MLX5_CAP_GEN(dev, log_max_eq);
- int nvec;
- int err;
-
- nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
- MLX5_EQ_VEC_COMP_BASE;
- nvec = min_t(int, nvec, num_eqs);
- if (nvec <= MLX5_EQ_VEC_COMP_BASE)
- return -ENOMEM;
-
- priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL);
- if (!priv->irq_info)
- return -ENOMEM;
-
- nvec = pci_alloc_irq_vectors(dev->pdev,
- MLX5_EQ_VEC_COMP_BASE + 1, nvec,
- PCI_IRQ_MSIX);
- if (nvec < 0) {
- err = nvec;
- goto err_free_irq_info;
- }
-
- table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
-
- return 0;
-
-err_free_irq_info:
- kfree(priv->irq_info);
- return err;
-}
-
-static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev)
-{
- struct mlx5_priv *priv = &dev->priv;
-
- pci_free_irq_vectors(dev->pdev);
- kfree(priv->irq_info);
-}
-
struct mlx5_reg_host_endianness {
u8 he;
u8 rsvd[15];
@@ -503,6 +440,58 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
return err;
}
+static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
+{
+ void *set_hca_cap;
+ void *set_ctx;
+ int set_sz;
+ bool do_set = false;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) ||
+ !MLX5_CAP_GEN(dev, pg))
+ return 0;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
+ if (err)
+ return err;
+
+ set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ set_ctx = kzalloc(set_sz, GFP_KERNEL);
+ if (!set_ctx)
+ return -ENOMEM;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+ memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP],
+ MLX5_ST_SZ_BYTES(odp_cap));
+
+#define ODP_CAP_SET_MAX(dev, field) \
+ do { \
+ u32 _res = MLX5_CAP_ODP_MAX(dev, field); \
+ if (_res) { \
+ do_set = true; \
+ MLX5_SET(odp_cap, set_hca_cap, field, _res); \
+ } \
+ } while (0)
+
+ ODP_CAP_SET_MAX(dev, ud_odp_caps.srq_receive);
+ ODP_CAP_SET_MAX(dev, rc_odp_caps.srq_receive);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.srq_receive);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.send);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.receive);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.write);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.read);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic);
+
+ if (do_set)
+ err = set_caps(dev, set_ctx, set_sz,
+ MLX5_SET_HCA_CAP_OP_MOD_ODP);
+
+ kfree(set_ctx);
+
+ return err;
+}
+
static int handle_hca_cap(struct mlx5_core_dev *dev)
{
void *set_ctx = NULL;
@@ -576,6 +565,33 @@ query_ex:
return err;
}
+static int set_hca_cap(struct mlx5_core_dev *dev)
+{
+ struct pci_dev *pdev = dev->pdev;
+ int err;
+
+ err = handle_hca_cap(dev);
+ if (err) {
+ dev_err(&pdev->dev, "handle_hca_cap failed\n");
+ goto out;
+ }
+
+ err = handle_hca_cap_atomic(dev);
+ if (err) {
+ dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
+ goto out;
+ }
+
+ err = handle_hca_cap_odp(dev);
+ if (err) {
+ dev_err(&pdev->dev, "handle_hca_cap_odp failed\n");
+ goto out;
+ }
+
+out:
+ return err;
+}
+
static int set_hca_ctrl(struct mlx5_core_dev *dev)
{
struct mlx5_reg_host_endianness he_in;
@@ -611,6 +627,8 @@ int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
MLX5_SET(enable_hca_in, in, function_id, func_id);
+ MLX5_SET(enable_hca_in, in, embedded_cpu_function,
+ dev->caps.embedded_cpu);
return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
}
@@ -621,191 +639,29 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
MLX5_SET(disable_hca_in, in, function_id, func_id);
+ MLX5_SET(enable_hca_in, in, embedded_cpu_function,
+ dev->caps.embedded_cpu);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
-u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
+u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
+ struct ptp_system_timestamp *sts)
{
u32 timer_h, timer_h1, timer_l;
timer_h = ioread32be(&dev->iseg->internal_timer_h);
+ ptp_read_system_prets(sts);
timer_l = ioread32be(&dev->iseg->internal_timer_l);
+ ptp_read_system_postts(sts);
timer_h1 = ioread32be(&dev->iseg->internal_timer_h);
- if (timer_h != timer_h1) /* wrap around */
+ if (timer_h != timer_h1) {
+ /* wrap around */
+ ptp_read_system_prets(sts);
timer_l = ioread32be(&dev->iseg->internal_timer_l);
-
- return (u64)timer_l | (u64)timer_h1 << 32;
-}
-
-static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
- struct mlx5_priv *priv = &mdev->priv;
- int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
-
- if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
- mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
- return -ENOMEM;
- }
-
- cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
- priv->irq_info[i].mask);
-
- if (IS_ENABLED(CONFIG_SMP) &&
- irq_set_affinity_hint(irq, priv->irq_info[i].mask))
- mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
-
- return 0;
-}
-
-static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
- struct mlx5_priv *priv = &mdev->priv;
- int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
-
- irq_set_affinity_hint(irq, NULL);
- free_cpumask_var(priv->irq_info[i].mask);
-}
-
-static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
-{
- int err;
- int i;
-
- for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
- err = mlx5_irq_set_affinity_hint(mdev, i);
- if (err)
- goto err_out;
+ ptp_read_system_postts(sts);
}
- return 0;
-
-err_out:
- for (i--; i >= 0; i--)
- mlx5_irq_clear_affinity_hint(mdev, i);
-
- return err;
-}
-
-static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
-{
- int i;
-
- for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
- mlx5_irq_clear_affinity_hint(mdev, i);
-}
-
-int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
- unsigned int *irqn)
-{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- struct mlx5_eq *eq, *n;
- int err = -ENOENT;
-
- spin_lock(&table->lock);
- list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
- if (eq->index == vector) {
- *eqn = eq->eqn;
- *irqn = eq->irqn;
- err = 0;
- break;
- }
- }
- spin_unlock(&table->lock);
-
- return err;
-}
-EXPORT_SYMBOL(mlx5_vector2eqn);
-
-struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn)
-{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- struct mlx5_eq *eq;
-
- spin_lock(&table->lock);
- list_for_each_entry(eq, &table->comp_eqs_list, list)
- if (eq->eqn == eqn) {
- spin_unlock(&table->lock);
- return eq;
- }
-
- spin_unlock(&table->lock);
-
- return ERR_PTR(-ENOENT);
-}
-
-static void free_comp_eqs(struct mlx5_core_dev *dev)
-{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- struct mlx5_eq *eq, *n;
-
-#ifdef CONFIG_RFS_ACCEL
- if (dev->rmap) {
- free_irq_cpu_rmap(dev->rmap);
- dev->rmap = NULL;
- }
-#endif
- spin_lock(&table->lock);
- list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
- list_del(&eq->list);
- spin_unlock(&table->lock);
- if (mlx5_destroy_unmap_eq(dev, eq))
- mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
- eq->eqn);
- kfree(eq);
- spin_lock(&table->lock);
- }
- spin_unlock(&table->lock);
-}
-
-static int alloc_comp_eqs(struct mlx5_core_dev *dev)
-{
- struct mlx5_eq_table *table = &dev->priv.eq_table;
- char name[MLX5_MAX_IRQ_NAME];
- struct mlx5_eq *eq;
- int ncomp_vec;
- int nent;
- int err;
- int i;
-
- INIT_LIST_HEAD(&table->comp_eqs_list);
- ncomp_vec = table->num_comp_vectors;
- nent = MLX5_COMP_EQ_SIZE;
-#ifdef CONFIG_RFS_ACCEL
- dev->rmap = alloc_irq_cpu_rmap(ncomp_vec);
- if (!dev->rmap)
- return -ENOMEM;
-#endif
- for (i = 0; i < ncomp_vec; i++) {
- eq = kzalloc(sizeof(*eq), GFP_KERNEL);
- if (!eq) {
- err = -ENOMEM;
- goto clean;
- }
-
-#ifdef CONFIG_RFS_ACCEL
- irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev,
- MLX5_EQ_VEC_COMP_BASE + i));
-#endif
- snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
- err = mlx5_create_map_eq(dev, eq,
- i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
- name, MLX5_EQ_TYPE_COMP);
- if (err) {
- kfree(eq);
- goto clean;
- }
- mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
- eq->index = i;
- spin_lock(&table->lock);
- list_add_tail(&eq->list, &table->comp_eqs_list);
- spin_unlock(&table->lock);
- }
-
- return 0;
-
-clean:
- free_comp_eqs(dev);
- return err;
+ return (u64)timer_l | (u64)timer_h1 << 32;
}
static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
@@ -877,11 +733,9 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
priv->numa_node = dev_to_node(&dev->pdev->dev);
- priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root);
- if (!priv->dbg_root) {
- dev_err(&pdev->dev, "Cannot create debugfs dir, aborting\n");
- return -ENOMEM;
- }
+ if (mlx5_debugfs_root)
+ priv->dbg_root =
+ debugfs_create_dir(pci_name(pdev), mlx5_debugfs_root);
err = mlx5_pci_enable_device(dev);
if (err) {
@@ -903,6 +757,11 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
goto err_clr_master;
}
+ if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) &&
+ pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) &&
+ pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128))
+ mlx5_core_dbg(dev, "Enabling pci atomics failed\n");
+
dev->iseg_base = pci_resource_start(dev->pdev, 0);
dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
if (!dev->iseg) {
@@ -938,28 +797,37 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
struct pci_dev *pdev = dev->pdev;
int err;
+ priv->devcom = mlx5_devcom_register_device(dev);
+ if (IS_ERR(priv->devcom))
+ dev_err(&pdev->dev, "failed to register with devcom (0x%p)\n",
+ priv->devcom);
+
err = mlx5_query_board_id(dev);
if (err) {
dev_err(&pdev->dev, "query board id failed\n");
- goto out;
+ goto err_devcom;
}
- err = mlx5_eq_init(dev);
+ err = mlx5_eq_table_init(dev);
if (err) {
dev_err(&pdev->dev, "failed to initialize eq\n");
- goto out;
+ goto err_devcom;
+ }
+
+ err = mlx5_events_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "failed to initialize events\n");
+ goto err_eq_cleanup;
}
err = mlx5_cq_debugfs_init(dev);
if (err) {
dev_err(&pdev->dev, "failed to initialize cq debugfs\n");
- goto err_eq_cleanup;
+ goto err_events_cleanup;
}
mlx5_init_qp_table(dev);
- mlx5_init_srq_table(dev);
-
mlx5_init_mkey_table(dev);
mlx5_init_reserved_gids(dev);
@@ -1013,14 +881,15 @@ err_rl_cleanup:
err_tables_cleanup:
mlx5_vxlan_destroy(dev->vxlan);
mlx5_cleanup_mkey_table(dev);
- mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
mlx5_cq_debugfs_cleanup(dev);
-
+err_events_cleanup:
+ mlx5_events_cleanup(dev);
err_eq_cleanup:
- mlx5_eq_cleanup(dev);
+ mlx5_eq_table_cleanup(dev);
+err_devcom:
+ mlx5_devcom_unregister_device(dev->priv.devcom);
-out:
return err;
}
@@ -1036,10 +905,11 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_cleanup_clock(dev);
mlx5_cleanup_reserved_gids(dev);
mlx5_cleanup_mkey_table(dev);
- mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
mlx5_cq_debugfs_cleanup(dev);
- mlx5_eq_cleanup(dev);
+ mlx5_events_cleanup(dev);
+ mlx5_eq_table_cleanup(dev);
+ mlx5_devcom_unregister_device(dev->priv.devcom);
}
static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
@@ -1048,6 +918,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
struct pci_dev *pdev = dev->pdev;
int err;
+ dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
mutex_lock(&dev->intf_state_mutex);
if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n",
@@ -1113,15 +984,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto reclaim_boot_pages;
}
- err = handle_hca_cap(dev);
+ err = set_hca_cap(dev);
if (err) {
- dev_err(&pdev->dev, "handle_hca_cap failed\n");
- goto reclaim_boot_pages;
- }
-
- err = handle_hca_cap_atomic(dev);
- if (err) {
- dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
+ dev_err(&pdev->dev, "set_hca_cap failed\n");
goto reclaim_boot_pages;
}
@@ -1131,16 +996,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto reclaim_boot_pages;
}
- err = mlx5_pagealloc_start(dev);
- if (err) {
- dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n");
- goto reclaim_boot_pages;
- }
-
err = mlx5_cmd_init_hca(dev, sw_owner_id);
if (err) {
dev_err(&pdev->dev, "init hca failed\n");
- goto err_pagealloc_stop;
+ goto reclaim_boot_pages;
}
mlx5_set_driver_version(dev);
@@ -1161,23 +1020,20 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
}
}
- err = mlx5_alloc_irq_vectors(dev);
- if (err) {
- dev_err(&pdev->dev, "alloc irq vectors failed\n");
- goto err_cleanup_once;
- }
-
dev->priv.uar = mlx5_get_uars_page(dev);
if (IS_ERR(dev->priv.uar)) {
dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
err = PTR_ERR(dev->priv.uar);
- goto err_disable_msix;
+ goto err_get_uars;
}
- err = mlx5_start_eqs(dev);
+ mlx5_events_start(dev);
+ mlx5_pagealloc_start(dev);
+
+ err = mlx5_eq_table_create(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
- goto err_put_uars;
+ dev_err(&pdev->dev, "Failed to create EQs\n");
+ goto err_eq_table;
}
err = mlx5_fw_tracer_init(dev->tracer);
@@ -1186,18 +1042,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto err_fw_tracer;
}
- err = alloc_comp_eqs(dev);
- if (err) {
- dev_err(&pdev->dev, "Failed to alloc completion EQs\n");
- goto err_comp_eqs;
- }
-
- err = mlx5_irq_set_affinity_hints(dev);
- if (err) {
- dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
- goto err_affinity_hints;
- }
-
err = mlx5_fpga_device_start(dev);
if (err) {
dev_err(&pdev->dev, "fpga device start failed %d\n", err);
@@ -1234,6 +1078,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto err_sriov;
}
+ err = mlx5_ec_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init embedded CPU\n");
+ goto err_ec;
+ }
+
if (mlx5_device_registered(dev)) {
mlx5_attach_device(dev);
} else {
@@ -1251,6 +1101,9 @@ out:
return 0;
err_reg_dev:
+ mlx5_ec_cleanup(dev);
+
+err_ec:
mlx5_sriov_detach(dev);
err_sriov:
@@ -1266,24 +1119,17 @@ err_ipsec_start:
mlx5_fpga_device_stop(dev);
err_fpga_start:
- mlx5_irq_clear_affinity_hints(dev);
-
-err_affinity_hints:
- free_comp_eqs(dev);
-
-err_comp_eqs:
mlx5_fw_tracer_cleanup(dev->tracer);
err_fw_tracer:
- mlx5_stop_eqs(dev);
+ mlx5_eq_table_destroy(dev);
-err_put_uars:
+err_eq_table:
+ mlx5_pagealloc_stop(dev);
+ mlx5_events_stop(dev);
mlx5_put_uars_page(dev, priv->uar);
-err_disable_msix:
- mlx5_free_irq_vectors(dev);
-
-err_cleanup_once:
+err_get_uars:
if (boot)
mlx5_cleanup_once(dev);
@@ -1294,9 +1140,6 @@ err_stop_poll:
goto out_err;
}
-err_pagealloc_stop:
- mlx5_pagealloc_stop(dev);
-
reclaim_boot_pages:
mlx5_reclaim_startup_pages(dev);
@@ -1335,26 +1178,26 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
if (mlx5_device_registered(dev))
mlx5_detach_device(dev);
+ mlx5_ec_cleanup(dev);
mlx5_sriov_detach(dev);
mlx5_cleanup_fs(dev);
mlx5_accel_ipsec_cleanup(dev);
mlx5_accel_tls_cleanup(dev);
mlx5_fpga_device_stop(dev);
- mlx5_irq_clear_affinity_hints(dev);
- free_comp_eqs(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
- mlx5_stop_eqs(dev);
+ mlx5_eq_table_destroy(dev);
+ mlx5_pagealloc_stop(dev);
+ mlx5_events_stop(dev);
mlx5_put_uars_page(dev, priv->uar);
- mlx5_free_irq_vectors(dev);
if (cleanup)
mlx5_cleanup_once(dev);
mlx5_stop_health_poll(dev, cleanup);
+
err = mlx5_cmd_teardown_hca(dev);
if (err) {
dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
goto out;
}
- mlx5_pagealloc_stop(dev);
mlx5_reclaim_startup_pages(dev);
mlx5_core_disable_hca(dev, 0);
mlx5_cmd_cleanup(dev);
@@ -1364,12 +1207,6 @@ out:
return err;
}
-struct mlx5_core_event_handler {
- void (*event)(struct mlx5_core_dev *dev,
- enum mlx5_dev_event event,
- void *data);
-};
-
static const struct devlink_ops mlx5_devlink_ops = {
#ifdef CONFIG_MLX5_ESWITCH
.eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
@@ -1403,7 +1240,6 @@ static int init_one(struct pci_dev *pdev,
pci_set_drvdata(pdev, dev);
dev->pdev = pdev;
- dev->event = mlx5_core_event;
dev->profile = &profile[prof_sel];
INIT_LIST_HEAD(&priv->ctx_list);
@@ -1411,17 +1247,6 @@ static int init_one(struct pci_dev *pdev,
mutex_init(&dev->pci_status_mutex);
mutex_init(&dev->intf_state_mutex);
- INIT_LIST_HEAD(&priv->waiting_events_list);
- priv->is_accum_events = false;
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- err = init_srcu_struct(&priv->pfault_srcu);
- if (err) {
- dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n",
- err);
- goto clean_dev;
- }
-#endif
mutex_init(&priv->bfregs.reg_head.lock);
mutex_init(&priv->bfregs.wc_head.lock);
INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
@@ -1430,7 +1255,7 @@ static int init_one(struct pci_dev *pdev,
err = mlx5_pci_init(dev, priv);
if (err) {
dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
- goto clean_srcu;
+ goto clean_dev;
}
err = mlx5_health_init(dev);
@@ -1439,12 +1264,14 @@ static int init_one(struct pci_dev *pdev,
goto close_pci;
}
- mlx5_pagealloc_init(dev);
+ err = mlx5_pagealloc_init(dev);
+ if (err)
+ goto err_pagealloc_init;
err = mlx5_load_one(dev, priv, true);
if (err) {
dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
- goto clean_health;
+ goto err_load_one;
}
request_module_nowait(MLX5_IB_MOD);
@@ -1458,16 +1285,13 @@ static int init_one(struct pci_dev *pdev,
clean_load:
mlx5_unload_one(dev, priv, true);
-clean_health:
+err_load_one:
mlx5_pagealloc_cleanup(dev);
+err_pagealloc_init:
mlx5_health_cleanup(dev);
close_pci:
mlx5_pci_close(dev, priv);
-clean_srcu:
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- cleanup_srcu_struct(&priv->pfault_srcu);
clean_dev:
-#endif
devlink_free(devlink);
return err;
@@ -1491,9 +1315,6 @@ static void remove_one(struct pci_dev *pdev)
mlx5_pagealloc_cleanup(dev);
mlx5_health_cleanup(dev);
mlx5_pci_close(dev, priv);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- cleanup_srcu_struct(&priv->pfault_srcu);
-#endif
devlink_free(devlink);
}
@@ -1637,7 +1458,6 @@ succeed:
* kexec. There is no need to cleanup the mlx5_core software
* contexts.
*/
- mlx5_irq_clear_affinity_hints(dev);
mlx5_core_eq_free_irqs(dev);
return 0;
@@ -1669,6 +1489,8 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
{ PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */
{ PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */
{ PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */
+ { PCI_VDEVICE(MELLANOX, 0x101d) }, /* ConnectX-6 Dx */
+ { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */
{ PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
{ PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */
{ 0, }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 0594d0961cb3..7b331674622c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -38,6 +38,7 @@
#include <linux/sched.h>
#include <linux/if_link.h>
#include <linux/firmware.h>
+#include <linux/ptp_clock_kernel.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/fs.h>
@@ -78,6 +79,11 @@ do { \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
+#define mlx5_core_warn_once(__dev, format, ...) \
+ dev_warn_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
#define mlx5_core_info(__dev, format, ...) \
dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__)
@@ -97,12 +103,6 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev);
-
-void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
- unsigned long param);
-void mlx5_core_page_fault(struct mlx5_core_dev *dev,
- struct mlx5_pagefault *pfault);
-void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
void mlx5_disable_device(struct mlx5_core_dev *dev);
void mlx5_recover_device(struct mlx5_core_dev *dev);
@@ -121,31 +121,12 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
u32 modify_bitmask);
int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
u32 element_id);
-int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
-u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
-
-int mlx5_eq_init(struct mlx5_core_dev *dev);
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
- int nent, u64 mask, const char *name,
- enum mlx5_eq_type type);
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
- u32 *out, int outlen);
-int mlx5_start_eqs(struct mlx5_core_dev *dev);
-void mlx5_stop_eqs(struct mlx5_core_dev *dev);
-/* This function should only be called after mlx5_cmd_force_teardown_hca */
-void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
-struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
-u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
-void mlx5_cq_tasklet_cb(unsigned long data);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
-int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages);
+u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
+ struct ptp_system_timestamp *sts);
+
+void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev);
+void mlx5_cmd_flush(struct mlx5_core_dev *dev);
int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
@@ -159,6 +140,11 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_remove(struct mlx5_core_dev *dev);
+int mlx5_events_init(struct mlx5_core_dev *dev);
+void mlx5_events_cleanup(struct mlx5_core_dev *dev);
+void mlx5_events_start(struct mlx5_core_dev *dev);
+void mlx5_events_stop(struct mlx5_core_dev *dev);
+
void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
void mlx5_attach_device(struct mlx5_core_dev *dev);
@@ -202,10 +188,8 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
MLX5_CAP_GEN(dev, lag_master);
}
-int mlx5_lag_allow(struct mlx5_core_dev *dev);
-int mlx5_lag_forbid(struct mlx5_core_dev *dev);
-
void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
+void mlx5_lag_update(struct mlx5_core_dev *dev);
enum {
MLX5_NIC_IFC_FULL = 0,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 0670165afd5f..ea744d8466ea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -51,9 +51,10 @@ void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev)
int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
struct mlx5_core_mkey *mkey,
- u32 *in, int inlen,
- u32 *out, int outlen,
- mlx5_cmd_cbk_t callback, void *context)
+ struct mlx5_async_ctx *async_ctx, u32 *in,
+ int inlen, u32 *out, int outlen,
+ mlx5_async_cbk_t callback,
+ struct mlx5_async_work *context)
{
struct mlx5_mkey_table *table = &dev->priv.mkey_table;
u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
@@ -71,7 +72,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
MLX5_SET(mkc, mkc, mkey_7_0, key);
if (callback)
- return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen,
+ return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
callback, context);
err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout));
@@ -105,7 +106,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
struct mlx5_core_mkey *mkey,
u32 *in, int inlen)
{
- return mlx5_core_create_mkey_cb(dev, mkey, in, inlen,
+ return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen,
NULL, 0, NULL, NULL);
}
EXPORT_SYMBOL(mlx5_core_create_mkey);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index e36d3e3675f9..41025387ff2c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -37,6 +37,7 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
enum {
MLX5_PAGES_CANT_GIVE = 0,
@@ -47,6 +48,7 @@ enum {
struct mlx5_pages_req {
struct mlx5_core_dev *dev;
u16 func_id;
+ u8 ec_function;
s32 npages;
struct work_struct work;
};
@@ -142,6 +144,7 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
MLX5_SET(query_pages_in, in, op_mod, boot ?
MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES :
MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
+ MLX5_SET(query_pages_in, in, embedded_cpu_function, mlx5_core_is_ecpf(dev));
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err)
@@ -252,7 +255,8 @@ err_mapping:
return err;
}
-static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
+static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id,
+ bool ec_function)
{
u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
@@ -261,6 +265,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE);
MLX5_SET(manage_pages_in, in, function_id, func_id);
+ MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err)
@@ -269,7 +274,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
}
static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
- int notify_fail)
+ int notify_fail, bool ec_function)
{
u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
int inlen = MLX5_ST_SZ_BYTES(manage_pages_in);
@@ -304,6 +309,7 @@ retry:
MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE);
MLX5_SET(manage_pages_in, in, function_id, func_id);
MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+ MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
if (err) {
@@ -315,8 +321,11 @@ retry:
dev->priv.fw_pages += npages;
if (func_id)
dev->priv.vfs_pages += npages;
+ else if (mlx5_core_is_ecpf(dev) && !ec_function)
+ dev->priv.peer_pf_pages += npages;
- mlx5_core_dbg(dev, "err %d\n", err);
+ mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n",
+ npages, ec_function, func_id, err);
kvfree(in);
return 0;
@@ -327,7 +336,7 @@ out_4k:
out_free:
kvfree(in);
if (notify_fail)
- page_notify_fail(dev, func_id);
+ page_notify_fail(dev, func_id, ec_function);
return err;
}
@@ -363,7 +372,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
}
static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
- int *nclaimed)
+ int *nclaimed, bool ec_function)
{
int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
@@ -384,6 +393,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE);
MLX5_SET(manage_pages_in, in, function_id, func_id);
MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+ MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
@@ -409,6 +419,8 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
dev->priv.fw_pages -= num_claimed;
if (func_id)
dev->priv.vfs_pages -= num_claimed;
+ else if (mlx5_core_is_ecpf(dev) && !ec_function)
+ dev->priv.peer_pf_pages -= num_claimed;
out_free:
kvfree(out);
@@ -422,9 +434,10 @@ static void pages_work_handler(struct work_struct *work)
int err = 0;
if (req->npages < 0)
- err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL);
+ err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL,
+ req->ec_function);
else if (req->npages > 0)
- err = give_pages(dev, req->func_id, req->npages, 1);
+ err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function);
if (err)
mlx5_core_warn(dev, "%s fail %d\n",
@@ -433,22 +446,43 @@ static void pages_work_handler(struct work_struct *work)
kfree(req);
}
-void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
- s32 npages)
+enum {
+ EC_FUNCTION_MASK = 0x8000,
+};
+
+static int req_pages_handler(struct notifier_block *nb,
+ unsigned long type, void *data)
{
struct mlx5_pages_req *req;
-
+ struct mlx5_core_dev *dev;
+ struct mlx5_priv *priv;
+ struct mlx5_eqe *eqe;
+ bool ec_function;
+ u16 func_id;
+ s32 npages;
+
+ priv = mlx5_nb_cof(nb, struct mlx5_priv, pg_nb);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+ eqe = data;
+
+ func_id = be16_to_cpu(eqe->data.req_pages.func_id);
+ npages = be32_to_cpu(eqe->data.req_pages.num_pages);
+ ec_function = be16_to_cpu(eqe->data.req_pages.ec_function) & EC_FUNCTION_MASK;
+ mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
+ func_id, npages);
req = kzalloc(sizeof(*req), GFP_ATOMIC);
if (!req) {
mlx5_core_warn(dev, "failed to allocate pages request\n");
- return;
+ return NOTIFY_DONE;
}
req->dev = dev;
req->func_id = func_id;
req->npages = npages;
+ req->ec_function = ec_function;
INIT_WORK(&req->work, pages_work_handler);
queue_work(dev->priv.pg_wq, &req->work);
+ return NOTIFY_OK;
}
int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
@@ -464,7 +498,7 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n",
npages, boot ? "boot" : "init", func_id);
- return give_pages(dev, func_id, npages, 0);
+ return give_pages(dev, func_id, npages, 0, mlx5_core_is_ecpf(dev));
}
enum {
@@ -498,7 +532,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
fwp = rb_entry(p, struct fw_page, rb_node);
err = reclaim_pages(dev, fwp->func_id,
optimal_reclaimed_pages(),
- &nclaimed);
+ &nclaimed, mlx5_core_is_ecpf(dev));
if (err) {
mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
@@ -520,39 +554,45 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
WARN(dev->priv.vfs_pages,
"VFs FW pages counter is %d after reclaiming all pages\n",
dev->priv.vfs_pages);
+ WARN(dev->priv.peer_pf_pages,
+ "Peer PF FW pages counter is %d after reclaiming all pages\n",
+ dev->priv.peer_pf_pages);
return 0;
}
-void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
+int mlx5_pagealloc_init(struct mlx5_core_dev *dev)
{
dev->priv.page_root = RB_ROOT;
INIT_LIST_HEAD(&dev->priv.free_list);
+ dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
+ if (!dev->priv.pg_wq)
+ return -ENOMEM;
+
+ return 0;
}
void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
{
- /* nothing */
+ destroy_workqueue(dev->priv.pg_wq);
}
-int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
+void mlx5_pagealloc_start(struct mlx5_core_dev *dev)
{
- dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
- if (!dev->priv.pg_wq)
- return -ENOMEM;
-
- return 0;
+ MLX5_NB_INIT(&dev->priv.pg_nb, req_pages_handler, PAGE_REQUEST);
+ mlx5_eq_notifier_register(dev, &dev->priv.pg_nb);
}
void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
{
- destroy_workqueue(dev->priv.pg_wq);
+ mlx5_eq_notifier_unregister(dev, &dev->priv.pg_nb);
+ flush_workqueue(dev->priv.pg_wq);
}
-int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
+int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages)
{
unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
- int prev_vfs_pages = dev->priv.vfs_pages;
+ int prev_pages = *pages;
/* In case of internal error we will free the pages manually later */
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
@@ -560,16 +600,16 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
return 0;
}
- mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages,
+ mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_pages,
dev->priv.name);
- while (dev->priv.vfs_pages) {
+ while (*pages) {
if (time_after(jiffies, end)) {
- mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages);
+ mlx5_core_warn(dev, "aborting while there are %d pending pages\n", *pages);
return -ETIMEDOUT;
}
- if (dev->priv.vfs_pages < prev_vfs_pages) {
+ if (*pages < prev_pages) {
end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
- prev_vfs_pages = dev->priv.vfs_pages;
+ prev_pages = *pages;
}
msleep(50);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 31a9cbd85689..361468e0435d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -30,10 +30,7 @@
* SOFTWARE.
*/
-#include <linux/module.h>
-#include <linux/mlx5/driver.h>
#include <linux/mlx5/port.h>
-#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
@@ -157,44 +154,6 @@ int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration)
sizeof(out), MLX5_REG_MLCR, 0, 1);
}
-int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev,
- u32 *proto_cap, int proto_mask)
-{
- u32 out[MLX5_ST_SZ_DW(ptys_reg)];
- int err;
-
- err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1);
- if (err)
- return err;
-
- if (proto_mask == MLX5_PTYS_EN)
- *proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability);
- else
- *proto_cap = MLX5_GET(ptys_reg, out, ib_proto_capability);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_proto_cap);
-
-int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
- u32 *proto_admin, int proto_mask)
-{
- u32 out[MLX5_ST_SZ_DW(ptys_reg)];
- int err;
-
- err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1);
- if (err)
- return err;
-
- if (proto_mask == MLX5_PTYS_EN)
- *proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin);
- else
- *proto_admin = MLX5_GET(ptys_reg, out, ib_proto_admin);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin);
-
int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
u8 *link_width_oper, u8 local_port)
{
@@ -211,23 +170,6 @@ int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
}
EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper);
-int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev,
- u32 *proto_oper, u8 local_port)
-{
- u32 out[MLX5_ST_SZ_DW(ptys_reg)];
- int err;
-
- err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN,
- local_port);
- if (err)
- return err;
-
- *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
-
- return 0;
-}
-EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper);
-
int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
u8 *proto_oper, u8 local_port)
{
@@ -245,35 +187,6 @@ int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
}
EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper);
-int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable,
- u32 proto_admin, int proto_mask)
-{
- u32 out[MLX5_ST_SZ_DW(ptys_reg)];
- u32 in[MLX5_ST_SZ_DW(ptys_reg)];
- u8 an_disable_admin;
- u8 an_disable_cap;
- u8 an_status;
-
- mlx5_query_port_autoneg(dev, proto_mask, &an_status,
- &an_disable_cap, &an_disable_admin);
- if (!an_disable_cap && an_disable)
- return -EPERM;
-
- memset(in, 0, sizeof(in));
-
- MLX5_SET(ptys_reg, in, local_port, 1);
- MLX5_SET(ptys_reg, in, an_disable_admin, an_disable);
- MLX5_SET(ptys_reg, in, proto_mask, proto_mask);
- if (proto_mask == MLX5_PTYS_EN)
- MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin);
- else
- MLX5_SET(ptys_reg, in, ib_proto_admin, proto_admin);
-
- return mlx5_core_access_reg(dev, in, sizeof(in), out,
- sizeof(out), MLX5_REG_PTYS, 0, 1);
-}
-EXPORT_SYMBOL_GPL(mlx5_set_port_ptys);
-
/* This function should be used after setting a port register only */
void mlx5_toggle_port_link(struct mlx5_core_dev *dev)
{
@@ -404,10 +317,6 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
i2c_addr = MLX5_I2C_ADDR_LOW;
- if (offset >= MLX5_EEPROM_PAGE_LENGTH) {
- i2c_addr = MLX5_I2C_ADDR_HIGH;
- offset -= MLX5_EEPROM_PAGE_LENGTH;
- }
MLX5_SET(mcia_reg, in, l, 0);
MLX5_SET(mcia_reg, in, module, module_num);
@@ -606,25 +515,6 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx)
}
EXPORT_SYMBOL_GPL(mlx5_query_port_pfc);
-void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask,
- u8 *an_status,
- u8 *an_disable_cap, u8 *an_disable_admin)
-{
- u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-
- *an_status = 0;
- *an_disable_cap = 0;
- *an_disable_admin = 0;
-
- if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1))
- return;
-
- *an_status = MLX5_GET(ptys_reg, out, an_status);
- *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap);
- *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg);
-
int mlx5_max_tc(struct mlx5_core_dev *mdev)
{
u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8;
@@ -870,8 +760,7 @@ int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode)
}
EXPORT_SYMBOL_GPL(mlx5_query_port_wol);
-static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out,
- int outlen)
+int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen)
{
u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
@@ -880,7 +769,7 @@ static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out,
outlen, MLX5_REG_PCMR, 0, 0);
}
-static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen)
+int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen)
{
u32 out[MLX5_ST_SZ_DW(pcmr_reg)];
@@ -891,7 +780,11 @@ static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen)
int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable)
{
u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+ int err;
+ err = mlx5_query_ports_check(mdev, in, sizeof(in));
+ if (err)
+ return err;
MLX5_SET(pcmr_reg, in, local_port, 1);
MLX5_SET(pcmr_reg, in, fcs_chk, enable);
return mlx5_set_ports_check(mdev, in, sizeof(in));
@@ -915,63 +808,6 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
*enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk));
}
-static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = {
- "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */
- "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */
- "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */
-};
-
-static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = {
- "Power budget exceeded",
- "Long Range for non MLNX cable",
- "Bus stuck(I2C or data shorted)",
- "No EEPROM/retry timeout",
- "Enforce part number list",
- "Unknown identifier",
- "High Temperature",
- "Bad or shorted cable/module",
- "Unknown status",
-};
-
-void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
-{
- enum port_module_event_status_type module_status;
- enum port_module_event_error_type error_type;
- struct mlx5_eqe_port_module *module_event_eqe;
- struct mlx5_priv *priv = &dev->priv;
- u8 module_num;
-
- module_event_eqe = &eqe->data.port_module;
- module_num = module_event_eqe->module;
- module_status = module_event_eqe->module_status &
- PORT_MODULE_EVENT_MODULE_STATUS_MASK;
- error_type = module_event_eqe->error_type &
- PORT_MODULE_EVENT_ERROR_TYPE_MASK;
-
- if (module_status < MLX5_MODULE_STATUS_ERROR) {
- priv->pme_stats.status_counters[module_status - 1]++;
- } else if (module_status == MLX5_MODULE_STATUS_ERROR) {
- if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN)
- /* Unknown error type */
- error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN;
- priv->pme_stats.error_counters[error_type]++;
- }
-
- if (!printk_ratelimit())
- return;
-
- if (module_status < MLX5_MODULE_STATUS_ERROR)
- mlx5_core_info(dev,
- "Port module event: module %u, %s\n",
- module_num, mlx5_pme_status[module_status - 1]);
-
- else if (module_status == MLX5_MODULE_STATUS_ERROR)
- mlx5_core_info(dev,
- "Port module event[error]: module %u, %s, %s\n",
- module_num, mlx5_pme_status[module_status - 1],
- mlx5_pme_error[error_type]);
-}
-
int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size)
{
u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 91b8139a388d..b8ba74de9555 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -38,26 +38,25 @@
#include <linux/mlx5/transobj.h>
#include "mlx5_core.h"
+#include "lib/eq.h"
-static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
- u32 rsn)
+static int mlx5_core_drain_dct(struct mlx5_core_dev *dev,
+ struct mlx5_core_dct *dct);
+
+static struct mlx5_core_rsc_common *
+mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
{
- struct mlx5_qp_table *table = &dev->priv.qp_table;
struct mlx5_core_rsc_common *common;
+ unsigned long flags;
- spin_lock(&table->lock);
+ spin_lock_irqsave(&table->lock, flags);
common = radix_tree_lookup(&table->tree, rsn);
if (common)
atomic_inc(&common->refcount);
- spin_unlock(&table->lock);
+ spin_unlock_irqrestore(&table->lock, flags);
- if (!common) {
- mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n",
- rsn);
- return NULL;
- }
return common;
}
@@ -120,19 +119,57 @@ static bool is_event_type_allowed(int rsc_type, int event_type)
}
}
-void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
+static int rsc_event_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
{
- struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
+ struct mlx5_core_rsc_common *common;
+ struct mlx5_qp_table *table;
+ struct mlx5_core_dev *dev;
struct mlx5_core_dct *dct;
+ u8 event_type = (u8)type;
struct mlx5_core_qp *qp;
+ struct mlx5_priv *priv;
+ struct mlx5_eqe *eqe;
+ u32 rsn;
+
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ eqe = data;
+ rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
+ rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
+ break;
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ eqe = data;
+ rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
- if (!common)
- return;
+ table = container_of(nb, struct mlx5_qp_table, nb);
+ priv = container_of(table, struct mlx5_priv, qp_table);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ mlx5_core_dbg(dev, "event (%d) arrived on resource 0x%x\n", eqe->type, rsn);
+
+ common = mlx5_get_rsc(table, rsn);
+ if (!common) {
+ mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", rsn);
+ return NOTIFY_OK;
+ }
if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
event_type, rsn);
- return;
+ goto out;
}
switch (common->res) {
@@ -150,8 +187,10 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
default:
mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn);
}
-
+out:
mlx5_core_put_rsc(common);
+
+ return NOTIFY_OK;
}
static int create_resource_common(struct mlx5_core_dev *dev,
@@ -191,20 +230,49 @@ static void destroy_resource_common(struct mlx5_core_dev *dev,
wait_for_completion(&qp->common.free);
}
+static int _mlx5_core_destroy_dct(struct mlx5_core_dev *dev,
+ struct mlx5_core_dct *dct, bool need_cleanup)
+{
+ u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {0};
+ struct mlx5_core_qp *qp = &dct->mqp;
+ int err;
+
+ err = mlx5_core_drain_dct(dev, dct);
+ if (err) {
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ goto destroy;
+ } else {
+ mlx5_core_warn(
+ dev, "failed drain DCT 0x%x with error 0x%x\n",
+ qp->qpn, err);
+ return err;
+ }
+ }
+ wait_for_completion(&dct->drained);
+destroy:
+ if (need_cleanup)
+ destroy_resource_common(dev, &dct->mqp);
+ MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
+ MLX5_SET(destroy_dct_in, in, uid, qp->uid);
+ err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
+ (void *)&out, sizeof(out));
+ return err;
+}
+
int mlx5_core_create_dct(struct mlx5_core_dev *dev,
struct mlx5_core_dct *dct,
- u32 *in, int inlen)
+ u32 *in, int inlen,
+ u32 *out, int outlen)
{
- u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0};
- u32 din[MLX5_ST_SZ_DW(destroy_dct_in)] = {0};
- u32 dout[MLX5_ST_SZ_DW(destroy_dct_out)] = {0};
struct mlx5_core_qp *qp = &dct->mqp;
int err;
init_completion(&dct->drained);
MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT);
- err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+ err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
if (err) {
mlx5_core_warn(dev, "create DCT failed, ret %d\n", err);
return err;
@@ -218,11 +286,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev,
return 0;
err_cmd:
- MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
- MLX5_SET(destroy_dct_in, din, dctn, qp->qpn);
- MLX5_SET(destroy_dct_in, din, uid, qp->uid);
- mlx5_cmd_exec(dev, (void *)&in, sizeof(din),
- (void *)&out, sizeof(dout));
+ _mlx5_core_destroy_dct(dev, dct, false);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_core_create_dct);
@@ -287,29 +351,7 @@ static int mlx5_core_drain_dct(struct mlx5_core_dev *dev,
int mlx5_core_destroy_dct(struct mlx5_core_dev *dev,
struct mlx5_core_dct *dct)
{
- u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {0};
- struct mlx5_core_qp *qp = &dct->mqp;
- int err;
-
- err = mlx5_core_drain_dct(dev, dct);
- if (err) {
- if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
- goto destroy;
- } else {
- mlx5_core_warn(dev, "failed drain DCT 0x%x with error 0x%x\n", qp->qpn, err);
- return err;
- }
- }
- wait_for_completion(&dct->drained);
-destroy:
- destroy_resource_common(dev, &dct->mqp);
- MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
- MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
- MLX5_SET(destroy_dct_in, in, uid, qp->uid);
- err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
- (void *)&out, sizeof(out));
- return err;
+ return _mlx5_core_destroy_dct(dev, dct, true);
}
EXPORT_SYMBOL_GPL(mlx5_core_destroy_dct);
@@ -487,10 +529,16 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev)
spin_lock_init(&table->lock);
INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
mlx5_qp_debugfs_init(dev);
+
+ table->nb.notifier_call = rsc_event_notifier;
+ mlx5_notifier_register(dev, &table->nb);
}
void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev)
{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+ mlx5_notifier_unregister(dev, &table->nb);
mlx5_qp_debugfs_cleanup(dev);
}
@@ -670,3 +718,20 @@ int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
}
EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter);
+
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
+ int res_num,
+ enum mlx5_res_type res_type)
+{
+ u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN);
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+ return mlx5_get_rsc(table, rsn);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_res_hold);
+
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res)
+{
+ mlx5_core_put_rsc(res);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_res_put);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index a0674962f02c..7b23fa8d2d60 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -147,7 +147,7 @@ out:
if (MLX5_ESWITCH_MANAGER(dev))
mlx5_eswitch_disable_sriov(dev->priv.eswitch);
- if (mlx5_wait_for_vf_pages(dev))
+ if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages))
mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
}
@@ -216,20 +216,10 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (!mlx5_core_is_pf(dev))
return -EPERM;
- if (num_vfs) {
- int ret;
-
- ret = mlx5_lag_forbid(dev);
- if (ret && (ret != -ENODEV))
- return ret;
- }
-
- if (num_vfs) {
+ if (num_vfs)
err = mlx5_sriov_enable(pdev, num_vfs);
- } else {
+ else
mlx5_sriov_disable(pdev);
- mlx5_lag_allow(dev);
- }
return err ? err : num_vfs;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
deleted file mode 100644
index 6a6fc9be01e6..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+++ /dev/null
@@ -1,716 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
-#include <linux/mlx5/srq.h>
-#include <rdma/ib_verbs.h>
-#include "mlx5_core.h"
-#include <linux/mlx5/transobj.h>
-
-void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
-{
- struct mlx5_srq_table *table = &dev->priv.srq_table;
- struct mlx5_core_srq *srq;
-
- spin_lock(&table->lock);
-
- srq = radix_tree_lookup(&table->tree, srqn);
- if (srq)
- atomic_inc(&srq->refcount);
-
- spin_unlock(&table->lock);
-
- if (!srq) {
- mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn);
- return;
- }
-
- srq->event(srq, event_type);
-
- if (atomic_dec_and_test(&srq->refcount))
- complete(&srq->free);
-}
-
-static int get_pas_size(struct mlx5_srq_attr *in)
-{
- u32 log_page_size = in->log_page_size + 12;
- u32 log_srq_size = in->log_size;
- u32 log_rq_stride = in->wqe_shift;
- u32 page_offset = in->page_offset;
- u32 po_quanta = 1 << (log_page_size - 6);
- u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride);
- u32 page_size = 1 << log_page_size;
- u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
- u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size);
-
- return rq_num_pas * sizeof(u64);
-}
-
-static void set_wq(void *wq, struct mlx5_srq_attr *in)
-{
- MLX5_SET(wq, wq, wq_signature, !!(in->flags
- & MLX5_SRQ_FLAG_WQ_SIG));
- MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size);
- MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4);
- MLX5_SET(wq, wq, log_wq_sz, in->log_size);
- MLX5_SET(wq, wq, page_offset, in->page_offset);
- MLX5_SET(wq, wq, lwm, in->lwm);
- MLX5_SET(wq, wq, pd, in->pd);
- MLX5_SET64(wq, wq, dbr_addr, in->db_record);
-}
-
-static void set_srqc(void *srqc, struct mlx5_srq_attr *in)
-{
- MLX5_SET(srqc, srqc, wq_signature, !!(in->flags
- & MLX5_SRQ_FLAG_WQ_SIG));
- MLX5_SET(srqc, srqc, log_page_size, in->log_page_size);
- MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift);
- MLX5_SET(srqc, srqc, log_srq_size, in->log_size);
- MLX5_SET(srqc, srqc, page_offset, in->page_offset);
- MLX5_SET(srqc, srqc, lwm, in->lwm);
- MLX5_SET(srqc, srqc, pd, in->pd);
- MLX5_SET64(srqc, srqc, dbr_addr, in->db_record);
- MLX5_SET(srqc, srqc, xrcd, in->xrcd);
- MLX5_SET(srqc, srqc, cqn, in->cqn);
-}
-
-static void get_wq(void *wq, struct mlx5_srq_attr *in)
-{
- if (MLX5_GET(wq, wq, wq_signature))
- in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
- in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz);
- in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4;
- in->log_size = MLX5_GET(wq, wq, log_wq_sz);
- in->page_offset = MLX5_GET(wq, wq, page_offset);
- in->lwm = MLX5_GET(wq, wq, lwm);
- in->pd = MLX5_GET(wq, wq, pd);
- in->db_record = MLX5_GET64(wq, wq, dbr_addr);
-}
-
-static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
-{
- if (MLX5_GET(srqc, srqc, wq_signature))
- in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
- in->log_page_size = MLX5_GET(srqc, srqc, log_page_size);
- in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride);
- in->log_size = MLX5_GET(srqc, srqc, log_srq_size);
- in->page_offset = MLX5_GET(srqc, srqc, page_offset);
- in->lwm = MLX5_GET(srqc, srqc, lwm);
- in->pd = MLX5_GET(srqc, srqc, pd);
- in->db_record = MLX5_GET64(srqc, srqc, dbr_addr);
-}
-
-struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
-{
- struct mlx5_srq_table *table = &dev->priv.srq_table;
- struct mlx5_core_srq *srq;
-
- spin_lock(&table->lock);
-
- srq = radix_tree_lookup(&table->tree, srqn);
- if (srq)
- atomic_inc(&srq->refcount);
-
- spin_unlock(&table->lock);
-
- return srq;
-}
-EXPORT_SYMBOL(mlx5_core_get_srq);
-
-static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
- void *create_in;
- void *srqc;
- void *pas;
- int pas_size;
- int inlen;
- int err;
-
- pas_size = get_pas_size(in);
- inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
- create_in = kvzalloc(inlen, GFP_KERNEL);
- if (!create_in)
- return -ENOMEM;
-
- MLX5_SET(create_srq_in, create_in, uid, in->uid);
- srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
- pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
-
- set_srqc(srqc, in);
- memcpy(pas, in->pas, pas_size);
-
- MLX5_SET(create_srq_in, create_in, opcode,
- MLX5_CMD_OP_CREATE_SRQ);
-
- err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
- sizeof(create_out));
- kvfree(create_in);
- if (!err) {
- srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
- srq->uid = in->uid;
- }
-
- return err;
-}
-
-static int destroy_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq)
-{
- u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0};
- u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0};
-
- MLX5_SET(destroy_srq_in, srq_in, opcode,
- MLX5_CMD_OP_DESTROY_SRQ);
- MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
- MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
- srq_out, sizeof(srq_out));
-}
-
-static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- u16 lwm, int is_srq)
-{
- u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
- u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
-
- MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ);
- MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
- MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn);
- MLX5_SET(arm_rq_in, srq_in, lwm, lwm);
- MLX5_SET(arm_rq_in, srq_in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
- srq_out, sizeof(srq_out));
-}
-
-static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out)
-{
- u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0};
- u32 *srq_out;
- void *srqc;
- int err;
-
- srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL);
- if (!srq_out)
- return -ENOMEM;
-
- MLX5_SET(query_srq_in, srq_in, opcode,
- MLX5_CMD_OP_QUERY_SRQ);
- MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
- err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
- srq_out, MLX5_ST_SZ_BYTES(query_srq_out));
- if (err)
- goto out;
-
- srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry);
- get_srqc(srqc, out);
- if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD)
- out->flags |= MLX5_SRQ_FLAG_ERR;
-out:
- kvfree(srq_out);
- return err;
-}
-
-static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)];
- void *create_in;
- void *xrc_srqc;
- void *pas;
- int pas_size;
- int inlen;
- int err;
-
- pas_size = get_pas_size(in);
- inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
- create_in = kvzalloc(inlen, GFP_KERNEL);
- if (!create_in)
- return -ENOMEM;
-
- MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid);
- xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in,
- xrc_srq_context_entry);
- pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
-
- set_srqc(xrc_srqc, in);
- MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index);
- memcpy(pas, in->pas, pas_size);
- MLX5_SET(create_xrc_srq_in, create_in, opcode,
- MLX5_CMD_OP_CREATE_XRC_SRQ);
-
- memset(create_out, 0, sizeof(create_out));
- err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
- sizeof(create_out));
- if (err)
- goto out;
-
- srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn);
- srq->uid = in->uid;
-out:
- kvfree(create_in);
- return err;
-}
-
-static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq)
-{
- u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0};
- u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
-
- MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode,
- MLX5_CMD_OP_DESTROY_XRC_SRQ);
- MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
- MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
- xrcsrq_out, sizeof(xrcsrq_out));
-}
-
-static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq, u16 lwm)
-{
- u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
- u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
-
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm);
- MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
- xrcsrq_out, sizeof(xrcsrq_out));
-}
-
-static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out)
-{
- u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)];
- u32 *xrcsrq_out;
- void *xrc_srqc;
- int err;
-
- xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL);
- if (!xrcsrq_out)
- return -ENOMEM;
- memset(xrcsrq_in, 0, sizeof(xrcsrq_in));
-
- MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode,
- MLX5_CMD_OP_QUERY_XRC_SRQ);
- MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
-
- err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out,
- MLX5_ST_SZ_BYTES(query_xrc_srq_out));
- if (err)
- goto out;
-
- xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out,
- xrc_srq_context_entry);
- get_srqc(xrc_srqc, out);
- if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD)
- out->flags |= MLX5_SRQ_FLAG_ERR;
-
-out:
- kvfree(xrcsrq_out);
- return err;
-}
-
-static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- void *create_in;
- void *rmpc;
- void *wq;
- int pas_size;
- int inlen;
- int err;
-
- pas_size = get_pas_size(in);
- inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
- create_in = kvzalloc(inlen, GFP_KERNEL);
- if (!create_in)
- return -ENOMEM;
-
- rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
- wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
-
- MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
- MLX5_SET(create_rmp_in, create_in, uid, in->uid);
- set_wq(wq, in);
- memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
-
- err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
- if (!err)
- srq->uid = in->uid;
-
- kvfree(create_in);
- return err;
-}
-
-static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {};
- u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {};
-
- MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
- MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn);
- MLX5_SET(destroy_rmp_in, in, uid, srq->uid);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int arm_rmp_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
- u16 lwm)
-{
- void *in;
- void *rmpc;
- void *wq;
- void *bitmask;
- int err;
-
- in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx);
- bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask);
- wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
-
- MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY);
- MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn);
- MLX5_SET(modify_rmp_in, in, uid, srq->uid);
- MLX5_SET(wq, wq, lwm, lwm);
- MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
- MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
-
- err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
-
- kvfree(in);
- return err;
-}
-
-static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out)
-{
- u32 *rmp_out;
- void *rmpc;
- int err;
-
- rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL);
- if (!rmp_out)
- return -ENOMEM;
-
- err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out);
- if (err)
- goto out;
-
- rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context);
- get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out);
- if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY)
- out->flags |= MLX5_SRQ_FLAG_ERR;
-
-out:
- kvfree(rmp_out);
- return err;
-}
-
-static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0};
- void *create_in;
- void *xrqc;
- void *wq;
- int pas_size;
- int inlen;
- int err;
-
- pas_size = get_pas_size(in);
- inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size;
- create_in = kvzalloc(inlen, GFP_KERNEL);
- if (!create_in)
- return -ENOMEM;
-
- xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context);
- wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
-
- set_wq(wq, in);
- memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size);
-
- if (in->type == IB_SRQT_TM) {
- MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING);
- if (in->flags & MLX5_SRQ_FLAG_RNDV)
- MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV);
- MLX5_SET(xrqc, xrqc,
- tag_matching_topology_context.log_matching_list_sz,
- in->tm_log_list_size);
- }
- MLX5_SET(xrqc, xrqc, user_index, in->user_index);
- MLX5_SET(xrqc, xrqc, cqn, in->cqn);
- MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
- MLX5_SET(create_xrq_in, create_in, uid, in->uid);
- err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
- sizeof(create_out));
- kvfree(create_in);
- if (!err) {
- srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn);
- srq->uid = in->uid;
- }
-
- return err;
-}
-
-static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0};
-
- MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ);
- MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn);
- MLX5_SET(destroy_xrq_in, in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int arm_xrq_cmd(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
- u16 lwm)
-{
- u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
-
- MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ);
- MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ);
- MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
- MLX5_SET(arm_rq_in, in, lwm, lwm);
- MLX5_SET(arm_rq_in, in, uid, srq->uid);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out)
-{
- u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0};
- u32 *xrq_out;
- int outlen = MLX5_ST_SZ_BYTES(query_xrq_out);
- void *xrqc;
- int err;
-
- xrq_out = kvzalloc(outlen, GFP_KERNEL);
- if (!xrq_out)
- return -ENOMEM;
-
- MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ);
- MLX5_SET(query_xrq_in, in, xrqn, srq->srqn);
-
- err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen);
- if (err)
- goto out;
-
- xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context);
- get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out);
- if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD)
- out->flags |= MLX5_SRQ_FLAG_ERR;
- out->tm_next_tag =
- MLX5_GET(xrqc, xrqc,
- tag_matching_topology_context.append_next_index);
- out->tm_hw_phase_cnt =
- MLX5_GET(xrqc, xrqc,
- tag_matching_topology_context.hw_phase_cnt);
- out->tm_sw_phase_cnt =
- MLX5_GET(xrqc, xrqc,
- tag_matching_topology_context.sw_phase_cnt);
-
-out:
- kvfree(xrq_out);
- return err;
-}
-
-static int create_srq_split(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- if (!dev->issi)
- return create_srq_cmd(dev, srq, in);
- switch (srq->common.res) {
- case MLX5_RES_XSRQ:
- return create_xrc_srq_cmd(dev, srq, in);
- case MLX5_RES_XRQ:
- return create_xrq_cmd(dev, srq, in);
- default:
- return create_rmp_cmd(dev, srq, in);
- }
-}
-
-static int destroy_srq_split(struct mlx5_core_dev *dev,
- struct mlx5_core_srq *srq)
-{
- if (!dev->issi)
- return destroy_srq_cmd(dev, srq);
- switch (srq->common.res) {
- case MLX5_RES_XSRQ:
- return destroy_xrc_srq_cmd(dev, srq);
- case MLX5_RES_XRQ:
- return destroy_xrq_cmd(dev, srq);
- default:
- return destroy_rmp_cmd(dev, srq);
- }
-}
-
-int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *in)
-{
- int err;
- struct mlx5_srq_table *table = &dev->priv.srq_table;
-
- switch (in->type) {
- case IB_SRQT_XRC:
- srq->common.res = MLX5_RES_XSRQ;
- break;
- case IB_SRQT_TM:
- srq->common.res = MLX5_RES_XRQ;
- break;
- default:
- srq->common.res = MLX5_RES_SRQ;
- }
-
- err = create_srq_split(dev, srq, in);
- if (err)
- return err;
-
- atomic_set(&srq->refcount, 1);
- init_completion(&srq->free);
-
- spin_lock_irq(&table->lock);
- err = radix_tree_insert(&table->tree, srq->srqn, srq);
- spin_unlock_irq(&table->lock);
- if (err) {
- mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn);
- goto err_destroy_srq_split;
- }
-
- return 0;
-
-err_destroy_srq_split:
- destroy_srq_split(dev, srq);
-
- return err;
-}
-EXPORT_SYMBOL(mlx5_core_create_srq);
-
-int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
-{
- struct mlx5_srq_table *table = &dev->priv.srq_table;
- struct mlx5_core_srq *tmp;
- int err;
-
- spin_lock_irq(&table->lock);
- tmp = radix_tree_delete(&table->tree, srq->srqn);
- spin_unlock_irq(&table->lock);
- if (!tmp) {
- mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn);
- return -EINVAL;
- }
- if (tmp != srq) {
- mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn);
- return -EINVAL;
- }
-
- err = destroy_srq_split(dev, srq);
- if (err)
- return err;
-
- if (atomic_dec_and_test(&srq->refcount))
- complete(&srq->free);
- wait_for_completion(&srq->free);
-
- return 0;
-}
-EXPORT_SYMBOL(mlx5_core_destroy_srq);
-
-int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- struct mlx5_srq_attr *out)
-{
- if (!dev->issi)
- return query_srq_cmd(dev, srq, out);
- switch (srq->common.res) {
- case MLX5_RES_XSRQ:
- return query_xrc_srq_cmd(dev, srq, out);
- case MLX5_RES_XRQ:
- return query_xrq_cmd(dev, srq, out);
- default:
- return query_rmp_cmd(dev, srq, out);
- }
-}
-EXPORT_SYMBOL(mlx5_core_query_srq);
-
-int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
- u16 lwm, int is_srq)
-{
- if (!dev->issi)
- return arm_srq_cmd(dev, srq, lwm, is_srq);
- switch (srq->common.res) {
- case MLX5_RES_XSRQ:
- return arm_xrc_srq_cmd(dev, srq, lwm);
- case MLX5_RES_XRQ:
- return arm_xrq_cmd(dev, srq, lwm);
- default:
- return arm_rmp_cmd(dev, srq, lwm);
- }
-}
-EXPORT_SYMBOL(mlx5_core_arm_srq);
-
-void mlx5_init_srq_table(struct mlx5_core_dev *dev)
-{
- struct mlx5_srq_table *table = &dev->priv.srq_table;
-
- memset(table, 0, sizeof(*table));
- spin_lock_init(&table->lock);
- INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
-}
-
-void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev)
-{
- /* nothing */
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index a1ee9a8a769e..c4d4b76096dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -258,115 +258,6 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
}
EXPORT_SYMBOL(mlx5_core_destroy_tis);
-int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *rmpn)
-{
- u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0};
- int err;
-
- MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP);
- err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
- if (!err)
- *rmpn = MLX5_GET(create_rmp_out, out, rmpn);
-
- return err;
-}
-
-int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen)
-{
- u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0};
-
- MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
- return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
-}
-
-int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0};
-
- MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
- MLX5_SET(destroy_rmp_in, in, rmpn, rmpn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out,
- sizeof(out));
-}
-
-int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out)
-{
- u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0};
- int outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
-
- MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP);
- MLX5_SET(query_rmp_in, in, rmpn, rmpn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
-}
-
-int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm)
-{
- void *in;
- void *rmpc;
- void *wq;
- void *bitmask;
- int err;
-
- in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx);
- bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask);
- wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
-
- MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY);
- MLX5_SET(modify_rmp_in, in, rmpn, rmpn);
- MLX5_SET(wq, wq, lwm, lwm);
- MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
- MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
-
- err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
-
- kvfree(in);
-
- return err;
-}
-
-int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *xsrqn)
-{
- u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0};
- int err;
-
- MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ);
- err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
- if (!err)
- *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn);
-
- return err;
-}
-
-int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn)
-{
- u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
-
- MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ);
- MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm)
-{
- u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
-
- MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
- MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn);
- MLX5_SET(arm_xrc_srq_in, in, lwm, lwm);
- MLX5_SET(arm_xrc_srq_in, in, op_mod,
- MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *rqtn)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 8b97066dd1f1..94464723ff77 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -90,8 +90,8 @@ static void up_rel_func(struct kref *kref)
iounmap(up->map);
if (mlx5_cmd_free_uar(up->mdev, up->index))
mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index);
- kfree(up->reg_bitmap);
- kfree(up->fp_bitmap);
+ bitmap_free(up->reg_bitmap);
+ bitmap_free(up->fp_bitmap);
kfree(up);
}
@@ -110,11 +110,11 @@ static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev,
return ERR_PTR(err);
up->mdev = mdev;
- up->reg_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL);
+ up->reg_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL);
if (!up->reg_bitmap)
goto error1;
- up->fp_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL);
+ up->fp_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL);
if (!up->fp_bitmap)
goto error1;
@@ -157,8 +157,8 @@ error2:
if (mlx5_cmd_free_uar(mdev, up->index))
mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index);
error1:
- kfree(up->fp_bitmap);
- kfree(up->reg_bitmap);
+ bitmap_free(up->fp_bitmap);
+ bitmap_free(up->reg_bitmap);
kfree(up);
return ERR_PTR(err);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index cfbea66b4879..ef95feca9961 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -64,7 +64,7 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
}
int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
- u16 vport, u8 state)
+ u16 vport, u8 other_vport, u8 state)
{
u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {0};
u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0};
@@ -73,8 +73,7 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
MLX5_CMD_OP_MODIFY_VPORT_STATE);
MLX5_SET(modify_vport_state_in, in, op_mod, opmod);
MLX5_SET(modify_vport_state_in, in, vport_number, vport);
- if (vport)
- MLX5_SET(modify_vport_state_in, in, other_vport, 1);
+ MLX5_SET(modify_vport_state_in, in, other_vport, other_vport);
MLX5_SET(modify_vport_state_in, in, admin_state, state);
return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
@@ -255,7 +254,7 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu)
EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu);
int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
- u32 vport,
+ u16 vport,
enum mlx5_list_type list_type,
u8 addr_list[][ETH_ALEN],
int *list_size)
@@ -373,7 +372,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list);
int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev,
- u32 vport,
+ u16 vport,
u16 vlans[],
int *size)
{
@@ -526,7 +525,7 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
- u32 vport, u64 node_guid)
+ u16 vport, u64 node_guid)
{
int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
void *nic_vport_context;
@@ -827,7 +826,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid);
int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
- u32 vport,
+ u16 vport,
int *promisc_uc,
int *promisc_mc,
int *promisc_all)
@@ -1057,7 +1056,7 @@ free:
EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter);
int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
- u64 *rx_discard_vport_down,
+ u8 other_vport, u64 *rx_discard_vport_down,
u64 *tx_discard_vport_down)
{
u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0};
@@ -1068,8 +1067,7 @@ int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
MLX5_CMD_OP_QUERY_VNIC_ENV);
MLX5_SET(query_vnic_env_in, in, op_mod, 0);
MLX5_SET(query_vnic_env_in, in, vport_number, vport);
- if (vport)
- MLX5_SET(query_vnic_env_in, in, other_vport, 1);
+ MLX5_SET(query_vnic_env_in, in, other_vport, other_vport);
err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
if (err)
@@ -1204,9 +1202,19 @@ EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport);
u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
{
- if (!mdev->sys_image_guid)
- mlx5_query_nic_vport_system_image_guid(mdev, &mdev->sys_image_guid);
+ int port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ u64 tmp = 0;
+
+ if (mdev->sys_image_guid)
+ return mdev->sys_image_guid;
+
+ if (port_type_cap == MLX5_CAP_PORT_TYPE_ETH)
+ mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
+ else
+ mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
+
+ mdev->sys_image_guid = tmp;
- return mdev->sys_image_guid;
+ return tmp;
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 2dcbf1ebfd6a..953cc8efba69 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -155,7 +155,8 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *cqc, struct mlx5_cqwq *wq,
struct mlx5_wq_ctrl *wq_ctrl)
{
- u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) + 6;
+ /* CQE_STRIDE_128 and CQE_STRIDE_128_PAD both mean 128B stride */
+ u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) == CQE_STRIDE_64 ? 6 : 7;
u8 log_wq_sz = MLX5_GET(cqc, cqc, log_cq_size);
int err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index b1293d153a58..ea934a48c90a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -177,9 +177,14 @@ static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq)
return mlx5_cqwq_ctr2ix(wq, wq->cc);
}
-static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
+static inline struct mlx5_cqe64 *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
{
- return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+ struct mlx5_cqe64 *cqe = mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+
+ /* For 128B CQEs the data is in the last 64B */
+ cqe += wq->fbc.log_stride == 7;
+
+ return cqe;
}
static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr)