diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core')
83 files changed, 9059 insertions, 5184 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 37a551436e4a..6debffb8336b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -4,7 +4,6 @@ config MLX5_CORE tristate "Mellanox 5th generation network adapters (ConnectX series) core driver" - depends on MAY_USE_DEVLINK depends on PCI imply PTP_1588_CLOCK imply VXLAN diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index d324a3884462..1a16f6d73cbc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -12,17 +12,17 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5 core basic # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ - health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \ - diag/fs_tracepoint.o diag/fw_tracer.o + health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ + transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ + lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o # # Netdev basic # mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ - en_selftest.o en/port.o + en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o # # Netdev extra @@ -30,12 +30,12 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o -mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o # # Core extra # -mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o ecpf.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 456f30007ad6..9008e17126db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -63,8 +63,8 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, mutex_lock(&priv->alloc_mutex); original_node = dev_to_node(&dev->pdev->dev); set_dev_node(&dev->pdev->dev, node); - cpu_handle = dma_zalloc_coherent(&dev->pdev->dev, size, - dma_handle, GFP_KERNEL); + cpu_handle = dma_alloc_coherent(&dev->pdev->dev, size, dma_handle, + GFP_KERNEL); set_dev_node(&dev->pdev->dev, original_node); mutex_unlock(&priv->alloc_mutex); return cpu_handle; @@ -186,10 +186,7 @@ static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev, if (!pgdir) return NULL; - pgdir->bitmap = kcalloc(BITS_TO_LONGS(db_per_page), - sizeof(unsigned long), - GFP_KERNEL); - + pgdir->bitmap = bitmap_zalloc(db_per_page, GFP_KERNEL); if (!pgdir->bitmap) { kfree(pgdir); return NULL; @@ -200,7 +197,7 @@ static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev, pgdir->db_page = mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE, &pgdir->db_dma, node); if (!pgdir->db_page) { - kfree(pgdir->bitmap); + bitmap_free(pgdir->bitmap); kfree(pgdir); return NULL; } @@ -280,7 +277,7 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, db->u.pgdir->db_page, db->u.pgdir->db_dma); list_del(&db->u.pgdir->list); - kfree(db->u.pgdir->bitmap); + bitmap_free(db->u.pgdir->bitmap); kfree(db->u.pgdir); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a5a0823e5ada..be48c6440251 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -40,9 +40,11 @@ #include <linux/random.h> #include <linux/io-mapping.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/eq.h> #include <linux/debugfs.h> #include "mlx5_core.h" +#include "lib/eq.h" enum { CMD_IF_REV = 5, @@ -313,6 +315,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_FPGA_DESTROY_QP: case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: case MLX5_CMD_OP_DEALLOC_MEMIC: + case MLX5_CMD_OP_PAGE_FAULT_RESUME: + case MLX5_CMD_OP_QUERY_HOST_PARAMS: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -326,7 +330,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_CREATE_MKEY: case MLX5_CMD_OP_QUERY_MKEY: case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS: - case MLX5_CMD_OP_PAGE_FAULT_RESUME: case MLX5_CMD_OP_CREATE_EQ: case MLX5_CMD_OP_QUERY_EQ: case MLX5_CMD_OP_GEN_EQE: @@ -371,6 +374,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_VPORT_COUNTER: case MLX5_CMD_OP_ALLOC_Q_COUNTER: case MLX5_CMD_OP_QUERY_Q_COUNTER: + case MLX5_CMD_OP_SET_MONITOR_COUNTER: + case MLX5_CMD_OP_ARM_MONITOR_COUNTER: case MLX5_CMD_OP_SET_PP_RATE_LIMIT: case MLX5_CMD_OP_QUERY_RATE_LIMIT: case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: @@ -520,6 +525,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); + MLX5_COMMAND_STR_CASE(SET_MONITOR_COUNTER); + MLX5_COMMAND_STR_CASE(ARM_MONITOR_COUNTER); MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT); MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); @@ -621,6 +628,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); + MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS); default: return "unknown command opcode"; } } @@ -805,6 +813,8 @@ static u16 msg_to_opcode(struct mlx5_cmd_msg *in) return MLX5_GET(mbox_in, in->first.data, opcode); } +static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); + static void cb_timeout_handler(struct work_struct *work) { struct delayed_work *dwork = container_of(work, struct delayed_work, @@ -1412,14 +1422,32 @@ static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) up(&cmd->sem); } +static int cmd_comp_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_core_dev *dev; + struct mlx5_cmd *cmd; + struct mlx5_eqe *eqe; + + cmd = mlx5_nb_cof(nb, struct mlx5_cmd, nb); + dev = container_of(cmd, struct mlx5_core_dev, cmd); + eqe = data; + + mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); + + return NOTIFY_OK; +} void mlx5_cmd_use_events(struct mlx5_core_dev *dev) { + MLX5_NB_INIT(&dev->cmd.nb, cmd_comp_notifier, CMD); + mlx5_eq_notifier_register(dev, &dev->cmd.nb); mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS); } void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) { mlx5_cmd_change_mod(dev, CMD_MODE_POLLING); + mlx5_eq_notifier_unregister(dev, &dev->cmd.nb); } static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) @@ -1435,7 +1463,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) } } -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) +static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; @@ -1533,7 +1561,47 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) } } } -EXPORT_SYMBOL(mlx5_cmd_comp_handler); + +void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) +{ + unsigned long flags; + u64 vector; + + /* wait for pending handlers to complete */ + mlx5_eq_synchronize_cmd_irq(dev); + spin_lock_irqsave(&dev->cmd.alloc_lock, flags); + vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); + if (!vector) + goto no_trig; + + vector |= MLX5_TRIGGERED_CMD_COMP; + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); + + mlx5_core_dbg(dev, "vector 0x%llx\n", vector); + mlx5_cmd_comp_handler(dev, vector, true); + return; + +no_trig: + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); +} + +void mlx5_cmd_flush(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->max_reg_cmds; i++) + while (down_trylock(&cmd->sem)) + mlx5_cmd_trigger_completions(dev); + + while (down_trylock(&cmd->pages_sem)) + mlx5_cmd_trigger_completions(dev); + + /* Unlock cmdif */ + up(&cmd->pages_sem); + for (i = 0; i < cmd->max_reg_cmds; i++) + up(&cmd->sem); +} static int status_to_err(u8 status) { @@ -1663,12 +1731,57 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, } EXPORT_SYMBOL(mlx5_cmd_exec); -int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size, mlx5_cmd_cbk_t callback, - void *context) +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx) +{ + ctx->dev = dev; + /* Starts at 1 to avoid doing wake_up if we are not cleaning up */ + atomic_set(&ctx->num_inflight, 1); + init_waitqueue_head(&ctx->wait); +} +EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); + +/** + * mlx5_cmd_cleanup_async_ctx - Clean up an async_ctx + * @ctx: The ctx to clean + * + * Upon return all callbacks given to mlx5_cmd_exec_cb() have been called. The + * caller must ensure that mlx5_cmd_exec_cb() is not called during or after + * the call mlx5_cleanup_async_ctx(). + */ +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx) +{ + atomic_dec(&ctx->num_inflight); + wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0); +} +EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx); + +static void mlx5_cmd_exec_cb_handler(int status, void *_work) { - return cmd_exec(dev, in, in_size, out, out_size, callback, context, - false); + struct mlx5_async_work *work = _work; + struct mlx5_async_ctx *ctx = work->ctx; + + work->user_callback(status, work); + if (atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); +} + +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work) +{ + int ret; + + work->ctx = ctx; + work->user_callback = callback; + if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) + return -EIO; + ret = cmd_exec(ctx->dev, in, in_size, out, out_size, + mlx5_cmd_exec_cb_handler, work, false); + if (ret && atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); + + return ret; } EXPORT_SYMBOL(mlx5_cmd_exec_cb); @@ -1741,8 +1854,8 @@ static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) { struct device *ddev = &dev->pdev->dev; - cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, - &cmd->alloc_dma, GFP_KERNEL); + cmd->cmd_alloc_buf = dma_alloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, + &cmd->alloc_dma, GFP_KERNEL); if (!cmd->cmd_alloc_buf) return -ENOMEM; @@ -1756,9 +1869,9 @@ static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) dma_free_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf, cmd->alloc_dma); - cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev, - 2 * MLX5_ADAPTER_PAGE_SIZE - 1, - &cmd->alloc_dma, GFP_KERNEL); + cmd->cmd_alloc_buf = dma_alloc_coherent(ddev, + 2 * MLX5_ADAPTER_PAGE_SIZE - 1, + &cmd->alloc_dma, GFP_KERNEL); if (!cmd->cmd_alloc_buf) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 4b85abb5c9f7..713a17ee3751 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -38,6 +38,7 @@ #include <rdma/ib_verbs.h> #include <linux/mlx5/cq.h> #include "mlx5_core.h" +#include "lib/eq.h" #define TASKLET_MAX_TIME 2 #define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME) @@ -92,10 +93,10 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)]; u32 out[MLX5_ST_SZ_DW(create_cq_out)]; u32 din[MLX5_ST_SZ_DW(destroy_cq_in)]; - struct mlx5_eq *eq; + struct mlx5_eq_comp *eq; int err; - eq = mlx5_eqn2eq(dev, eqn); + eq = mlx5_eqn2comp_eq(dev, eqn); if (IS_ERR(eq)) return PTR_ERR(eq); @@ -119,12 +120,12 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, INIT_LIST_HEAD(&cq->tasklet_ctx.list); /* Add to comp EQ CQ tree to recv comp events */ - err = mlx5_eq_add_cq(eq, cq); + err = mlx5_eq_add_cq(&eq->core, cq); if (err) goto err_cmd; /* Add to async EQ CQ tree to recv async events */ - err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq); + err = mlx5_eq_add_cq(mlx5_get_async_eq(dev), cq); if (err) goto err_cq_add; @@ -139,7 +140,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, return 0; err_cq_add: - mlx5_eq_del_cq(eq, cq); + mlx5_eq_del_cq(&eq->core, cq); err_cmd: memset(din, 0, sizeof(din)); memset(dout, 0, sizeof(dout)); @@ -157,11 +158,11 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0}; int err; - err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq); + err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq); if (err) return err; - err = mlx5_eq_del_cq(cq->eq, cq); + err = mlx5_eq_del_cq(&cq->eq->core, cq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 90fabd612b6c..a11e22d0b0cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -36,6 +36,7 @@ #include <linux/mlx5/cq.h> #include <linux/mlx5/driver.h> #include "mlx5_core.h" +#include "lib/eq.h" enum { QP_PID, @@ -349,6 +350,16 @@ out: return param; } +static int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {}; + + MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ); + MLX5_SET(query_eq_in, in, eq_number, eq->eqn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq, int index) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 37ba7c78859d..ebc046fa97d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -45,75 +45,11 @@ struct mlx5_device_context { unsigned long state; }; -struct mlx5_delayed_event { - struct list_head list; - struct mlx5_core_dev *dev; - enum mlx5_dev_event event; - unsigned long param; -}; - enum { MLX5_INTERFACE_ADDED, MLX5_INTERFACE_ATTACHED, }; -static void add_delayed_event(struct mlx5_priv *priv, - struct mlx5_core_dev *dev, - enum mlx5_dev_event event, - unsigned long param) -{ - struct mlx5_delayed_event *delayed_event; - - delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC); - if (!delayed_event) { - mlx5_core_err(dev, "event %d is missed\n", event); - return; - } - - mlx5_core_dbg(dev, "Accumulating event %d\n", event); - delayed_event->dev = dev; - delayed_event->event = event; - delayed_event->param = param; - list_add_tail(&delayed_event->list, &priv->waiting_events_list); -} - -static void delayed_event_release(struct mlx5_device_context *dev_ctx, - struct mlx5_priv *priv) -{ - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - struct mlx5_delayed_event *de; - struct mlx5_delayed_event *n; - struct list_head temp; - - INIT_LIST_HEAD(&temp); - - spin_lock_irq(&priv->ctx_lock); - - priv->is_accum_events = false; - list_splice_init(&priv->waiting_events_list, &temp); - if (!dev_ctx->context) - goto out; - list_for_each_entry_safe(de, n, &temp, list) - dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param); - -out: - spin_unlock_irq(&priv->ctx_lock); - - list_for_each_entry_safe(de, n, &temp, list) { - list_del(&de->list); - kfree(de); - } -} - -/* accumulating events that can come after mlx5_ib calls to - * ib_register_device, till adding that interface to the events list. - */ -static void delayed_event_start(struct mlx5_priv *priv) -{ - spin_lock_irq(&priv->ctx_lock); - priv->is_accum_events = true; - spin_unlock_irq(&priv->ctx_lock); -} void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) { @@ -129,8 +65,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) dev_ctx->intf = intf; - delayed_event_start(priv); - dev_ctx->context = intf->add(dev); if (dev_ctx->context) { set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); @@ -139,22 +73,9 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) spin_lock_irq(&priv->ctx_lock); list_add_tail(&dev_ctx->list, &priv->ctx_list); - -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (dev_ctx->intf->pfault) { - if (priv->pfault) { - mlx5_core_err(dev, "multiple page fault handlers not supported"); - } else { - priv->pfault_ctx = dev_ctx->context; - priv->pfault = dev_ctx->intf->pfault; - } - } -#endif spin_unlock_irq(&priv->ctx_lock); } - delayed_event_release(dev_ctx, priv); - if (!dev_ctx->context) kfree(dev_ctx); } @@ -179,15 +100,6 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) if (!dev_ctx) return; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - spin_lock_irq(&priv->ctx_lock); - if (priv->pfault == dev_ctx->intf->pfault) - priv->pfault = NULL; - spin_unlock_irq(&priv->ctx_lock); - - synchronize_srcu(&priv->pfault_srcu); -#endif - spin_lock_irq(&priv->ctx_lock); list_del(&dev_ctx->list); spin_unlock_irq(&priv->ctx_lock); @@ -207,26 +119,20 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv if (!dev_ctx) return; - delayed_event_start(priv); if (intf->attach) { if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) - goto out; + return; if (intf->attach(dev, dev_ctx->context)) - goto out; - + return; set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); } else { if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) - goto out; + return; dev_ctx->context = intf->add(dev); if (!dev_ctx->context) - goto out; - + return; set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); } - -out: - delayed_event_release(dev_ctx, priv); } void mlx5_attach_device(struct mlx5_core_dev *dev) @@ -350,28 +256,6 @@ void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol) mutex_unlock(&mlx5_intf_mutex); } -void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) -{ - struct mlx5_priv *priv = &mdev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - void *result = NULL; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) - if ((dev_ctx->intf->protocol == protocol) && - dev_ctx->intf->get_dev) { - result = dev_ctx->intf->get_dev(dev_ctx->context); - break; - } - - spin_unlock_irqrestore(&priv->ctx_lock, flags); - - return result; -} -EXPORT_SYMBOL(mlx5_get_protocol_dev); - /* Must be called with intf_mutex held */ void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) { @@ -422,44 +306,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) return res; } -void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, - unsigned long param) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - if (priv->is_accum_events) - add_delayed_event(priv, dev, event, param); - - /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is - * still in priv->ctx_list. In this case, only notify the dev_ctx if its - * ADDED or ATTACHED bit are set. - */ - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf->event && - (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) || - test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))) - dev_ctx->intf->event(dev, dev_ctx->context, event, param); - - spin_unlock_irqrestore(&priv->ctx_lock, flags); -} - -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -void mlx5_core_page_fault(struct mlx5_core_dev *dev, - struct mlx5_pagefault *pfault) -{ - struct mlx5_priv *priv = &dev->priv; - int srcu_idx; - - srcu_idx = srcu_read_lock(&priv->pfault_srcu); - if (priv->pfault) - priv->pfault(dev, priv->pfault_ctx, pfault); - srcu_read_unlock(&priv->pfault_srcu, srcu_idx); -} -#endif void mlx5_dev_list_lock(void) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index 0f11fff32a9b..8ecac81a385d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -161,10 +161,10 @@ static void print_misc_parameters_hdrs(struct trace_seq *p, PRINT_MASKED_VAL(name, p, format); \ } DECLARE_MASK_VAL(u64, gre_key) = { - .m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 | - MLX5_GET(fte_match_set_misc, mask, gre_key_l), - .v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 | - MLX5_GET(fte_match_set_misc, value, gre_key_l)}; + .m = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi) << 8 | + MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo), + .v = MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.hi) << 8 | + MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.lo)}; PRINT_MASKED_VAL(gre_key, p, "%llu"); PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u"); @@ -258,6 +258,8 @@ const char *parse_fs_dst(struct trace_seq *p, return ret; } +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_ft); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_ft); EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_fg); EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fg); EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_set_fte); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index d027ce00c8ce..a4cf123e3f17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -61,6 +61,41 @@ const char *parse_fs_dst(struct trace_seq *p, const struct mlx5_flow_destination *dst, u32 counter_id); +TRACE_EVENT(mlx5_fs_add_ft, + TP_PROTO(const struct mlx5_flow_table *ft), + TP_ARGS(ft), + TP_STRUCT__entry( + __field(const struct mlx5_flow_table *, ft) + __field(u32, id) + __field(u32, level) + __field(u32, type) + ), + TP_fast_assign( + __entry->ft = ft; + __entry->id = ft->id; + __entry->level = ft->level; + __entry->type = ft->type; + ), + TP_printk("ft=%p id=%u level=%u type=%u \n", + __entry->ft, __entry->id, __entry->level, __entry->type) + ); + +TRACE_EVENT(mlx5_fs_del_ft, + TP_PROTO(const struct mlx5_flow_table *ft), + TP_ARGS(ft), + TP_STRUCT__entry( + __field(const struct mlx5_flow_table *, ft) + __field(u32, id) + ), + TP_fast_assign( + __entry->ft = ft; + __entry->id = ft->id; + + ), + TP_printk("ft=%p id=%u\n", + __entry->ft, __entry->id) + ); + TRACE_EVENT(mlx5_fs_add_fg, TP_PROTO(const struct mlx5_flow_group *fg), TP_ARGS(fg), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index d4ec93bde4de..6999f4486e9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -30,6 +30,7 @@ * SOFTWARE. */ #define CREATE_TRACE_POINTS +#include "lib/eq.h" #include "fw_tracer.h" #include "fw_tracer_tracepoint.h" @@ -846,9 +847,9 @@ free_tracer: return ERR_PTR(err); } -/* Create HW resources + start tracer - * must be called before Async EQ is created - */ +static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data); + +/* Create HW resources + start tracer */ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) { struct mlx5_core_dev *dev; @@ -874,6 +875,9 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) goto err_dealloc_pd; } + MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER); + mlx5_eq_notifier_register(dev, &tracer->nb); + mlx5_fw_tracer_start(tracer); return 0; @@ -883,9 +887,7 @@ err_dealloc_pd: return err; } -/* Stop tracer + Cleanup HW resources - * must be called after Async EQ is destroyed - */ +/* Stop tracer + Cleanup HW resources */ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer) { if (IS_ERR_OR_NULL(tracer)) @@ -893,7 +895,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer) mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n", tracer->owner); - + mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb); cancel_work_sync(&tracer->ownership_change_work); cancel_work_sync(&tracer->handle_traces_work); @@ -922,12 +924,11 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer) kfree(tracer); } -void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) +static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data) { - struct mlx5_fw_tracer *tracer = dev->tracer; - - if (!tracer) - return; + struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb); + struct mlx5_core_dev *dev = tracer->dev; + struct mlx5_eqe *eqe = data; switch (eqe->sub_type) { case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE: @@ -942,6 +943,8 @@ void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n", eqe->sub_type); } + + return NOTIFY_OK; } EXPORT_TRACEPOINT_SYMBOL(mlx5_fw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h index 0347f2dd5cee..a8b8747f2b61 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h @@ -55,6 +55,7 @@ struct mlx5_fw_tracer { struct mlx5_core_dev *dev; + struct mlx5_nb nb; bool owner; u8 trc_ver; struct workqueue_struct *work_queue; @@ -170,6 +171,5 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev); int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer); void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer); void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer); -void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c new file mode 100644 index 000000000000..4746f2d28fb6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "ecpf.h" + +bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) +{ + return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1; +} + +static int mlx5_peer_pf_enable_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {}; + + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, 0); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); +} + +static int mlx5_peer_pf_disable_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {}; + + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, 0); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_peer_pf_init(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_peer_pf_enable_hca(dev); + if (err) + mlx5_core_err(dev, "Failed to enable peer PF HCA err(%d)\n", + err); + + return err; +} + +static void mlx5_peer_pf_cleanup(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_peer_pf_disable_hca(dev); + if (err) { + mlx5_core_err(dev, "Failed to disable peer PF HCA err(%d)\n", + err); + return; + } + + err = mlx5_wait_for_pages(dev, &dev->priv.peer_pf_pages); + if (err) + mlx5_core_warn(dev, "Timeout reclaiming peer PF pages err(%d)\n", + err); +} + +int mlx5_ec_init(struct mlx5_core_dev *dev) +{ + int err = 0; + + if (!mlx5_core_is_ecpf(dev)) + return 0; + + /* ECPF shall enable HCA for peer PF in the same way a PF + * does this for its VFs. + */ + err = mlx5_peer_pf_init(dev); + if (err) + return err; + + return 0; +} + +void mlx5_ec_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_ecpf(dev)) + return; + + mlx5_peer_pf_cleanup(dev); +} + +static int mlx5_query_host_params_context(struct mlx5_core_dev *dev, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {}; + + MLX5_SET(query_host_params_in, in, opcode, + MLX5_CMD_OP_QUERY_HOST_PARAMS); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) +{ + u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {}; + int err; + + err = mlx5_query_host_params_context(dev, out, sizeof(out)); + if (err) + return err; + + *num_vf = MLX5_GET(query_host_params_out, out, + host_params_context.host_num_of_vfs); + mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h new file mode 100644 index 000000000000..346372df218f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_ECPF_H__ +#define __MLX5_ECPF_H__ + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +#ifdef CONFIG_MLX5_ESWITCH + +enum { + MLX5_ECPU_BIT_NUM = 23, +}; + +bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); +int mlx5_ec_init(struct mlx5_core_dev *dev); +void mlx5_ec_cleanup(struct mlx5_core_dev *dev); +int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline bool +mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } +static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} +static inline int +mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) +{ return -EOPNOTSUPP; } + +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_ECPF_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 118324802926..d3eaf2ceaa39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -49,6 +49,7 @@ #include <net/switchdev.h> #include <net/xdp.h> #include <linux/net_dim.h> +#include <linux/bits.h> #include "wq.h" #include "mlx5_core.h" #include "en_stats.h" @@ -75,15 +76,14 @@ struct page_pool; #define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) +#define MLX5E_RX_MAX_HEAD (256) + #define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \ (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */ #define MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, req) \ max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req) -#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6) -#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8) -#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \ - (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \ - MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev)) +#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \ + MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD)) #define MLX5_MPWRQ_LOG_WQE_SZ 18 #define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ @@ -118,8 +118,6 @@ struct page_pool; #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 -#define MLX5E_RX_MAX_HEAD (256) - #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_DEFAULT_LRO_TIMEOUT 32 #define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 @@ -147,9 +145,6 @@ struct page_pool; MLX5_UMR_MTT_ALIGNMENT)) #define MLX5E_UMR_WQEBBS \ (DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB)) -#define MLX5E_ICOSQ_MAX_WQEBBS MLX5E_UMR_WQEBBS - -#define MLX5E_NUM_MAIN_GROUPS 9 #define MLX5E_MSG_LEVEL NETIF_MSG_LINK @@ -178,8 +173,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) { return is_kdump_kernel() ? MLX5E_MIN_NUM_CHANNELS : - min_t(int, mdev->priv.eq_table.num_comp_vectors, - MLX5E_MAX_NUM_CHANNELS); + min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS); } /* Use this function to get max num channels after netdev was created */ @@ -214,22 +208,24 @@ struct mlx5e_umr_wqe { extern const char mlx5e_self_tests[][ETH_GSTRING_LEN]; enum mlx5e_priv_flag { - MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0), - MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1), - MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2), - MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3), - MLX5E_PFLAG_RX_NO_CSUM_COMPLETE = (1 << 4), + MLX5E_PFLAG_RX_CQE_BASED_MODER, + MLX5E_PFLAG_TX_CQE_BASED_MODER, + MLX5E_PFLAG_RX_CQE_COMPRESS, + MLX5E_PFLAG_RX_STRIDING_RQ, + MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, + MLX5E_PFLAG_XDP_TX_MPWQE, + MLX5E_NUM_PFLAGS, /* Keep last */ }; #define MLX5E_SET_PFLAG(params, pflag, enable) \ do { \ if (enable) \ - (params)->pflags |= (pflag); \ + (params)->pflags |= BIT(pflag); \ else \ - (params)->pflags &= ~(pflag); \ + (params)->pflags &= ~(BIT(pflag)); \ } while (0) -#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (pflag))) +#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag)))) #ifdef CONFIG_MLX5_CORE_EN_DCB #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ @@ -247,9 +243,6 @@ struct mlx5e_params { bool lro_en; u32 lro_wqe_sz; u8 tx_min_inline_mode; - u8 rss_hfunc; - u8 toeplitz_hash_key[40]; - u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; bool vlan_strip_disable; bool scatter_fcs_en; bool rx_dim_enabled; @@ -313,16 +306,18 @@ struct mlx5e_cq { struct mlx5_core_cq mcq; struct mlx5e_channel *channel; + /* control */ + struct mlx5_core_dev *mdev; + struct mlx5_wq_ctrl wq_ctrl; +} ____cacheline_aligned_in_smp; + +struct mlx5e_cq_decomp { /* cqe decompression */ struct mlx5_cqe64 title; struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE]; u8 mini_arr_idx; - u16 decmprs_left; - u16 decmprs_wqe_counter; - - /* control */ - struct mlx5_core_dev *mdev; - struct mlx5_wq_ctrl wq_ctrl; + u16 left; + u16 wqe_counter; } ____cacheline_aligned_in_smp; struct mlx5e_tx_wqe_info { @@ -349,7 +344,6 @@ enum { MLX5E_SQ_STATE_IPSEC, MLX5E_SQ_STATE_AM, MLX5E_SQ_STATE_TLS, - MLX5E_SQ_STATE_REDIRECT, }; struct mlx5e_sq_wqe_info { @@ -393,10 +387,7 @@ struct mlx5e_txqsq { struct mlx5e_channel *channel; int txq_ix; u32 rate_limit; - struct mlx5e_txqsq_recover { - struct work_struct recover_work; - u64 last_recover; - } recover; + struct work_struct recover_work; } ____cacheline_aligned_in_smp; struct mlx5e_dma_info { @@ -410,24 +401,51 @@ struct mlx5e_xdp_info { struct mlx5e_dma_info di; }; +struct mlx5e_xdp_info_fifo { + struct mlx5e_xdp_info *xi; + u32 *cc; + u32 *pc; + u32 mask; +}; + +struct mlx5e_xdp_wqe_info { + u8 num_wqebbs; + u8 num_ds; +}; + +struct mlx5e_xdp_mpwqe { + /* Current MPWQE session */ + struct mlx5e_tx_wqe *wqe; + u8 ds_count; + u8 max_ds_count; +}; + +struct mlx5e_xdpsq; +typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq*, + struct mlx5e_xdp_info*); struct mlx5e_xdpsq { /* data path */ /* dirtied @completion */ + u32 xdpi_fifo_cc; u16 cc; bool redirect_flush; /* dirtied @xmit */ - u16 pc ____cacheline_aligned_in_smp; - bool doorbell; + u32 xdpi_fifo_pc ____cacheline_aligned_in_smp; + u16 pc; + struct mlx5_wqe_ctrl_seg *doorbell_cseg; + struct mlx5e_xdp_mpwqe mpwqe; struct mlx5e_cq cq; /* read only */ struct mlx5_wq_cyc wq; struct mlx5e_xdpsq_stats *stats; + mlx5e_fp_xmit_xdp_frame xmit_xdp_frame; struct { - struct mlx5e_xdp_info *xdpi; + struct mlx5e_xdp_wqe_info *wqe_info; + struct mlx5e_xdp_info_fifo xdpi_fifo; } db; void __iomem *uar_map; u32 sqn; @@ -559,6 +577,7 @@ struct mlx5e_rq { struct net_device *netdev; struct mlx5e_rq_stats *stats; struct mlx5e_cq cq; + struct mlx5e_cq_decomp cqd; struct mlx5e_page_cache page_cache; struct hwtstamp_config *tstamp; struct mlx5_clock *clock; @@ -616,6 +635,7 @@ struct mlx5e_channel { struct hwtstamp_config *tstamp; int ix; int cpu; + cpumask_var_t xps_cpumask; }; struct mlx5e_channels { @@ -633,9 +653,9 @@ struct mlx5e_channel_stats { } ____cacheline_aligned_in_smp; enum { - MLX5E_STATE_ASYNC_EVENTS_ENABLED, MLX5E_STATE_OPENED, MLX5E_STATE_DESTROYING, + MLX5E_STATE_XDP_TX_ENABLED, }; struct mlx5e_rqt { @@ -654,6 +674,20 @@ enum { MLX5E_NIC_PRIO }; +struct mlx5e_rss_params { + u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; + u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS]; + u8 toeplitz_hash_key[40]; + u8 hfunc; +}; + +struct mlx5e_modify_sq_param { + int curr_state; + int next_state; + int rl_update; + int rl_index; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; @@ -674,6 +708,7 @@ struct mlx5e_priv { struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_rss_params rss_params; u32 tx_rates[MLX5E_MAX_NUM_SQS]; struct mlx5e_flow_steering fs; @@ -683,6 +718,8 @@ struct mlx5e_priv { struct work_struct set_rx_mode_work; struct work_struct tx_timeout_work; struct work_struct update_stats_work; + struct work_struct monitor_counters_work; + struct mlx5_nb monitor_counters_nb; struct mlx5_core_dev *mdev; struct net_device *netdev; @@ -692,6 +729,8 @@ struct mlx5e_priv { struct hwtstamp_config tstamp; u16 q_counter; u16 drop_rq_q_counter; + struct notifier_block events_nb; + #ifdef CONFIG_MLX5_CORE_EN_DCB struct mlx5e_dcbx dcbx; #endif @@ -704,6 +743,7 @@ struct mlx5e_priv { #ifdef CONFIG_MLX5_EN_TLS struct mlx5e_tls *tls; #endif + struct devlink_health_reporter *tx_reporter; }; struct mlx5e_profile { @@ -769,6 +809,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt); void mlx5e_update_stats(struct mlx5e_priv *priv); +void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); +void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s); void mlx5e_init_l2_addr(struct mlx5e_priv *priv); int mlx5e_self_test_num(struct mlx5e_priv *priv); @@ -799,9 +841,11 @@ struct mlx5e_redirect_rqt_param { int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, struct mlx5e_redirect_rqt_param rrp); -void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, - enum mlx5e_traffic_types tt, +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params, + const struct mlx5e_tirc_config *ttconfig, void *tirc, bool inner); +void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen); +struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); @@ -814,9 +858,10 @@ void mlx5e_close_channels(struct mlx5e_channels *chs); * switching channels */ typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv); -void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, - struct mlx5e_channels *new_chs, - mlx5e_fp_hw_modify hw_modify); +int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv); +int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify); void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); @@ -830,6 +875,11 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p); +void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq); +void mlx5e_tx_disable_queue(struct netdev_queue *txq); + static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) { return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) && @@ -931,14 +981,16 @@ int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); int mlx5e_create_tises(struct mlx5e_priv *priv); -void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); +void mlx5e_update_carrier(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); +void mlx5e_update_ndo_stats(struct mlx5e_priv *priv); void mlx5e_queue_update_stats(struct mlx5e_priv *priv); int mlx5e_bits_invert(unsigned long a, int size); typedef int (*change_hw_mtu_cb)(struct mlx5e_priv *priv); +int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv); int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, change_hw_mtu_cb set_mtu_cb); @@ -962,12 +1014,20 @@ int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal); int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal); +int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, + struct ethtool_link_ksettings *link_ksettings); +int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, + const struct ethtool_link_ksettings *link_ksettings); u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info); int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, struct ethtool_flash *flash); +void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam); +int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam); /* mlx5e generic netdev management API */ int mlx5e_netdev_init(struct net_device *netdev, @@ -983,12 +1043,26 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv); void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, u16 max_channels, u16 mtu); void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); -void mlx5e_build_rss_params(struct mlx5e_params *params); +void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, + u16 num_channels); u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_tx_dim_work(struct work_struct *work); + +void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti); +void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti); +netdev_features_t mlx5e_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features); +#ifdef CONFIG_MLX5_ESWITCH +int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac); +int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); +int mlx5e_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi); +int mlx5e_get_vf_stats(struct net_device *dev, int vf, struct ifla_vf_stats *vf_stats); +#endif #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 1431232c9a09..be5961ff24cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -73,6 +73,22 @@ enum mlx5e_traffic_types { MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY, }; +struct mlx5e_tirc_config { + u8 l3_prot_type; + u8 l4_prot_type; + u32 rx_hash_fields; +}; + +#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP) +#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_L4_SPORT |\ + MLX5_HASH_FIELD_SEL_L4_DPORT) +#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_IPSEC_SPI) + enum mlx5e_tunnel_types { MLX5E_TT_IPV4_GRE, MLX5E_TT_IPV6_GRE, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c new file mode 100644 index 000000000000..7cd5b02e0f10 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include "en.h" +#include "monitor_stats.h" +#include "lib/eq.h" + +/* Driver will set the following watch counters list: + * Ppcnt.802_3: + * a_in_range_length_errors Type: 0x0, Counter: 0x0, group_id = N/A + * a_out_of_range_length_field Type: 0x0, Counter: 0x1, group_id = N/A + * a_frame_too_long_errors Type: 0x0, Counter: 0x2, group_id = N/A + * a_frame_check_sequence_errors Type: 0x0, Counter: 0x3, group_id = N/A + * a_alignment_errors Type: 0x0, Counter: 0x4, group_id = N/A + * if_out_discards Type: 0x0, Counter: 0x5, group_id = N/A + * Q_Counters: + * Q[index].rx_out_of_buffer Type: 0x1, Counter: 0x4, group_id = counter_ix + */ + +#define NUM_REQ_PPCNT_COUNTER_S1 MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1 +#define NUM_REQ_Q_COUNTERS_S1 MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1 + +int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(mdev, max_num_of_monitor_counters)) + return false; + if (MLX5_CAP_PCAM_REG(mdev, ppcnt) && + MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters) < + NUM_REQ_PPCNT_COUNTER_S1) + return false; + if (MLX5_CAP_GEN(mdev, num_q_monitor_counters) < + NUM_REQ_Q_COUNTERS_S1) + return false; + return true; +} + +void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv) +{ + u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {}; + u32 out[MLX5_ST_SZ_DW(arm_monitor_counter_out)] = {}; + + MLX5_SET(arm_monitor_counter_in, in, opcode, + MLX5_CMD_OP_ARM_MONITOR_COUNTER); + mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out)); +} + +static void mlx5e_monitor_counters_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + monitor_counters_work); + + mutex_lock(&priv->state_lock); + mlx5e_update_ndo_stats(priv); + mutex_unlock(&priv->state_lock); + mlx5e_monitor_counter_arm(priv); +} + +static int mlx5e_monitor_event_handler(struct notifier_block *nb, + unsigned long event, void *eqe) +{ + struct mlx5e_priv *priv = mlx5_nb_cof(nb, struct mlx5e_priv, + monitor_counters_nb); + queue_work(priv->wq, &priv->monitor_counters_work); + return NOTIFY_OK; +} + +static void mlx5e_monitor_counter_start(struct mlx5e_priv *priv) +{ + MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler, + MONITOR_COUNTER); + mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb); +} + +static void mlx5e_monitor_counter_stop(struct mlx5e_priv *priv) +{ + mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb); + cancel_work_sync(&priv->monitor_counters_work); +} + +static int fill_monitor_counter_ppcnt_set1(int cnt, u32 *in) +{ + enum mlx5_monitor_counter_ppcnt ppcnt_cnt; + + for (ppcnt_cnt = 0; + ppcnt_cnt < NUM_REQ_PPCNT_COUNTER_S1; + ppcnt_cnt++, cnt++) { + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].type, + MLX5_QUERY_MONITOR_CNT_TYPE_PPCNT); + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].counter, + ppcnt_cnt); + } + return ppcnt_cnt; +} + +static int fill_monitor_counter_q_counter_set1(int cnt, int q_counter, u32 *in) +{ + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].type, + MLX5_QUERY_MONITOR_CNT_TYPE_Q_COUNTER); + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].counter, + MLX5_QUERY_MONITOR_Q_COUNTER_RX_OUT_OF_BUFFER); + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].counter_group_id, + q_counter); + return 1; +} + +/* check if mlx5e_monitor_counter_supported before calling this function*/ +static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int max_num_of_counters = MLX5_CAP_GEN(mdev, max_num_of_monitor_counters); + int num_q_counters = MLX5_CAP_GEN(mdev, num_q_monitor_counters); + int num_ppcnt_counters = !MLX5_CAP_PCAM_REG(mdev, ppcnt) ? 0 : + MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters); + u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {}; + u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {}; + int q_counter = priv->q_counter; + int cnt = 0; + + if (num_ppcnt_counters >= NUM_REQ_PPCNT_COUNTER_S1 && + max_num_of_counters >= (NUM_REQ_PPCNT_COUNTER_S1 + cnt)) + cnt += fill_monitor_counter_ppcnt_set1(cnt, in); + + if (num_q_counters >= NUM_REQ_Q_COUNTERS_S1 && + max_num_of_counters >= (NUM_REQ_Q_COUNTERS_S1 + cnt) && + q_counter) + cnt += fill_monitor_counter_q_counter_set1(cnt, q_counter, in); + + MLX5_SET(set_monitor_counter_in, in, num_of_counters, cnt); + MLX5_SET(set_monitor_counter_in, in, opcode, + MLX5_CMD_OP_SET_MONITOR_COUNTER); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +/* check if mlx5e_monitor_counter_supported before calling this function*/ +void mlx5e_monitor_counter_init(struct mlx5e_priv *priv) +{ + INIT_WORK(&priv->monitor_counters_work, mlx5e_monitor_counters_work); + mlx5e_monitor_counter_start(priv); + mlx5e_set_monitor_counter(priv); + mlx5e_monitor_counter_arm(priv); + queue_work(priv->wq, &priv->update_stats_work); +} + +static void mlx5e_monitor_counter_disable(struct mlx5e_priv *priv) +{ + u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {}; + u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {}; + + MLX5_SET(set_monitor_counter_in, in, num_of_counters, 0); + MLX5_SET(set_monitor_counter_in, in, opcode, + MLX5_CMD_OP_SET_MONITOR_COUNTER); + + mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out)); +} + +/* check if mlx5e_monitor_counter_supported before calling this function*/ +void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv) +{ + mlx5e_monitor_counter_disable(priv); + mlx5e_monitor_counter_stop(priv); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h new file mode 100644 index 000000000000..e1ac4b3d22fb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_MONITOR_H__ +#define __MLX5_MONITOR_H__ + +int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv); +void mlx5e_monitor_counter_init(struct mlx5e_priv *priv); +void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv); +void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv); + +#endif /* __MLX5_MONITOR_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 4a37713023be..d5e5afbdca6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -63,66 +63,165 @@ static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { [MLX5E_50GBASE_KR2] = 50000, }; -u32 mlx5e_port_ptys2speed(u32 eth_proto_oper) +static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { + [MLX5E_SGMII_100M] = 100, + [MLX5E_1000BASE_X_SGMII] = 1000, + [MLX5E_5GBASE_R] = 5000, + [MLX5E_10GBASE_XFI_XAUI_1] = 10000, + [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000, + [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000, + [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000, + [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000, + [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000, + [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000, + [MLX5E_400GAUI_8] = 400000, +}; + +static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev, + const u32 **arr, u32 *size) +{ + bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + + *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) : + ARRAY_SIZE(mlx5e_link_speed); + *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed; +} + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, + struct mlx5e_port_eth_proto *eproto) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + if (!eproto) + return -EINVAL; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); + if (err) + return err; + + eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_capability); + eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin); + eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); + return 0; +} + +void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + + *an_status = 0; + *an_disable_cap = 0; + *an_disable_admin = 0; + + if (mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, 1)) + return; + + *an_status = MLX5_GET(ptys_reg, out, an_status); + *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); + *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); +} + +int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin, bool ext) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u8 an_disable_admin; + u8 an_disable_cap; + u8 an_status; + + mlx5_port_query_eth_autoneg(dev, &an_status, &an_disable_cap, + &an_disable_admin); + if (!an_disable_cap && an_disable) + return -EPERM; + + memset(in, 0, sizeof(in)); + + MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); + MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN); + if (ext) + MLX5_SET(ptys_reg, in, ext_eth_proto_admin, proto_admin); + else + MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PTYS, 0, 1); +} + +u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper) { unsigned long temp = eth_proto_oper; + const u32 *table; u32 speed = 0; + u32 max_size; int i; - i = find_first_bit(&temp, MLX5E_LINK_MODES_NUMBER); - if (i < MLX5E_LINK_MODES_NUMBER) - speed = mlx5e_link_speed[i]; - + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + i = find_first_bit(&temp, max_size); + if (i < max_size) + speed = table[i]; return speed; } int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { - u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {}; - u32 eth_proto_oper; + struct mlx5e_port_eth_proto eproto; + bool ext; int err; - err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); + ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); if (err) - return err; + goto out; - eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - *speed = mlx5e_port_ptys2speed(eth_proto_oper); + *speed = mlx5e_port_ptys2speed(mdev, eproto.oper); if (!(*speed)) err = -EINVAL; +out: return err; } int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { + struct mlx5e_port_eth_proto eproto; u32 max_speed = 0; - u32 proto_cap; + const u32 *table; + u32 max_size; + bool ext; int err; int i; - err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN); + ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); if (err) return err; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) - if (proto_cap & MLX5E_PROT_MASK(i)) - max_speed = max(max_speed, mlx5e_link_speed[i]); + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + for (i = 0; i < max_size; ++i) + if (eproto.cap & MLX5E_PROT_MASK(i)) + max_speed = max(max_speed, table[i]); *speed = max_speed; return 0; } -u32 mlx5e_port_speed2linkmodes(u32 speed) +u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed) { u32 link_modes = 0; + const u32 *table; + u32 max_size; int i; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (mlx5e_link_speed[i] == speed) + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + for (i = 0; i < max_size; ++i) { + if (table[i] == speed) link_modes |= MLX5E_PROT_MASK(i); } - return link_modes; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index cd2160b8c9bf..70f536ec51c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -36,10 +36,22 @@ #include <linux/mlx5/driver.h> #include "en.h" -u32 mlx5e_port_ptys2speed(u32 eth_proto_oper); +struct mlx5e_port_eth_proto { + u32 cap; + u32 admin; + u32 oper; +}; + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, + struct mlx5e_port_eth_proto *eproto); +void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin); +int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin, bool ext); +u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper); int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); -u32 mlx5e_port_speed2linkmodes(u32 speed); +u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed); int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index eac245a93f91..4ab0d030b544 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -122,7 +122,9 @@ out: return err; } -/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) */ +/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) + * minimum speed value is 40Gbps + */ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) { u32 speed; @@ -130,10 +132,9 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) int err; err = mlx5e_port_linkspeed(priv->mdev, &speed); - if (err) { - mlx5_core_warn(priv->mdev, "cannot get port speed\n"); - return 0; - } + if (err) + speed = SPEED_40000; + speed = max_t(u32, speed, SPEED_40000); xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100; @@ -142,7 +143,7 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) } static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, - u32 xoff, unsigned int mtu) + u32 xoff, unsigned int max_mtu) { int i; @@ -154,11 +155,12 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, } if (port_buffer->buffer[i].size < - (xoff + mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) + (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) return -ENOMEM; port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff; - port_buffer->buffer[i].xon = port_buffer->buffer[i].xoff - mtu; + port_buffer->buffer[i].xon = + port_buffer->buffer[i].xoff - max_mtu; } return 0; @@ -166,7 +168,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, /** * update_buffer_lossy() - * mtu: device's MTU + * max_mtu: netdev's max_mtu * pfc_en: <input> current pfc configuration * buffer: <input> current prio to buffer mapping * xoff: <input> xoff value @@ -183,7 +185,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, * Return 0 if no error. * Set change to true if buffer configuration is modified. */ -static int update_buffer_lossy(unsigned int mtu, +static int update_buffer_lossy(unsigned int max_mtu, u8 pfc_en, u8 *buffer, u32 xoff, struct mlx5e_port_buffer *port_buffer, bool *change) @@ -220,7 +222,7 @@ static int update_buffer_lossy(unsigned int mtu, } if (changed) { - err = update_xoff_threshold(port_buffer, xoff, mtu); + err = update_xoff_threshold(port_buffer, xoff, max_mtu); if (err) return err; @@ -230,6 +232,7 @@ static int update_buffer_lossy(unsigned int mtu, return 0; } +#define MINIMUM_MAX_MTU 9216 int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, u32 change, unsigned int mtu, struct ieee_pfc *pfc, @@ -241,12 +244,14 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, bool update_prio2buffer = false; u8 buffer[MLX5E_MAX_PRIORITY]; bool update_buffer = false; + unsigned int max_mtu; u32 total_used = 0; u8 curr_pfc_en; int err; int i; mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change); + max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU); err = mlx5e_port_query_buffer(priv, &port_buffer); if (err) @@ -254,7 +259,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (change & MLX5E_PORT_BUFFER_CABLE_LEN) { update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu); if (err) return err; } @@ -264,7 +269,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; - err = update_buffer_lossy(mtu, pfc->pfc_en, buffer, xoff, + err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff, &port_buffer, &update_buffer); if (err) return err; @@ -276,8 +281,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; - err = update_buffer_lossy(mtu, curr_pfc_en, prio2buffer, xoff, - &port_buffer, &update_buffer); + err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, + xoff, &port_buffer, &update_buffer); if (err) return err; } @@ -301,7 +306,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, return -EINVAL; update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu); if (err) return err; } @@ -309,7 +314,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, /* Need to update buffer configuration if xoff value is changed */ if (!update_buffer && xoff != priv->dcbx.xoff) { update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu); if (err) return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h new file mode 100644 index 000000000000..e78e92753d73 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5E_EN_REPORTER_H +#define __MLX5E_EN_REPORTER_H + +#include <linux/mlx5/driver.h> +#include "en.h" + +int mlx5e_tx_reporter_create(struct mlx5e_priv *priv); +void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv); +void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq); +int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c new file mode 100644 index 000000000000..476dd97f7f2f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -0,0 +1,314 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <net/devlink.h> +#include "reporter.h" +#include "lib/eq.h" + +#define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256 + +struct mlx5e_tx_err_ctx { + int (*recover)(struct mlx5e_txqsq *sq); + struct mlx5e_txqsq *sq; +}; + +static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) +{ + unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + + while (time_before(jiffies, exp_time)) { + if (sq->cc == sq->pc) + return 0; + + msleep(20); + } + + netdev_err(sq->channel->netdev, + "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", + sq->sqn, sq->cc, sq->pc); + + return -ETIMEDOUT; +} + +static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) +{ + WARN_ONCE(sq->cc != sq->pc, + "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", + sq->sqn, sq->cc, sq->pc); + sq->cc = 0; + sq->dma_fifo_cc = 0; + sq->pc = 0; +} + +static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) +{ + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + struct mlx5e_modify_sq_param msp = {0}; + int err; + + msp.curr_state = curr_state; + msp.next_state = MLX5_SQC_STATE_RST; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); + return err; + } + + memset(&msp, 0, sizeof(msp)); + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); + return err; + } + + return 0; +} + +static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) +{ + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + u8 state; + int err; + + if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + return 0; + + err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); + if (err) { + netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", + sq->sqn, err); + return err; + } + + if (state != MLX5_SQC_STATE_ERR) { + netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); + return -EINVAL; + } + + mlx5e_tx_disable_queue(sq->txq); + + err = mlx5e_wait_for_sq_flush(sq); + if (err) + return err; + + /* At this point, no new packets will arrive from the stack as TXQ is + * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all + * pending WQEs. SQ can safely reset the SQ. + */ + + err = mlx5e_sq_to_ready(sq, state); + if (err) + return err; + + mlx5e_reset_txqsq_cc_pc(sq); + sq->stats->recover++; + mlx5e_activate_txqsq(sq); + + return 0; +} + +static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter, + char *err_str, + struct mlx5e_tx_err_ctx *err_ctx) +{ + if (IS_ERR_OR_NULL(tx_reporter)) { + netdev_err(err_ctx->sq->channel->netdev, err_str); + return err_ctx->recover(err_ctx->sq); + } + + return devlink_health_report(tx_reporter, err_str, err_ctx); +} + +void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq) +{ + char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN]; + struct mlx5e_tx_err_ctx err_ctx = {0}; + + err_ctx.sq = sq; + err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; + sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn); + + mlx5_tx_health_report(sq->channel->priv->tx_reporter, err_str, + &err_ctx); +} + +static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq) +{ + struct mlx5_eq_comp *eq = sq->cq.mcq.eq; + u32 eqe_count; + int ret; + + netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", + eq->core.eqn, eq->core.cons_index, eq->core.irqn); + + eqe_count = mlx5_eq_poll_irq_disabled(eq); + ret = eqe_count ? false : true; + if (!eqe_count) { + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + return ret; + } + + netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n", + eqe_count, eq->core.eqn); + sq->channel->stats->eq_rearm++; + return ret; +} + +int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq) +{ + char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN]; + struct mlx5e_tx_err_ctx err_ctx; + + err_ctx.sq = sq; + err_ctx.recover = mlx5e_tx_reporter_timeout_recover; + sprintf(err_str, + "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", + sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, + jiffies_to_usecs(jiffies - sq->txq->trans_start)); + + return mlx5_tx_health_report(sq->channel->priv->tx_reporter, err_str, + &err_ctx); +} + +/* state lock cannot be grabbed within this function. + * It can cause a dead lock or a read-after-free. + */ +static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx) +{ + return err_ctx->recover(err_ctx->sq); +} + +static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv) +{ + int err = 0; + + rtnl_lock(); + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto out; + + err = mlx5e_safe_reopen_channels(priv); + +out: + mutex_unlock(&priv->state_lock); + rtnl_unlock(); + + return err; +} + +static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, + void *context) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_tx_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : + mlx5e_tx_reporter_recover_all(priv); +} + +static int +mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, + u32 sqn, u8 state, bool stopped) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state); + if (err) + return err; + + err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + int i, err = 0; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); + if (err) + goto unlock; + + for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; + i++) { + struct mlx5e_txqsq *sq = priv->txq2sq[i]; + u8 state; + + err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); + if (err) + break; + + err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq->sqn, + state, + netif_xmit_stopped(sq->txq)); + if (err) + break; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + goto unlock; + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { + .name = "tx", + .recover = mlx5e_tx_reporter_recover, + .diagnose = mlx5e_tx_reporter_diagnose, +}; + +#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 + +int mlx5e_tx_reporter_create(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct devlink *devlink = priv_to_devlink(mdev); + + priv->tx_reporter = + devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, + MLX5_REPORTER_TX_GRACEFUL_PERIOD, + true, priv); + if (IS_ERR(priv->tx_reporter)) + netdev_warn(priv->netdev, + "Failed to create tx reporter, err = %ld\n", + PTR_ERR(priv->tx_reporter)); + return IS_ERR_OR_NULL(priv->tx_reporter); +} + +void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv) +{ + if (IS_ERR_OR_NULL(priv->tx_reporter)) + return; + + devlink_health_reporter_destroy(priv->tx_reporter); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c new file mode 100644 index 000000000000..eec07b34b4ad --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -0,0 +1,648 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/vxlan.h> +#include <net/gre.h> +#include "lib/vxlan.h" +#include "en/tc_tun.h" + +static int get_route_and_out_devs(struct mlx5e_priv *priv, + struct net_device *dev, + struct net_device **route_dev, + struct net_device **out_dev) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *uplink_dev, *uplink_upper; + bool dst_is_lag_dev; + + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + uplink_upper = netdev_master_upper_dev_get(uplink_dev); + dst_is_lag_dev = (uplink_upper && + netif_is_lag_master(uplink_upper) && + dev == uplink_upper && + mlx5_lag_is_sriov(priv->mdev)); + + /* if the egress device isn't on the same HW e-switch or + * it's a LAG device, use the uplink + */ + if (!netdev_port_same_parent_id(priv->netdev, dev) || + dst_is_lag_dev) { + *route_dev = uplink_dev; + *out_dev = *route_dev; + } else { + *route_dev = dev; + if (is_vlan_dev(*route_dev)) + *out_dev = uplink_dev; + else if (mlx5e_eswitch_rep(dev)) + *out_dev = *route_dev; + else + return -EOPNOTSUPP; + } + + if (!(mlx5e_eswitch_rep(*out_dev) && + mlx5e_is_uplink_rep(netdev_priv(*out_dev)))) + return -EOPNOTSUPP; + + return 0; +} + +static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct net_device **route_dev, + struct flowi4 *fl4, + struct neighbour **out_n, + u8 *out_ttl) +{ + struct rtable *rt; + struct neighbour *n = NULL; + +#if IS_ENABLED(CONFIG_INET) + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *uplink_dev; + int ret; + + if (mlx5_lag_is_multipath(mdev)) { + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + fl4->flowi4_oif = uplink_dev->ifindex; + } + + rt = ip_route_output_key(dev_net(mirred_dev), fl4); + ret = PTR_ERR_OR_ZERO(rt); + if (ret) + return ret; + + if (mlx5_lag_is_multipath(mdev) && !rt->rt_gateway) + return -ENETUNREACH; +#else + return -EOPNOTSUPP; +#endif + + ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev); + if (ret < 0) + return ret; + + if (!(*out_ttl)) + *out_ttl = ip4_dst_hoplimit(&rt->dst); + n = dst_neigh_lookup(&rt->dst, &fl4->daddr); + ip_rt_put(rt); + if (!n) + return -ENOMEM; + + *out_n = n; + return 0; +} + +static const char *mlx5e_netdev_kind(struct net_device *dev) +{ + if (dev->rtnl_link_ops) + return dev->rtnl_link_ops->kind; + else + return ""; +} + +static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct net_device **route_dev, + struct flowi6 *fl6, + struct neighbour **out_n, + u8 *out_ttl) +{ + struct neighbour *n = NULL; + struct dst_entry *dst; + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + int ret; + + ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst, + fl6); + if (ret < 0) + return ret; + + if (!(*out_ttl)) + *out_ttl = ip6_dst_hoplimit(dst); + + ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev); + if (ret < 0) + return ret; +#else + return -EOPNOTSUPP; +#endif + + n = dst_neigh_lookup(dst, &fl6->daddr); + dst_release(dst); + if (!n) + return -ENOMEM; + + *out_n = n; + return 0; +} + +static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key) +{ + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + struct udphdr *udp = (struct udphdr *)(buf); + struct vxlanhdr *vxh = (struct vxlanhdr *) + ((char *)udp + sizeof(struct udphdr)); + + udp->dest = tun_key->tp_dst; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(tun_id); + + return 0; +} + +static int mlx5e_gen_gre_header(char buf[], struct ip_tunnel_key *tun_key) +{ + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + int hdr_len; + struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf); + + /* the HW does not calculate GRE csum or sequences */ + if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ)) + return -EOPNOTSUPP; + + greh->protocol = htons(ETH_P_TEB); + + /* GRE key */ + hdr_len = gre_calc_hlen(tun_key->tun_flags); + greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags); + if (tun_key->tun_flags & TUNNEL_KEY) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + + *ptr = tun_id; + } + + return 0; +} + +static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + int err = 0; + struct ip_tunnel_key *key = &e->tun_info.key; + + if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + *ip_proto = IPPROTO_UDP; + err = mlx5e_gen_vxlan_header(buf, key); + } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { + *ip_proto = IPPROTO_GRE; + err = mlx5e_gen_gre_header(buf, key); + } else { + pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n" + , e->tunnel_type); + err = -EOPNOTSUPP; + } + + return err; +} + +static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev, + struct mlx5e_encap_entry *e, + u16 proto) +{ + struct ethhdr *eth = (struct ethhdr *)buf; + char *ip; + + ether_addr_copy(eth->h_dest, e->h_dest); + ether_addr_copy(eth->h_source, dev->dev_addr); + if (is_vlan_dev(dev)) { + struct vlan_hdr *vlan = (struct vlan_hdr *) + ((char *)eth + ETH_HLEN); + ip = (char *)vlan + VLAN_HLEN; + eth->h_proto = vlan_dev_vlan_proto(dev); + vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev)); + vlan->h_vlan_encapsulated_proto = htons(proto); + } else { + eth->h_proto = htons(proto); + ip = (char *)eth + ETH_HLEN; + } + + return ip; +} + +int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev, *route_dev; + struct neighbour *n = NULL; + struct flowi4 fl4 = {}; + int ipv4_encap_size; + char *encap_header; + u8 nud_state, ttl; + struct iphdr *ip; + int err; + + /* add the IP fields */ + fl4.flowi4_tos = tun_key->tos; + fl4.daddr = tun_key->u.ipv4.dst; + fl4.saddr = tun_key->u.ipv4.src; + ttl = tun_key->ttl; + + err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &route_dev, + &fl4, &n, &ttl); + if (err) + return err; + + ipv4_encap_size = + (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct iphdr) + + e->tunnel_hlen; + + if (max_encap_size < ipv4_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv4_encap_size, max_encap_size); + return -EOPNOTSUPP; + } + + encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; + + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + e->route_dev = route_dev; + + /* It's important to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) + goto free_encap; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); + + /* add ethernet header */ + ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, route_dev, e, + ETH_P_IP); + + /* add ip header */ + ip->tos = tun_key->tos; + ip->version = 0x4; + ip->ihl = 0x5; + ip->ttl = ttl; + ip->daddr = fl4.daddr; + ip->saddr = fl4.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr), + &ip->protocol, e); + if (err) + goto destroy_neigh_entry; + + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + /* the encap entry will be made valid on neigh update event + * and not used before that. + */ + goto out; + } + + err = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv4_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); +free_encap: + kfree(encap_header); +out: + if (n) + neigh_release(n); + return err; +} + +int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev, *route_dev; + struct neighbour *n = NULL; + struct flowi6 fl6 = {}; + struct ipv6hdr *ip6h; + int ipv6_encap_size; + char *encap_header; + u8 nud_state, ttl; + int err; + + ttl = tun_key->ttl; + + fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); + fl6.daddr = tun_key->u.ipv6.dst; + fl6.saddr = tun_key->u.ipv6.src; + + err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &route_dev, + &fl6, &n, &ttl); + if (err) + return err; + + ipv6_encap_size = + (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct ipv6hdr) + + e->tunnel_hlen; + + if (max_encap_size < ipv6_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv6_encap_size, max_encap_size); + return -EOPNOTSUPP; + } + + encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; + + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + e->route_dev = route_dev; + + /* It's importent to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) + goto free_encap; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); + + /* add ethernet header */ + ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, route_dev, e, + ETH_P_IPV6); + + /* add ip header */ + ip6_flow_hdr(ip6h, tun_key->tos, 0); + /* the HW fills up ipv6 payload len */ + ip6h->hop_limit = ttl; + ip6h->daddr = fl6.daddr; + ip6h->saddr = fl6.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr), + &ip6h->nexthdr, e); + if (err) + goto destroy_neigh_entry; + + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + /* the encap entry will be made valid on neigh update event + * and not used before that. + */ + goto out; + } + + err = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv6_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); +free_encap: + kfree(encap_header); +out: + if (n) + neigh_release(n); + return err; +} + +int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev) +{ + if (netif_is_vxlan(tunnel_dev)) + return MLX5E_TC_TUNNEL_TYPE_VXLAN; + else if (netif_is_gretap(tunnel_dev) || + netif_is_ip6gretap(tunnel_dev)) + return MLX5E_TC_TUNNEL_TYPE_GRETAP; + else + return MLX5E_TC_TUNNEL_TYPE_UNKNOWN; +} + +bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, + struct net_device *netdev) +{ + int tunnel_type = mlx5e_tc_tun_get_type(netdev); + + if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN && + MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) + return true; + else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP && + MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) + return true; + else + return false; +} + +int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel_type = mlx5e_tc_tun_get_type(tunnel_dev); + + if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + int dst_port = be16_to_cpu(e->tun_info.key.tp_dst); + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { + NL_SET_ERR_MSG_MOD(extack, + "vxlan udp dport was not registered with the HW"); + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", + dst_port); + return -EOPNOTSUPP; + } + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + e->tunnel_hlen = VXLAN_HLEN; + } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE; + e->tunnel_hlen = gre_calc_hlen(e->tun_info.key.tun_flags); + } else { + e->reformat_type = -1; + e->tunnel_hlen = -1; + return -EOPNOTSUPP; + } + return 0; +} + +static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + void *misc_c = MLX5_ADDR_OF(fte_match_param, + spec->match_criteria, + misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, + spec->match_value, + misc_parameters); + struct flow_match_ports enc_ports; + + flow_rule_match_enc_ports(rule, &enc_ports); + + /* Full udp dst port must be given */ + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) || + memchr_inv(&enc_ports.mask->dst, 0xff, sizeof(enc_ports.mask->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "VXLAN decap filter must include enc_dst_port condition"); + netdev_warn(priv->netdev, + "VXLAN decap filter must include enc_dst_port condition\n"); + return -EOPNOTSUPP; + } + + /* udp dst port must be knonwn as a VXLAN port */ + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(enc_ports.key->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP port is not registered as a VXLAN port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a VXLAN port\n", + be16_to_cpu(enc_ports.key->dst)); + return -EOPNOTSUPP; + } + + /* dst UDP port is valid here */ + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, + ntohs(enc_ports.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, + ntohs(enc_ports.key->dst)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, + ntohs(enc_ports.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, + ntohs(enc_ports.key->src)); + + /* match on VNI */ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid enc_keyid; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, + be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, + be32_to_cpu(enc_keyid.key->keyid)); + } + return 0; +} + +static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *outer_headers_c, + void *outer_headers_v) +{ + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + + if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) { + NL_SET_ERR_MSG_MOD(f->common.extack, + "GRE HW offloading is not supported"); + netdev_warn(priv->netdev, "GRE HW offloading is not supported\n"); + return -EOPNOTSUPP; + } + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + ip_protocol, IPPROTO_GRE); + + /* gre protocol*/ + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol); + MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB); + + /* gre key */ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid enc_keyid; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + MLX5_SET(fte_match_set_misc, misc_c, + gre_key.key, be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, + gre_key.key, be32_to_cpu(enc_keyid.key->keyid)); + } + + return 0; +} + +int mlx5e_tc_tun_parse(struct net_device *filter_dev, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v, u8 *match_level) +{ + int tunnel_type; + int err = 0; + + tunnel_type = mlx5e_tc_tun_get_type(filter_dev); + if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + *match_level = MLX5_MATCH_L4; + err = mlx5e_tc_tun_parse_vxlan(priv, spec, f, + headers_c, headers_v); + } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { + *match_level = MLX5_MATCH_L3; + err = mlx5e_tc_tun_parse_gretap(priv, spec, f, + headers_c, headers_v); + } else { + netdev_warn(priv->netdev, + "decapsulation offload is not supported for %s net device (%d)\n", + mlx5e_netdev_kind(filter_dev), tunnel_type); + return -EOPNOTSUPP; + } + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h new file mode 100644 index 000000000000..b63f15de899d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_TUNNEL_H__ +#define __MLX5_EN_TC_TUNNEL_H__ + +#include <linux/netdevice.h> +#include <linux/mlx5/fs.h> +#include <net/pkt_cls.h> +#include <linux/netlink.h> +#include "en.h" +#include "en_rep.h" + +enum { + MLX5E_TC_TUNNEL_TYPE_UNKNOWN, + MLX5E_TC_TUNNEL_TYPE_VXLAN, + MLX5E_TC_TUNNEL_TYPE_GRETAP +}; + +int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack); + +int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); + +int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); + +int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev); +bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, + struct net_device *netdev); + +int mlx5e_tc_tun_parse(struct net_device *filter_dev, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v, u8 *match_level); + +#endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index ad6d471d00dd..cad34d6f5f45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -33,6 +33,26 @@ #include <linux/bpf_trace.h> #include "en/xdp.h" +int mlx5e_xdp_max_mtu(struct mlx5e_params *params) +{ + int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM; + + /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). + * The condition checked in mlx5e_rx_is_linear_skb is: + * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1) + * (Note that hw_mtu == sw_mtu + hard_mtu.) + * What is returned from this function is: + * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2) + * After assigning sw_mtu := max_mtu, the left side of (1) turns to + * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE, + * because both PAGE_SIZE and S are already aligned. Any number greater + * than max_mtu would make the left side of (1) greater than PAGE_SIZE, + * so max_mtu is the maximum MTU allowed. + */ + + return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr)); +} + static inline bool mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di, struct xdp_buff *xdp) @@ -47,7 +67,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di, xdpi.xdpf->len, PCI_DMA_TODEVICE); xdpi.di = *di; - return mlx5e_xmit_xdp_frame(sq, &xdpi); + return sq->xmit_xdp_frame(sq, &xdpi); } /* returns true if packet was consumed by xdp */ @@ -102,7 +122,98 @@ xdp_abort: } } -bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) +static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) +{ + struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5_wq_cyc *wq = &sq->wq; + u8 wqebbs; + u16 pi; + + mlx5e_xdpsq_fetch_wqe(sq, &session->wqe); + + prefetchw(session->wqe->data); + session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + +/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS + * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. + * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a + * full-session WQE be cache-aligned. + */ +#if L1_CACHE_BYTES < 128 +#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) +#else +#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) +#endif + + wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi), + MLX5E_XDP_MPW_MAX_WQEBBS); + + session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs; +} + +static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl; + u16 ds_count = session->ds_count; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; + + cseg->opmod_idx_opcode = + cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); + + wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS); + wi->num_ds = ds_count - MLX5E_XDP_TX_EMPTY_DS_COUNT; + + sq->pc += wi->num_wqebbs; + + sq->doorbell_cseg = cseg; + + session->wqe = NULL; /* Close session */ +} + +static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_info *xdpi) +{ + struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5e_xdpsq_stats *stats = sq->stats; + + dma_addr_t dma_addr = xdpi->dma_addr; + struct xdp_frame *xdpf = xdpi->xdpf; + unsigned int dma_len = xdpf->len; + + if (unlikely(sq->hw_mtu < dma_len)) { + stats->err++; + return false; + } + + if (unlikely(!session->wqe)) { + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, + MLX5_SEND_WQE_MAX_WQEBBS))) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + stats->full++; + return false; + } + + mlx5e_xdp_mpwqe_session_start(sq); + } + + mlx5e_xdp_mpwqe_add_dseg(sq, dma_addr, dma_len); + + if (unlikely(session->ds_count == session->max_ds_count)) + mlx5e_xdp_mpwqe_complete(sq); + + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); + stats->xmit++; + return true; +} + +static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) { struct mlx5_wq_cyc *wq = &sq->wq; u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); @@ -126,11 +237,8 @@ bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) } if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) { - if (sq->doorbell) { - /* SQ is full, ring doorbell */ - mlx5e_xmit_xdp_doorbell(sq); - sq->doorbell = false; - } + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); stats->full++; return false; } @@ -152,23 +260,20 @@ bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - /* move page to reference to sq responsibility, - * and mark so it's not put back in page-cache. - */ - sq->db.xdpi[pi] = *xdpi; sq->pc++; - sq->doorbell = true; + sq->doorbell_cseg = cseg; + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); stats->xmit++; return true; } -bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) { + struct mlx5e_xdp_info_fifo *xdpi_fifo; struct mlx5e_xdpsq *sq; struct mlx5_cqe64 *cqe; - struct mlx5e_rq *rq; bool is_redirect; u16 sqcc; int i; @@ -182,8 +287,8 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) if (!cqe) return false; - is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state); - rq = container_of(sq, struct mlx5e_rq, xdpsq); + is_redirect = !rq; + xdpi_fifo = &sq->db.xdpi_fifo; /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), * otherwise a cq overrun may occur @@ -199,20 +304,33 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) wqe_counter = be16_to_cpu(cqe->wqe_counter); + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) + netdev_WARN_ONCE(sq->channel->netdev, + "Bad OP in XDPSQ CQE: 0x%x\n", + get_cqe_opcode(cqe)); + do { - u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); - struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci]; + struct mlx5e_xdp_wqe_info *wi; + u16 ci, j; last_wqe = (sqcc == wqe_counter); - sqcc++; - - if (is_redirect) { - xdp_return_frame(xdpi->xdpf); - dma_unmap_single(sq->pdev, xdpi->dma_addr, - xdpi->xdpf->len, DMA_TO_DEVICE); - } else { - /* Recycle RX page */ - mlx5e_page_release(rq, &xdpi->di, true); + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); + wi = &sq->db.wqe_info[ci]; + + sqcc += wi->num_wqebbs; + + for (j = 0; j < wi->num_ds; j++) { + struct mlx5e_xdp_info xdpi = + mlx5e_xdpi_fifo_pop(xdpi_fifo); + + if (is_redirect) { + dma_unmap_single(sq->pdev, xdpi.dma_addr, + xdpi.xdpf->len, DMA_TO_DEVICE); + xdp_return_frame(xdpi.xdpf); + } else { + /* Recycle RX page */ + mlx5e_page_release(rq, &xdpi.di, true); + } } } while (!last_wqe); } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); @@ -228,27 +346,32 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) return (i == MLX5E_TX_CQ_POLL_BUDGET); } -void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) { - struct mlx5e_rq *rq; - bool is_redirect; - - is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state); - rq = is_redirect ? NULL : container_of(sq, struct mlx5e_rq, xdpsq); + struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; + bool is_redirect = !rq; while (sq->cc != sq->pc) { - u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); - struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci]; - - sq->cc++; - - if (is_redirect) { - xdp_return_frame(xdpi->xdpf); - dma_unmap_single(sq->pdev, xdpi->dma_addr, - xdpi->xdpf->len, DMA_TO_DEVICE); - } else { - /* Recycle RX page */ - mlx5e_page_release(rq, &xdpi->di, false); + struct mlx5e_xdp_wqe_info *wi; + u16 ci, i; + + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); + wi = &sq->db.wqe_info[ci]; + + sq->cc += wi->num_wqebbs; + + for (i = 0; i < wi->num_ds; i++) { + struct mlx5e_xdp_info xdpi = + mlx5e_xdpi_fifo_pop(xdpi_fifo); + + if (is_redirect) { + dma_unmap_single(sq->pdev, xdpi.dma_addr, + xdpi.xdpf->len, DMA_TO_DEVICE); + xdp_return_frame(xdpi.xdpf); + } else { + /* Recycle RX page */ + mlx5e_page_release(rq, &xdpi.di, false); + } } } } @@ -262,7 +385,8 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, int sq_num; int i; - if (unlikely(!test_bit(MLX5E_STATE_OPENED, &priv->state))) + /* this flag is sufficient, no need to test internal sq state */ + if (unlikely(!mlx5e_xdp_tx_is_enabled(priv))) return -ENETDOWN; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) @@ -275,9 +399,6 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, sq = &priv->channels.c[sq_num]->xdpsq; - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) - return -ENETDOWN; - for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; struct mlx5e_xdp_info xdpi; @@ -292,7 +413,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, xdpi.xdpf = xdpf; - if (unlikely(!mlx5e_xmit_xdp_frame(sq, &xdpi))) { + if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) { dma_unmap_single(sq->pdev, xdpi.dma_addr, xdpf->len, DMA_TO_DEVICE); xdp_return_frame_rx_napi(xdpf); @@ -300,8 +421,33 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, } } - if (flags & XDP_XMIT_FLUSH) + if (flags & XDP_XMIT_FLUSH) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); mlx5e_xmit_xdp_doorbell(sq); + } return n - drops; } + +void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) +{ + struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; + + if (xdpsq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(xdpsq); + + mlx5e_xmit_xdp_doorbell(xdpsq); + + if (xdpsq->redirect_flush) { + xdp_do_flush_map(); + xdpsq->redirect_flush = false; + } +} + +void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) +{ + sq->xmit_xdp_frame = is_mpw ? + mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 6dfab045925f..553956cadc8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -34,30 +34,81 @@ #include "en.h" -#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \ - MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM))) #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) -#define MLX5E_XDP_TX_DS_COUNT \ - ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) +#define MLX5E_XDP_TX_EMPTY_DS_COUNT \ + (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) +#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) +int mlx5e_xdp_max_mtu(struct mlx5e_params *params); bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, void *va, u16 *rx_headroom, u32 *len); -bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); -void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); - -bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi); +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq); +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq); +void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw); +void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq); int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags); +static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv) +{ + set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); +} + +static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv) +{ + clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); + /* let other device's napi(s) see our new state */ + synchronize_rcu(); +} + +static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) +{ + return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); +} + static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) { + if (sq->doorbell_cseg) { + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg); + sq->doorbell_cseg = NULL; + } +} + +static inline void +mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, dma_addr_t dma_addr, u16 dma_len) +{ + struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5_wqe_data_seg *dseg = + (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count++; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->byte_count = cpu_to_be32(dma_len); + dseg->lkey = sq->mkey_be; +} + +static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq, + struct mlx5e_tx_wqe **wqe) +{ struct mlx5_wq_cyc *wq = &sq->wq; - struct mlx5e_tx_wqe *wqe; - u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc - 1); /* last pi */ + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - wqe = mlx5_wq_cyc_get_wqe(wq, pi); + *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + memset(*wqe, 0, sizeof(**wqe)); +} - mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &wqe->ctrl); +static inline void +mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo, + struct mlx5e_xdp_info *xi) +{ + u32 i = (*fifo->pc)++ & fifo->mask; + + fifo->xi[i] = *xi; +} + +static inline struct mlx5e_xdp_info +mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo) +{ + return fifo->xi[(*fifo->cc)++ & fifo->mask]; } #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 128a82b1dbfc..53608afd39b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -254,11 +254,13 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, struct mlx5e_ipsec_metadata *mdata; struct mlx5e_ipsec_sa_entry *sa_entry; struct xfrm_state *x; + struct sec_path *sp; if (!xo) return skb; - if (unlikely(skb->sp->len != 1)) { + sp = skb_sec_path(skb); + if (unlikely(sp->len != 1)) { atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle); goto drop; } @@ -305,10 +307,11 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb, struct mlx5e_priv *priv = netdev_priv(netdev); struct xfrm_offload *xo; struct xfrm_state *xs; + struct sec_path *sp; u32 sa_handle; - skb->sp = secpath_dup(skb->sp); - if (unlikely(!skb->sp)) { + sp = secpath_set(skb); + if (unlikely(!sp)) { atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc); return NULL; } @@ -320,8 +323,9 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb, return NULL; } - skb->sp->xvec[skb->sp->len++] = xs; - skb->sp->olen++; + sp = skb_sec_path(skb); + sp->xvec[sp->len++] = xs; + sp->olen++; xo = xfrm_offload(skb); xo->flags = CRYPTO_DONE; @@ -372,10 +376,11 @@ struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev, netdev_features_t features) { + struct sec_path *sp = skb_sec_path(skb); struct xfrm_state *x; - if (skb->sp && skb->sp->len) { - x = skb->sp->xvec[0]; + if (sp && sp->len) { + x = sp->xvec[0]; if (x && x->xso.offload_handle) return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 3078491cc0d0..1539cf3de5dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -45,7 +45,9 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev, if (err) return err; + mutex_lock(&mdev->mlx5e_res.td.list_lock); list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list); + mutex_unlock(&mdev->mlx5e_res.td.list_lock); return 0; } @@ -53,8 +55,10 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev, void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir) { + mutex_lock(&mdev->mlx5e_res.td.list_lock); mlx5_core_destroy_tir(mdev, tir->tirn); list_del(&tir->list); + mutex_unlock(&mdev->mlx5e_res.td.list_lock); } static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, @@ -114,6 +118,7 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) } INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list); + mutex_init(&mdev->mlx5e_res.td.list_lock); return 0; @@ -141,15 +146,17 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_tir *tir; - int err = -ENOMEM; + int err = 0; u32 tirn = 0; int inlen; void *in; inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = kvzalloc(inlen, GFP_KERNEL); - if (!in) + if (!in) { + err = -ENOMEM; goto out; + } if (enable_uc_lb) MLX5_SET(modify_tir_in, in, ctx.self_lb_block, @@ -157,6 +164,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); + mutex_lock(&mdev->mlx5e_res.td.list_lock); list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) { tirn = tir->tirn; err = mlx5_core_modify_tir(mdev, tirn, in, inlen); @@ -168,6 +176,7 @@ out: kvfree(in); if (err) netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); + mutex_unlock(&mdev->mlx5e_res.td.list_lock); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 722998d68564..554672edf8c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1126,9 +1126,7 @@ static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv) priv->channels.params.tx_min_inline_mode) goto out; - if (mlx5e_open_channels(priv, &new_channels)) - goto out; - mlx5e_switch_priv_channels(priv, &new_channels, NULL); + mlx5e_safe_switch_channels(priv, &new_channels, NULL); out: mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index f480763dcd0d..78dc8fe2a83c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -63,86 +63,158 @@ struct ptys2ethtool_config { __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); }; -static struct ptys2ethtool_config ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER]; +static +struct ptys2ethtool_config ptys2legacy_ethtool_table[MLX5E_LINK_MODES_NUMBER]; +static +struct ptys2ethtool_config ptys2ext_ethtool_table[MLX5E_EXT_LINK_MODES_NUMBER]; -#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, ...) \ +#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, table, ...) \ ({ \ struct ptys2ethtool_config *cfg; \ const unsigned int modes[] = { __VA_ARGS__ }; \ - unsigned int i; \ - cfg = &ptys2ethtool_table[reg_]; \ + unsigned int i, bit, idx; \ + cfg = &ptys2##table##_ethtool_table[reg_]; \ bitmap_zero(cfg->supported, \ __ETHTOOL_LINK_MODE_MASK_NBITS); \ bitmap_zero(cfg->advertised, \ __ETHTOOL_LINK_MODE_MASK_NBITS); \ for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) { \ - __set_bit(modes[i], cfg->supported); \ - __set_bit(modes[i], cfg->advertised); \ + bit = modes[i] % 64; \ + idx = modes[i] / 64; \ + __set_bit(bit, &cfg->supported[idx]); \ + __set_bit(bit, &cfg->advertised[idx]); \ } \ }) void mlx5e_build_ptys2ethtool_map(void) { - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, + memset(ptys2legacy_ethtool_table, 0, sizeof(ptys2legacy_ethtool_table)); + memset(ptys2ext_ethtool_table, 0, sizeof(ptys2ext_ethtool_table)); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, legacy, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, legacy, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, legacy, ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, legacy, ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, legacy, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, legacy, ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, legacy, ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, legacy, ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, legacy, ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, legacy, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, legacy, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, legacy, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, legacy, ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, legacy, ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, legacy, ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, legacy, ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, legacy, ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, legacy, ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, legacy, ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, legacy, ETHTOOL_LINK_MODE_10000baseT_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, legacy, ETHTOOL_LINK_MODE_25000baseCR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, legacy, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, legacy, ETHTOOL_LINK_MODE_25000baseSR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, legacy, ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, legacy, ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_SGMII_100M, ext, + ETHTOOL_LINK_MODE_100baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_X_SGMII, ext, + ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_5GBASE_R, ext, + ETHTOOL_LINK_MODE_5000baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_XFI_XAUI_1, ext, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT, + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseER_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_XLAUI_4_XLPPI_4, ext, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GAUI_1_25GBASE_CR_KR, ext, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2, + ext, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR, ext, + ETHTOOL_LINK_MODE_50000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseDR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_CAUI_4_100GBASE_CR4_KR4, ext, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_2_100GBASE_CR2_KR2, ext, + ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_4_200GBASE_CR4_KR4, ext, + ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT); +} + +static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev, + struct ptys2ethtool_config **arr, + u32 *size) +{ + bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + + *arr = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table; + *size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) : + ARRAY_SIZE(ptys2legacy_ethtool_table); } -static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = { - "rx_cqe_moder", - "tx_cqe_moder", - "rx_cqe_compress", - "rx_striding_rq", - "rx_no_csum_complete", +typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); + +struct pflag_desc { + char name[ETH_GSTRING_LEN]; + mlx5e_pflag_handler handler; }; +static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS]; + int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) { int i, num_stats = 0; @@ -153,7 +225,7 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) num_stats += mlx5e_stats_grps[i].get_num_stats(priv); return num_stats; case ETH_SS_PRIV_FLAGS: - return ARRAY_SIZE(mlx5e_priv_flags); + return MLX5E_NUM_PFLAGS; case ETH_SS_TEST: return mlx5e_self_test_num(priv); /* fallthrough */ @@ -183,8 +255,9 @@ void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data) switch (stringset) { case ETH_SS_PRIV_FLAGS: - for (i = 0; i < ARRAY_SIZE(mlx5e_priv_flags); i++) - strcpy(data + i * ETH_GSTRING_LEN, mlx5e_priv_flags[i]); + for (i = 0; i < MLX5E_NUM_PFLAGS; i++) + strcpy(data + i * ETH_GSTRING_LEN, + mlx5e_priv_flags[i].name); break; case ETH_SS_TEST: @@ -296,11 +369,7 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, goto unlock; } - err = mlx5e_open_channels(priv, &new_channels); - if (err) - goto unlock; - - mlx5e_switch_priv_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); unlock: mutex_unlock(&priv->state_lock); @@ -352,32 +421,32 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, new_channels.params = priv->channels.params; new_channels.params.num_channels = count; - if (!netif_is_rxfh_configured(priv->netdev)) - mlx5e_build_default_indir_rqt(new_channels.params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, count); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; + if (!netif_is_rxfh_configured(priv->netdev)) + mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); goto out; } - /* Create fresh channels with new parameters */ - err = mlx5e_open_channels(priv, &new_channels); - if (err) - goto out; - arfs_enabled = priv->netdev->features & NETIF_F_NTUPLE; if (arfs_enabled) mlx5e_arfs_disable(priv); + if (!netif_is_rxfh_configured(priv->netdev)) + mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); + /* Switch to new channels, set new parameters and close old ones */ - mlx5e_switch_priv_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); if (arfs_enabled) { - err = mlx5e_arfs_enable(priv); - if (err) + int err2 = mlx5e_arfs_enable(priv); + + if (err2) netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n", - __func__, err); + __func__, err2); } out: @@ -503,12 +572,7 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, goto out; } - /* open fresh channels with new coal parameters */ - err = mlx5e_open_channels(priv, &new_channels); - if (err) - goto out; - - mlx5e_switch_priv_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); out: mutex_unlock(&priv->state_lock); @@ -523,27 +587,37 @@ static int mlx5e_set_coalesce(struct net_device *netdev, return mlx5e_ethtool_set_coalesce(priv, coal); } -static void ptys2ethtool_supported_link(unsigned long *supported_modes, +static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev, + unsigned long *supported_modes, u32 eth_proto_cap) { unsigned long proto_cap = eth_proto_cap; + struct ptys2ethtool_config *table; + u32 max_size; int proto; - for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER) + mlx5e_ethtool_get_speed_arr(mdev, &table, &max_size); + for_each_set_bit(proto, &proto_cap, max_size) bitmap_or(supported_modes, supported_modes, - ptys2ethtool_table[proto].supported, + table[proto].supported, __ETHTOOL_LINK_MODE_MASK_NBITS); } static void ptys2ethtool_adver_link(unsigned long *advertising_modes, - u32 eth_proto_cap) + u32 eth_proto_cap, bool ext) { unsigned long proto_cap = eth_proto_cap; + struct ptys2ethtool_config *table; + u32 max_size; int proto; - for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER) + table = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table; + max_size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) : + ARRAY_SIZE(ptys2legacy_ethtool_table); + + for_each_set_bit(proto, &proto_cap, max_size) bitmap_or(advertising_modes, advertising_modes, - ptys2ethtool_table[proto].advertised, + table[proto].advertised, __ETHTOOL_LINK_MODE_MASK_NBITS); } @@ -693,13 +767,14 @@ static void get_speed_duplex(struct net_device *netdev, u32 eth_proto_oper, struct ethtool_link_ksettings *link_ksettings) { + struct mlx5e_priv *priv = netdev_priv(netdev); u32 speed = SPEED_UNKNOWN; u8 duplex = DUPLEX_UNKNOWN; if (!netif_carrier_ok(netdev)) goto out; - speed = mlx5e_port_ptys2speed(eth_proto_oper); + speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper); if (!speed) { speed = SPEED_UNKNOWN; goto out; @@ -712,22 +787,22 @@ out: link_ksettings->base.duplex = duplex; } -static void get_supported(u32 eth_proto_cap, +static void get_supported(struct mlx5_core_dev *mdev, u32 eth_proto_cap, struct ethtool_link_ksettings *link_ksettings) { unsigned long *supported = link_ksettings->link_modes.supported; + ptys2ethtool_supported_link(mdev, supported, eth_proto_cap); - ptys2ethtool_supported_link(supported, eth_proto_cap); ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause); } -static void get_advertising(u32 eth_proto_cap, u8 tx_pause, - u8 rx_pause, - struct ethtool_link_ksettings *link_ksettings) +static void get_advertising(u32 eth_proto_cap, u8 tx_pause, u8 rx_pause, + struct ethtool_link_ksettings *link_ksettings, + bool ext) { unsigned long *advertising = link_ksettings->link_modes.advertising; + ptys2ethtool_adver_link(advertising, eth_proto_cap, ext); - ptys2ethtool_adver_link(advertising, eth_proto_cap); if (rx_pause) ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause); if (tx_pause ^ rx_pause) @@ -777,18 +852,18 @@ static u8 get_connector_port(u32 eth_proto, u8 connector_type) return PORT_OTHER; } -static void get_lp_advertising(u32 eth_proto_lp, +static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp, struct ethtool_link_ksettings *link_ksettings) { unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising; + bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); - ptys2ethtool_adver_link(lp_advertising, eth_proto_lp); + ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext); } -static int mlx5e_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *link_ksettings) +int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, + struct ethtool_link_ksettings *link_ksettings) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; u32 rx_pause = 0; @@ -800,31 +875,50 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, u8 an_disable_admin; u8 an_status; u8 connector_type; + bool admin_ext; + bool ext; int err; err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); if (err) { - netdev_err(netdev, "%s: query port ptys failed: %d\n", + netdev_err(priv->netdev, "%s: query port ptys failed: %d\n", __func__, err); goto err_query_regs; } + ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_capability); + eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_admin); + /* Fields: eth_proto_admin and ext_eth_proto_admin are + * mutually exclusive. Hence try reading legacy advertising + * when extended advertising is zero. + * admin_ext indicates how eth_proto_admin should be + * interpreted + */ + admin_ext = ext; + if (ext && !eth_proto_admin) { + eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, false, + eth_proto_admin); + admin_ext = false; + } - eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); - eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); - eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); - an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); - an_status = MLX5_GET(ptys_reg, out, an_status); - connector_type = MLX5_GET(ptys_reg, out, connector_type); + eth_proto_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_oper); + eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); + an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); + an_status = MLX5_GET(ptys_reg, out, an_status); + connector_type = MLX5_GET(ptys_reg, out, connector_type); mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); - get_supported(eth_proto_cap, link_ksettings); - get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings); - get_speed_duplex(netdev, eth_proto_oper, link_ksettings); + get_supported(mdev, eth_proto_cap, link_ksettings); + get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings, + admin_ext); + get_speed_duplex(priv->netdev, eth_proto_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; @@ -832,7 +926,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, connector_type); ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin, connector_type); - get_lp_advertising(eth_proto_lp, link_ksettings); + get_lp_advertising(mdev, eth_proto_lp, link_ksettings); if (an_status == MLX5_AN_COMPLETE) ethtool_link_ksettings_add_link_mode(link_ksettings, @@ -843,9 +937,12 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Autoneg); - if (get_fec_supported_advertised(mdev, link_ksettings)) - netdev_dbg(netdev, "%s: FEC caps query failed: %d\n", + err = get_fec_supported_advertised(mdev, link_ksettings); + if (err) { + netdev_dbg(priv->netdev, "%s: FEC caps query failed: %d\n", __func__, err); + err = 0; /* don't fail caps query because of FEC error */ + } if (!an_disable_admin) ethtool_link_ksettings_add_link_mode(link_ksettings, @@ -855,12 +952,22 @@ err_query_regs: return err; } +static int mlx5e_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings); +} + static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) { u32 i, ptys_modes = 0; for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (bitmap_intersects(ptys2ethtool_table[i].advertised, + if (*ptys2legacy_ethtool_table[i].advertised == 0) + continue; + if (bitmap_intersects(ptys2legacy_ethtool_table[i].advertised, link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) ptys_modes |= MLX5E_PROT_MASK(i); @@ -869,14 +976,34 @@ static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) return ptys_modes; } -static int mlx5e_set_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *link_ksettings) +static u32 mlx5e_ethtool2ptys_ext_adver_link(const unsigned long *link_modes) +{ + u32 i, ptys_modes = 0; + unsigned long modes[2]; + + for (i = 0; i < MLX5E_EXT_LINK_MODES_NUMBER; ++i) { + if (*ptys2ext_ethtool_table[i].advertised == 0) + continue; + memset(modes, 0, sizeof(modes)); + bitmap_and(modes, ptys2ext_ethtool_table[i].advertised, + link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS); + + if (modes[0] == ptys2ext_ethtool_table[i].advertised[0] && + modes[1] == ptys2ext_ethtool_table[i].advertised[1]) + ptys_modes |= MLX5E_PROT_MASK(i); + } + return ptys_modes; +} + +int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, + const struct ethtool_link_ksettings *link_ksettings) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - u32 eth_proto_cap, eth_proto_admin; + struct mlx5e_port_eth_proto eproto; bool an_changes = false; u8 an_disable_admin; + bool ext_supported; + bool ext_requested; u8 an_disable_cap; bool an_disable; u32 link_modes; @@ -884,54 +1011,66 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev, u32 speed; int err; - speed = link_ksettings->base.speed; + u32 (*ethtool2ptys_adver_func)(const unsigned long *adver); - link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ? - mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) : - mlx5e_port_speed2linkmodes(speed); +#define MLX5E_PTYS_EXT ((1ULL << ETHTOOL_LINK_MODE_50000baseKR_Full_BIT) - 1) + + ext_requested = !!(link_ksettings->link_modes.advertising[0] > + MLX5E_PTYS_EXT || + link_ksettings->link_modes.advertising[1]); + ext_supported = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + ext_requested &= ext_supported; - err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); + speed = link_ksettings->base.speed; + ethtool2ptys_adver_func = ext_requested ? + mlx5e_ethtool2ptys_ext_adver_link : + mlx5e_ethtool2ptys_adver_link; + err = mlx5_port_query_eth_proto(mdev, 1, ext_requested, &eproto); if (err) { - netdev_err(netdev, "%s: query port eth proto cap failed: %d\n", + netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n", __func__, err); goto out; } + link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ? + ethtool2ptys_adver_func(link_ksettings->link_modes.advertising) : + mlx5e_port_speed2linkmodes(mdev, speed); - link_modes = link_modes & eth_proto_cap; + link_modes = link_modes & eproto.cap; if (!link_modes) { - netdev_err(netdev, "%s: Not supported link mode(s) requested", + netdev_err(priv->netdev, "%s: Not supported link mode(s) requested", __func__); err = -EINVAL; goto out; } - err = mlx5_query_port_proto_admin(mdev, ð_proto_admin, MLX5_PTYS_EN); - if (err) { - netdev_err(netdev, "%s: query port eth proto admin failed: %d\n", - __func__, err); - goto out; - } - - mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status, - &an_disable_cap, &an_disable_admin); + mlx5_port_query_eth_autoneg(mdev, &an_status, &an_disable_cap, + &an_disable_admin); an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE; an_changes = ((!an_disable && an_disable_admin) || (an_disable && !an_disable_admin)); - if (!an_changes && link_modes == eth_proto_admin) + if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN); + mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext_requested); mlx5_toggle_port_link(mdev); out: return err; } +static int mlx5e_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings); +} + u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv) { - return sizeof(priv->channels.params.toeplitz_hash_key); + return sizeof(priv->rss_params.toeplitz_hash_key); } static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev) @@ -957,50 +1096,27 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rss_params *rss = &priv->rss_params; if (indir) - memcpy(indir, priv->channels.params.indirection_rqt, - sizeof(priv->channels.params.indirection_rqt)); + memcpy(indir, rss->indirection_rqt, + sizeof(rss->indirection_rqt)); if (key) - memcpy(key, priv->channels.params.toeplitz_hash_key, - sizeof(priv->channels.params.toeplitz_hash_key)); + memcpy(key, rss->toeplitz_hash_key, + sizeof(rss->toeplitz_hash_key)); if (hfunc) - *hfunc = priv->channels.params.rss_hfunc; + *hfunc = rss->hfunc; return 0; } -static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) -{ - void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); - struct mlx5_core_dev *mdev = priv->mdev; - int ctxlen = MLX5_ST_SZ_BYTES(tirc); - int tt; - - MLX5_SET(modify_tir_in, in, bitmask.hash, 1); - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - memset(tirc, 0, ctxlen); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); - mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); - } - - if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) - return; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - memset(tirc, 0, ctxlen); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); - mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen); - } -} - static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) { struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_rss_params *rss = &priv->rss_params; int inlen = MLX5_ST_SZ_BYTES(modify_tir_in); bool hash_changed = false; void *in; @@ -1016,15 +1132,14 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mutex_lock(&priv->state_lock); - if (hfunc != ETH_RSS_HASH_NO_CHANGE && - hfunc != priv->channels.params.rss_hfunc) { - priv->channels.params.rss_hfunc = hfunc; + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hfunc) { + rss->hfunc = hfunc; hash_changed = true; } if (indir) { - memcpy(priv->channels.params.indirection_rqt, indir, - sizeof(priv->channels.params.indirection_rqt)); + memcpy(rss->indirection_rqt, indir, + sizeof(rss->indirection_rqt)); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { u32 rqtn = priv->indir_rqt.rqtn; @@ -1032,7 +1147,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, .is_rss = true, { .rss = { - .hfunc = priv->channels.params.rss_hfunc, + .hfunc = rss->hfunc, .channels = &priv->channels, }, }, @@ -1043,10 +1158,9 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, } if (key) { - memcpy(priv->channels.params.toeplitz_hash_key, key, - sizeof(priv->channels.params.toeplitz_hash_key)); - hash_changed = hash_changed || - priv->channels.params.rss_hfunc == ETH_RSS_HASH_TOP; + memcpy(rss->toeplitz_hash_key, key, + sizeof(rss->toeplitz_hash_key)); + hash_changed = hash_changed || rss->hfunc == ETH_RSS_HASH_TOP; } if (hash_changed) @@ -1150,25 +1264,31 @@ static int mlx5e_set_tunable(struct net_device *dev, return err; } -static void mlx5e_get_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pauseparam) +void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; int err; err = mlx5_query_port_pause(mdev, &pauseparam->rx_pause, &pauseparam->tx_pause); if (err) { - netdev_err(netdev, "%s: mlx5_query_port_pause failed:0x%x\n", + netdev_err(priv->netdev, "%s: mlx5_query_port_pause failed:0x%x\n", __func__, err); } } -static int mlx5e_set_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pauseparam) +static void mlx5e_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_ethtool_get_pauseparam(priv, pauseparam); +} + +int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; int err; @@ -1179,13 +1299,21 @@ static int mlx5e_set_pauseparam(struct net_device *netdev, pauseparam->rx_pause ? 1 : 0, pauseparam->tx_pause ? 1 : 0); if (err) { - netdev_err(netdev, "%s: mlx5_set_port_pause failed:0x%x\n", + netdev_err(priv->netdev, "%s: mlx5_set_port_pause failed:0x%x\n", __func__, err); } return err; } +static int mlx5e_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_pauseparam(priv, pauseparam); +} + int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info) { @@ -1458,7 +1586,7 @@ static int mlx5e_get_module_info(struct net_device *netdev, break; case MLX5_MODULE_ID_SFP: modinfo->type = ETH_MODULE_SFF_8472; - modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH; break; default: netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n", @@ -1505,8 +1633,6 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, return 0; } -typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); - static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, bool is_rx_cq) { @@ -1515,7 +1641,6 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, struct mlx5e_channels new_channels = {}; bool mode_changed; u8 cq_period_mode, current_cq_period_mode; - int err = 0; cq_period_mode = enable ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : @@ -1543,12 +1668,7 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, return 0; } - err = mlx5e_open_channels(priv, &new_channels); - if (err) - return err; - - mlx5e_switch_priv_channels(priv, &new_channels, NULL); - return 0; + return mlx5e_safe_switch_channels(priv, &new_channels, NULL); } static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable) @@ -1581,11 +1701,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val return 0; } - err = mlx5e_open_channels(priv, &new_channels); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); if (err) return err; - mlx5e_switch_priv_channels(priv, &new_channels, NULL); mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n", MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF"); @@ -1618,7 +1737,6 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; - int err; if (enable) { if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) @@ -1640,12 +1758,7 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) return 0; } - err = mlx5e_open_channels(priv, &new_channels); - if (err) - return err; - - mlx5e_switch_priv_channels(priv, &new_channels, NULL); - return 0; + return mlx5e_safe_switch_channels(priv, &new_channels, NULL); } static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) @@ -1655,7 +1768,8 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) struct mlx5e_channel *c; int i; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || + priv->channels.params.xdp_prog) return 0; for (i = 0; i < channels->num; i++) { @@ -1669,23 +1783,54 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) return 0; } +static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channels new_channels = {}; + int err; + + if (enable && !MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)) + return -EOPNOTSUPP; + + new_channels.params = priv->channels.params; + + MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_XDP_TX_MPWQE, enable); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + return 0; + } + + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + return err; +} + +static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = { + { "rx_cqe_moder", set_pflag_rx_cqe_based_moder }, + { "tx_cqe_moder", set_pflag_tx_cqe_based_moder }, + { "rx_cqe_compress", set_pflag_rx_cqe_compress }, + { "rx_striding_rq", set_pflag_rx_striding_rq }, + { "rx_no_csum_complete", set_pflag_rx_no_csum_complete }, + { "xdp_tx_mpwqe", set_pflag_xdp_tx_mpwqe }, +}; + static int mlx5e_handle_pflag(struct net_device *netdev, u32 wanted_flags, - enum mlx5e_priv_flag flag, - mlx5e_pflag_handler pflag_handler) + enum mlx5e_priv_flag flag) { struct mlx5e_priv *priv = netdev_priv(netdev); - bool enable = !!(wanted_flags & flag); + bool enable = !!(wanted_flags & BIT(flag)); u32 changes = wanted_flags ^ priv->channels.params.pflags; int err; - if (!(changes & flag)) + if (!(changes & BIT(flag))) return 0; - err = pflag_handler(netdev, enable); + err = mlx5e_priv_flags[flag].handler(netdev, enable); if (err) { - netdev_err(netdev, "%s private flag 0x%x failed err %d\n", - enable ? "Enable" : "Disable", flag, err); + netdev_err(netdev, "%s private flag '%s' failed err %d\n", + enable ? "Enable" : "Disable", mlx5e_priv_flags[flag].name, err); return err; } @@ -1696,38 +1841,17 @@ static int mlx5e_handle_pflag(struct net_device *netdev, static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) { struct mlx5e_priv *priv = netdev_priv(netdev); + enum mlx5e_priv_flag pflag; int err; mutex_lock(&priv->state_lock); - err = mlx5e_handle_pflag(netdev, pflags, - MLX5E_PFLAG_RX_CQE_BASED_MODER, - set_pflag_rx_cqe_based_moder); - if (err) - goto out; - - err = mlx5e_handle_pflag(netdev, pflags, - MLX5E_PFLAG_TX_CQE_BASED_MODER, - set_pflag_tx_cqe_based_moder); - if (err) - goto out; - err = mlx5e_handle_pflag(netdev, pflags, - MLX5E_PFLAG_RX_CQE_COMPRESS, - set_pflag_rx_cqe_compress); - if (err) - goto out; - - err = mlx5e_handle_pflag(netdev, pflags, - MLX5E_PFLAG_RX_STRIDING_RQ, - set_pflag_rx_striding_rq); - if (err) - goto out; - - err = mlx5e_handle_pflag(netdev, pflags, - MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, - set_pflag_rx_no_csum_complete); + for (pflag = 0; pflag < MLX5E_NUM_PFLAGS; pflag++) { + err = mlx5e_handle_pflag(netdev, pflags, pflag); + if (err) + break; + } -out: mutex_unlock(&priv->state_lock); /* Need to fix some features.. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index c18dcebe1462..4421c10f58ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -771,6 +771,112 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) INIT_LIST_HEAD(&priv->fs.ethtool.rules); } +static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type) +{ + switch (flow_type) { + case TCP_V4_FLOW: + return MLX5E_TT_IPV4_TCP; + case TCP_V6_FLOW: + return MLX5E_TT_IPV6_TCP; + case UDP_V4_FLOW: + return MLX5E_TT_IPV4_UDP; + case UDP_V6_FLOW: + return MLX5E_TT_IPV6_UDP; + case AH_V4_FLOW: + return MLX5E_TT_IPV4_IPSEC_AH; + case AH_V6_FLOW: + return MLX5E_TT_IPV6_IPSEC_AH; + case ESP_V4_FLOW: + return MLX5E_TT_IPV4_IPSEC_ESP; + case ESP_V6_FLOW: + return MLX5E_TT_IPV6_IPSEC_ESP; + case IPV4_FLOW: + return MLX5E_TT_IPV4; + case IPV6_FLOW: + return MLX5E_TT_IPV6; + default: + return MLX5E_NUM_INDIR_TIRS; + } +} + +static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, + struct ethtool_rxnfc *nfc) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + enum mlx5e_traffic_types tt; + u8 rx_hash_field = 0; + void *in; + + tt = flow_type_to_traffic_type(nfc->flow_type); + if (tt == MLX5E_NUM_INDIR_TIRS) + return -EINVAL; + + /* RSS does not support anything other than hashing to queues + * on src IP, dest IP, TCP/UDP src port and TCP/UDP dest + * port. + */ + if (nfc->flow_type != TCP_V4_FLOW && + nfc->flow_type != TCP_V6_FLOW && + nfc->flow_type != UDP_V4_FLOW && + nfc->flow_type != UDP_V6_FLOW) + return -EOPNOTSUPP; + + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EOPNOTSUPP; + + if (nfc->data & RXH_IP_SRC) + rx_hash_field |= MLX5_HASH_FIELD_SEL_SRC_IP; + if (nfc->data & RXH_IP_DST) + rx_hash_field |= MLX5_HASH_FIELD_SEL_DST_IP; + if (nfc->data & RXH_L4_B_0_1) + rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_SPORT; + if (nfc->data & RXH_L4_B_2_3) + rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mutex_lock(&priv->state_lock); + + if (rx_hash_field == priv->rss_params.rx_hash_fields[tt]) + goto out; + + priv->rss_params.rx_hash_fields[tt] = rx_hash_field; + mlx5e_modify_tirs_hash(priv, in, inlen); + +out: + mutex_unlock(&priv->state_lock); + kvfree(in); + return 0; +} + +static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv, + struct ethtool_rxnfc *nfc) +{ + enum mlx5e_traffic_types tt; + u32 hash_field = 0; + + tt = flow_type_to_traffic_type(nfc->flow_type); + if (tt == MLX5E_NUM_INDIR_TIRS) + return -EINVAL; + + hash_field = priv->rss_params.rx_hash_fields[tt]; + nfc->data = 0; + + if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP) + nfc->data |= RXH_IP_SRC; + if (hash_field & MLX5_HASH_FIELD_SEL_DST_IP) + nfc->data |= RXH_IP_DST; + if (hash_field & MLX5_HASH_FIELD_SEL_L4_SPORT) + nfc->data |= RXH_L4_B_0_1; + if (hash_field & MLX5_HASH_FIELD_SEL_L4_DPORT) + nfc->data |= RXH_L4_B_2_3; + + return 0; +} + int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) { int err = 0; @@ -783,6 +889,9 @@ int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) case ETHTOOL_SRXCLSRLDEL: err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location); break; + case ETHTOOL_SRXFH: + err = mlx5e_set_rss_hash_opt(priv, cmd); + break; default: err = -EOPNOTSUPP; break; @@ -810,6 +919,9 @@ int mlx5e_get_rxnfc(struct net_device *dev, case ETHTOOL_GRXCLSRLALL: err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs); break; + case ETHTOOL_GRXFH: + err = mlx5e_get_rss_hash_opt(priv, info); + break; default: err = -EOPNOTSUPP; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b70cb6fd164c..46157e2a1e5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -35,6 +35,7 @@ #include <linux/mlx5/fs.h> #include <net/vxlan.h> #include <linux/bpf.h> +#include <linux/if_bridge.h> #include <net/page_pool.h> #include "eswitch.h" #include "en.h" @@ -49,6 +50,9 @@ #include "lib/clock.h" #include "en/port.h" #include "en/xdp.h" +#include "lib/eq.h" +#include "en/monitor_stats.h" +#include "en/reporter.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; @@ -59,6 +63,7 @@ struct mlx5e_rq_param { struct mlx5e_sq_param { u32 sqc[MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; + bool is_mpw; }; struct mlx5e_cq_param { @@ -168,8 +173,7 @@ static u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) return order_base_2(mlx5e_rx_get_linear_frag_sz(params)); - return MLX5E_MPWQE_STRIDE_SZ(mdev, - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); + return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); } static u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, @@ -228,7 +232,7 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) MLX5_WQ_TYPE_CYCLIC; } -static void mlx5e_update_carrier(struct mlx5e_priv *priv) +void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; u8 port_state; @@ -267,7 +271,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) mlx5e_stats_grps[i].update_stats(priv); } -static void mlx5e_update_ndo_stats(struct mlx5e_priv *priv) +void mlx5e_update_ndo_stats(struct mlx5e_priv *priv) { int i; @@ -298,33 +302,35 @@ void mlx5e_queue_update_stats(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->update_stats_work); } -static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, - enum mlx5_dev_event event, unsigned long param) +static int async_event(struct notifier_block *nb, unsigned long event, void *data) { - struct mlx5e_priv *priv = vpriv; + struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); + struct mlx5_eqe *eqe = data; - if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state)) - return; + if (event != MLX5_EVENT_TYPE_PORT_CHANGE) + return NOTIFY_DONE; - switch (event) { - case MLX5_DEV_EVENT_PORT_UP: - case MLX5_DEV_EVENT_PORT_DOWN: + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: queue_work(priv->wq, &priv->update_carrier_work); break; default: - break; + return NOTIFY_DONE; } + + return NOTIFY_OK; } static void mlx5e_enable_async_events(struct mlx5e_priv *priv) { - set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); + priv->events_nb.notifier_call = async_event; + mlx5_notifier_register(priv->mdev, &priv->events_nb); } static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { - clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); - synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC)); + mlx5_notifier_unregister(priv->mdev, &priv->events_nb); } static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, @@ -945,7 +951,11 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (params->rx_dim_enabled) __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - if (params->pflags & MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) + /* We disable csum_complete when XDP is enabled since + * XDP programs might manipulate packets which will render + * skb->checksum incorrect. + */ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp) __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); return 0; @@ -988,18 +998,42 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq) { - kvfree(sq->db.xdpi); + kvfree(sq->db.xdpi_fifo.xi); + kvfree(sq->db.wqe_info); +} + +static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa) +{ + struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + + xdpi_fifo->xi = kvzalloc_node(sizeof(*xdpi_fifo->xi) * dsegs_per_wq, + GFP_KERNEL, numa); + if (!xdpi_fifo->xi) + return -ENOMEM; + + xdpi_fifo->pc = &sq->xdpi_fifo_pc; + xdpi_fifo->cc = &sq->xdpi_fifo_cc; + xdpi_fifo->mask = dsegs_per_wq - 1; + + return 0; } static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int err; - sq->db.xdpi = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.xdpi)), - GFP_KERNEL, numa); - if (!sq->db.xdpi) { - mlx5e_free_xdpsq_db(sq); + sq->db.wqe_info = kvzalloc_node(sizeof(*sq->db.wqe_info) * wq_sz, + GFP_KERNEL, numa); + if (!sq->db.wqe_info) return -ENOMEM; + + err = mlx5e_alloc_xdpsq_fifo(sq, numa); + if (err) { + mlx5e_free_xdpsq_db(sq); + return err; } return 0; @@ -1131,7 +1165,7 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) return 0; } -static void mlx5e_sq_recover(struct work_struct *work); +static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int txq_ix, struct mlx5e_params *params, @@ -1153,7 +1187,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; - INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover); + INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (mlx5_accel_is_tls_device(c->priv->mdev)) @@ -1241,15 +1275,8 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev, return err; } -struct mlx5e_modify_sq_param { - int curr_state; - int next_state; - bool rl_update; - int rl_index; -}; - -static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, - struct mlx5e_modify_sq_param *p) +int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p) { void *in; void *sqc; @@ -1347,17 +1374,7 @@ err_free_txqsq: return err; } -static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) -{ - WARN_ONCE(sq->cc != sq->pc, - "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", - sq->sqn, sq->cc, sq->pc); - sq->cc = 0; - sq->dma_fifo_cc = 0; - sq->pc = 0; -} - -static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) +void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) { sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); @@ -1366,7 +1383,7 @@ static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) netif_tx_start_queue(sq->txq); } -static inline void netif_tx_disable_queue(struct netdev_queue *txq) +void mlx5e_tx_disable_queue(struct netdev_queue *txq) { __netif_tx_lock_bh(txq); netif_tx_stop_queue(txq); @@ -1382,7 +1399,7 @@ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) /* prevent netif_tx_wake_queue */ napi_synchronize(&c->napi); - netif_tx_disable_queue(sq->txq); + mlx5e_tx_disable_queue(sq->txq); /* last doorbell out, godspeed .. */ if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) { @@ -1402,6 +1419,7 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) struct mlx5_rate_limit rl = {0}; cancel_work_sync(&sq->dim.work); + cancel_work_sync(&sq->recover_work); mlx5e_destroy_sq(mdev, sq->sqn); if (sq->rate_limit) { rl.rate = sq->rate_limit; @@ -1411,105 +1429,12 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) mlx5e_free_txqsq(sq); } -static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) +static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) { - unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq, + recover_work); - while (time_before(jiffies, exp_time)) { - if (sq->cc == sq->pc) - return 0; - - msleep(20); - } - - netdev_err(sq->channel->netdev, - "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", - sq->sqn, sq->cc, sq->pc); - - return -ETIMEDOUT; -} - -static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) -{ - struct mlx5_core_dev *mdev = sq->channel->mdev; - struct net_device *dev = sq->channel->netdev; - struct mlx5e_modify_sq_param msp = {0}; - int err; - - msp.curr_state = curr_state; - msp.next_state = MLX5_SQC_STATE_RST; - - err = mlx5e_modify_sq(mdev, sq->sqn, &msp); - if (err) { - netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); - return err; - } - - memset(&msp, 0, sizeof(msp)); - msp.curr_state = MLX5_SQC_STATE_RST; - msp.next_state = MLX5_SQC_STATE_RDY; - - err = mlx5e_modify_sq(mdev, sq->sqn, &msp); - if (err) { - netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); - return err; - } - - return 0; -} - -static void mlx5e_sq_recover(struct work_struct *work) -{ - struct mlx5e_txqsq_recover *recover = - container_of(work, struct mlx5e_txqsq_recover, - recover_work); - struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq, - recover); - struct mlx5_core_dev *mdev = sq->channel->mdev; - struct net_device *dev = sq->channel->netdev; - u8 state; - int err; - - err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); - if (err) { - netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", - sq->sqn, err); - return; - } - - if (state != MLX5_RQC_STATE_ERR) { - netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); - return; - } - - netif_tx_disable_queue(sq->txq); - - if (mlx5e_wait_for_sq_flush(sq)) - return; - - /* If the interval between two consecutive recovers per SQ is too - * short, don't recover to avoid infinite loop of ERR_CQE -> recover. - * If we reached this state, there is probably a bug that needs to be - * fixed. let's keep the queue close and let tx timeout cleanup. - */ - if (jiffies_to_msecs(jiffies - recover->last_recover) < - MLX5E_SQ_RECOVER_MIN_INTERVAL) { - netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n", - sq->sqn); - return; - } - - /* At this point, no new packets will arrive from the stack as TXQ is - * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all - * pending WQEs. SQ can safely reset the SQ. - */ - if (mlx5e_sq_to_ready(sq, state)) - return; - - mlx5e_reset_txqsq_cc_pc(sq); - sq->stats->recover++; - recover->last_recover = jiffies; - mlx5e_activate_txqsq(sq); + mlx5e_tx_reporter_err_cqe(sq); } static int mlx5e_open_icosq(struct mlx5e_channel *c, @@ -1558,11 +1483,8 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_xdpsq *sq, bool is_redirect) { - unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT; struct mlx5e_create_sq_param csp = {}; - unsigned int inline_hdr_sz = 0; int err; - int i; err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect); if (err) @@ -1573,30 +1495,40 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c, csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = sq->min_inline_mode; - if (is_redirect) - set_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state); set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); if (err) goto err_free_xdpsq; - if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { - inline_hdr_sz = MLX5E_XDP_MIN_INLINE; - ds_cnt++; - } + mlx5e_set_xmit_fp(sq, param->is_mpw); + + if (!param->is_mpw) { + unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT; + unsigned int inline_hdr_sz = 0; + int i; + + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { + inline_hdr_sz = MLX5E_XDP_MIN_INLINE; + ds_cnt++; + } - /* Pre initialize fixed WQE fields */ - for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) { - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - struct mlx5_wqe_data_seg *dseg; + /* Pre initialize fixed WQE fields */ + for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) { + struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[i]; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + struct mlx5_wqe_data_seg *dseg; - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); - dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1); - dseg->lkey = sq->mkey_be; + dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1); + dseg->lkey = sq->mkey_be; + + wi->num_wqebbs = 1; + wi->num_ds = 1; + } } return 0; @@ -1608,7 +1540,7 @@ err_free_xdpsq: return err; } -static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) +static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) { struct mlx5e_channel *c = sq->channel; @@ -1616,7 +1548,7 @@ static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) napi_synchronize(&c->napi); mlx5e_destroy_sq(c->mdev, sq->sqn); - mlx5e_free_xdpsq_descs(sq); + mlx5e_free_xdpsq_descs(sq, rq); mlx5e_free_xdpsq(sq); } @@ -1769,11 +1701,6 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq) mlx5e_free_cq(cq); } -static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) -{ - return cpumask_first(priv->mdev->priv.irq_info[ix].mask); -} - static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) @@ -1919,14 +1846,37 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } +static int mlx5e_alloc_xps_cpumask(struct mlx5e_channel *c, + struct mlx5e_params *params) +{ + int num_comp_vectors = mlx5_comp_vectors_count(c->mdev); + int irq; + + if (!zalloc_cpumask_var(&c->xps_cpumask, GFP_KERNEL)) + return -ENOMEM; + + for (irq = c->ix; irq < num_comp_vectors; irq += params->num_channels) { + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(c->mdev, irq)); + + cpumask_set_cpu(cpu, c->xps_cpumask); + } + + return 0; +} + +static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c) +{ + free_cpumask_var(c->xps_cpumask); +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_params *params, struct mlx5e_channel_param *cparam, struct mlx5e_channel **cp) { + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); struct net_dim_cq_moder icocq_moder = {0, 0}; struct net_device *netdev = priv->netdev; - int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; unsigned int irq; int err; @@ -1951,9 +1901,12 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->num_tc = params->num_tc; c->xdp = !!params->xdp_prog; c->stats = &priv->channel_stats[ix].ch; - c->irq_desc = irq_to_desc(irq); + err = mlx5e_alloc_xps_cpumask(c, params); + if (err) + goto err_free_channel; + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq); @@ -2009,7 +1962,7 @@ err_close_rq: err_close_xdp_sq: if (c->xdp) - mlx5e_close_xdpsq(&c->rq.xdpsq); + mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq); err_close_sqs: mlx5e_close_sqs(c); @@ -2036,6 +1989,9 @@ err_close_icosq_cq: err_napi_del: netif_napi_del(&c->napi); + mlx5e_free_xps_cpumask(c); + +err_free_channel: kvfree(c); return err; @@ -2048,7 +2004,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) for (tc = 0; tc < c->num_tc; tc++) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_rq(&c->rq); - netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix); + netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix); } static void mlx5e_deactivate_channel(struct mlx5e_channel *c) @@ -2062,10 +2018,10 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) static void mlx5e_close_channel(struct mlx5e_channel *c) { - mlx5e_close_xdpsq(&c->xdpsq); + mlx5e_close_xdpsq(&c->xdpsq, NULL); mlx5e_close_rq(&c->rq); if (c->xdp) - mlx5e_close_xdpsq(&c->rq.xdpsq); + mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq); mlx5e_close_sqs(c); mlx5e_close_icosq(&c->icosq); napi_disable(&c->napi); @@ -2076,6 +2032,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) mlx5e_close_tx_cqs(c); mlx5e_close_cq(&c->icosq.cq); netif_napi_del(&c->napi); + mlx5e_free_xps_cpumask(c); kvfree(c); } @@ -2232,6 +2189,8 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, void *cqc = param->cqc; MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index); + if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128) + MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, @@ -2308,6 +2267,7 @@ static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, mlx5e_build_sq_param_common(priv, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); + param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); } static void mlx5e_build_channel_param(struct mlx5e_priv *priv, @@ -2346,6 +2306,10 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, goto err_close_channels; } + if (!IS_ERR_OR_NULL(priv->tx_reporter)) + devlink_health_reporter_state_update(priv->tx_reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); + kvfree(cparam); return 0; @@ -2510,7 +2474,7 @@ static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz, if (rrp.rss.hfunc == ETH_RSS_HASH_XOR) ix = mlx5e_bits_invert(i, ilog2(sz)); - ix = priv->channels.params.indirection_rqt[ix]; + ix = priv->rss_params.indirection_rqt[ix]; rqn = rrp.rss.channels->c[ix]->rq.rqn; } else { rqn = rrp.rqn; @@ -2593,7 +2557,7 @@ static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, { .rss = { .channels = chs, - .hfunc = chs->params.rss_hfunc, + .hfunc = priv->rss_params.hfunc, } }, }; @@ -2613,6 +2577,54 @@ static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) mlx5e_redirect_rqts(priv, drop_rrp); } +static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = { + [MLX5E_TT_IPV4_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5E_TT_IPV6_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5E_TT_IPV4_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5E_TT_IPV6_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5E_TT_IPV4_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5E_TT_IPV6_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5E_TT_IPV4_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5E_TT_IPV6_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5E_TT_IPV4] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP, + }, + [MLX5E_TT_IPV6] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP, + }, +}; + +struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt) +{ + return tirc_default_config[tt]; +} + static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) { if (!params->lro_en) @@ -2628,116 +2640,68 @@ static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout); } -void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, - enum mlx5e_traffic_types tt, +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params, + const struct mlx5e_tirc_config *ttconfig, void *tirc, bool inner) { void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) : MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); -#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ - MLX5_HASH_FIELD_SEL_DST_IP) - -#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ - MLX5_HASH_FIELD_SEL_DST_IP |\ - MLX5_HASH_FIELD_SEL_L4_SPORT |\ - MLX5_HASH_FIELD_SEL_L4_DPORT) - -#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ - MLX5_HASH_FIELD_SEL_DST_IP |\ - MLX5_HASH_FIELD_SEL_IPSEC_SPI) - - MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(params->rss_hfunc)); - if (params->rss_hfunc == ETH_RSS_HASH_TOP) { + MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(rss_params->hfunc)); + if (rss_params->hfunc == ETH_RSS_HASH_TOP) { void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); - memcpy(rss_key, params->toeplitz_hash_key, len); + memcpy(rss_key, rss_params->toeplitz_hash_key, len); } + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + ttconfig->l3_prot_type); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + ttconfig->l4_prot_type); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + ttconfig->rx_hash_fields); +} - switch (tt) { - case MLX5E_TT_IPV4_TCP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_TCP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV6_TCP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_TCP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV4_UDP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_UDP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV6_UDP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_UDP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV4_IPSEC_AH: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; +static void mlx5e_update_rx_hash_fields(struct mlx5e_tirc_config *ttconfig, + enum mlx5e_traffic_types tt, + u32 rx_hash_fields) +{ + *ttconfig = tirc_default_config[tt]; + ttconfig->rx_hash_fields = rx_hash_fields; +} - case MLX5E_TT_IPV6_IPSEC_AH: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; +void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) +{ + void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); + struct mlx5e_rss_params *rss = &priv->rss_params; + struct mlx5_core_dev *mdev = priv->mdev; + int ctxlen = MLX5_ST_SZ_BYTES(tirc); + struct mlx5e_tirc_config ttconfig; + int tt; - case MLX5E_TT_IPV4_IPSEC_ESP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; + MLX5_SET(modify_tir_in, in, bitmask.hash, 1); - case MLX5E_TT_IPV6_IPSEC_ESP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(tirc, 0, ctxlen); + mlx5e_update_rx_hash_fields(&ttconfig, tt, + rss->rx_hash_fields[tt]); + mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, false); + mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); + } - case MLX5E_TT_IPV4: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP); - break; + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; - case MLX5E_TT_IPV6: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP); - break; - default: - WARN_ONCE(true, "%s: bad traffic type!\n", __func__); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(tirc, 0, ctxlen); + mlx5e_update_rx_hash_fields(&ttconfig, tt, + rss->rx_hash_fields[tt]); + mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, true); + mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, + inlen); } } @@ -2794,7 +2758,8 @@ static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv, MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); + mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, + &tirc_default_config[tt], tirc, true); } static int mlx5e_set_mtu(struct mlx5_core_dev *mdev, @@ -2825,7 +2790,7 @@ static void mlx5e_query_mtu(struct mlx5_core_dev *mdev, *mtu = MLX5E_HW2SW_MTU(params, hw_mtu); } -static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) +int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) { struct mlx5e_params *params = &priv->channels.params; struct net_device *netdev = priv->netdev; @@ -2903,9 +2868,10 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_build_tx2sq_maps(priv); mlx5e_activate_channels(&priv->channels); + mlx5e_xdp_tx_enable(priv); netif_tx_start_all_queues(priv->netdev); - if (MLX5_ESWITCH_MANAGER(priv->mdev)) + if (mlx5e_is_vport_rep(priv)) mlx5e_add_sqs_fwd_rules(priv); mlx5e_wait_channels_min_rx_wqes(&priv->channels); @@ -2916,7 +2882,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { mlx5e_redirect_rqts_to_drop(priv); - if (MLX5_ESWITCH_MANAGER(priv->mdev)) + if (mlx5e_is_vport_rep(priv)) mlx5e_remove_sqs_fwd_rules(priv); /* FIXME: This is a W/A only for tx timeout watch dog false alarm when @@ -2924,16 +2890,18 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) */ netif_tx_stop_all_queues(priv->netdev); netif_tx_disable(priv->netdev); + mlx5e_xdp_tx_disable(priv); mlx5e_deactivate_channels(&priv->channels); } -void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, - struct mlx5e_channels *new_chs, - mlx5e_fp_hw_modify hw_modify) +static void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify) { struct net_device *netdev = priv->netdev; int new_num_txqs; int carrier_ok; + new_num_txqs = new_chs->num * new_chs->params.num_tc; carrier_ok = netif_carrier_ok(netdev); @@ -2959,6 +2927,28 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, netif_carrier_on(netdev); } +int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify) +{ + int err; + + err = mlx5e_open_channels(priv, new_chs); + if (err) + return err; + + mlx5e_switch_priv_channels(priv, new_chs, hw_modify); + return 0; +} + +int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv) +{ + struct mlx5e_channels new_channels = {}; + + new_channels.params = priv->channels.params; + return mlx5e_safe_switch_channels(priv, &new_channels, NULL); +} + void mlx5e_timestamp_init(struct mlx5e_priv *priv) { priv->tstamp.tx_type = HWTSTAMP_TX_OFF; @@ -3168,10 +3158,11 @@ err_close_tises: return err; } -void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) +static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) { int tc; + mlx5e_tx_reporter_destroy(priv); for (tc = 0; tc < priv->profile->max_tc; tc++) mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); } @@ -3186,7 +3177,9 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); + + mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, + &tirc_default_config[tt], tirc, false); } static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) @@ -3372,13 +3365,12 @@ static int mlx5e_setup_tc_mqprio(struct net_device *netdev, goto out; } - err = mlx5e_open_channels(priv, &new_channels); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); if (err) goto out; priv->max_opened_tc = max_t(u8, priv->max_opened_tc, new_channels.params.num_tc); - mlx5e_switch_priv_channels(priv, &new_channels, NULL); out: mutex_unlock(&priv->state_lock); return err; @@ -3391,11 +3383,14 @@ static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, { switch (cls_flower->command) { case TC_CLSFLOWER_REPLACE: - return mlx5e_configure_flower(priv, cls_flower, flags); + return mlx5e_configure_flower(priv->netdev, priv, cls_flower, + flags); case TC_CLSFLOWER_DESTROY: - return mlx5e_delete_flower(priv, cls_flower, flags); + return mlx5e_delete_flower(priv->netdev, priv, cls_flower, + flags); case TC_CLSFLOWER_STATS: - return mlx5e_stats_flower(priv, cls_flower, flags); + return mlx5e_stats_flower(priv->netdev, priv, cls_flower, + flags); default: return -EOPNOTSUPP; } @@ -3408,7 +3403,8 @@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS); + return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS | + MLX5E_TC_NIC_OFFLOAD); default: return -EOPNOTSUPP; } @@ -3451,16 +3447,39 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, } } -static void +void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) +{ + int i; + + for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) { + struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i]; + struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; + int j; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + + for (j = 0; j < priv->max_opened_tc; j++) { + struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; + + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + s->tx_dropped += sq_stats->dropped; + } + } +} + +void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_sw_stats *sstats = &priv->stats.sw; struct mlx5e_vport_stats *vstats = &priv->stats.vport; struct mlx5e_pport_stats *pstats = &priv->stats.pport; - /* update HW stats in background for next time */ - mlx5e_queue_update_stats(priv); + if (!mlx5e_monitor_counter_supported(priv)) { + /* update HW stats in background for next time */ + mlx5e_queue_update_stats(priv); + } if (mlx5e_is_uplink_rep(priv)) { stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok); @@ -3468,12 +3487,7 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok); stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); } else { - mlx5e_grp_sw_update_stats(priv); - stats->rx_packets = sstats->rx_packets; - stats->rx_bytes = sstats->rx_bytes; - stats->tx_packets = sstats->tx_packets; - stats->tx_bytes = sstats->tx_bytes; - stats->tx_dropped = sstats->tx_queue_dropped; + mlx5e_fold_sw_stats64(priv, stats); } stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer; @@ -3566,11 +3580,7 @@ static int set_feature_lro(struct net_device *netdev, bool enable) goto out; } - err = mlx5e_open_channels(priv, &new_channels); - if (err) - goto out; - - mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_modify_tirs_lro); + err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_modify_tirs_lro); out: mutex_unlock(&priv->state_lock); return err; @@ -3593,7 +3603,7 @@ static int set_feature_tc_num_filters(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); - if (!enable && mlx5e_tc_num_filters(priv)) { + if (!enable && mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)) { netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; @@ -3767,7 +3777,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, if (params->xdp_prog && !mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) { netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n", - new_mtu, MLX5E_XDP_MAX_MTU); + new_mtu, mlx5e_xdp_max_mtu(params)); err = -EINVAL; goto out; } @@ -3788,11 +3798,10 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, goto out; } - err = mlx5e_open_channels(priv, &new_channels); + err = mlx5e_safe_switch_channels(priv, &new_channels, set_mtu_cb); if (err) goto out; - mlx5e_switch_priv_channels(priv, &new_channels, set_mtu_cb); netdev->mtu = new_channels.params.sw_mtu; out: @@ -3895,7 +3904,7 @@ static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } #ifdef CONFIG_MLX5_ESWITCH -static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; @@ -3932,8 +3941,8 @@ static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting) return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting); } -static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, - int max_tx_rate) +int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, + int max_tx_rate) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; @@ -3974,8 +3983,8 @@ static int mlx5e_set_vf_link_state(struct net_device *dev, int vf, mlx5_ifla_link2vport(link_state)); } -static int mlx5e_get_vf_config(struct net_device *dev, - int vf, struct ifla_vf_info *ivi) +int mlx5e_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; @@ -3988,8 +3997,8 @@ static int mlx5e_get_vf_config(struct net_device *dev, return 0; } -static int mlx5e_get_vf_stats(struct net_device *dev, - int vf, struct ifla_vf_stats *vf_stats) +int mlx5e_get_vf_stats(struct net_device *dev, + int vf, struct ifla_vf_stats *vf_stats) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; @@ -4050,8 +4059,7 @@ static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add) queue_work(priv->wq, &vxlan_work->work); } -static void mlx5e_add_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti) +void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4064,8 +4072,7 @@ static void mlx5e_add_vxlan_port(struct net_device *netdev, mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1); } -static void mlx5e_del_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti) +void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4115,9 +4122,9 @@ out: return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } -static netdev_features_t mlx5e_features_check(struct sk_buff *skb, - struct net_device *netdev, - netdev_features_t features) +netdev_features_t mlx5e_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4137,31 +4144,13 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, return features; } -static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev, - struct mlx5e_txqsq *sq) -{ - struct mlx5_eq *eq = sq->cq.mcq.eq; - u32 eqe_count; - - netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", - eq->eqn, eq->cons_index, eq->irqn); - - eqe_count = mlx5_eq_poll_irq_disabled(eq); - if (!eqe_count) - return false; - - netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn); - sq->channel->stats->eq_rearm++; - return true; -} - static void mlx5e_tx_timeout_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, tx_timeout_work); - struct net_device *dev = priv->netdev; - bool reopen_channels = false; - int i, err; + bool report_failed = false; + int err; + int i; rtnl_lock(); mutex_lock(&priv->state_lock); @@ -4170,34 +4159,24 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) goto unlock; for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { - struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i); + struct netdev_queue *dev_queue = + netdev_get_tx_queue(priv->netdev, i); struct mlx5e_txqsq *sq = priv->txq2sq[i]; if (!netif_xmit_stopped(dev_queue)) continue; - netdev_err(dev, - "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", - i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, - jiffies_to_usecs(jiffies - dev_queue->trans_start)); - - /* If we recover a lost interrupt, most likely TX timeout will - * be resolved, skip reopening channels - */ - if (!mlx5e_tx_timeout_eq_recover(dev, sq)) { - clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - reopen_channels = true; - } + if (mlx5e_tx_reporter_timeout(sq)) + report_failed = true; } - if (!reopen_channels) + if (!report_failed) goto unlock; - mlx5e_close_locked(dev); - err = mlx5e_open_locked(dev); + err = mlx5e_safe_reopen_channels(priv); if (err) netdev_err(priv->netdev, - "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", + "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", err); unlock: @@ -4233,7 +4212,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) if (!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) { netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n", - new_channels.params.sw_mtu, MLX5E_XDP_MAX_MTU); + new_channels.params.sw_mtu, + mlx5e_xdp_max_mtu(&new_channels.params)); return -EINVAL; } @@ -4342,6 +4322,61 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } +#ifdef CONFIG_MLX5_ESWITCH +static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, + int nlflags) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 mode, setting; + int err; + + err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting); + if (err) + return err; + mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB; + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, + mode, + 0, 0, nlflags, filter_mask, NULL); +} + +static int mlx5e_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, + u16 flags, struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct nlattr *attr, *br_spec; + u16 mode = BRIDGE_MODE_UNDEF; + u8 setting; + int rem; + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; + + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) != IFLA_BRIDGE_MODE) + continue; + + if (nla_len(attr) < sizeof(mode)) + return -EINVAL; + + mode = nla_get_u16(attr); + if (mode > BRIDGE_MODE_VEPA) + return -EINVAL; + + break; + } + + if (mode == BRIDGE_MODE_UNDEF) + return -EINVAL; + + setting = (mode == BRIDGE_MODE_VEPA) ? 1 : 0; + return mlx5_eswitch_set_vepa(mdev->priv.eswitch, setting); +} +#endif + const struct net_device_ops mlx5e_netdev_ops = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -4368,6 +4403,9 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif #ifdef CONFIG_MLX5_ESWITCH + .ndo_bridge_setlink = mlx5e_bridge_setlink, + .ndo_bridge_getlink = mlx5e_bridge_getlink, + /* SRIOV E-Switch NDOs */ .ndo_set_vf_mac = mlx5e_set_vf_mac, .ndo_set_vf_vlan = mlx5e_set_vf_vlan, @@ -4377,8 +4415,6 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, - .ndo_has_offload_stats = mlx5e_has_offload_stats, - .ndo_get_offload_stats = mlx5e_get_offload_stats, #endif }; @@ -4524,15 +4560,23 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, mlx5e_init_rq_type_params(mdev, params); } -void mlx5e_build_rss_params(struct mlx5e_params *params) +void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, + u16 num_channels) { - params->rss_hfunc = ETH_RSS_HASH_XOR; - netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key)); - mlx5e_build_default_indir_rqt(params->indirection_rqt, - MLX5E_INDIR_RQT_SIZE, params->num_channels); + enum mlx5e_traffic_types tt; + + rss_params->hfunc = ETH_RSS_HASH_TOP; + netdev_rss_key_fill(rss_params->toeplitz_hash_key, + sizeof(rss_params->toeplitz_hash_key)); + mlx5e_build_default_indir_rqt(rss_params->indirection_rqt, + MLX5E_INDIR_RQT_SIZE, num_channels); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + rss_params->rx_hash_fields[tt] = + tirc_default_config[tt].rx_hash_fields; } void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, u16 max_channels, u16 mtu) { @@ -4548,6 +4592,10 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + /* XDP SQ */ + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, + MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)); + /* set CQE compression */ params->rx_cqe_compress_def = false; if (MLX5_CAP_GEN(mdev, cqe_compression) && @@ -4581,7 +4629,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev); /* RSS */ - mlx5e_build_rss_params(params); + mlx5e_build_rss_params(rss_params, params->num_channels); } static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) @@ -4596,12 +4644,6 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) } } -#if IS_ENABLED(CONFIG_MLX5_ESWITCH) -static const struct switchdev_ops mlx5e_switchdev_ops = { - .switchdev_port_attr_get = mlx5e_attr_get, -}; -#endif - static void mlx5e_build_nic_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4711,12 +4753,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->priv_flags |= IFF_UNICAST_FLT; mlx5e_set_netdev_dev_addr(netdev); - -#if IS_ENABLED(CONFIG_MLX5_ESWITCH) - if (MLX5_ESWITCH_MANAGER(mdev)) - netdev->switchdev_ops = &mlx5e_switchdev_ops; -#endif - mlx5e_ipsec_build_netdev(priv); mlx5e_tls_build_netdev(priv); } @@ -4754,14 +4790,16 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, void *ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rss_params *rss = &priv->rss_params; int err; err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv); if (err) return err; - mlx5e_build_nic_params(mdev, &priv->channels.params, - mlx5e_get_netdev_max_channels(netdev), netdev->mtu); + mlx5e_build_nic_params(mdev, rss, &priv->channels.params, + mlx5e_get_netdev_max_channels(netdev), + netdev->mtu); mlx5e_timestamp_init(priv); @@ -4867,6 +4905,7 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_initialize(priv); #endif + mlx5e_tx_reporter_create(priv); return 0; } @@ -4891,9 +4930,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5_lag_add(mdev, netdev); mlx5e_enable_async_events(priv); - - if (MLX5_ESWITCH_MANAGER(priv->mdev)) - mlx5e_register_vport_reps(priv); + if (mlx5e_monitor_counter_supported(priv)) + mlx5e_monitor_counter_init(priv); if (netdev->reg_state != NETREG_REGISTERED) return; @@ -4927,8 +4965,8 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->set_rx_mode_work); - if (MLX5_ESWITCH_MANAGER(priv->mdev)) - mlx5e_unregister_vport_reps(priv); + if (mlx5e_monitor_counter_supported(priv)) + mlx5e_monitor_counter_cleanup(priv); mlx5e_disable_async_events(priv); mlx5_lag_remove(mdev); @@ -4981,7 +5019,7 @@ int mlx5e_netdev_init(struct net_device *netdev, netif_carrier_off(netdev); #ifdef CONFIG_MLX5_EN_ARFS - netdev->rx_cpu_rmap = mdev->rmap; + netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(mdev); #endif return 0; @@ -5036,7 +5074,7 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) if (priv->channels.params.num_channels > max_nch) { mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch); priv->channels.params.num_channels = max_nch; - mlx5e_build_default_indir_rqt(priv->channels.params.indirection_rqt, + mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, max_nch); } @@ -5125,7 +5163,6 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) static void *mlx5e_add(struct mlx5_core_dev *mdev) { struct net_device *netdev; - void *rpriv = NULL; void *priv; int err; int nch; @@ -5135,20 +5172,18 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) return NULL; #ifdef CONFIG_MLX5_ESWITCH - if (MLX5_ESWITCH_MANAGER(mdev)) { - rpriv = mlx5e_alloc_nic_rep_priv(mdev); - if (!rpriv) { - mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n"); - return NULL; - } + if (MLX5_ESWITCH_MANAGER(mdev) && + mlx5_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { + mlx5e_rep_register_vport_reps(mdev); + return mdev; } #endif nch = mlx5e_get_max_num_channels(mdev); - netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, rpriv); + netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, NULL); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); - goto err_free_rpriv; + return NULL; } priv = netdev_priv(netdev); @@ -5174,30 +5209,26 @@ err_detach: mlx5e_detach(mdev, priv); err_destroy_netdev: mlx5e_destroy_netdev(priv); -err_free_rpriv: - kfree(rpriv); return NULL; } static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) { - struct mlx5e_priv *priv = vpriv; - void *ppriv = priv->ppriv; + struct mlx5e_priv *priv; +#ifdef CONFIG_MLX5_ESWITCH + if (MLX5_ESWITCH_MANAGER(mdev) && vpriv == mdev) { + mlx5e_rep_unregister_vport_reps(mdev); + return; + } +#endif + priv = vpriv; #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_delete_app(priv); #endif unregister_netdev(priv->netdev); mlx5e_detach(mdev, vpriv); mlx5e_destroy_netdev(priv); - kfree(ppriv); -} - -static void *mlx5e_get_netdev(void *vpriv) -{ - struct mlx5e_priv *priv = vpriv; - - return priv->netdev; } static struct mlx5_interface mlx5e_interface = { @@ -5205,9 +5236,7 @@ static struct mlx5_interface mlx5e_interface = { .remove = mlx5e_remove, .attach = mlx5e_attach, .detach = mlx5e_detach, - .event = mlx5e_async_event, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, - .get_dev = mlx5e_get_netdev, }; void mlx5e_init(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 820fe85100b0..a66b6ed80b30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -42,14 +42,26 @@ #include "en.h" #include "en_rep.h" #include "en_tc.h" +#include "en/tc_tun.h" #include "fs_core.h" +#include "lib/port_tun.h" -#define MLX5E_REP_PARAMS_LOG_SQ_SIZE \ - max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) +#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ + max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) #define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1 static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; +struct mlx5e_rep_indr_block_priv { + struct net_device *netdev; + struct mlx5e_rep_priv *rpriv; + + struct list_head list; +}; + +static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev); + static void mlx5e_rep_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { @@ -99,7 +111,7 @@ static void mlx5e_rep_get_strings(struct net_device *dev, } } -static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) +static void mlx5e_vf_rep_update_hw_counters(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -122,29 +134,45 @@ static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) vport_stats->tx_bytes = vf_stats.rx_bytes; } -static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) +static void mlx5e_uplink_rep_update_hw_counters(struct mlx5e_priv *priv) { - struct mlx5e_sw_stats *s = &priv->stats.sw; - struct mlx5e_rq_stats *rq_stats; - struct mlx5e_sq_stats *sq_stats; - int i, j; + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct rtnl_link_stats64 *vport_stats; - memset(s, 0, sizeof(*s)); - for (i = 0; i < priv->channels.num; i++) { - struct mlx5e_channel *c = priv->channels.c[i]; + mlx5e_grp_802_3_update_stats(priv); - rq_stats = c->rq.stats; + vport_stats = &priv->stats.vf_vport; - s->rx_packets += rq_stats->packets; - s->rx_bytes += rq_stats->bytes; + vport_stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok); + vport_stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok); + vport_stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok); + vport_stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); +} - for (j = 0; j < priv->channels.params.num_tc; j++) { - sq_stats = c->sq[j].stats; +static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; - s->tx_packets += sq_stats->packets; - s->tx_bytes += sq_stats->bytes; - } - } + if (rep->vport == MLX5_VPORT_UPLINK) + mlx5e_uplink_rep_update_hw_counters(priv); + else + mlx5e_vf_rep_update_hw_counters(priv); +} + +static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_sw_stats *s = &priv->stats.sw; + struct rtnl_link_stats64 stats64 = {}; + + memset(s, 0, sizeof(*s)); + mlx5e_fold_sw_stats64(priv, &stats64); + + s->rx_packets = stats64.rx_packets; + s->rx_bytes = stats64.rx_bytes; + s->tx_packets = stats64.tx_packets; + s->tx_bytes = stats64.tx_bytes; + s->tx_queue_dropped = stats64.tx_dropped; } static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, @@ -157,8 +185,7 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, return; mutex_lock(&priv->state_lock); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_rep_update_sw_counters(priv); + mlx5e_rep_update_sw_counters(priv); mlx5e_rep_update_hw_counters(priv); mutex_unlock(&priv->state_lock); @@ -257,6 +284,22 @@ static int mlx5e_rep_set_channels(struct net_device *dev, return 0; } +static int mlx5e_rep_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_coalesce(priv, coal); +} + +static int mlx5e_rep_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_coalesce(priv, coal); +} + static u32 mlx5e_rep_get_rxfh_key_size(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -271,7 +314,39 @@ static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev) return mlx5e_ethtool_get_rxfh_indir_size(priv); } -static const struct ethtool_ops mlx5e_rep_ethtool_ops = { +static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_ethtool_get_pauseparam(priv, pauseparam); +} + +static int mlx5e_uplink_rep_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_pauseparam(priv, pauseparam); +} + +static int mlx5e_uplink_rep_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings); +} + +static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings); +} + +static const struct ethtool_ops mlx5e_vf_rep_ethtool_ops = { .get_drvinfo = mlx5e_rep_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_rep_get_strings, @@ -281,27 +356,58 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = { .set_ringparam = mlx5e_rep_set_ringparam, .get_channels = mlx5e_rep_get_channels, .set_channels = mlx5e_rep_set_channels, + .get_coalesce = mlx5e_rep_get_coalesce, + .set_coalesce = mlx5e_rep_set_coalesce, .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size, .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, }; -int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) +static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { + .get_drvinfo = mlx5e_rep_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = mlx5e_rep_get_strings, + .get_sset_count = mlx5e_rep_get_sset_count, + .get_ethtool_stats = mlx5e_rep_get_ethtool_stats, + .get_ringparam = mlx5e_rep_get_ringparam, + .set_ringparam = mlx5e_rep_set_ringparam, + .get_channels = mlx5e_rep_get_channels, + .set_channels = mlx5e_rep_set_channels, + .get_coalesce = mlx5e_rep_get_coalesce, + .set_coalesce = mlx5e_rep_set_coalesce, + .get_link_ksettings = mlx5e_uplink_rep_get_link_ksettings, + .set_link_ksettings = mlx5e_uplink_rep_set_link_ksettings, + .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size, + .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, + .get_pauseparam = mlx5e_uplink_rep_get_pauseparam, + .set_pauseparam = mlx5e_uplink_rep_set_pauseparam, +}; + +static int mlx5e_rep_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *uplink_upper = NULL; + struct mlx5e_priv *uplink_priv = NULL; + struct net_device *uplink_dev; if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = ETH_ALEN; - ether_addr_copy(attr->u.ppid.id, rep->hw_id); - break; - default: - return -EOPNOTSUPP; + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + if (uplink_dev) { + uplink_upper = netdev_master_upper_dev_get(uplink_dev); + uplink_priv = netdev_priv(uplink_dev); + } + + ppid->id_len = ETH_ALEN; + if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) { + ether_addr_copy(ppid->id, uplink_upper->dev_addr); + } else { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + ether_addr_copy(ppid->id, rep->hw_id); } return 0; @@ -474,6 +580,10 @@ static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { ether_addr_copy(e->h_dest, ha); ether_addr_copy(eth->h_dest, ha); + /* Update the encap source mac, in case that we delete + * the flows when encap source mac changed. + */ + ether_addr_copy(eth->h_source, e->route_dev->dev_addr); mlx5e_tc_encap_flows_add(priv, e); } @@ -519,6 +629,186 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) neigh_release(n); } +static struct mlx5e_rep_indr_block_priv * +mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev) +{ + struct mlx5e_rep_indr_block_priv *cb_priv; + + /* All callback list access should be protected by RTNL. */ + ASSERT_RTNL(); + + list_for_each_entry(cb_priv, + &rpriv->uplink_priv.tc_indr_block_priv_list, + list) + if (cb_priv->netdev == netdev) + return cb_priv; + + return NULL; +} + +static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_rep_indr_block_priv *cb_priv, *temp; + struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list; + + list_for_each_entry_safe(cb_priv, temp, head, list) { + mlx5e_rep_indr_unregister_block(rpriv, cb_priv->netdev); + kfree(cb_priv); + } +} + +static int +mlx5e_rep_indr_offload(struct net_device *netdev, + struct tc_cls_flower_offload *flower, + struct mlx5e_rep_indr_block_priv *indr_priv) +{ + struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); + int flags = MLX5E_TC_EGRESS | MLX5E_TC_ESW_OFFLOAD; + int err = 0; + + switch (flower->command) { + case TC_CLSFLOWER_REPLACE: + err = mlx5e_configure_flower(netdev, priv, flower, flags); + break; + case TC_CLSFLOWER_DESTROY: + err = mlx5e_delete_flower(netdev, priv, flower, flags); + break; + case TC_CLSFLOWER_STATS: + err = mlx5e_stats_flower(netdev, priv, flower, flags); + break; + default: + err = -EOPNOTSUPP; + } + + return err; +} + +static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type, + void *type_data, void *indr_priv) +{ + struct mlx5e_rep_indr_block_priv *priv = indr_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_rep_indr_offload(priv->netdev, type_data, priv); + default: + return -EOPNOTSUPP; + } +} + +static int +mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, + struct mlx5e_rep_priv *rpriv, + struct tc_block_offload *f) +{ + struct mlx5e_rep_indr_block_priv *indr_priv; + int err = 0; + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); + if (indr_priv) + return -EEXIST; + + indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL); + if (!indr_priv) + return -ENOMEM; + + indr_priv->netdev = netdev; + indr_priv->rpriv = rpriv; + list_add(&indr_priv->list, + &rpriv->uplink_priv.tc_indr_block_priv_list); + + err = tcf_block_cb_register(f->block, + mlx5e_rep_indr_setup_block_cb, + indr_priv, indr_priv, f->extack); + if (err) { + list_del(&indr_priv->list); + kfree(indr_priv); + } + + return err; + case TC_BLOCK_UNBIND: + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); + if (!indr_priv) + return -ENOENT; + + tcf_block_cb_unregister(f->block, + mlx5e_rep_indr_setup_block_cb, + indr_priv); + list_del(&indr_priv->list); + kfree(indr_priv); + + return 0; + default: + return -EOPNOTSUPP; + } + return 0; +} + +static +int mlx5e_rep_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv, + enum tc_setup_type type, void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return mlx5e_rep_indr_setup_tc_block(netdev, cb_priv, + type_data); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev) +{ + int err; + + err = __tc_indr_block_cb_register(netdev, rpriv, + mlx5e_rep_indr_setup_tc_cb, + rpriv); + if (err) { + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n", + netdev_name(netdev), err); + } + return err; +} + +static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev) +{ + __tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb, + rpriv); +} + +static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, + uplink_priv.netdevice_nb); + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + + if (!mlx5e_tc_tun_device_to_offload(priv, netdev)) + return NOTIFY_OK; + + switch (event) { + case NETDEV_REGISTER: + mlx5e_rep_indr_register_block(rpriv, netdev); + break; + case NETDEV_UNREGISTER: + mlx5e_rep_indr_unregister_block(rpriv, netdev); + break; + } + return NOTIFY_OK; +} + static struct mlx5e_neigh_hash_entry * mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, struct mlx5e_neigh *m_neigh); @@ -755,14 +1045,23 @@ static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv, int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy; struct mlx5e_neigh_hash_entry *nhe; int err; + err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type); + if (err) + return err; nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); if (!nhe) { err = mlx5e_rep_neigh_entry_create(priv, e, &nhe); - if (err) + if (err) { + mlx5_tun_entropy_refcount_dec(tun_entropy, + e->reformat_type); return err; + } } list_add(&e->encap_list, &nhe->encap_list); return 0; @@ -771,6 +1070,9 @@ int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy; struct mlx5e_neigh_hash_entry *nhe; list_del(&e->encap_list); @@ -778,9 +1080,10 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, if (list_empty(&nhe->encap_list)) mlx5e_rep_neigh_entry_destroy(priv, nhe); + mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type); } -static int mlx5e_rep_open(struct net_device *dev) +static int mlx5e_vf_rep_open(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -794,7 +1097,8 @@ static int mlx5e_rep_open(struct net_device *dev) if (!mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - rep->vport, MLX5_VPORT_ADMIN_STATE_UP)) + rep->vport, 1, + MLX5_VPORT_ADMIN_STATE_UP)) netif_carrier_on(dev); unlock: @@ -802,7 +1106,7 @@ unlock: return err; } -static int mlx5e_rep_close(struct net_device *dev) +static int mlx5e_vf_rep_close(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -812,7 +1116,8 @@ static int mlx5e_rep_close(struct net_device *dev) mutex_lock(&priv->state_lock); mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - rep->vport, MLX5_VPORT_ADMIN_STATE_DOWN); + rep->vport, 1, + MLX5_VPORT_ADMIN_STATE_DOWN); ret = mlx5e_close_locked(dev); mutex_unlock(&priv->state_lock); return ret; @@ -824,9 +1129,18 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; + unsigned int fn; int ret; - ret = snprintf(buf, len, "%d", rep->vport - 1); + fn = PCI_FUNC(priv->mdev->pdev->devfn); + if (fn >= MLX5_MAX_PORTS) + return -EOPNOTSUPP; + + if (rep->vport == MLX5_VPORT_UPLINK) + ret = snprintf(buf, len, "p%d", fn); + else + ret = snprintf(buf, len, "pf%dvf%d", fn, rep->vport - 1); + if (ret >= len) return -EOPNOTSUPP; @@ -839,24 +1153,14 @@ mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, { switch (cls_flower->command) { case TC_CLSFLOWER_REPLACE: - return mlx5e_configure_flower(priv, cls_flower, flags); + return mlx5e_configure_flower(priv->netdev, priv, cls_flower, + flags); case TC_CLSFLOWER_DESTROY: - return mlx5e_delete_flower(priv, cls_flower, flags); + return mlx5e_delete_flower(priv->netdev, priv, cls_flower, + flags); case TC_CLSFLOWER_STATS: - return mlx5e_stats_flower(priv, cls_flower, flags); - default: - return -EOPNOTSUPP; - } -} - -static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data, - void *cb_priv) -{ - struct mlx5e_priv *priv = cb_priv; - - switch (type) { - case TC_SETUP_CLSFLOWER: - return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS); + return mlx5e_stats_flower(priv->netdev, priv, cls_flower, + flags); default: return -EOPNOTSUPP; } @@ -869,7 +1173,8 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS); + return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS | + MLX5E_TC_ESW_OFFLOAD); default: return -EOPNOTSUPP; } @@ -908,43 +1213,23 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep; if (!MLX5_ESWITCH_MANAGER(priv->mdev)) return false; - rep = rpriv->rep; - if (esw->mode == SRIOV_OFFLOADS && - rep && rep->vport == FDB_UPLINK_VPORT) - return true; - - return false; -} - -static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) -{ - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep; - - if (!MLX5_ESWITCH_MANAGER(priv->mdev)) + if (!rpriv) /* non vport rep mlx5e instances don't use this field */ return false; rep = rpriv->rep; - if (rep && rep->vport != FDB_UPLINK_VPORT) - return true; - - return false; + return (rep->vport == MLX5_VPORT_UPLINK); } -bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id) +static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id) { - struct mlx5e_priv *priv = netdev_priv(dev); - switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: - if (mlx5e_is_vf_vport_rep(priv) || mlx5e_is_uplink_rep(priv)) return true; } @@ -956,22 +1241,13 @@ mlx5e_get_sw_stats64(const struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_sw_stats *sstats = &priv->stats.sw; - - mlx5e_rep_update_sw_counters(priv); - - stats->rx_packets = sstats->rx_packets; - stats->rx_bytes = sstats->rx_bytes; - stats->tx_packets = sstats->tx_packets; - stats->tx_bytes = sstats->tx_bytes; - - stats->tx_dropped = sstats->tx_queue_dropped; + mlx5e_fold_sw_stats64(priv, stats); return 0; } -int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, - void *sp) +static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) { switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: @@ -982,7 +1258,7 @@ int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, } static void -mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -991,37 +1267,104 @@ mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); } -static const struct switchdev_ops mlx5e_rep_switchdev_ops = { - .switchdev_port_attr_get = mlx5e_attr_get, -}; - -static int mlx5e_change_rep_mtu(struct net_device *netdev, int new_mtu) +static int mlx5e_vf_rep_change_mtu(struct net_device *netdev, int new_mtu) { return mlx5e_change_mtu(netdev, new_mtu, NULL); } -static const struct net_device_ops mlx5e_netdev_ops_rep = { - .ndo_open = mlx5e_rep_open, - .ndo_stop = mlx5e_rep_close, +static int mlx5e_uplink_rep_change_mtu(struct net_device *netdev, int new_mtu) +{ + return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu); +} + +static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr) +{ + struct sockaddr *saddr = addr; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + ether_addr_copy(netdev->dev_addr, saddr->sa_data); + return 0; +} + +static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) +{ + netdev_warn_once(dev, "legacy vf vlan setting isn't supported in switchdev mode\n"); + + if (vlan != 0) + return -EOPNOTSUPP; + + /* allow setting 0-vid for compatibility with libvirt */ + return 0; +} + +static const struct net_device_ops mlx5e_netdev_ops_vf_rep = { + .ndo_open = mlx5e_vf_rep_open, + .ndo_stop = mlx5e_vf_rep_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, + .ndo_setup_tc = mlx5e_rep_setup_tc, + .ndo_get_stats64 = mlx5e_vf_rep_get_stats, + .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, + .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, + .ndo_change_mtu = mlx5e_vf_rep_change_mtu, + .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, +}; + +static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { + .ndo_open = mlx5e_open, + .ndo_stop = mlx5e_close, .ndo_start_xmit = mlx5e_xmit, + .ndo_set_mac_address = mlx5e_uplink_rep_set_mac, .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, .ndo_setup_tc = mlx5e_rep_setup_tc, - .ndo_get_stats64 = mlx5e_rep_get_stats, - .ndo_has_offload_stats = mlx5e_has_offload_stats, - .ndo_get_offload_stats = mlx5e_get_offload_stats, - .ndo_change_mtu = mlx5e_change_rep_mtu, + .ndo_get_stats64 = mlx5e_get_stats, + .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, + .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, + .ndo_change_mtu = mlx5e_uplink_rep_change_mtu, + .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, + .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, + .ndo_features_check = mlx5e_features_check, + .ndo_set_vf_mac = mlx5e_set_vf_mac, + .ndo_set_vf_rate = mlx5e_set_vf_rate, + .ndo_get_vf_config = mlx5e_get_vf_config, + .ndo_get_vf_stats = mlx5e_get_vf_stats, + .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan, + .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, }; -static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, u16 mtu) +bool mlx5e_eswitch_rep(struct net_device *netdev) +{ + if (netdev->netdev_ops == &mlx5e_netdev_ops_vf_rep || + netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep) + return true; + + return false; +} + +static void mlx5e_build_rep_params(struct net_device *netdev) { + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params *params; + u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + params = &priv->channels.params; params->hard_mtu = MLX5E_ETH_HARD_MTU; - params->sw_mtu = mtu; - params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE; + params->sw_mtu = netdev->mtu; + + /* SQ */ + if (rep->vport == MLX5_VPORT_UPLINK) + params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + else + params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE; /* RQ */ mlx5e_build_rq_params(mdev, params); @@ -1035,24 +1378,36 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); /* RSS */ - mlx5e_build_rss_params(params); + mlx5e_build_rss_params(&priv->rss_params, params->num_channels); } static void mlx5e_build_rep_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_core_dev *mdev = priv->mdev; - u16 max_mtu; - netdev->netdev_ops = &mlx5e_netdev_ops_rep; + if (rep->vport == MLX5_VPORT_UPLINK) { + SET_NETDEV_DEV(netdev, &priv->mdev->pdev->dev); + netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep; + /* we want a persistent mac for the uplink rep */ + mlx5_query_nic_vport_mac_address(mdev, 0, netdev->dev_addr); + netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops; +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (MLX5_CAP_GEN(mdev, qos)) + netdev->dcbnl_ops = &mlx5e_dcbnl_ops; +#endif + } else { + netdev->netdev_ops = &mlx5e_netdev_ops_vf_rep; + eth_hw_addr_random(netdev); + netdev->ethtool_ops = &mlx5e_vf_rep_ethtool_ops; + } netdev->watchdog_timeo = 15 * HZ; - netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; - - netdev->switchdev_ops = &mlx5e_rep_switchdev_ops; - netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; + netdev->features |= NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; netdev->hw_features |= NETIF_F_HW_TC; netdev->hw_features |= NETIF_F_SG; @@ -1063,13 +1418,10 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_TSO6; netdev->hw_features |= NETIF_F_RXCSUM; - netdev->features |= netdev->hw_features; - - eth_hw_addr_random(netdev); + if (rep->vport != MLX5_VPORT_UPLINK) + netdev->features |= NETIF_F_VLAN_CHALLENGED; - netdev->min_mtu = ETH_MIN_MTU; - mlx5_query_port_max_mtu(mdev, &max_mtu, 1); - netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); + netdev->features |= netdev->hw_features; } static int mlx5e_init_rep(struct mlx5_core_dev *mdev, @@ -1086,7 +1438,7 @@ static int mlx5e_init_rep(struct mlx5_core_dev *mdev, priv->channels.params.num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS; - mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu); + mlx5e_build_rep_params(netdev); mlx5e_build_rep_netdev(netdev); mlx5e_timestamp_init(priv); @@ -1209,94 +1561,193 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) { - int err; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_rep_uplink_priv *uplink_priv; + int tc, err; err = mlx5e_create_tises(priv); if (err) { mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); return err; } + + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { + uplink_priv = &rpriv->uplink_priv; + + INIT_LIST_HEAD(&uplink_priv->unready_flows); + + /* init shared tc flow table */ + err = mlx5e_tc_esw_init(&uplink_priv->tc_ht); + if (err) + goto destroy_tises; + + mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev); + + /* init indirect block notifications */ + INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list); + uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event; + err = register_netdevice_notifier(&uplink_priv->netdevice_nb); + if (err) { + mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n"); + goto tc_esw_cleanup; + } + } + return 0; + +tc_esw_cleanup: + mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht); +destroy_tises: + for (tc = 0; tc < priv->profile->max_tc; tc++) + mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); + return err; } -static const struct mlx5e_profile mlx5e_rep_profile = { - .init = mlx5e_init_rep, - .cleanup = mlx5e_cleanup_rep, - .init_rx = mlx5e_init_rep_rx, - .cleanup_rx = mlx5e_cleanup_rep_rx, - .init_tx = mlx5e_init_rep_tx, - .cleanup_tx = mlx5e_cleanup_nic_tx, - .update_stats = mlx5e_rep_update_hw_counters, - .update_carrier = NULL, - .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, - .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, - .max_tc = 1, -}; +static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + int tc; -/* e-Switch vport representors */ + for (tc = 0; tc < priv->profile->max_tc; tc++) + mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); -static int -mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { + /* clean indirect TC block notifications */ + unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb); + mlx5e_rep_indr_clean_block_privs(rpriv); + + /* delete shared tc flow table */ + mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht); + } +} + +static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); - struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + u16 max_mtu; - int err; + netdev->min_mtu = ETH_MIN_MTU; + mlx5_query_port_max_mtu(mdev, &max_mtu, 1); + netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); +} - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { - err = mlx5e_add_sqs_fwd_rules(priv); - if (err) - return err; +static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); + + if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { + struct mlx5_eqe *eqe = data; + + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + queue_work(priv->wq, &priv->update_carrier_work); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; } - err = mlx5e_rep_neigh_init(rpriv); - if (err) - goto err_remove_sqs; + if (event == MLX5_DEV_EVENT_PORT_AFFINITY) { + struct mlx5e_rep_priv *rpriv = priv->ppriv; - /* init shared tc flow table */ - err = mlx5e_tc_esw_init(&rpriv->tc_ht); - if (err) - goto err_neigh_cleanup; + queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work); - return 0; + return NOTIFY_OK; + } -err_neigh_cleanup: - mlx5e_rep_neigh_cleanup(rpriv); -err_remove_sqs: - mlx5e_remove_sqs_fwd_rules(priv); - return err; + return NOTIFY_DONE; } -static void -mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep) +static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); - struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + u16 max_mtu; + + netdev->min_mtu = ETH_MIN_MTU; + mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); + netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); + mlx5e_set_dev_port_mtu(priv); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_remove_sqs_fwd_rules(priv); + INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work, + mlx5e_tc_reoffload_flows_work); - /* clean uplink offloaded TC rules, delete shared tc flow table */ - mlx5e_tc_esw_cleanup(&rpriv->tc_ht); + mlx5_lag_add(mdev, netdev); + priv->events_nb.notifier_call = uplink_rep_async_event; + mlx5_notifier_register(mdev, &priv->events_nb); +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_initialize(priv); + mlx5e_dcbnl_init_app(priv); +#endif +} - mlx5e_rep_neigh_cleanup(rpriv); +static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_delete_app(priv); +#endif + mlx5_notifier_unregister(mdev, &priv->events_nb); + cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work); + mlx5_lag_remove(mdev); } +static const struct mlx5e_profile mlx5e_vf_rep_profile = { + .init = mlx5e_init_rep, + .cleanup = mlx5e_cleanup_rep, + .init_rx = mlx5e_init_rep_rx, + .cleanup_rx = mlx5e_cleanup_rep_rx, + .init_tx = mlx5e_init_rep_tx, + .cleanup_tx = mlx5e_cleanup_rep_tx, + .enable = mlx5e_vf_rep_enable, + .update_stats = mlx5e_vf_rep_update_hw_counters, + .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .max_tc = 1, +}; + +static const struct mlx5e_profile mlx5e_uplink_rep_profile = { + .init = mlx5e_init_rep, + .cleanup = mlx5e_cleanup_rep, + .init_rx = mlx5e_init_rep_rx, + .cleanup_rx = mlx5e_cleanup_rep_rx, + .init_tx = mlx5e_init_rep_tx, + .cleanup_tx = mlx5e_cleanup_rep_tx, + .enable = mlx5e_uplink_rep_enable, + .disable = mlx5e_uplink_rep_disable, + .update_stats = mlx5e_uplink_rep_update_hw_counters, + .update_carrier = mlx5e_update_carrier, + .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .max_tc = MLX5E_MAX_NUM_TC, +}; + +/* e-Switch vport representors */ static int mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { - struct mlx5e_rep_priv *uplink_rpriv; + const struct mlx5e_profile *profile; struct mlx5e_rep_priv *rpriv; struct net_device *netdev; - struct mlx5e_priv *upriv; int nch, err; rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); if (!rpriv) return -ENOMEM; + /* rpriv->rep to be looked up when profile->init() is called */ + rpriv->rep = rep; + nch = mlx5e_get_max_num_channels(dev); - netdev = mlx5e_create_netdev(dev, &mlx5e_rep_profile, nch, rpriv); + profile = (rep->vport == MLX5_VPORT_UPLINK) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile; + netdev = mlx5e_create_netdev(dev, profile, nch, rpriv); if (!netdev) { pr_warn("Failed to create representor netdev for vport %d\n", rep->vport); @@ -1305,15 +1756,20 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) } rpriv->netdev = netdev; - rpriv->rep = rep; rep->rep_if[REP_ETH].priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); + if (rep->vport == MLX5_VPORT_UPLINK) { + err = mlx5e_create_mdev_resources(dev); + if (err) + goto err_destroy_netdev; + } + err = mlx5e_attach_netdev(netdev_priv(netdev)); if (err) { pr_warn("Failed to attach representor netdev for vport %d\n", rep->vport); - goto err_destroy_netdev; + goto err_destroy_mdev_resources; } err = mlx5e_rep_neigh_init(rpriv); @@ -1323,32 +1779,25 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) goto err_detach_netdev; } - uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch, REP_ETH); - upriv = netdev_priv(uplink_rpriv->netdev); - err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb_egdev, - upriv); - if (err) - goto err_neigh_cleanup; - err = register_netdev(netdev); if (err) { pr_warn("Failed to register representor netdev for vport %d\n", rep->vport); - goto err_egdev_cleanup; + goto err_neigh_cleanup; } return 0; -err_egdev_cleanup: - tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev, - upriv); - err_neigh_cleanup: mlx5e_rep_neigh_cleanup(rpriv); err_detach_netdev: mlx5e_detach_netdev(netdev_priv(netdev)); +err_destroy_mdev_resources: + if (rep->vport == MLX5_VPORT_UPLINK) + mlx5e_destroy_mdev_resources(dev); + err_destroy_netdev: mlx5e_destroy_netdev(netdev_priv(netdev)); kfree(rpriv); @@ -1361,18 +1810,13 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_rep_priv *uplink_rpriv; void *ppriv = priv->ppriv; - struct mlx5e_priv *upriv; unregister_netdev(netdev); - uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, - REP_ETH); - upriv = netdev_priv(uplink_rpriv->netdev); - tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev, - upriv); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); + if (rep->vport == MLX5_VPORT_UPLINK) + mlx5e_destroy_mdev_resources(priv->mdev); mlx5e_destroy_netdev(priv); kfree(ppriv); /* mlx5e_rep_priv */ } @@ -1386,72 +1830,21 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) return rpriv->netdev; } -static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; - - for (vport = 1; vport < total_vfs; vport++) { - struct mlx5_eswitch_rep_if rep_if = {}; - - rep_if.load = mlx5e_vport_rep_load; - rep_if.unload = mlx5e_vport_rep_unload; - rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH); - } -} - -static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv) +void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev) { - struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; - - for (vport = 1; vport < total_vfs; vport++) - mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH); -} - -void mlx5e_register_vport_reps(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_eswitch_rep_if rep_if; - struct mlx5e_rep_priv *rpriv; - - rpriv = priv->ppriv; - rpriv->netdev = priv->netdev; + struct mlx5_eswitch_rep_if rep_if = {}; - rep_if.load = mlx5e_nic_rep_load; - rep_if.unload = mlx5e_nic_rep_unload; + rep_if.load = mlx5e_vport_rep_load; + rep_if.unload = mlx5e_vport_rep_unload; rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - rep_if.priv = rpriv; - INIT_LIST_HEAD(&rpriv->vport_sqs_list); - mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/ - - mlx5e_rep_register_vf_vports(priv); /* VFs vports */ -} - -void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */ - mlx5_eswitch_unregister_vport_rep(esw, 0, REP_ETH); /* UPLINK PF*/ + mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH); } -void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev) +void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5e_rep_priv *rpriv; - - rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); - if (!rpriv) - return NULL; - rpriv->rep = &esw->offloads.vport_reps[0]; - return rpriv; + mlx5_eswitch_unregister_vport_reps(esw, REP_ETH); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 844d32d5c29f..83b573b1abac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -37,6 +37,7 @@ #include <linux/rhashtable.h> #include "eswitch.h" #include "en.h" +#include "lib/port_tun.h" #ifdef CONFIG_MLX5_ESWITCH struct mlx5e_neigh_update_table { @@ -53,13 +54,38 @@ struct mlx5e_neigh_update_table { unsigned long min_interval; /* jiffies */ }; +struct mlx5_rep_uplink_priv { + /* Filters DB - instantiated by the uplink representor and shared by + * the uplink's VFs + */ + struct rhashtable tc_ht; + + /* indirect block callbacks are invoked on bind/unbind events + * on registered higher level devices (e.g. tunnel devices) + * + * tc_indr_block_cb_priv_list is used to lookup indirect callback + * private data + * + * netdevice_nb is the netdev events notifier - used to register + * tunnel devices for block events + * + */ + struct list_head tc_indr_block_priv_list; + struct notifier_block netdevice_nb; + + struct mlx5_tun_entropy tun_entropy; + + struct list_head unready_flows; + struct work_struct reoffload_flows_work; +}; + struct mlx5e_rep_priv { struct mlx5_eswitch_rep *rep; struct mlx5e_neigh_update_table neigh_update; struct net_device *netdev; struct mlx5_flow_handle *vport_rx_rule; struct list_head vport_sqs_list; - struct rhashtable tc_ht; /* valid for uplink rep */ + struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ }; static inline @@ -128,7 +154,10 @@ struct mlx5e_encap_entry { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ struct net_device *out_dev; + struct net_device *route_dev; int tunnel_type; + int tunnel_hlen; + int reformat_type; u8 flags; char *encap_header; int encap_size; @@ -140,16 +169,12 @@ struct mlx5e_rep_sq { }; void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev); -void mlx5e_register_vport_reps(struct mlx5e_priv *priv); -void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv); +void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev); +void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev); bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); -int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, void *sp); -bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id); - -int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, @@ -158,12 +183,17 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); + +bool mlx5e_eswitch_rep(struct net_device *netdev); + #else /* CONFIG_MLX5_ESWITCH */ -static inline void mlx5e_register_vport_reps(struct mlx5e_priv *priv) {} -static inline void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) {} static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; } static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {} #endif +static inline bool mlx5e_is_vport_rep(struct mlx5e_priv *priv) +{ + return (MLX5_ESWITCH_MANAGER(priv->mdev) && priv->ppriv); +} #endif /* __MLX5E_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0b5ef6d4e815..c3b3002ff62f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -52,40 +52,45 @@ static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) return config->rx_filter == HWTSTAMP_FILTER_ALL; } -static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc, - void *data) +static inline void mlx5e_read_cqe_slot(struct mlx5_cqwq *wq, + u32 cqcc, void *data) { - u32 ci = mlx5_cqwq_ctr2ix(&cq->wq, cqcc); + u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc); - memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64)); + memcpy(data, mlx5_cqwq_get_wqe(wq, ci), sizeof(struct mlx5_cqe64)); } static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq, - struct mlx5e_cq *cq, u32 cqcc) + struct mlx5_cqwq *wq, + u32 cqcc) { - mlx5e_read_cqe_slot(cq, cqcc, &cq->title); - cq->decmprs_left = be32_to_cpu(cq->title.byte_cnt); - cq->decmprs_wqe_counter = be16_to_cpu(cq->title.wqe_counter); + struct mlx5e_cq_decomp *cqd = &rq->cqd; + struct mlx5_cqe64 *title = &cqd->title; + + mlx5e_read_cqe_slot(wq, cqcc, title); + cqd->left = be32_to_cpu(title->byte_cnt); + cqd->wqe_counter = be16_to_cpu(title->wqe_counter); rq->stats->cqe_compress_blks++; } -static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc) +static inline void mlx5e_read_mini_arr_slot(struct mlx5_cqwq *wq, + struct mlx5e_cq_decomp *cqd, + u32 cqcc) { - mlx5e_read_cqe_slot(cq, cqcc, cq->mini_arr); - cq->mini_arr_idx = 0; + mlx5e_read_cqe_slot(wq, cqcc, cqd->mini_arr); + cqd->mini_arr_idx = 0; } -static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) +static inline void mlx5e_cqes_update_owner(struct mlx5_cqwq *wq, int n) { - struct mlx5_cqwq *wq = &cq->wq; - + u32 cqcc = wq->cc; u8 op_own = mlx5_cqwq_get_ctr_wrap_cnt(wq, cqcc) & 1; u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc); u32 wq_sz = mlx5_cqwq_get_size(wq); u32 ci_top = min_t(u32, wq_sz, ci + n); for (; ci < ci_top; ci++, n--) { - struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci); + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); cqe->op_own = op_own; } @@ -93,7 +98,7 @@ static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) if (unlikely(ci == wq_sz)) { op_own = !op_own; for (ci = 0; ci < n; ci++) { - struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci); + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); cqe->op_own = op_own; } @@ -101,68 +106,79 @@ static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) } static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, - struct mlx5e_cq *cq, u32 cqcc) + struct mlx5_cqwq *wq, + u32 cqcc) { - cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt; - cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum; - cq->title.op_own &= 0xf0; - cq->title.op_own |= 0x01 & (cqcc >> cq->wq.fbc.log_sz); - cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter); + struct mlx5e_cq_decomp *cqd = &rq->cqd; + struct mlx5_mini_cqe8 *mini_cqe = &cqd->mini_arr[cqd->mini_arr_idx]; + struct mlx5_cqe64 *title = &cqd->title; + + title->byte_cnt = mini_cqe->byte_cnt; + title->check_sum = mini_cqe->checksum; + title->op_own &= 0xf0; + title->op_own |= 0x01 & (cqcc >> wq->fbc.log_sz); + title->wqe_counter = cpu_to_be16(cqd->wqe_counter); if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) - cq->decmprs_wqe_counter += - mpwrq_get_cqe_consumed_strides(&cq->title); + cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title); else - cq->decmprs_wqe_counter = - mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cq->decmprs_wqe_counter + 1); + cqd->wqe_counter = + mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cqd->wqe_counter + 1); } static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq, - struct mlx5e_cq *cq, u32 cqcc) + struct mlx5_cqwq *wq, + u32 cqcc) { - mlx5e_decompress_cqe(rq, cq, cqcc); - cq->title.rss_hash_type = 0; - cq->title.rss_hash_result = 0; + struct mlx5e_cq_decomp *cqd = &rq->cqd; + + mlx5e_decompress_cqe(rq, wq, cqcc); + cqd->title.rss_hash_type = 0; + cqd->title.rss_hash_result = 0; } static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq, - struct mlx5e_cq *cq, + struct mlx5_cqwq *wq, int update_owner_only, int budget_rem) { - u32 cqcc = cq->wq.cc + update_owner_only; + struct mlx5e_cq_decomp *cqd = &rq->cqd; + u32 cqcc = wq->cc + update_owner_only; u32 cqe_count; u32 i; - cqe_count = min_t(u32, cq->decmprs_left, budget_rem); + cqe_count = min_t(u32, cqd->left, budget_rem); for (i = update_owner_only; i < cqe_count; - i++, cq->mini_arr_idx++, cqcc++) { - if (cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE) - mlx5e_read_mini_arr_slot(cq, cqcc); + i++, cqd->mini_arr_idx++, cqcc++) { + if (cqd->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE) + mlx5e_read_mini_arr_slot(wq, cqd, cqcc); - mlx5e_decompress_cqe_no_hash(rq, cq, cqcc); - rq->handle_rx_cqe(rq, &cq->title); + mlx5e_decompress_cqe_no_hash(rq, wq, cqcc); + rq->handle_rx_cqe(rq, &cqd->title); } - mlx5e_cqes_update_owner(cq, cq->wq.cc, cqcc - cq->wq.cc); - cq->wq.cc = cqcc; - cq->decmprs_left -= cqe_count; + mlx5e_cqes_update_owner(wq, cqcc - wq->cc); + wq->cc = cqcc; + cqd->left -= cqe_count; rq->stats->cqe_compress_pkts += cqe_count; return cqe_count; } static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, - struct mlx5e_cq *cq, + struct mlx5_cqwq *wq, int budget_rem) { - mlx5e_read_title_slot(rq, cq, cq->wq.cc); - mlx5e_read_mini_arr_slot(cq, cq->wq.cc + 1); - mlx5e_decompress_cqe(rq, cq, cq->wq.cc); - rq->handle_rx_cqe(rq, &cq->title); - cq->mini_arr_idx++; + struct mlx5e_cq_decomp *cqd = &rq->cqd; + u32 cc = wq->cc; - return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1; + mlx5e_read_title_slot(rq, wq, cc); + mlx5e_read_mini_arr_slot(wq, cqd, cc + 1); + mlx5e_decompress_cqe(rq, wq, cc); + rq->handle_rx_cqe(rq, &cqd->title); + cqd->mini_arr_idx++; + + return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1; } static inline bool mlx5e_page_is_reserved(struct page *page) @@ -369,7 +385,7 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, static inline void mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, struct mlx5e_dma_info *dma_info, - int offset_from, int offset_to, u32 headlen) + int offset_from, u32 headlen) { const void *from = page_address(dma_info->page) + offset_from; /* Aligning len to sizeof(long) optimizes memcpy performance */ @@ -377,24 +393,7 @@ mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, dma_sync_single_for_cpu(pdev, dma_info->addr + offset_from, len, DMA_FROM_DEVICE); - skb_copy_to_linear_data_offset(skb, offset_to, from, len); -} - -static inline void -mlx5e_copy_skb_header_mpwqe(struct device *pdev, - struct sk_buff *skb, - struct mlx5e_dma_info *dma_info, - u32 offset, u32 headlen) -{ - u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset); - - mlx5e_copy_skb_header(pdev, skb, dma_info, offset, 0, headlen_pg); - - if (unlikely(offset + headlen > PAGE_SIZE)) { - dma_info++; - mlx5e_copy_skb_header(pdev, skb, dma_info, 0, headlen_pg, - headlen - headlen_pg); - } + skb_copy_to_linear_data(skb, from, len); } static void @@ -554,9 +553,9 @@ static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, mlx5_cqwq_pop(&cq->wq); - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { netdev_WARN_ONCE(cq->channel->netdev, - "Bad OP in ICOSQ CQE: 0x%x\n", cqe->op_own); + "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe)); return; } @@ -693,7 +692,14 @@ static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth, { *proto = ((struct ethhdr *)skb->data)->h_proto; *proto = __vlan_get_protocol(skb, *proto, network_depth); - return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6)); + + if (*proto == htons(ETH_P_IP)) + return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr)); + + if (*proto == htons(ETH_P_IPV6)) + return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr)); + + return false; } static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) @@ -713,23 +719,76 @@ static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) rq->stats->ecn_mark += !!rc; } -static u32 mlx5e_get_fcs(const struct sk_buff *skb) +static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) { - const void *fcs_bytes; - u32 _fcs_bytes; + void *ip_p = skb->data + network_depth; + + return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol : + ((struct ipv6hdr *)ip_p)->nexthdr; +} + +#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) - fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN, - ETH_FCS_LEN, &_fcs_bytes); +#define MAX_PADDING 8 - return __get_unaligned_cpu32(fcs_bytes); +static void +tail_padding_csum_slow(struct sk_buff *skb, int offset, int len, + struct mlx5e_rq_stats *stats) +{ + stats->csum_complete_tail_slow++; + skb->csum = csum_block_add(skb->csum, + skb_checksum(skb, offset, len, 0), + offset); } -static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) +static void +tail_padding_csum(struct sk_buff *skb, int offset, + struct mlx5e_rq_stats *stats) { - void *ip_p = skb->data + network_depth; + u8 tail_padding[MAX_PADDING]; + int len = skb->len - offset; + void *tail; - return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol : - ((struct ipv6hdr *)ip_p)->nexthdr; + if (unlikely(len > MAX_PADDING)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + tail = skb_header_pointer(skb, offset, len, tail_padding); + if (unlikely(!tail)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + stats->csum_complete_tail++; + skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset); +} + +static void +mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto, + struct mlx5e_rq_stats *stats) +{ + struct ipv6hdr *ip6; + struct iphdr *ip4; + int pkt_len; + + switch (proto) { + case htons(ETH_P_IP): + ip4 = (struct iphdr *)(skb->data + network_depth); + pkt_len = network_depth + ntohs(ip4->tot_len); + break; + case htons(ETH_P_IPV6): + ip6 = (struct ipv6hdr *)(skb->data + network_depth); + pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len); + break; + default: + return; + } + + if (likely(pkt_len >= skb->len)) + return; + + tail_padding_csum(skb, pkt_len, stats); } static inline void mlx5e_handle_csum(struct net_device *netdev, @@ -751,7 +810,19 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; } - if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))) + /* True when explicitly set via priv flag, or XDP prog is loaded */ + if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)) + goto csum_unnecessary; + + /* CQE csum doesn't cover padding octets in short ethernet + * frames. And the pad field is appended prior to calculating + * and appending the FCS field. + * + * Detecting these padded frames requires to verify and parse + * IP headers, so we simply force all those small frames to be + * CHECKSUM_UNNECESSARY even if they are not padded. + */ + if (short_frame(skb->len)) goto csum_unnecessary; if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) { @@ -768,18 +839,15 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, skb->csum = csum_partial(skb->data + ETH_HLEN, network_depth - ETH_HLEN, skb->csum); - if (unlikely(netdev->features & NETIF_F_RXFCS)) - skb->csum = csum_block_add(skb->csum, - (__force __wsum)mlx5e_get_fcs(skb), - skb->len - ETH_FCS_LEN); + + mlx5e_skb_padding_csum(skb, network_depth, proto, stats); stats->csum_complete++; return; } csum_unnecessary: if (likely((cqe->hds_ip_ext & CQE_L3_OK) && - ((cqe->hds_ip_ext & CQE_L4_OK) || - (get_cqe_l4_hdr_type(cqe) == CQE_L4_HDR_TYPE_NONE)))) { + (cqe->hds_ip_ext & CQE_L4_OK))) { skb->ip_summed = CHECKSUM_UNNECESSARY; if (cqe_is_tunneled(cqe)) { skb->csum_level = 1; @@ -898,7 +966,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, prefetchw(va); /* xdp_frame data area */ prefetch(data); - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { rq->stats->wqe_err++; return NULL; } @@ -930,7 +998,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, u16 byte_cnt = cqe_bcnt - headlen; struct sk_buff *skb; - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { rq->stats->wqe_err++; return NULL; } @@ -960,8 +1028,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, } /* copy header */ - mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, - 0, headlen); + mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; skb->len += headlen; @@ -1083,8 +1150,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w di++; } /* copy header */ - mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, head_di, - head_offset, headlen); + mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; skb->len += headlen; @@ -1154,7 +1220,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi->consumed_strides += cstrides; - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { rq->stats->wqe_err++; goto mpwrq_cqe_out; } @@ -1190,17 +1256,17 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; + struct mlx5_cqwq *cqwq = &cq->wq; struct mlx5_cqe64 *cqe; int work_done = 0; if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) return 0; - if (cq->decmprs_left) - work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); + if (rq->cqd.left) + work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget); - cqe = mlx5_cqwq_get_cqe(&cq->wq); + cqe = mlx5_cqwq_get_cqe(cqwq); if (!cqe) { if (unlikely(work_done)) goto out; @@ -1210,28 +1276,21 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) do { if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { work_done += - mlx5e_decompress_cqes_start(rq, cq, + mlx5e_decompress_cqes_start(rq, cqwq, budget - work_done); continue; } - mlx5_cqwq_pop(&cq->wq); + mlx5_cqwq_pop(cqwq); rq->handle_rx_cqe(rq, cqe); - } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq))); out: - if (xdpsq->doorbell) { - mlx5e_xmit_xdp_doorbell(xdpsq); - xdpsq->doorbell = false; - } - - if (xdpsq->redirect_flush) { - xdp_do_flush_map(); - xdpsq->redirect_flush = false; - } + if (rq->xdp_prog) + mlx5e_xdp_rx_poll_complete(rq); - mlx5_cqwq_update_db_record(&cq->wq); + mlx5_cqwq_update_db_record(cqwq); /* ensure cq space is freed before enabling more cqes */ wmb(); @@ -1292,8 +1351,14 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, skb->protocol = *((__be16 *)(skb->data)); - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + if (netdev->features & NETIF_F_RXCSUM) { + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + stats->csum_complete++; + } else { + skb->ip_summed = CHECKSUM_NONE; + stats->csum_none++; + } if (unlikely(mlx5e_rx_hw_stamp(tstamp))) skb_hwtstamps(skb)->hwtstamp = @@ -1312,7 +1377,6 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, skb->dev = netdev; - stats->csum_complete++; stats->packets++; stats->bytes += cqe_bcnt; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 4337afd610d7..b75aa8b8bf04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include "lib/mlx5.h" #include "en.h" #include "en_accel/ipsec.h" #include "en_accel/tls.h" @@ -58,6 +59,8 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) }, @@ -126,9 +129,9 @@ static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) return idx; } -void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) +static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) { - struct mlx5e_sw_stats temp, *s = &temp; + struct mlx5e_sw_stats *s = &priv->stats.sw; int i; memset(s, 0, sizeof(*s)); @@ -150,6 +153,8 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_complete_tail += rq_stats->csum_complete_tail; + s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow; s->rx_csum_unnecessary += rq_stats->csum_unnecessary; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_xdp_drop += rq_stats->xdp_drop; @@ -211,8 +216,6 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_cqes += sq_stats->cqes; } } - - memcpy(&priv->stats.sw, s, sizeof(*s)); } static const struct counter_desc q_stats_desc[] = { @@ -480,7 +483,10 @@ static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } -static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv) +#define MLX5_BASIC_PPCNT_SUPPORTED(mdev) \ + (MLX5_CAP_GEN(mdev, pcam_reg) ? MLX5_CAP_PCAM_REG(mdev, ppcnt) : 1) + +void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv) { struct mlx5e_pport_stats *pstats = &priv->stats.pport; struct mlx5_core_dev *mdev = priv->mdev; @@ -488,6 +494,9 @@ static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv) int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); void *out; + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return; + MLX5_SET(ppcnt_reg, in, local_port, 1); out = pstats->IEEE_802_3_counters; MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); @@ -600,6 +609,9 @@ static void mlx5e_grp_2819_update_stats(struct mlx5e_priv *priv) int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); void *out; + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return; + MLX5_SET(ppcnt_reg, in, local_port, 1); out = pstats->RFC_2819_counters; MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); @@ -934,7 +946,7 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { }; static const struct counter_desc pport_pfc_stall_stats_desc[] = { - { "tx_pause_storm_warning_events ", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) }, + { "tx_pause_storm_warning_events", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) }, { "tx_pause_storm_error_events", PPORT_PER_PRIO_OFF(device_stall_critical_watermark_cnt) }, }; @@ -1075,6 +1087,9 @@ static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv) int prio; void *out; + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return; + MLX5_SET(ppcnt_reg, in, local_port, 1); MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { @@ -1086,13 +1101,13 @@ static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv) } static const struct counter_desc mlx5e_pme_status_desc[] = { - { "module_unplug", 8 }, + { "module_unplug", sizeof(u64) * MLX5_MODULE_STATUS_UNPLUGGED }, }; static const struct counter_desc mlx5e_pme_error_desc[] = { - { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */ - { "module_high_temp", 48 }, /* high temperature */ - { "module_bad_shorted", 56 }, /* bad or shorted cable/module */ + { "module_bus_stuck", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BUS_STUCK }, + { "module_high_temp", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE }, + { "module_bad_shorted", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BAD_CABLE }, }; #define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc) @@ -1120,15 +1135,17 @@ static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data, static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) { - struct mlx5_priv *mlx5_priv = &priv->mdev->priv; + struct mlx5_pme_stats pme_stats; int i; + mlx5_get_pme_stats(priv->mdev, &pme_stats); + for (i = 0; i < NUM_PME_STATUS_STATS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters, + data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters, mlx5e_pme_status_desc, i); for (i = 0; i < NUM_PME_ERR_STATS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters, + data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters, mlx5e_pme_error_desc, i); return idx; @@ -1177,6 +1194,8 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 3ff69ddae2d3..16c3b785f282 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -71,6 +71,8 @@ struct mlx5e_sw_stats { u64 rx_csum_unnecessary; u64 rx_csum_none; u64 rx_csum_complete; + u64 rx_csum_complete_tail; + u64 rx_csum_complete_tail_slow; u64 rx_csum_unnecessary_inner; u64 rx_xdp_drop; u64 rx_xdp_redirect; @@ -181,6 +183,8 @@ struct mlx5e_rq_stats { u64 packets; u64 bytes; u64 csum_complete; + u64 csum_complete_tail; + u64 csum_complete_tail_slow; u64 csum_unnecessary; u64 csum_unnecessary_inner; u64 csum_none; @@ -277,6 +281,6 @@ struct mlx5e_stats_grp { extern const struct mlx5e_stats_grp mlx5e_stats_grps[]; extern const int mlx5e_num_stats_grps; -void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv); +void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv); #endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9dabe9d4b279..d75dc44eb2ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -38,21 +38,20 @@ #include <linux/mlx5/fs.h> #include <linux/mlx5/device.h> #include <linux/rhashtable.h> -#include <net/switchdev.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_vlan.h> #include <net/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_pedit.h> #include <net/tc_act/tc_csum.h> -#include <net/vxlan.h> #include <net/arp.h> #include "en.h" #include "en_rep.h" #include "en_tc.h" #include "eswitch.h" -#include "lib/vxlan.h" #include "fs_core.h" #include "en/port.h" +#include "en/tc_tun.h" +#include "lib/devcom.h" struct mlx5_nic_flow_attr { u32 action; @@ -69,25 +68,56 @@ struct mlx5_nic_flow_attr { enum { MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS, MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS, - MLX5E_TC_FLOW_ESWITCH = BIT(MLX5E_TC_FLOW_BASE), - MLX5E_TC_FLOW_NIC = BIT(MLX5E_TC_FLOW_BASE + 1), - MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2), - MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3), - MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4), - MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 5), + MLX5E_TC_FLOW_ESWITCH = MLX5E_TC_ESW_OFFLOAD, + MLX5E_TC_FLOW_NIC = MLX5E_TC_NIC_OFFLOAD, + MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE), + MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 1), + MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 2), + MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 3), + MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 4), + MLX5E_TC_FLOW_NOT_READY = BIT(MLX5E_TC_FLOW_BASE + 5), }; #define MLX5E_TC_MAX_SPLITS 1 +/* Helper struct for accessing a struct containing list_head array. + * Containing struct + * |- Helper array + * [0] Helper item 0 + * |- list_head item 0 + * |- index (0) + * [1] Helper item 1 + * |- list_head item 1 + * |- index (1) + * To access the containing struct from one of the list_head items: + * 1. Get the helper item from the list_head item using + * helper item = + * container_of(list_head item, helper struct type, list_head field) + * 2. Get the contining struct from the helper item and its index in the array: + * containing struct = + * container_of(helper item, containing struct type, helper field[index]) + */ +struct encap_flow_item { + struct list_head list; + int index; +}; + struct mlx5e_tc_flow { struct rhash_head node; struct mlx5e_priv *priv; u64 cookie; u16 flags; struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; - struct list_head encap; /* flows sharing the same encap ID */ + /* Flow can be associated with multiple encap IDs. + * The number of encaps is bounded by the number of supported + * destinations. + */ + struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5e_tc_flow *peer_flow; struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ struct list_head hairpin; /* flows sharing the same hairpin */ + struct list_head peer; /* flows with peer flow */ + struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */ union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@ -95,11 +125,13 @@ struct mlx5e_tc_flow { }; struct mlx5e_tc_flow_parse_attr { - struct ip_tunnel_info tun_info; + struct ip_tunnel_info tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; + struct net_device *filter_dev; struct mlx5_flow_spec spec; int num_mod_hdr_actions; + int max_mod_hdr_actions; void *mod_hdr_actions; - int mirred_ifindex; + int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; }; #define MLX5E_TC_TABLE_NUM_GROUPS 4 @@ -316,7 +348,7 @@ static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) for (i = 0; i < sz; i++) { ix = i; - if (priv->channels.params.rss_hfunc == ETH_RSS_HASH_XOR) + if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR) ix = mlx5e_bits_invert(i, ilog2(sz)); ix = indirection_rqt[ix]; rqn = hp->pair->rqn[ix]; @@ -360,13 +392,15 @@ static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) void *tirc; for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt); + memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in)); tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); MLX5_SET(tirc, tirc, transport_domain, hp->tdn); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); + mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false); err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]); @@ -569,7 +603,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, struct netlink_ext_ack *extack) { - int peer_ifindex = parse_attr->mirred_ifindex; + int peer_ifindex = parse_attr->mirred_ifindex[0]; struct mlx5_hairpin_params params; struct mlx5_core_dev *peer_mdev; struct mlx5e_hairpin_entry *hpe; @@ -802,7 +836,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, mlx5_del_flow_rules(flow->rule[0]); mlx5_fc_destroy(priv->mdev, counter); - if (!mlx5e_tc_num_filters(priv) && priv->fs.tc.t) { + if (!mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD) && priv->fs.tc.t) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; } @@ -815,14 +849,15 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, } static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow); + struct mlx5e_tc_flow *flow, int out_index); static int mlx5e_attach_encap(struct mlx5e_priv *priv, - struct ip_tunnel_info *tun_info, + struct mlx5e_tc_flow *flow, struct net_device *mirred_dev, + int out_index, + struct netlink_ext_ack *extack, struct net_device **encap_dev, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack); + bool *encap_valid); static struct mlx5_flow_handle * mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, @@ -836,7 +871,7 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, if (IS_ERR(rule)) return rule; - if (attr->mirror_count) { + if (attr->split_count) { flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); if (IS_ERR(flow->rule[1])) { mlx5_eswitch_del_offloaded_rule(esw, rule, attr); @@ -855,7 +890,7 @@ mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, { flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; - if (attr->mirror_count) + if (attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); @@ -871,7 +906,7 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - slow_attr->mirror_count = 0; + slow_attr->split_count = 0; slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN; rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); @@ -888,27 +923,49 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, { memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - slow_attr->mirror_count = 0; + slow_attr->split_count = 0; slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN; mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); flow->flags &= ~MLX5E_TC_FLOW_SLOW; } +static void add_unready_flow(struct mlx5e_tc_flow *flow) +{ + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + + esw = flow->priv->mdev->priv.eswitch; + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &rpriv->uplink_priv; + + flow->flags |= MLX5E_TC_FLOW_NOT_READY; + list_add_tail(&flow->unready, &uplink_priv->unready_flows); +} + +static void remove_unready_flow(struct mlx5e_tc_flow *flow) +{ + list_del(&flow->unready); + flow->flags &= ~MLX5E_TC_FLOW_NOT_READY; +} + static int mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; u32 max_chain = mlx5_eswitch_get_chain_range(esw); struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; u16 max_prio = mlx5_eswitch_get_prio_range(esw); struct net_device *out_dev, *encap_dev = NULL; struct mlx5_fc *counter = NULL; struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; - int err = 0, encap_err = 0; + bool encap_valid = true; + int err = 0; + int out_index; if (!mlx5_eswitch_prios_supported(esw) && attr->prio != 1) { NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW"); @@ -927,20 +984,24 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, goto err_max_prio_chain; } - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + int mirred_ifindex; + + if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + mirred_ifindex = parse_attr->mirred_ifindex[out_index]; out_dev = __dev_get_by_index(dev_net(priv->netdev), - attr->parse_attr->mirred_ifindex); - encap_err = mlx5e_attach_encap(priv, &parse_attr->tun_info, - out_dev, &encap_dev, flow, - extack); - if (encap_err && encap_err != -EAGAIN) { - err = encap_err; + mirred_ifindex); + err = mlx5e_attach_encap(priv, flow, out_dev, out_index, + extack, &encap_dev, &encap_valid); + if (err) goto err_attach_encap; - } + out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; - attr->out_rep[attr->out_count] = rpriv->rep; - attr->out_mdev[attr->out_count++] = out_priv->mdev; + attr->dests[out_index].rep = rpriv->rep; + attr->dests[out_index].mdev = out_priv->mdev; } err = mlx5_eswitch_add_vlan_action(esw, attr); @@ -955,7 +1016,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - counter = mlx5_fc_create(esw->dev, true); + counter = mlx5_fc_create(attr->counter_dev, true); if (IS_ERR(counter)) { err = PTR_ERR(counter); goto err_create_counter; @@ -964,10 +1025,11 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, attr->counter = counter; } - /* we get here if (1) there's no error or when - * (2) there's an encap action and we're on -EAGAIN (no valid neigh) + /* we get here if one of the following takes place: + * (1) there's no error + * (2) there's an encap action and we don't have valid neigh */ - if (encap_err == -EAGAIN) { + if (!encap_valid) { /* continue with goto slow path rule instead */ struct mlx5_esw_flow_attr slow_attr; @@ -984,15 +1046,16 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, return 0; err_add_rule: - mlx5_fc_destroy(esw->dev, counter); + mlx5_fc_destroy(attr->counter_dev, counter); err_create_counter: if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); err_mod_hdr: mlx5_eswitch_del_vlan_action(esw, attr); err_add_vlan: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) - mlx5e_detach_encap(priv, flow); + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) + mlx5e_detach_encap(priv, flow, out_index); err_attach_encap: err_max_prio_chain: return err; @@ -1004,6 +1067,13 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5_esw_flow_attr slow_attr; + int out_index; + + if (flow->flags & MLX5E_TC_FLOW_NOT_READY) { + remove_unready_flow(flow); + kvfree(attr->parse_attr); + return; + } if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { if (flow->flags & MLX5E_TC_FLOW_SLOW) @@ -1014,16 +1084,16 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5_eswitch_del_vlan_action(esw, attr); - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { - mlx5e_detach_encap(priv, flow); - kvfree(attr->parse_attr); - } + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) + mlx5e_detach_encap(priv, flow, out_index); + kvfree(attr->parse_attr); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) - mlx5_fc_destroy(esw->dev, attr->counter); + mlx5_fc_destroy(attr->counter_dev, attr->counter); } void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, @@ -1033,10 +1103,12 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr slow_attr, *esw_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; + struct encap_flow_item *efi; struct mlx5e_tc_flow *flow; int err; - err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, + err = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, e->encap_size, e->encap_header, MLX5_FLOW_NAMESPACE_FDB, &e->encap_id); @@ -1048,11 +1120,31 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(priv); - list_for_each_entry(flow, &e->flows, encap) { + list_for_each_entry(efi, &e->flows, list) { + bool all_flow_encaps_valid = true; + int i; + + flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); esw_attr = flow->esw_attr; - esw_attr->encap_id = e->encap_id; spec = &esw_attr->parse_attr->spec; + esw_attr->dests[efi->index].encap_id = e->encap_id; + esw_attr->dests[efi->index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + /* Flow can be associated with multiple encap entries. + * Before offloading the flow verify that all of them have + * a valid neighbour. + */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) + continue; + if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { + all_flow_encaps_valid = false; + break; + } + } + /* Do not offload flows with unresolved neighbors */ + if (!all_flow_encaps_valid) + continue; /* update from slow path rule to encap rule */ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr); if (IS_ERR(rule)) { @@ -1075,14 +1167,18 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr slow_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; + struct encap_flow_item *efi; struct mlx5e_tc_flow *flow; int err; - list_for_each_entry(flow, &e->flows, encap) { + list_for_each_entry(efi, &e->flows, list) { + flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); spec = &flow->esw_attr->parse_attr->spec; /* update from encap rule to slow path rule */ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr); + /* mark the flow's encap dest as non-valid */ + flow->esw_attr->dests[efi->index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -1130,9 +1226,12 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) return; list_for_each_entry(e, &nhe->encap_list, encap_list) { + struct encap_flow_item *efi; if (!(e->flags & MLX5_ENCAP_ENTRY_VALID)) continue; - list_for_each_entry(flow, &e->flows, encap) { + list_for_each_entry(efi, &e->flows, list) { + flow = container_of(efi, struct mlx5e_tc_flow, + encaps[efi->index]); if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { counter = mlx5e_tc_get_counter(flow); mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); @@ -1162,11 +1261,11 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) } static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow) + struct mlx5e_tc_flow *flow, int out_index) { - struct list_head *next = flow->encap.next; + struct list_head *next = flow->encaps[out_index].list.next; - list_del(&flow->encap); + list_del(&flow->encaps[out_index].list); if (list_empty(next)) { struct mlx5e_encap_entry *e; @@ -1182,177 +1281,137 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, } } -static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow) +static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) - mlx5e_tc_del_fdb_flow(priv, flow); - else - mlx5e_tc_del_nic_flow(priv, flow); + struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; + + if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) || + !(flow->flags & MLX5E_TC_FLOW_DUP)) + return; + + mutex_lock(&esw->offloads.peer_mutex); + list_del(&flow->peer); + mutex_unlock(&esw->offloads.peer_mutex); + + flow->flags &= ~MLX5E_TC_FLOW_DUP; + + mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); + kvfree(flow->peer_flow); + flow->peer_flow = NULL; } -static void parse_vxlan_attr(struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f) +static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { - void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers); - void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers); - void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); + struct mlx5_core_dev *dev = flow->priv->mdev; + struct mlx5_devcom *devcom = dev->priv.devcom; + struct mlx5_eswitch *peer_esw; - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + return; + + __mlx5e_tc_del_fdb_peer_flow(flow); + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +} - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_dissector_key_keyid *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->key); - struct flow_dissector_key_keyid *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->mask); - MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, - be32_to_cpu(mask->keyid)); - MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, - be32_to_cpu(key->keyid)); +static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + mlx5e_tc_del_fdb_peer_flow(flow); + mlx5e_tc_del_fdb_flow(priv, flow); + } else { + mlx5e_tc_del_nic_flow(priv, flow); } } + static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f) + struct tc_cls_flower_offload *f, + struct net_device *filter_dev, u8 *match_level) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_match_control enc_control; + int err; - struct flow_dissector_key_control *enc_control = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_CONTROL, - f->key); - - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { - struct flow_dissector_key_ports *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_PORTS, - f->key); - struct flow_dissector_key_ports *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_PORTS, - f->mask); - - /* Full udp dst port must be given */ - if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) - goto vxlan_match_offload_err; - - if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst)) && - MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) - parse_vxlan_attr(spec, f); - else { - NL_SET_ERR_MSG_MOD(extack, - "port isn't an offloaded vxlan udp dport"); - netdev_warn(priv->netdev, - "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst)); - return -EOPNOTSUPP; - } - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - udp_dport, ntohs(mask->dst)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - udp_dport, ntohs(key->dst)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - udp_sport, ntohs(mask->src)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - udp_sport, ntohs(key->src)); - } else { /* udp dst port must be given */ -vxlan_match_offload_err: + err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, + headers_c, headers_v, match_level); + if (err) { NL_SET_ERR_MSG_MOD(extack, - "IP tunnel decap offload supported only for vxlan, must set UDP dport"); - netdev_warn(priv->netdev, - "IP tunnel decap offload supported only for vxlan, must set UDP dport\n"); - return -EOPNOTSUPP; + "failed to parse tunnel attributes"); + return err; } - if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_dissector_key_ipv4_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, - f->key); - struct flow_dissector_key_ipv4_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, - f->mask); + flow_rule_match_enc_control(rule, &enc_control); + + if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); MLX5_SET(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(mask->src)); + ntohl(match.mask->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(key->src)); + ntohl(match.key->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(mask->dst)); + ntohl(match.mask->dst)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(key->dst)); + ntohl(match.key->dst)); MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); - } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_dissector_key_ipv6_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, - f->key); - struct flow_dissector_key_ipv6_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, - f->mask); + } else if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + flow_rule_match_enc_ipv6_addrs(rule, &match); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) { - struct flow_dissector_key_ip *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IP, - f->key); - struct flow_dissector_key_ip *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IP, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3); + flow_rule_match_enc_ip(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl); - if (mask->ttl && + if (match.mask->ttl && !MLX5_CAP_ESW_FLOWTABLE_FDB (priv->mdev, ft_field_support.outer_ipv4_ttl)) { @@ -1381,7 +1440,8 @@ vxlan_match_offload_err: static int __parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f, - u8 *match_level) + struct net_device *filter_dev, + u8 *match_level, u8 *tunnel_match_level) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@ -1392,12 +1452,14 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, misc_parameters); void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; u16 addr_type = 0; u8 ip_proto = 0; *match_level = MLX5_MATCH_NONE; - if (f->dissector->used_keys & + if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | @@ -1416,23 +1478,21 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_ENC_IP))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", - f->dissector->used_keys); + dissector->used_keys); return -EOPNOTSUPP; } - if ((dissector_uses_key(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || - dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) || - dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) && - dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { - struct flow_dissector_key_control *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_CONTROL, - f->key); - switch (key->addr_type) { + if ((flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) && + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_enc_control(rule, &match); + switch (match.key->addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: case FLOW_DISSECTOR_KEY_IPV6_ADDRS: - if (parse_tunnel_attr(priv, spec, f)) + if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) return -EOPNOTSUPP; break; default: @@ -1448,35 +1508,27 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, inner_headers); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); - struct flow_dissector_key_basic *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, - ntohs(mask->n_proto)); + ntohs(match.mask->n_proto)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ntohs(key->n_proto)); + ntohs(match.key->n_proto)); - if (mask->n_proto) + if (match.mask->n_proto) *match_level = MLX5_MATCH_L2; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { - struct flow_dissector_key_vlan *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->key); - struct flow_dissector_key_vlan *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->mask); - if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) { - if (key->vlan_tpid == htons(ETH_P_8021AD)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + if (match.mask->vlan_id || + match.mask->vlan_priority || + match.mask->vlan_tpid) { + if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, @@ -1488,11 +1540,15 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, cvlan_tag, 1); } - MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, + match.mask->vlan_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, + match.key->vlan_id); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, + match.mask->vlan_priority); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, + match.key->vlan_priority); *match_level = MLX5_MATCH_L2; } @@ -1502,17 +1558,14 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, *match_level = MLX5_MATCH_L2; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) { - struct flow_dissector_key_vlan *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CVLAN, - f->key); - struct flow_dissector_key_vlan *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CVLAN, - f->mask); - if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) { - if (key->vlan_tpid == htons(ETH_P_8021AD)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + if (match.mask->vlan_id || + match.mask->vlan_priority || + match.mask->vlan_tpid) { + if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { MLX5_SET(fte_match_set_misc, misc_c, outer_second_svlan_tag, 1); MLX5_SET(fte_match_set_misc, misc_v, @@ -1525,69 +1578,58 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, } MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid, - mask->vlan_id); + match.mask->vlan_id); MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid, - key->vlan_id); + match.key->vlan_id); MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio, - mask->vlan_priority); + match.mask->vlan_priority); MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio, - key->vlan_priority); + match.key->vlan_priority); *match_level = MLX5_MATCH_L2; } } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { - struct flow_dissector_key_eth_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->key); - struct flow_dissector_key_eth_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + flow_rule_match_eth_addrs(rule, &match); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dmac_47_16), - mask->dst); + match.mask->dst); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dmac_47_16), - key->dst); + match.key->dst); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, smac_47_16), - mask->src); + match.mask->src); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, smac_47_16), - key->src); + match.key->src); - if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst)) + if (!is_zero_ether_addr(match.mask->src) || + !is_zero_ether_addr(match.mask->dst)) *match_level = MLX5_MATCH_L2; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { - struct flow_dissector_key_control *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - f->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; - struct flow_dissector_key_control *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - f->mask); - addr_type = key->addr_type; + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; /* the HW doesn't support frag first/later */ - if (mask->flags & FLOW_DIS_FIRST_FRAG) + if (match.mask->flags & FLOW_DIS_FIRST_FRAG) return -EOPNOTSUPP; - if (mask->flags & FLOW_DIS_IS_FRAGMENT) { + if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, - key->flags & FLOW_DIS_IS_FRAGMENT); + match.key->flags & FLOW_DIS_IS_FRAGMENT); /* the HW doesn't need L3 inline to match on frag=no */ - if (!(key->flags & FLOW_DIS_IS_FRAGMENT)) + if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT)) *match_level = MLX5_MATCH_L2; /* *** L2 attributes parsing up to here *** */ else @@ -1595,102 +1637,85 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, } } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); - struct flow_dissector_key_basic *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->mask); - ip_proto = key->ip_proto; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + ip_proto = match.key->ip_proto; MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, - mask->ip_proto); + match.mask->ip_proto); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, - key->ip_proto); + match.key->ip_proto); - if (mask->ip_proto) + if (match.mask->ip_proto) *match_level = MLX5_MATCH_L3; } if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_dissector_key_ipv4_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->key); - struct flow_dissector_key_ipv4_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->mask); + struct flow_match_ipv4_addrs match; + flow_rule_match_ipv4_addrs(rule, &match); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4), - &mask->src, sizeof(mask->src)); + &match.mask->src, sizeof(match.mask->src)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4), - &key->src, sizeof(key->src)); + &match.key->src, sizeof(match.key->src)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &mask->dst, sizeof(mask->dst)); + &match.mask->dst, sizeof(match.mask->dst)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &key->dst, sizeof(key->dst)); + &match.key->dst, sizeof(match.key->dst)); - if (mask->src || mask->dst) + if (match.mask->src || match.mask->dst) *match_level = MLX5_MATCH_L3; } if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_dissector_key_ipv6_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->key); - struct flow_dissector_key_ipv6_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->mask); + struct flow_match_ipv6_addrs match; + flow_rule_match_ipv6_addrs(rule, &match); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &mask->src, sizeof(mask->src)); + &match.mask->src, sizeof(match.mask->src)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &key->src, sizeof(key->src)); + &match.key->src, sizeof(match.key->src)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &mask->dst, sizeof(mask->dst)); + &match.mask->dst, sizeof(match.mask->dst)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &key->dst, sizeof(key->dst)); + &match.key->dst, sizeof(match.key->dst)); - if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY || - ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY) + if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY || + ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY) *match_level = MLX5_MATCH_L3; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) { - struct flow_dissector_key_ip *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IP, - f->key); - struct flow_dissector_key_ip *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IP, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + struct flow_match_ip match; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3); + flow_rule_match_ip(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl); - if (mask->ttl && + if (match.mask->ttl && !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_ipv4_ttl)) { NL_SET_ERR_MSG_MOD(extack, @@ -1698,44 +1723,39 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (mask->tos || mask->ttl) + if (match.mask->tos || match.mask->ttl) *match_level = MLX5_MATCH_L3; } /* *** L3 attributes parsing up to here *** */ - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { - struct flow_dissector_key_ports *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->key); - struct flow_dissector_key_ports *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); switch (ip_proto) { case IPPROTO_TCP: MLX5_SET(fte_match_set_lyr_2_4, headers_c, - tcp_sport, ntohs(mask->src)); + tcp_sport, ntohs(match.mask->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, - tcp_sport, ntohs(key->src)); + tcp_sport, ntohs(match.key->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_c, - tcp_dport, ntohs(mask->dst)); + tcp_dport, ntohs(match.mask->dst)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, - tcp_dport, ntohs(key->dst)); + tcp_dport, ntohs(match.key->dst)); break; case IPPROTO_UDP: MLX5_SET(fte_match_set_lyr_2_4, headers_c, - udp_sport, ntohs(mask->src)); + udp_sport, ntohs(match.mask->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, - udp_sport, ntohs(key->src)); + udp_sport, ntohs(match.key->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_c, - udp_dport, ntohs(mask->dst)); + udp_dport, ntohs(match.mask->dst)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, - udp_dport, ntohs(key->dst)); + udp_dport, ntohs(match.key->dst)); break; default: NL_SET_ERR_MSG_MOD(extack, @@ -1745,26 +1765,20 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, return -EINVAL; } - if (mask->src || mask->dst) + if (match.mask->src || match.mask->dst) *match_level = MLX5_MATCH_L4; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) { - struct flow_dissector_key_tcp *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_TCP, - f->key); - struct flow_dissector_key_tcp *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_TCP, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp match; + flow_rule_match_tcp(rule, &match); MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, - ntohs(mask->flags)); + ntohs(match.mask->flags)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, - ntohs(key->flags)); + ntohs(match.key->flags)); - if (mask->flags) + if (match.mask->flags) *match_level = MLX5_MATCH_L4; } @@ -1774,21 +1788,22 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f) + struct tc_cls_flower_offload *f, + struct net_device *filter_dev) { struct netlink_ext_ack *extack = f->common.extack; struct mlx5_core_dev *dev = priv->mdev; struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; + u8 match_level, tunnel_match_level = MLX5_MATCH_NONE; struct mlx5_eswitch_rep *rep; - u8 match_level; int err; - err = __parse_cls_flower(priv, spec, f, &match_level); + err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level, &tunnel_match_level); if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { rep = rpriv->rep; - if (rep->vport != FDB_UPLINK_VPORT && + if (rep->vport != MLX5_VPORT_UPLINK && (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && esw->offloads.inline_mode < match_level)) { NL_SET_ERR_MSG_MOD(extack, @@ -1800,10 +1815,12 @@ static int parse_cls_flower(struct mlx5e_priv *priv, } } - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { flow->esw_attr->match_level = match_level; - else + flow->esw_attr->tunnel_match_level = tunnel_match_level; + } else { flow->nic_attr->match_level = match_level; + } return err; } @@ -1816,27 +1833,29 @@ struct pedit_headers { struct udphdr udp; }; +struct pedit_headers_action { + struct pedit_headers vals; + struct pedit_headers masks; + u32 pedits; +}; + static int pedit_header_offsets[] = { - [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), - [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), - [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), - [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), - [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), + [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), + [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), + [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), + [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), + [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), }; #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, - struct pedit_headers *masks, - struct pedit_headers *vals) + struct pedit_headers_action *hdrs) { u32 *curr_pmask, *curr_pval; - if (hdr_type >= __PEDIT_HDR_TYPE_MAX) - goto out_err; - - curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset); - curr_pval = (u32 *)(pedit_header(vals, hdr_type) + offset); + curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset); + curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset); if (*curr_pmask & mask) /* disallow acting twice on the same location */ goto out_err; @@ -1888,12 +1907,11 @@ static struct mlx5_fields fields[] = { OFFLOAD(UDP_DPORT, 2, udp.dest, 0), }; -/* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at - * max from the SW pedit action. On success, it says how many HW actions were - * actually parsed. +/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at + * max from the SW pedit action. On success, attr->num_mod_hdr_actions + * says how many HW actions were actually parsed. */ -static int offload_pedit_fields(struct pedit_headers *masks, - struct pedit_headers *vals, +static int offload_pedit_fields(struct pedit_headers_action *hdrs, struct mlx5e_tc_flow_parse_attr *parse_attr, struct netlink_ext_ack *extack) { @@ -1908,15 +1926,17 @@ static int offload_pedit_fields(struct pedit_headers *masks, __be16 mask_be16; void *action; - set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET]; - add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD]; - set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET]; - add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD]; + set_masks = &hdrs[0].masks; + add_masks = &hdrs[1].masks; + set_vals = &hdrs[0].vals; + add_vals = &hdrs[1].vals; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - action = parse_attr->mod_hdr_actions; - max_actions = parse_attr->num_mod_hdr_actions; - nactions = 0; + action = parse_attr->mod_hdr_actions + + parse_attr->num_mod_hdr_actions * action_size; + + max_actions = parse_attr->max_mod_hdr_actions; + nactions = parse_attr->num_mod_hdr_actions; for (i = 0; i < ARRAY_SIZE(fields); i++) { f = &fields[i]; @@ -2007,12 +2027,14 @@ static int offload_pedit_fields(struct pedit_headers *masks, } static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, - const struct tc_action *a, int namespace, + struct pedit_headers_action *hdrs, + int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr) { int nkeys, action_size, max_actions; - nkeys = tcf_pedit_nkeys(a); + nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits + + hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ @@ -2027,62 +2049,67 @@ static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, if (!parse_attr->mod_hdr_actions) return -ENOMEM; - parse_attr->num_mod_hdr_actions = max_actions; + parse_attr->max_mod_hdr_actions = max_actions; return 0; } static const struct pedit_headers zero_masks = {}; static int parse_tc_pedit_action(struct mlx5e_priv *priv, - const struct tc_action *a, int namespace, + const struct flow_action_entry *act, int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, struct netlink_ext_ack *extack) { - struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks; - int nkeys, i, err = -EOPNOTSUPP; + u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; + int err = -EOPNOTSUPP; u32 mask, val, offset; - u8 cmd, htype; + u8 htype; - nkeys = tcf_pedit_nkeys(a); + htype = act->mangle.htype; + err = -EOPNOTSUPP; /* can't be all optimistic */ - memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX); - memset(vals, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX); + if (htype == FLOW_ACT_MANGLE_UNSPEC) { + NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded"); + goto out_err; + } - for (i = 0; i < nkeys; i++) { - htype = tcf_pedit_htype(a, i); - cmd = tcf_pedit_cmd(a, i); - err = -EOPNOTSUPP; /* can't be all optimistic */ + mask = act->mangle.mask; + val = act->mangle.val; + offset = act->mangle.offset; - if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) { - NL_SET_ERR_MSG_MOD(extack, - "legacy pedit isn't offloaded"); - goto out_err; - } + err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]); + if (err) + goto out_err; - if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) { - NL_SET_ERR_MSG_MOD(extack, "pedit cmd isn't offloaded"); - goto out_err; - } + hdrs[cmd].pedits++; + + return 0; +out_err: + return err; +} - mask = tcf_pedit_mask(a, i); - val = tcf_pedit_val(a, i); - offset = tcf_pedit_offset(a, i); +static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) +{ + struct pedit_headers *cmd_masks; + int err; + u8 cmd; - err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]); + if (!parse_attr->mod_hdr_actions) { + err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr); if (err) goto out_err; } - err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr); - if (err) - goto out_err; - - err = offload_pedit_fields(masks, vals, parse_attr, extack); + err = offload_pedit_fields(hdrs, parse_attr, extack); if (err < 0) goto out_dealloc_parsed_actions; for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { - cmd_masks = &masks[cmd]; + cmd_masks = &hdrs[cmd].masks; if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field"); @@ -2131,19 +2158,69 @@ static bool csum_offload_supported(struct mlx5e_priv *priv, return true; } +struct ip_ttl_word { + __u8 ttl; + __u8 protocol; + __sum16 check; +}; + +struct ipv6_hoplimit_word { + __be16 payload_len; + __u8 nexthdr; + __u8 hop_limit; +}; + +static bool is_action_keys_supported(const struct flow_action_entry *act) +{ + u32 mask, offset; + u8 htype; + + htype = act->mangle.htype; + offset = act->mangle.offset; + mask = ~act->mangle.mask; + /* For IPv4 & IPv6 header check 4 byte word, + * to determine that modified fields + * are NOT ttl & hop_limit only. + */ + if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) { + struct ip_ttl_word *ttl_word = + (struct ip_ttl_word *)&mask; + + if (offset != offsetof(struct iphdr, ttl) || + ttl_word->protocol || + ttl_word->check) { + return true; + } + } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { + struct ipv6_hoplimit_word *hoplimit_word = + (struct ipv6_hoplimit_word *)&mask; + + if (offset != offsetof(struct ipv6hdr, payload_len) || + hoplimit_word->payload_len || + hoplimit_word->nexthdr) { + return true; + } + } + return false; +} + static bool modify_header_match_supported(struct mlx5_flow_spec *spec, - struct tcf_exts *exts, + struct flow_action *flow_action, + u32 actions, struct netlink_ext_ack *extack) { - const struct tc_action *a; + const struct flow_action_entry *act; bool modify_ip_header; - LIST_HEAD(actions); - u8 htype, ip_proto; void *headers_v; u16 ethertype; - int nkeys, i; + u8 ip_proto; + int i; + + if (actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers); + else + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); - headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); /* for non-IP we only re-write MACs, so we're okay */ @@ -2151,20 +2228,14 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, goto out_ok; modify_ip_header = false; - tcf_exts_for_each_action(i, a, exts) { - int k; - - if (!is_tcf_pedit(a)) + flow_action_for_each(i, act, flow_action) { + if (act->id != FLOW_ACTION_MANGLE && + act->id != FLOW_ACTION_ADD) continue; - nkeys = tcf_pedit_nkeys(a); - for (k = 0; k < nkeys; k++) { - htype = tcf_pedit_htype(a, k); - if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 || - htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) { - modify_ip_header = true; - break; - } + if (is_action_keys_supported(act)) { + modify_ip_header = true; + break; } } @@ -2182,7 +2253,7 @@ out_ok: } static bool actions_match_supported(struct mlx5e_priv *priv, - struct tcf_exts *exts, + struct flow_action *flow_action, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) @@ -2199,7 +2270,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv, return false; if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - return modify_header_match_supported(&parse_attr->spec, exts, + return modify_header_match_supported(&parse_attr->spec, + flow_action, actions, extack); return true; @@ -2219,57 +2291,54 @@ static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) return (fsystem_guid == psystem_guid); } -static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, +static int parse_tc_nic_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { struct mlx5_nic_flow_attr *attr = flow->nic_attr; - const struct tc_action *a; - LIST_HEAD(actions); + struct pedit_headers_action hdrs[2] = {}; + const struct flow_action_entry *act; u32 action = 0; int err, i; - if (!tcf_exts_has_actions(exts)) + if (!flow_action_has_entries(flow_action)) return -EINVAL; attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; - tcf_exts_for_each_action(i, a, exts) { - if (is_tcf_gact_shot(a)) { + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_DROP: action |= MLX5_FLOW_CONTEXT_ACTION_DROP; if (MLX5_CAP_FLOWTABLE(priv->mdev, flow_table_properties_nic_receive.flow_counter)) action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - continue; - } - - if (is_tcf_pedit(a)) { - err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL, - parse_attr, extack); + break; + case FLOW_ACTION_MANGLE: + case FLOW_ACTION_ADD: + err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL, + parse_attr, hdrs, extack); if (err) return err; action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - continue; - } - - if (is_tcf_csum(a)) { + break; + case FLOW_ACTION_CSUM: if (csum_offload_supported(priv, action, - tcf_csum_update_flags(a), + act->csum_flags, extack)) - continue; + break; return -EOPNOTSUPP; - } - - if (is_tcf_mirred_egress_redirect(a)) { - struct net_device *peer_dev = tcf_mirred_dev(a); + case FLOW_ACTION_REDIRECT: { + struct net_device *peer_dev = act->dev; if (priv->netdev->netdev_ops == peer_dev->netdev_ops && same_hw_devs(priv, netdev_priv(peer_dev))) { - parse_attr->mirred_ifindex = peer_dev->ifindex; + parse_attr->mirred_ifindex[0] = peer_dev->ifindex; flow->flags |= MLX5E_TC_FLOW_HAIRPIN; action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; @@ -2280,11 +2349,10 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, peer_dev->name); return -EINVAL; } - continue; - } - - if (is_tcf_skbedit_mark(a)) { - u32 mark = tcf_skbedit_mark(a); + } + break; + case FLOW_ACTION_MARK: { + u32 mark = act->mark; if (mark & ~MLX5E_TC_FLOW_ID_MASK) { NL_SET_ERR_MSG_MOD(extack, @@ -2294,70 +2362,47 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, attr->flow_tag = mark; action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - continue; + } + break; + default: + return -EINVAL; } + } - return -EINVAL; + if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || + hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { + err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL, + parse_attr, hdrs, extack); + if (err) + return err; } attr->action = action; - if (!actions_match_supported(priv, exts, parse_attr, flow, extack)) + if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) return -EOPNOTSUPP; return 0; } -static inline int cmp_encap_info(struct ip_tunnel_key *a, - struct ip_tunnel_key *b) -{ - return memcmp(a, b, sizeof(*a)); -} +struct encap_key { + struct ip_tunnel_key *ip_tun_key; + int tunnel_type; +}; -static inline int hash_encap_info(struct ip_tunnel_key *key) +static inline int cmp_encap_info(struct encap_key *a, + struct encap_key *b) { - return jhash(key, sizeof(*key), 0); + return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || + a->tunnel_type != b->tunnel_type; } -static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct net_device **out_dev, - struct flowi4 *fl4, - struct neighbour **out_n, - u8 *out_ttl) +static inline int hash_encap_info(struct encap_key *key) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_rep_priv *uplink_rpriv; - struct rtable *rt; - struct neighbour *n = NULL; - -#if IS_ENABLED(CONFIG_INET) - int ret; - - rt = ip_route_output_key(dev_net(mirred_dev), fl4); - ret = PTR_ERR_OR_ZERO(rt); - if (ret) - return ret; -#else - return -EOPNOTSUPP; -#endif - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - /* if the egress device isn't on the same HW e-switch, we use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) - *out_dev = uplink_rpriv->netdev; - else - *out_dev = rt->dst.dev; - - if (!(*out_ttl)) - *out_ttl = ip4_dst_hoplimit(&rt->dst); - n = dst_neigh_lookup(&rt->dst, &fl4->daddr); - ip_rt_put(rt); - if (!n) - return -ENOMEM; - - *out_n = n; - return 0; + return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), + key->tunnel_type); } + static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, struct net_device *peer_netdev) { @@ -2366,389 +2411,45 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, peer_priv = netdev_priv(peer_netdev); return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) && - (priv->netdev->netdev_ops == peer_netdev->netdev_ops) && - same_hw_devs(priv, peer_priv) && - MLX5_VPORT_MANAGER(peer_priv->mdev) && - (peer_priv->mdev->priv.eswitch->mode == SRIOV_OFFLOADS)); -} - -static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct net_device **out_dev, - struct flowi6 *fl6, - struct neighbour **out_n, - u8 *out_ttl) -{ - struct neighbour *n = NULL; - struct dst_entry *dst; - -#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) - struct mlx5e_rep_priv *uplink_rpriv; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - int ret; - - ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst, - fl6); - if (ret < 0) - return ret; - - if (!(*out_ttl)) - *out_ttl = ip6_dst_hoplimit(dst); - - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - /* if the egress device isn't on the same HW e-switch, we use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, dst->dev)) - *out_dev = uplink_rpriv->netdev; - else - *out_dev = dst->dev; -#else - return -EOPNOTSUPP; -#endif - - n = dst_neigh_lookup(dst, &fl6->daddr); - dst_release(dst); - if (!n) - return -ENOMEM; - - *out_n = n; - return 0; -} - -static void gen_vxlan_header_ipv4(struct net_device *out_dev, - char buf[], int encap_size, - unsigned char h_dest[ETH_ALEN], - u8 tos, u8 ttl, - __be32 daddr, - __be32 saddr, - __be16 udp_dst_port, - __be32 vx_vni) -{ - struct ethhdr *eth = (struct ethhdr *)buf; - struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr)); - struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr)); - struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); - - memset(buf, 0, encap_size); - - ether_addr_copy(eth->h_dest, h_dest); - ether_addr_copy(eth->h_source, out_dev->dev_addr); - eth->h_proto = htons(ETH_P_IP); - - ip->daddr = daddr; - ip->saddr = saddr; - - ip->tos = tos; - ip->ttl = ttl; - ip->protocol = IPPROTO_UDP; - ip->version = 0x4; - ip->ihl = 0x5; - - udp->dest = udp_dst_port; - vxh->vx_flags = VXLAN_HF_VNI; - vxh->vx_vni = vxlan_vni_field(vx_vni); -} - -static void gen_vxlan_header_ipv6(struct net_device *out_dev, - char buf[], int encap_size, - unsigned char h_dest[ETH_ALEN], - u8 tos, u8 ttl, - struct in6_addr *daddr, - struct in6_addr *saddr, - __be16 udp_dst_port, - __be32 vx_vni) -{ - struct ethhdr *eth = (struct ethhdr *)buf; - struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr)); - struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr)); - struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); - - memset(buf, 0, encap_size); - - ether_addr_copy(eth->h_dest, h_dest); - ether_addr_copy(eth->h_source, out_dev->dev_addr); - eth->h_proto = htons(ETH_P_IPV6); - - ip6_flow_hdr(ip6h, tos, 0); - /* the HW fills up ipv6 payload len */ - ip6h->nexthdr = IPPROTO_UDP; - ip6h->hop_limit = ttl; - ip6h->daddr = *daddr; - ip6h->saddr = *saddr; - - udp->dest = udp_dst_port; - vxh->vx_flags = VXLAN_HF_VNI; - vxh->vx_vni = vxlan_vni_field(vx_vni); -} - -static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct mlx5e_encap_entry *e) -{ - int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN; - struct ip_tunnel_key *tun_key = &e->tun_info.key; - struct net_device *out_dev; - struct neighbour *n = NULL; - struct flowi4 fl4 = {}; - u8 nud_state, tos, ttl; - char *encap_header; - int err; - - if (max_encap_size < ipv4_encap_size) { - mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", - ipv4_encap_size, max_encap_size); - return -EOPNOTSUPP; - } - - encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); - if (!encap_header) - return -ENOMEM; - - switch (e->tunnel_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - fl4.flowi4_proto = IPPROTO_UDP; - fl4.fl4_dport = tun_key->tp_dst; - break; - default: - err = -EOPNOTSUPP; - goto free_encap; - } - - tos = tun_key->tos; - ttl = tun_key->ttl; - - fl4.flowi4_tos = tun_key->tos; - fl4.daddr = tun_key->u.ipv4.dst; - fl4.saddr = tun_key->u.ipv4.src; - - err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, - &fl4, &n, &ttl); - if (err) - goto free_encap; - - /* used by mlx5e_detach_encap to lookup a neigh hash table - * entry in the neigh hash table when a user deletes a rule - */ - e->m_neigh.dev = n->dev; - e->m_neigh.family = n->ops->family; - memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - e->out_dev = out_dev; - - /* It's importent to add the neigh to the hash table before checking - * the neigh validity state. So if we'll get a notification, in case the - * neigh changes it's validity state, we would find the relevant neigh - * in the hash. - */ - err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); - if (err) - goto free_encap; - - read_lock_bh(&n->lock); - nud_state = n->nud_state; - ether_addr_copy(e->h_dest, n->ha); - read_unlock_bh(&n->lock); - - switch (e->tunnel_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - gen_vxlan_header_ipv4(out_dev, encap_header, - ipv4_encap_size, e->h_dest, tos, ttl, - fl4.daddr, - fl4.saddr, tun_key->tp_dst, - tunnel_id_to_key32(tun_key->tun_id)); - break; - default: - err = -EOPNOTSUPP; - goto destroy_neigh_entry; - } - e->encap_size = ipv4_encap_size; - e->encap_header = encap_header; - - if (!(nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); - err = -EAGAIN; - goto out; - } - - err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, - ipv4_encap_size, encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) - goto destroy_neigh_entry; - - e->flags |= MLX5_ENCAP_ENTRY_VALID; - mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); - neigh_release(n); - return err; - -destroy_neigh_entry: - mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); -free_encap: - kfree(encap_header); -out: - if (n) - neigh_release(n); - return err; + mlx5e_eswitch_rep(priv->netdev) && + mlx5e_eswitch_rep(peer_netdev) && + same_hw_devs(priv, peer_priv)); } -static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct mlx5e_encap_entry *e) -{ - int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN; - struct ip_tunnel_key *tun_key = &e->tun_info.key; - struct net_device *out_dev; - struct neighbour *n = NULL; - struct flowi6 fl6 = {}; - u8 nud_state, tos, ttl; - char *encap_header; - int err; - - if (max_encap_size < ipv6_encap_size) { - mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", - ipv6_encap_size, max_encap_size); - return -EOPNOTSUPP; - } - - encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); - if (!encap_header) - return -ENOMEM; - - switch (e->tunnel_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - fl6.flowi6_proto = IPPROTO_UDP; - fl6.fl6_dport = tun_key->tp_dst; - break; - default: - err = -EOPNOTSUPP; - goto free_encap; - } - - tos = tun_key->tos; - ttl = tun_key->ttl; - - fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); - fl6.daddr = tun_key->u.ipv6.dst; - fl6.saddr = tun_key->u.ipv6.src; - - err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, - &fl6, &n, &ttl); - if (err) - goto free_encap; - - /* used by mlx5e_detach_encap to lookup a neigh hash table - * entry in the neigh hash table when a user deletes a rule - */ - e->m_neigh.dev = n->dev; - e->m_neigh.family = n->ops->family; - memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - e->out_dev = out_dev; - - /* It's importent to add the neigh to the hash table before checking - * the neigh validity state. So if we'll get a notification, in case the - * neigh changes it's validity state, we would find the relevant neigh - * in the hash. - */ - err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); - if (err) - goto free_encap; - - read_lock_bh(&n->lock); - nud_state = n->nud_state; - ether_addr_copy(e->h_dest, n->ha); - read_unlock_bh(&n->lock); - - switch (e->tunnel_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - gen_vxlan_header_ipv6(out_dev, encap_header, - ipv6_encap_size, e->h_dest, tos, ttl, - &fl6.daddr, - &fl6.saddr, tun_key->tp_dst, - tunnel_id_to_key32(tun_key->tun_id)); - break; - default: - err = -EOPNOTSUPP; - goto destroy_neigh_entry; - } - - e->encap_size = ipv6_encap_size; - e->encap_header = encap_header; - - if (!(nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); - err = -EAGAIN; - goto out; - } - - err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, - ipv6_encap_size, encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) - goto destroy_neigh_entry; - e->flags |= MLX5_ENCAP_ENTRY_VALID; - mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); - neigh_release(n); - return err; - -destroy_neigh_entry: - mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); -free_encap: - kfree(encap_header); -out: - if (n) - neigh_release(n); - return err; -} static int mlx5e_attach_encap(struct mlx5e_priv *priv, - struct ip_tunnel_info *tun_info, + struct mlx5e_tc_flow *flow, struct net_device *mirred_dev, + int out_index, + struct netlink_ext_ack *extack, struct net_device **encap_dev, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) + bool *encap_valid) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - unsigned short family = ip_tunnel_info_af(tun_info); struct mlx5_esw_flow_attr *attr = flow->esw_attr; - struct ip_tunnel_key *key = &tun_info->key; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct ip_tunnel_info *tun_info; + struct encap_key key, e_key; struct mlx5e_encap_entry *e; - int tunnel_type, err = 0; + unsigned short family; uintptr_t hash_key; bool found = false; + int err = 0; - /* udp dst port must be set */ - if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst))) - goto vxlan_encap_offload_err; - - /* setting udp src port isn't supported */ - if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) { -vxlan_encap_offload_err: - NL_SET_ERR_MSG_MOD(extack, - "must set udp dst port and not set udp src port"); - netdev_warn(priv->netdev, - "must set udp dst port and not set udp src port\n"); - return -EOPNOTSUPP; - } - - if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) && - MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { - tunnel_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; - } else { - NL_SET_ERR_MSG_MOD(extack, - "port isn't an offloaded vxlan udp dport"); - netdev_warn(priv->netdev, - "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst)); - return -EOPNOTSUPP; - } + parse_attr = attr->parse_attr; + tun_info = &parse_attr->tun_info[out_index]; + family = ip_tunnel_info_af(tun_info); + key.ip_tun_key = &tun_info->key; + key.tunnel_type = mlx5e_tc_tun_get_type(mirred_dev); - hash_key = hash_encap_info(key); + hash_key = hash_encap_info(&key); hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { - if (!cmp_encap_info(&e->tun_info.key, key)) { + e_key.ip_tun_key = &e->tun_info.key; + e_key.tunnel_type = e->tunnel_type; + if (!cmp_encap_info(&e_key, &key)) { found = true; break; } @@ -2763,26 +2464,33 @@ vxlan_encap_offload_err: return -ENOMEM; e->tun_info = *tun_info; - e->tunnel_type = tunnel_type; + err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); + if (err) + goto out_err; + INIT_LIST_HEAD(&e->flows); if (family == AF_INET) - err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e); + err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); else if (family == AF_INET6) - err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e); + err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); - if (err && err != -EAGAIN) + if (err) goto out_err; hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); attach_flow: - list_add(&flow->encap, &e->flows); + list_add(&flow->encaps[out_index].list, &e->flows); + flow->encaps[out_index].index = out_index; *encap_dev = e->out_dev; - if (e->flags & MLX5_ENCAP_ENTRY_VALID) - attr->encap_id = e->encap_id; - else - err = -EAGAIN; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + attr->dests[out_index].encap_id = e->encap_id; + attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + *encap_valid = true; + } else { + *encap_valid = false; + } return err; @@ -2792,7 +2500,7 @@ out_err: } static int parse_tc_vlan_action(struct mlx5e_priv *priv, - const struct tc_action *a, + const struct flow_action_entry *act, struct mlx5_esw_flow_attr *attr, u32 *action) { @@ -2801,7 +2509,8 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, if (vlan_idx >= MLX5_FS_VLAN_DEPTH) return -EOPNOTSUPP; - if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) { + switch (act->id) { + case FLOW_ACTION_VLAN_POP: if (vlan_idx) { if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, MLX5_FS_VLAN_DEPTH)) @@ -2811,10 +2520,11 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, } else { *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; } - } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) { - attr->vlan_vid[vlan_idx] = tcf_vlan_push_vid(a); - attr->vlan_prio[vlan_idx] = tcf_vlan_push_prio(a); - attr->vlan_proto[vlan_idx] = tcf_vlan_push_proto(a); + break; + case FLOW_ACTION_VLAN_PUSH: + attr->vlan_vid[vlan_idx] = act->vlan.vid; + attr->vlan_prio[vlan_idx] = act->vlan.prio; + attr->vlan_proto[vlan_idx] = act->vlan.proto; if (!attr->vlan_proto[vlan_idx]) attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q); @@ -2826,13 +2536,15 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; } else { if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) && - (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) || - tcf_vlan_push_prio(a))) + (act->vlan.proto != htons(ETH_P_8021Q) || + act->vlan.prio)) return -EOPNOTSUPP; *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; } - } else { /* action is TCA_VLAN_ACT_MODIFY */ + break; + default: + /* action is FLOW_ACT_VLAN_MANGLE */ return -EOPNOTSUPP; } @@ -2841,59 +2553,63 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, return 0; } -static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, +static int parse_tc_fdb_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct pedit_headers_action hdrs[2] = {}; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct ip_tunnel_info *info = NULL; - const struct tc_action *a; - LIST_HEAD(actions); + const struct ip_tunnel_info *info = NULL; + const struct flow_action_entry *act; bool encap = false; u32 action = 0; int err, i; - if (!tcf_exts_has_actions(exts)) + if (!flow_action_has_entries(flow_action)) return -EINVAL; attr->in_rep = rpriv->rep; attr->in_mdev = priv->mdev; - tcf_exts_for_each_action(i, a, exts) { - if (is_tcf_gact_shot(a)) { + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_DROP: action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; - continue; - } - - if (is_tcf_pedit(a)) { - err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB, - parse_attr, extack); + break; + case FLOW_ACTION_MANGLE: + case FLOW_ACTION_ADD: + err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB, + parse_attr, hdrs, extack); if (err) return err; action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - attr->mirror_count = attr->out_count; - continue; - } - - if (is_tcf_csum(a)) { + attr->split_count = attr->out_count; + break; + case FLOW_ACTION_CSUM: if (csum_offload_supported(priv, action, - tcf_csum_update_flags(a), - extack)) - continue; + act->csum_flags, extack)) + break; return -EOPNOTSUPP; - } - - if (is_tcf_mirred_egress_redirect(a) || is_tcf_mirred_egress_mirror(a)) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_MIRRED: { struct mlx5e_priv *out_priv; struct net_device *out_dev; - out_dev = tcf_mirred_dev(a); + out_dev = act->dev; + if (!out_dev) { + /* out_dev is NULL when filters with + * non-existing mirred device are replayed to + * the driver. + */ + return -EINVAL; + } if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { NL_SET_ERR_MSG_MOD(extack, @@ -2903,23 +2619,47 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EOPNOTSUPP; } - if (switchdev_port_same_parent_id(priv->netdev, - out_dev) || + action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + if (netdev_port_same_parent_id(priv->netdev, + out_dev) || is_merged_eswitch_dev(priv, out_dev)) { - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev); + + if (uplink_upper && + netif_is_lag_master(uplink_upper) && + uplink_upper == out_dev) + out_dev = uplink_dev; + + if (!mlx5e_eswitch_rep(out_dev)) + return -EOPNOTSUPP; + out_priv = netdev_priv(out_dev); rpriv = out_priv->ppriv; - attr->out_rep[attr->out_count] = rpriv->rep; - attr->out_mdev[attr->out_count++] = out_priv->mdev; + attr->dests[attr->out_count].rep = rpriv->rep; + attr->dests[attr->out_count].mdev = out_priv->mdev; + attr->out_count++; } else if (encap) { - parse_attr->mirred_ifindex = out_dev->ifindex; - parse_attr->tun_info = *info; + parse_attr->mirred_ifindex[attr->out_count] = + out_dev->ifindex; + parse_attr->tun_info[attr->out_count] = *info; + encap = false; attr->parse_attr = parse_attr; - action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - /* attr->out_rep is resolved when we handle encap */ + attr->dests[attr->out_count].flags |= + MLX5_ESW_DEST_ENCAP; + attr->out_count++; + /* attr->dests[].rep is resolved when we + * handle encap + */ + } else if (parse_attr->filter_dev != priv->netdev) { + /* All mlx5 devices are called to configure + * high level device filters. Therefore, the + * *attempt* to install a filter on invalid + * eswitch should not trigger an explicit error + */ + return -EINVAL; } else { NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding"); @@ -2927,36 +2667,29 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, priv->netdev->name, out_dev->name); return -EINVAL; } - continue; - } - - if (is_tcf_tunnel_set(a)) { - info = tcf_tunnel_info(a); + } + break; + case FLOW_ACTION_TUNNEL_ENCAP: + info = act->tunnel; if (info) encap = true; else return -EOPNOTSUPP; - attr->mirror_count = attr->out_count; - continue; - } - - if (is_tcf_vlan(a)) { - err = parse_tc_vlan_action(priv, a, attr, &action); + break; + case FLOW_ACTION_VLAN_PUSH: + case FLOW_ACTION_VLAN_POP: + err = parse_tc_vlan_action(priv, act, attr, &action); if (err) return err; - attr->mirror_count = attr->out_count; - continue; - } - - if (is_tcf_tunnel_release(a)) { + attr->split_count = attr->out_count; + break; + case FLOW_ACTION_TUNNEL_DECAP: action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; - continue; - } - - if (is_tcf_gact_goto_chain(a)) { - u32 dest_chain = tcf_gact_goto_chain_index(a); + break; + case FLOW_ACTION_GOTO: { + u32 dest_chain = act->chain_index; u32 max_chain = mlx5_eswitch_get_chain_range(esw); if (dest_chain <= attr->chain) { @@ -2969,15 +2702,23 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, } action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; attr->dest_chain = dest_chain; - - continue; + break; + } + default: + return -EINVAL; } + } - return -EINVAL; + if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || + hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { + err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB, + parse_attr, hdrs, extack); + if (err) + return err; } attr->action = action; - if (!actions_match_supported(priv, exts, parse_attr, flow, extack)) + if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) return -EOPNOTSUPP; if (attr->dest_chain) { @@ -2988,7 +2729,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } - if (attr->mirror_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { + if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { NL_SET_ERR_MSG_MOD(extack, "current firmware doesn't support split rule for port mirroring"); netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n"); @@ -3007,6 +2748,11 @@ static void get_flags(int flags, u16 *flow_flags) if (flags & MLX5E_TC_EGRESS) __flow_flags |= MLX5E_TC_FLOW_EGRESS; + if (flags & MLX5E_TC_ESW_OFFLOAD) + __flow_flags |= MLX5E_TC_FLOW_ESWITCH; + if (flags & MLX5E_TC_NIC_OFFLOAD) + __flow_flags |= MLX5E_TC_FLOW_NIC; + *flow_flags = __flow_flags; } @@ -3017,18 +2763,39 @@ static const struct rhashtable_params tc_ht_params = { .automatic_shrinking = true, }; -static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv) +static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *uplink_rpriv; - if (MLX5_VPORT_MANAGER(priv->mdev) && esw->mode == SRIOV_OFFLOADS) { + if (flags & MLX5E_TC_ESW_OFFLOAD) { uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - return &uplink_rpriv->tc_ht; - } else + return &uplink_rpriv->uplink_priv.tc_ht; + } else /* NIC offload */ return &priv->fs.tc.ht; } +static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) +{ + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK && + flow->flags & MLX5E_TC_FLOW_INGRESS; + bool act_is_encap = !!(attr->action & + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); + bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom, + MLX5_DEVCOM_ESW_OFFLOADS); + + if (!esw_paired) + return false; + + if ((mlx5_lag_is_sriov(attr->in_mdev) || + mlx5_lag_is_multipath(attr->in_mdev)) && + (is_rep_ingress || act_is_encap)) + return true; + + return false; +} + static int mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, struct tc_cls_flower_offload *f, u16 flow_flags, @@ -3050,10 +2817,6 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, flow->flags = flow_flags; flow->priv = priv; - err = parse_cls_flower(priv, flow, &parse_attr->spec, f); - if (err) - goto err_free; - *__flow = flow; *__parse_attr = parse_attr; @@ -3065,12 +2828,39 @@ err_free: return err; } -static int -mlx5e_add_fdb_flow(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, - u16 flow_flags, - struct mlx5e_tc_flow **__flow) +static void +mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr, + struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct tc_cls_flower_offload *f, + struct mlx5_eswitch_rep *in_rep, + struct mlx5_core_dev *in_mdev) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + esw_attr->parse_attr = parse_attr; + esw_attr->chain = f->common.chain_index; + esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; + + esw_attr->in_rep = in_rep; + esw_attr->in_mdev = in_mdev; + + if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) == + MLX5_COUNTER_SOURCE_ESWITCH) + esw_attr->counter_dev = in_mdev; + else + esw_attr->counter_dev = priv->mdev; +} + +static struct mlx5e_tc_flow * +__mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + u16 flow_flags, + struct net_device *filter_dev, + struct mlx5_eswitch_rep *in_rep, + struct mlx5_core_dev *in_mdev) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; @@ -3083,28 +2873,118 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto out; - flow->esw_attr->chain = f->common.chain_index; - flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; - err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow, extack); + parse_attr->filter_dev = filter_dev; + mlx5e_flow_esw_attr_init(flow->esw_attr, + priv, parse_attr, + f, in_rep, in_mdev); + + err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, + f, filter_dev); if (err) goto err_free; - err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack); + err = parse_tc_fdb_actions(priv, &rule->action, parse_attr, flow, extack); if (err) goto err_free; - if (!(flow->esw_attr->action & - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)) - kvfree(parse_attr); + err = mlx5e_tc_add_fdb_flow(priv, flow, extack); + if (err) { + if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev))) + goto err_free; - *__flow = flow; + add_unready_flow(flow); + } - return 0; + return flow; err_free: kfree(flow); kvfree(parse_attr); out: + return ERR_PTR(err); +} + +static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f, + struct mlx5e_tc_flow *flow, + u16 flow_flags) +{ + struct mlx5e_priv *priv = flow->priv, *peer_priv; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; + struct mlx5_devcom *devcom = priv->mdev->priv.devcom; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_rep_priv *peer_urpriv; + struct mlx5e_tc_flow *peer_flow; + struct mlx5_core_dev *in_mdev; + int err = 0; + + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + return -ENODEV; + + peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH); + peer_priv = netdev_priv(peer_urpriv->netdev); + + /* in_mdev is assigned of which the packet originated from. + * So packets redirected to uplink use the same mdev of the + * original flow and packets redirected from uplink use the + * peer mdev. + */ + if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK) + in_mdev = peer_priv->mdev; + else + in_mdev = priv->mdev; + + parse_attr = flow->esw_attr->parse_attr; + peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags, + parse_attr->filter_dev, + flow->esw_attr->in_rep, in_mdev); + if (IS_ERR(peer_flow)) { + err = PTR_ERR(peer_flow); + goto out; + } + + flow->peer_flow = peer_flow; + flow->flags |= MLX5E_TC_FLOW_DUP; + mutex_lock(&esw->offloads.peer_mutex); + list_add_tail(&flow->peer, &esw->offloads.peer_flows); + mutex_unlock(&esw->offloads.peer_mutex); + +out: + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return err; +} + +static int +mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + u16 flow_flags, + struct net_device *filter_dev, + struct mlx5e_tc_flow **__flow) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *in_rep = rpriv->rep; + struct mlx5_core_dev *in_mdev = priv->mdev; + struct mlx5e_tc_flow *flow; + int err; + + flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, + in_mdev); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + if (is_peer_flow_needed(flow)) { + err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags); + if (err) { + mlx5e_tc_del_fdb_flow(priv, flow); + goto out; + } + } + + *__flow = flow; + + return 0; + +out: return err; } @@ -3112,8 +2992,10 @@ static int mlx5e_add_nic_flow(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, u16 flow_flags, + struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; @@ -3130,7 +3012,13 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, if (err) goto out; - err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, extack); + parse_attr->filter_dev = filter_dev; + err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, + f, filter_dev); + if (err) + goto err_free; + + err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack); if (err) goto err_free; @@ -3155,6 +3043,7 @@ static int mlx5e_tc_add_flow(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags, + struct net_device *filter_dev, struct mlx5e_tc_flow **flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -3167,18 +3056,20 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv, return -EOPNOTSUPP; if (esw && esw->mode == SRIOV_OFFLOADS) - err = mlx5e_add_fdb_flow(priv, f, flow_flags, flow); + err = mlx5e_add_fdb_flow(priv, f, flow_flags, + filter_dev, flow); else - err = mlx5e_add_nic_flow(priv, f, flow_flags, flow); + err = mlx5e_add_nic_flow(priv, f, flow_flags, + filter_dev, flow); return err; } -int mlx5e_configure_flower(struct mlx5e_priv *priv, +int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags) { struct netlink_ext_ack *extack = f->common.extack; - struct rhashtable *tc_ht = get_tc_ht(priv); + struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; int err = 0; @@ -3192,7 +3083,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, goto out; } - err = mlx5e_tc_add_flow(priv, f, flags, &flow); + err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow); if (err) goto out; @@ -3220,10 +3111,10 @@ static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) return false; } -int mlx5e_delete_flower(struct mlx5e_priv *priv, +int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags) { - struct rhashtable *tc_ht = get_tc_ht(priv); + struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); @@ -3239,30 +3130,57 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, return 0; } -int mlx5e_stats_flower(struct mlx5e_priv *priv, +int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags) { - struct rhashtable *tc_ht = get_tc_ht(priv); + struct mlx5_devcom *devcom = priv->mdev->priv.devcom; + struct rhashtable *tc_ht = get_tc_ht(priv, flags); + struct mlx5_eswitch *peer_esw; struct mlx5e_tc_flow *flow; struct mlx5_fc *counter; - u64 bytes; - u64 packets; - u64 lastuse; + u64 lastuse = 0; + u64 packets = 0; + u64 bytes = 0; flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); if (!flow || !same_flow_direction(flow, flags)) return -EINVAL; - if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED)) - return 0; + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + counter = mlx5e_tc_get_counter(flow); + if (!counter) + return 0; - counter = mlx5e_tc_get_counter(flow); - if (!counter) - return 0; + mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + } + + /* Under multipath it's possible for one rule to be currently + * un-offloaded while the other rule is offloaded. + */ + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + goto out; - mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + if ((flow->flags & MLX5E_TC_FLOW_DUP) && + (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) { + u64 bytes2; + u64 packets2; + u64 lastuse2; - tcf_exts_stats_update(f->exts, bytes, packets, lastuse); + counter = mlx5e_tc_get_counter(flow->peer_flow); + if (!counter) + goto no_peer_counter; + mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2); + + bytes += bytes2; + packets += packets2; + lastuse = max_t(u64, lastuse, lastuse2); + } + +no_peer_counter: + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +out: + flow_stats_update(&f->stats, bytes, packets, lastuse); return 0; } @@ -3350,7 +3268,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) if (tc->netdevice_nb.notifier_call) unregister_netdevice_notifier(&tc->netdevice_nb); - rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL); + rhashtable_destroy(&tc->ht); if (!IS_ERR_OR_NULL(tc->t)) { mlx5_destroy_flow_table(tc->t); @@ -3368,9 +3286,32 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); } -int mlx5e_tc_num_filters(struct mlx5e_priv *priv) +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags) { - struct rhashtable *tc_ht = get_tc_ht(priv); + struct rhashtable *tc_ht = get_tc_ht(priv, flags); return atomic_read(&tc_ht->nelems); } + +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) +{ + struct mlx5e_tc_flow *flow, *tmp; + + list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer) + __mlx5e_tc_del_fdb_peer_flow(flow); +} + +void mlx5e_tc_reoffload_flows_work(struct work_struct *work) +{ + struct mlx5_rep_uplink_priv *rpriv = + container_of(work, struct mlx5_rep_uplink_priv, + reoffload_flows_work); + struct mlx5e_tc_flow *flow, *tmp; + + rtnl_lock(); + list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) { + if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL)) + remove_unready_flow(flow); + } + rtnl_unlock(); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 49436bf3b80a..f62e81902d27 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -42,7 +42,9 @@ enum { MLX5E_TC_INGRESS = BIT(0), MLX5E_TC_EGRESS = BIT(1), - MLX5E_TC_LAST_EXPORTED_BIT = 1, + MLX5E_TC_NIC_OFFLOAD = BIT(2), + MLX5E_TC_ESW_OFFLOAD = BIT(3), + MLX5E_TC_LAST_EXPORTED_BIT = 3, }; int mlx5e_tc_nic_init(struct mlx5e_priv *priv); @@ -51,12 +53,12 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv); int mlx5e_tc_esw_init(struct rhashtable *tc_ht); void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht); -int mlx5e_configure_flower(struct mlx5e_priv *priv, +int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags); -int mlx5e_delete_flower(struct mlx5e_priv *priv, +int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags); -int mlx5e_stats_flower(struct mlx5e_priv *priv, +int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags); struct mlx5e_encap_entry; @@ -68,12 +70,14 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, struct mlx5e_neigh_hash_entry; void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); -int mlx5e_tc_num_filters(struct mlx5e_priv *priv); +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags); + +void mlx5e_tc_reoffload_flows_work(struct work_struct *work); #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} -static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return 0; } +static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags) { return 0; } #endif #endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 6dacaeba2fbf..25a8f8260c14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -127,7 +127,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, else #endif if (skb_vlan_tag_present(skb)) - up = skb->vlan_tci >> VLAN_PRIO_SHIFT; + up = skb_vlan_tag_get_prio(skb); /* channel_ix can be larger than num_channels since * dev->num_real_tx_queues = num_channels * num_tc @@ -148,12 +148,8 @@ static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb) { - struct flow_keys keys; - if (skb_transport_header_was_set(skb)) return skb_transport_offset(skb); - else if (skb_flow_dissect_flow_keys(skb, &keys, 0)) - return keys.control.thoff; else return mlx5e_skb_l2_header_offset(skb); } @@ -172,15 +168,8 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, hlen += VLAN_HLEN; break; case MLX5_INLINE_MODE_IP: - /* When transport header is set to zero, it means no transport - * header. When transport header is set to 0xff's, it means - * transport header wasn't set. - */ - if (skb_transport_offset(skb)) { - hlen = mlx5e_skb_l3_header_offset(skb); - break; - } - /* fall through */ + hlen = mlx5e_skb_l3_header_offset(skb); + break; case MLX5_INLINE_MODE_L2: default: hlen = mlx5e_skb_l2_header_offset(skb); @@ -387,8 +376,14 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); if (unlikely(contig_wqebbs_room < num_wqebbs)) { +#ifdef CONFIG_MLX5_EN_IPSEC + struct mlx5_wqe_eth_seg cur_eth = wqe->eth; +#endif mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); mlx5e_sq_fetch_wqe(sq, &wqe, &pi); +#ifdef CONFIG_MLX5_EN_IPSEC + wqe->eth = cur_eth; +#endif } /* fill wqe */ @@ -459,9 +454,10 @@ static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq, u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq); netdev_err(sq->channel->netdev, - "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", - sq->cq.mcq.cqn, ci, sq->sqn, err_cqe->syndrome, - err_cqe->vendor_err_synd); + "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", + sq->cq.mcq.cqn, ci, sq->sqn, + get_cqe_opcode((struct mlx5_cqe64 *)err_cqe), + err_cqe->syndrome, err_cqe->vendor_err_synd); mlx5_dump_err_cqe(sq->cq.mdev, err_cqe); } @@ -507,13 +503,13 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) wqe_counter = be16_to_cpu(cqe->wqe_counter); - if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) { + if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { mlx5e_dump_error_cqe(sq, (struct mlx5_err_cqe *)cqe); queue_work(cq->channel->priv->wq, - &sq->recover.recover_work); + &sq->recover_work); } stats->cqe_err++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 85d517360157..b4af5e19f6ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -76,6 +76,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, napi); struct mlx5e_ch_stats *ch_stats = c->stats; + struct mlx5e_rq *rq = &c->rq; bool busy = false; int work_done = 0; int i; @@ -85,17 +86,17 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); - busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); + busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq, NULL); if (c->xdp) - busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq); + busy |= mlx5e_poll_xdpsq_cq(&rq->xdpsq.cq, rq); if (likely(budget)) { /* budget=0 means: don't poll rx rings */ - work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); + work_done = mlx5e_poll_rx_cq(&rq->cq, budget); busy |= work_done == budget; } - busy |= c->rq.post_wqes(&c->rq); + busy |= c->rq.post_wqes(rq); if (busy) { if (likely(mlx5e_channel_no_affinity_change(c))) @@ -115,9 +116,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) mlx5e_cq_arm(&c->sq[i].cq); } - mlx5e_handle_rx_dim(&c->rq); + mlx5e_handle_rx_dim(rq); - mlx5e_cq_arm(&c->rq.cq); + mlx5e_cq_arm(&rq->cq); mlx5e_cq_arm(&c->icosq.cq); mlx5e_cq_arm(&c->xdpsq.cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index c1e1a16a9b07..bb6e5b5d9681 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -31,20 +31,23 @@ */ #include <linux/interrupt.h> +#include <linux/notifier.h> #include <linux/module.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/eq.h> #include <linux/mlx5/cmd.h> #ifdef CONFIG_RFS_ACCEL #include <linux/cpu_rmap.h> #endif #include "mlx5_core.h" +#include "lib/eq.h" #include "fpga/core.h" #include "eswitch.h" #include "lib/clock.h" #include "diag/fw_tracer.h" enum { - MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), MLX5_EQE_OWNER_INIT_VAL = 0x1, }; @@ -55,14 +58,32 @@ enum { }; enum { - MLX5_NUM_SPARE_EQE = 0x80, - MLX5_NUM_ASYNC_EQE = 0x1000, - MLX5_NUM_CMD_EQE = 32, - MLX5_NUM_PF_DRAIN = 64, + MLX5_EQ_DOORBEL_OFFSET = 0x40, }; -enum { - MLX5_EQ_DOORBEL_OFFSET = 0x40, +struct mlx5_irq_info { + cpumask_var_t mask; + char name[MLX5_MAX_IRQ_NAME]; + void *context; /* dev_id provided to request_irq */ +}; + +struct mlx5_eq_table { + struct list_head comp_eqs_list; + struct mlx5_eq pages_eq; + struct mlx5_eq cmd_eq; + struct mlx5_eq async_eq; + + struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; + + /* Since CQ DB is stored in async_eq */ + struct mlx5_nb cq_err_nb; + + struct mutex lock; /* sync async eqs creations */ + int num_comp_vectors; + struct mlx5_irq_info *irq_info; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif }; #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ @@ -78,17 +99,6 @@ enum { (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) -struct map_eq_in { - u64 mask; - u32 reserved; - u32 unmap_eqn; -}; - -struct cre_des_eq { - u8 reserved[15]; - u8 eqn; -}; - static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; @@ -99,213 +109,56 @@ static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) -{ - return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE); -} - -static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq) -{ - struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1)); - - return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe; -} - -static const char *eqe_type_str(u8 type) -{ - switch (type) { - case MLX5_EVENT_TYPE_COMP: - return "MLX5_EVENT_TYPE_COMP"; - case MLX5_EVENT_TYPE_PATH_MIG: - return "MLX5_EVENT_TYPE_PATH_MIG"; - case MLX5_EVENT_TYPE_COMM_EST: - return "MLX5_EVENT_TYPE_COMM_EST"; - case MLX5_EVENT_TYPE_SQ_DRAINED: - return "MLX5_EVENT_TYPE_SQ_DRAINED"; - case MLX5_EVENT_TYPE_SRQ_LAST_WQE: - return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; - case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: - return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; - case MLX5_EVENT_TYPE_CQ_ERROR: - return "MLX5_EVENT_TYPE_CQ_ERROR"; - case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: - return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; - case MLX5_EVENT_TYPE_PATH_MIG_FAILED: - return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; - case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: - return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; - case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: - return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; - case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: - return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; - case MLX5_EVENT_TYPE_INTERNAL_ERROR: - return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; - case MLX5_EVENT_TYPE_PORT_CHANGE: - return "MLX5_EVENT_TYPE_PORT_CHANGE"; - case MLX5_EVENT_TYPE_GPIO_EVENT: - return "MLX5_EVENT_TYPE_GPIO_EVENT"; - case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: - return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; - case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: - return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; - case MLX5_EVENT_TYPE_REMOTE_CONFIG: - return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; - case MLX5_EVENT_TYPE_DB_BF_CONGESTION: - return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; - case MLX5_EVENT_TYPE_STALL_EVENT: - return "MLX5_EVENT_TYPE_STALL_EVENT"; - case MLX5_EVENT_TYPE_CMD: - return "MLX5_EVENT_TYPE_CMD"; - case MLX5_EVENT_TYPE_PAGE_REQUEST: - return "MLX5_EVENT_TYPE_PAGE_REQUEST"; - case MLX5_EVENT_TYPE_PAGE_FAULT: - return "MLX5_EVENT_TYPE_PAGE_FAULT"; - case MLX5_EVENT_TYPE_PPS_EVENT: - return "MLX5_EVENT_TYPE_PPS_EVENT"; - case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: - return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; - case MLX5_EVENT_TYPE_FPGA_ERROR: - return "MLX5_EVENT_TYPE_FPGA_ERROR"; - case MLX5_EVENT_TYPE_FPGA_QP_ERROR: - return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; - case MLX5_EVENT_TYPE_GENERAL_EVENT: - return "MLX5_EVENT_TYPE_GENERAL_EVENT"; - case MLX5_EVENT_TYPE_DEVICE_TRACER: - return "MLX5_EVENT_TYPE_DEVICE_TRACER"; - default: - return "Unrecognized event"; - } -} - -static enum mlx5_dev_event port_subtype_event(u8 subtype) -{ - switch (subtype) { - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: - return MLX5_DEV_EVENT_PORT_DOWN; - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - return MLX5_DEV_EVENT_PORT_UP; - case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: - return MLX5_DEV_EVENT_PORT_INITIALIZED; - case MLX5_PORT_CHANGE_SUBTYPE_LID: - return MLX5_DEV_EVENT_LID_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_PKEY: - return MLX5_DEV_EVENT_PKEY_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_GUID: - return MLX5_DEV_EVENT_GUID_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: - return MLX5_DEV_EVENT_CLIENT_REREG; - } - return -1; -} - -static void eq_update_ci(struct mlx5_eq *eq, int arm) +/* caller must eventually call mlx5_cq_put on the returned cq */ +static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) { - __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); - u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); - - __raw_writel((__force u32)cpu_to_be32(val), addr); - /* We still want ordering, just not swabbing, so add a barrier */ - mb(); -} + struct mlx5_cq_table *table = &eq->cq_table; + struct mlx5_core_cq *cq = NULL; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -static void eqe_pf_action(struct work_struct *work) -{ - struct mlx5_pagefault *pfault = container_of(work, - struct mlx5_pagefault, - work); - struct mlx5_eq *eq = pfault->eq; + rcu_read_lock(); + cq = radix_tree_lookup(&table->tree, cqn); + if (likely(cq)) + mlx5_cq_hold(cq); + rcu_read_unlock(); - mlx5_core_page_fault(eq->dev, pfault); - mempool_free(pfault, eq->pf_ctx.pool); + return cq; } -static void eq_pf_process(struct mlx5_eq *eq) +static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) { - struct mlx5_core_dev *dev = eq->dev; - struct mlx5_eqe_page_fault *pf_eqe; - struct mlx5_pagefault *pfault; + struct mlx5_eq_comp *eq_comp = eq_ptr; + struct mlx5_eq *eq = eq_ptr; struct mlx5_eqe *eqe; int set_ci = 0; + u32 cqn = -1; while ((eqe = next_eqe_sw(eq))) { - pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC); - if (!pfault) { - schedule_work(&eq->pf_ctx.work); - break; - } - + struct mlx5_core_cq *cq; + /* Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ dma_rmb(); - pf_eqe = &eqe->data.page_fault; - pfault->event_subtype = eqe->sub_type; - pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed); - - mlx5_core_dbg(dev, - "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n", - eqe->sub_type, pfault->bytes_committed); - - switch (eqe->sub_type) { - case MLX5_PFAULT_SUBTYPE_RDMA: - /* RDMA based event */ - pfault->type = - be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24; - pfault->token = - be32_to_cpu(pf_eqe->rdma.pftype_token) & - MLX5_24BIT_MASK; - pfault->rdma.r_key = - be32_to_cpu(pf_eqe->rdma.r_key); - pfault->rdma.packet_size = - be16_to_cpu(pf_eqe->rdma.packet_length); - pfault->rdma.rdma_op_len = - be32_to_cpu(pf_eqe->rdma.rdma_op_len); - pfault->rdma.rdma_va = - be64_to_cpu(pf_eqe->rdma.rdma_va); - mlx5_core_dbg(dev, - "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n", - pfault->type, pfault->token, - pfault->rdma.r_key); - mlx5_core_dbg(dev, - "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n", - pfault->rdma.rdma_op_len, - pfault->rdma.rdma_va); - break; - - case MLX5_PFAULT_SUBTYPE_WQE: - /* WQE based event */ - pfault->type = - (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; - pfault->token = - be32_to_cpu(pf_eqe->wqe.token); - pfault->wqe.wq_num = - be32_to_cpu(pf_eqe->wqe.pftype_wq) & - MLX5_24BIT_MASK; - pfault->wqe.wqe_index = - be16_to_cpu(pf_eqe->wqe.wqe_index); - pfault->wqe.packet_size = - be16_to_cpu(pf_eqe->wqe.packet_length); - mlx5_core_dbg(dev, - "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n", - pfault->type, pfault->token, - pfault->wqe.wq_num, - pfault->wqe.wqe_index); - break; - - default: - mlx5_core_warn(dev, - "Unsupported page fault event sub-type: 0x%02hhx\n", - eqe->sub_type); - /* Unsupported page faults should still be - * resolved by the page fault handler - */ + /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */ + cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; + + cq = mlx5_eq_cq_get(eq, cqn); + if (likely(cq)) { + ++cq->arm_sn; + cq->comp(cq); + mlx5_cq_put(cq); + } else { + mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); } - pfault->eq = eq; - INIT_WORK(&pfault->work, eqe_pf_action); - queue_work(eq->pf_ctx.wq, &pfault->work); - ++eq->cons_index; ++set_ci; + /* The HCA will think the queue has overflowed if we + * don't tell it we've been processing events. We + * create our EQs with MLX5_NUM_SPARE_EQE extra + * entries, so we must update our consumer index at + * least that often. + */ if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { eq_update_ci(eq, 0); set_ci = 0; @@ -313,165 +166,41 @@ static void eq_pf_process(struct mlx5_eq *eq) } eq_update_ci(eq, 1); -} - -static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr) -{ - struct mlx5_eq *eq = eq_ptr; - unsigned long flags; - if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) { - eq_pf_process(eq); - spin_unlock_irqrestore(&eq->pf_ctx.lock, flags); - } else { - schedule_work(&eq->pf_ctx.work); - } + if (cqn != -1) + tasklet_schedule(&eq_comp->tasklet_ctx.task); return IRQ_HANDLED; } -/* mempool_refill() was proposed but unfortunately wasn't accepted - * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html - * Chip workaround. +/* Some architectures don't latch interrupts when they are disabled, so using + * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to + * avoid losing them. It is not recommended to use it, unless this is the last + * resort. */ -static void mempool_refill(mempool_t *pool) +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) { - while (pool->curr_nr < pool->min_nr) - mempool_free(mempool_alloc(pool, GFP_KERNEL), pool); -} - -static void eq_pf_action(struct work_struct *work) -{ - struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work); - - mempool_refill(eq->pf_ctx.pool); - - spin_lock_irq(&eq->pf_ctx.lock); - eq_pf_process(eq); - spin_unlock_irq(&eq->pf_ctx.lock); -} - -static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name) -{ - spin_lock_init(&pf_ctx->lock); - INIT_WORK(&pf_ctx->work, eq_pf_action); - - pf_ctx->wq = alloc_ordered_workqueue(name, - WQ_MEM_RECLAIM); - if (!pf_ctx->wq) - return -ENOMEM; - - pf_ctx->pool = mempool_create_kmalloc_pool - (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault)); - if (!pf_ctx->pool) - goto err_wq; - - return 0; -err_wq: - destroy_workqueue(pf_ctx->wq); - return -ENOMEM; -} - -int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, - u32 wq_num, u8 type, int error) -{ - u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0}; - - MLX5_SET(page_fault_resume_in, in, opcode, - MLX5_CMD_OP_PAGE_FAULT_RESUME); - MLX5_SET(page_fault_resume_in, in, error, !!error); - MLX5_SET(page_fault_resume_in, in, page_fault_type, type); - MLX5_SET(page_fault_resume_in, in, wq_number, wq_num); - MLX5_SET(page_fault_resume_in, in, token, token); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} -EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); -#endif - -static void general_event_handler(struct mlx5_core_dev *dev, - struct mlx5_eqe *eqe) -{ - switch (eqe->sub_type) { - case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: - if (dev->event) - dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0); - break; - default: - mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n", - eqe->sub_type); - } -} - -static void mlx5_temp_warning_event(struct mlx5_core_dev *dev, - struct mlx5_eqe *eqe) -{ - u64 value_lsb; - u64 value_msb; - - value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); - value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); - - mlx5_core_warn(dev, - "High temperature on sensors with bit set %llx %llx", - value_msb, value_lsb); -} - -/* caller must eventually call mlx5_cq_put on the returned cq */ -static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) -{ - struct mlx5_cq_table *table = &eq->cq_table; - struct mlx5_core_cq *cq = NULL; - - spin_lock(&table->lock); - cq = radix_tree_lookup(&table->tree, cqn); - if (likely(cq)) - mlx5_cq_hold(cq); - spin_unlock(&table->lock); - - return cq; -} - -static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn) -{ - struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn); - - if (unlikely(!cq)) { - mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); - return; - } - - ++cq->arm_sn; - - cq->comp(cq); - - mlx5_cq_put(cq); -} - -static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type) -{ - struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn); - - if (unlikely(!cq)) { - mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); - return; - } + u32 count_eqe; - cq->event(cq, event_type); + disable_irq(eq->core.irqn); + count_eqe = eq->core.cons_index; + mlx5_eq_comp_int(eq->core.irqn, eq); + count_eqe = eq->core.cons_index - count_eqe; + enable_irq(eq->core.irqn); - mlx5_cq_put(cq); + return count_eqe; } -static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) +static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) { struct mlx5_eq *eq = eq_ptr; - struct mlx5_core_dev *dev = eq->dev; + struct mlx5_eq_table *eqt; + struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; int set_ci = 0; - u32 cqn = -1; - u32 rsn; - u8 port; + + dev = eq->dev; + eqt = dev->priv.eq_table; while ((eqe = next_eqe_sw(eq))) { /* @@ -480,116 +209,12 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) */ dma_rmb(); - mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", - eq->eqn, eqe_type_str(eqe->type)); - switch (eqe->type) { - case MLX5_EVENT_TYPE_COMP: - cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; - mlx5_eq_cq_completion(eq, cqn); - break; - case MLX5_EVENT_TYPE_DCT_DRAINED: - rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; - rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); - mlx5_rsc_event(dev, rsn, eqe->type); - break; - case MLX5_EVENT_TYPE_PATH_MIG: - case MLX5_EVENT_TYPE_COMM_EST: - case MLX5_EVENT_TYPE_SQ_DRAINED: - case MLX5_EVENT_TYPE_SRQ_LAST_WQE: - case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: - case MLX5_EVENT_TYPE_PATH_MIG_FAILED: - case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: - case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: - rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN); - mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n", - eqe_type_str(eqe->type), eqe->type, rsn); - mlx5_rsc_event(dev, rsn, eqe->type); - break; - - case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: - case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: - rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n", - eqe_type_str(eqe->type), eqe->type, rsn); - mlx5_srq_event(dev, rsn, eqe->type); - break; - - case MLX5_EVENT_TYPE_CMD: - mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); - break; - - case MLX5_EVENT_TYPE_PORT_CHANGE: - port = (eqe->data.port.port >> 4) & 0xf; - switch (eqe->sub_type) { - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - case MLX5_PORT_CHANGE_SUBTYPE_LID: - case MLX5_PORT_CHANGE_SUBTYPE_PKEY: - case MLX5_PORT_CHANGE_SUBTYPE_GUID: - case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: - case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: - if (dev->event) - dev->event(dev, port_subtype_event(eqe->sub_type), - (unsigned long)port); - break; - default: - mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n", - port, eqe->sub_type); - } - break; - case MLX5_EVENT_TYPE_CQ_ERROR: - cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; - mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", - cqn, eqe->data.cq_err.syndrome); - mlx5_eq_cq_event(eq, cqn, eqe->type); - break; + if (likely(eqe->type < MLX5_EVENT_TYPE_MAX)) + atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); + else + mlx5_core_warn_once(dev, "notifier_call_chain is not setup for eqe: %d\n", eqe->type); - case MLX5_EVENT_TYPE_PAGE_REQUEST: - { - u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id); - s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages); - - mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n", - func_id, npages); - mlx5_core_req_pages_handler(dev, func_id, npages); - } - break; - - case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: - mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); - break; - - case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: - mlx5_port_module_event(dev, eqe); - break; - - case MLX5_EVENT_TYPE_PPS_EVENT: - mlx5_pps_event(dev, eqe); - break; - - case MLX5_EVENT_TYPE_FPGA_ERROR: - case MLX5_EVENT_TYPE_FPGA_QP_ERROR: - mlx5_fpga_event(dev, eqe->type, &eqe->data.raw); - break; - - case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: - mlx5_temp_warning_event(dev, eqe); - break; - - case MLX5_EVENT_TYPE_GENERAL_EVENT: - general_event_handler(dev, eqe); - break; - - case MLX5_EVENT_TYPE_DEVICE_TRACER: - mlx5_fw_tracer_event(dev, eqe); - break; - - default: - mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", - eqe->type, eq->eqn); - break; - } + atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); ++eq->cons_index; ++set_ci; @@ -608,30 +233,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) eq_update_ci(eq, 1); - if (cqn != -1) - tasklet_schedule(&eq->tasklet_ctx.task); - return IRQ_HANDLED; } -/* Some architectures don't latch interrupts when they are disabled, so using - * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to - * avoid losing them. It is not recommended to use it, unless this is the last - * resort. - */ -u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq) -{ - u32 count_eqe; - - disable_irq(eq->irqn); - count_eqe = eq->cons_index; - mlx5_eq_int(eq->irqn, eq); - count_eqe = eq->cons_index - count_eqe; - enable_irq(eq->irqn); - - return count_eqe; -} - static void init_eq_buf(struct mlx5_eq *eq) { struct mlx5_eqe *eqe; @@ -643,39 +247,35 @@ static void init_eq_buf(struct mlx5_eq *eq) } } -int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, - enum mlx5_eq_type type) +static int +create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, + struct mlx5_eq_param *param) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - irq_handler_t handler; + u8 vecidx = param->index; __be64 *pas; void *eqc; int inlen; u32 *in; int err; + if (eq_table->irq_info[vecidx].context) + return -EEXIST; + /* Init CQ table */ memset(cq_table, 0, sizeof(*cq_table)); spin_lock_init(&cq_table->lock); INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); - eq->type = type; - eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); + eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE); eq->cons_index = 0; err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); if (err) return err; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (type == MLX5_EQ_TYPE_PF) - handler = mlx5_eq_pf_int; - else -#endif - handler = mlx5_eq_int; - init_eq_buf(eq); inlen = MLX5_ST_SZ_BYTES(create_eq_in) + @@ -691,7 +291,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, mlx5_fill_page_array(&eq->buf, pas); MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); - MLX5_SET64(create_eq_in, in, event_bitmask, mask); + MLX5_SET64(create_eq_in, in, event_bitmask, param->mask); eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); @@ -704,15 +304,17 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, if (err) goto err_in; - snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", + snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); + eq_table->irq_info[vecidx].context = param->context; + eq->vecidx = vecidx; eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(eq->irqn, handler, 0, - priv->irq_info[vecidx].name, eq); + err = request_irq(eq->irqn, param->handler, 0, + eq_table->irq_info[vecidx].name, param->context); if (err) goto err_eq; @@ -720,21 +322,6 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, if (err) goto err_irq; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (type == MLX5_EQ_TYPE_PF) { - err = init_pf_ctx(&eq->pf_ctx, name); - if (err) - goto err_irq; - } else -#endif - { - INIT_LIST_HEAD(&eq->tasklet_ctx.list); - INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); - spin_lock_init(&eq->tasklet_ctx.lock); - tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, - (unsigned long)&eq->tasklet_ctx); - } - /* EQs are created in ARMED state */ eq_update_ci(eq, 1); @@ -756,27 +343,25 @@ err_buf: return err; } -int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + struct mlx5_irq_info *irq_info; int err; + irq_info = &eq_table->irq_info[eq->vecidx]; + mlx5_debug_eq_remove(dev, eq); - free_irq(eq->irqn, eq); + + free_irq(eq->irqn, irq_info->context); + irq_info->context = NULL; + err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); synchronize_irq(eq->irqn); - if (eq->type == MLX5_EQ_TYPE_COMP) { - tasklet_disable(&eq->tasklet_ctx.task); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - } else if (eq->type == MLX5_EQ_TYPE_PF) { - cancel_work_sync(&eq->pf_ctx.work); - destroy_workqueue(eq->pf_ctx.wq); - mempool_destroy(eq->pf_ctx.pool); -#endif - } mlx5_buf_free(dev, &eq->buf); return err; @@ -787,9 +372,9 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) struct mlx5_cq_table *table = &eq->cq_table; int err; - spin_lock_irq(&table->lock); + spin_lock(&table->lock); err = radix_tree_insert(&table->tree, cq->cqn, cq); - spin_unlock_irq(&table->lock); + spin_unlock(&table->lock); return err; } @@ -799,9 +384,9 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) struct mlx5_cq_table *table = &eq->cq_table; struct mlx5_core_cq *tmp; - spin_lock_irq(&table->lock); + spin_lock(&table->lock); tmp = radix_tree_delete(&table->tree, cq->cqn); - spin_unlock_irq(&table->lock); + spin_unlock(&table->lock); if (!tmp) { mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn); @@ -816,28 +401,106 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) return 0; } -int mlx5_eq_init(struct mlx5_core_dev *dev) +int mlx5_eq_table_init(struct mlx5_core_dev *dev) { - int err; + struct mlx5_eq_table *eq_table; + int i, err; - spin_lock_init(&dev->priv.eq_table.lock); + eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL); + if (!eq_table) + return -ENOMEM; + + dev->priv.eq_table = eq_table; err = mlx5_eq_debugfs_init(dev); + if (err) + goto kvfree_eq_table; + mutex_init(&eq_table->lock); + for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) + ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); + + return 0; + +kvfree_eq_table: + kvfree(eq_table); + dev->priv.eq_table = NULL; return err; } -void mlx5_eq_cleanup(struct mlx5_core_dev *dev) +void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) { mlx5_eq_debugfs_cleanup(dev); + kvfree(dev->priv.eq_table); } -int mlx5_start_eqs(struct mlx5_core_dev *dev) +/* Async EQs */ + +static int create_async_eq(struct mlx5_core_dev *dev, const char *name, + struct mlx5_eq *eq, struct mlx5_eq_param *param) { - struct mlx5_eq_table *table = &dev->priv.eq_table; - u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int err; + + mutex_lock(&eq_table->lock); + if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) { + err = -ENOSPC; + goto unlock; + } + + err = create_map_eq(dev, eq, name, param); +unlock: + mutex_unlock(&eq_table->lock); + return err; +} + +static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; + mutex_lock(&eq_table->lock); + err = destroy_unmap_eq(dev, eq); + mutex_unlock(&eq_table->lock); + return err; +} + +static int cq_err_event_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_eq_table *eqt; + struct mlx5_core_cq *cq; + struct mlx5_eqe *eqe; + struct mlx5_eq *eq; + u32 cqn; + + /* type == MLX5_EVENT_TYPE_CQ_ERROR */ + + eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb); + eq = &eqt->async_eq; + eqe = data; + + cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; + mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", + cqn, eqe->data.cq_err.syndrome); + + cq = mlx5_eq_cq_get(eq, cqn); + if (unlikely(!cq)) { + mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); + return NOTIFY_OK; + } + + cq->event(cq, type); + + mlx5_cq_put(cq); + + return NOTIFY_OK; +} + +static u64 gather_async_events_mask(struct mlx5_core_dev *dev) +{ + u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; + if (MLX5_VPORT_MANAGER(dev)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); @@ -865,127 +528,524 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_MCAM_REG(dev, tracer_registers)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER); - err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, - MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, - "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC); + if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); + + if (mlx5_core_is_ecpf_esw_manager(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE); + + return async_event_mask; +} + +static int create_async_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_param param = {}; + int err; + + MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); + mlx5_eq_notifier_register(dev, &table->cq_err_nb); + + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_CMD_IDX, + .mask = 1ull << MLX5_EVENT_TYPE_CMD, + .nent = MLX5_NUM_CMD_EQE, + .context = &table->cmd_eq, + .handler = mlx5_eq_async_int, + }; + err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); - return err; + goto err0; } mlx5_cmd_use_events(dev); - err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC, - MLX5_NUM_ASYNC_EQE, async_event_mask, - "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC); + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_ASYNC_IDX, + .mask = gather_async_events_mask(dev), + .nent = MLX5_NUM_ASYNC_EQE, + .context = &table->async_eq, + .handler = mlx5_eq_async_int, + }; + err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; } - err = mlx5_create_map_eq(dev, &table->pages_eq, - MLX5_EQ_VEC_PAGES, - /* TODO: sriov max_vf + */ 1, - 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", - MLX5_EQ_TYPE_ASYNC); + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_PAGEREQ_IDX, + .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, + .nent = /* TODO: sriov max_vf + */ 1, + .context = &table->pages_eq, + .handler = mlx5_eq_async_int, + }; + err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(dev, pg)) { - err = mlx5_create_map_eq(dev, &table->pfault_eq, - MLX5_EQ_VEC_PFAULT, - MLX5_NUM_ASYNC_EQE, - 1 << MLX5_EVENT_TYPE_PAGE_FAULT, - "mlx5_page_fault_eq", - MLX5_EQ_TYPE_PF); - if (err) { - mlx5_core_warn(dev, "failed to create page fault EQ %d\n", - err); - goto err3; - } - } - return err; -err3: - mlx5_destroy_unmap_eq(dev, &table->pages_eq); -#else - return err; -#endif err2: - mlx5_destroy_unmap_eq(dev, &table->async_eq); + destroy_async_eq(dev, &table->async_eq); err1: mlx5_cmd_use_polling(dev); - mlx5_destroy_unmap_eq(dev, &table->cmd_eq); + destroy_async_eq(dev, &table->cmd_eq); +err0: + mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; } -void mlx5_stop_eqs(struct mlx5_core_dev *dev) +static void destroy_async_eqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; int err; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(dev, pg)) { - err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq); - if (err) - mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n", - err); - } -#endif - - err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); + err = destroy_async_eq(dev, &table->pages_eq); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); - err = mlx5_destroy_unmap_eq(dev, &table->async_eq); + err = destroy_async_eq(dev, &table->async_eq); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", err); + mlx5_cmd_use_polling(dev); - err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq); + err = destroy_async_eq(dev, &table->cmd_eq); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", err); + + mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); +} + +struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) +{ + return &dev->priv.eq_table->async_eq; +} + +void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) +{ + synchronize_irq(dev->priv.eq_table->async_eq.irqn); +} + +void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) +{ + synchronize_irq(dev->priv.eq_table->cmd_eq.irqn); +} + +/* Generic EQ API for mlx5_core consumers + * Needed For RDMA ODP EQ for now + */ +struct mlx5_eq * +mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, + struct mlx5_eq_param *param) +{ + struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); + int err; + + if (!eq) + return ERR_PTR(-ENOMEM); + + err = create_async_eq(dev, name, eq, param); + if (err) { + kvfree(eq); + eq = ERR_PTR(err); + } + + return eq; +} +EXPORT_SYMBOL(mlx5_eq_create_generic); + +int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + if (IS_ERR(eq)) + return -EINVAL; + + err = destroy_async_eq(dev, eq); + if (err) + goto out; + + kvfree(eq); +out: + return err; } +EXPORT_SYMBOL(mlx5_eq_destroy_generic); -int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - u32 *out, int outlen) +struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc) { - u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0}; + u32 ci = eq->cons_index + cc; + struct mlx5_eqe *eqe; + + eqe = get_eqe(eq, ci & (eq->nent - 1)); + eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe; + /* Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + if (eqe) + dma_rmb(); - MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ); - MLX5_SET(query_eq_in, in, eq_number, eq->eqn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); + return eqe; +} +EXPORT_SYMBOL(mlx5_eq_get_eqe); + +void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) +{ + __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); + u32 val; + + eq->cons_index += cc; + val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + + __raw_writel((__force u32)cpu_to_be32(val), addr); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} +EXPORT_SYMBOL(mlx5_eq_update_ci); + +/* Completion EQs */ + +static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + struct mlx5_priv *priv = &mdev->priv; + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + int irq = pci_irq_vector(mdev->pdev, vecidx); + struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; + + if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); + return -ENOMEM; + } + + cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), + irq_info->mask); + + if (IS_ENABLED(CONFIG_SMP) && + irq_set_affinity_hint(irq, irq_info->mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); + + return 0; +} + +static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + struct mlx5_priv *priv = &mdev->priv; + int irq = pci_irq_vector(mdev->pdev, vecidx); + struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; + + irq_set_affinity_hint(irq, NULL); + free_cpumask_var(irq_info->mask); +} + +static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) +{ + int err; + int i; + + for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) { + err = set_comp_irq_affinity_hint(mdev, i); + if (err) + goto err_out; + } + + return 0; + +err_out: + for (i--; i >= 0; i--) + clear_comp_irq_affinity_hint(mdev, i); + + return err; +} + +static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) +{ + int i; + + for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) + clear_comp_irq_affinity_hint(mdev, i); +} + +static void destroy_comp_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq, *n; + + clear_comp_irqs_affinity_hints(dev); + +#ifdef CONFIG_RFS_ACCEL + if (table->rmap) { + free_irq_cpu_rmap(table->rmap); + table->rmap = NULL; + } +#endif + list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + list_del(&eq->list); + if (destroy_unmap_eq(dev, &eq->core)) + mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", + eq->core.eqn); + tasklet_disable(&eq->tasklet_ctx.task); + kfree(eq); + } +} + +static int create_comp_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + char name[MLX5_MAX_IRQ_NAME]; + struct mlx5_eq_comp *eq; + int ncomp_vec; + int nent; + int err; + int i; + + INIT_LIST_HEAD(&table->comp_eqs_list); + ncomp_vec = table->num_comp_vectors; + nent = MLX5_COMP_EQ_SIZE; +#ifdef CONFIG_RFS_ACCEL + table->rmap = alloc_irq_cpu_rmap(ncomp_vec); + if (!table->rmap) + return -ENOMEM; +#endif + for (i = 0; i < ncomp_vec; i++) { + int vecidx = i + MLX5_EQ_VEC_COMP_BASE; + struct mlx5_eq_param param = {}; + + eq = kzalloc(sizeof(*eq), GFP_KERNEL); + if (!eq) { + err = -ENOMEM; + goto clean; + } + + INIT_LIST_HEAD(&eq->tasklet_ctx.list); + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); + spin_lock_init(&eq->tasklet_ctx.lock); + tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, + (unsigned long)&eq->tasklet_ctx); + +#ifdef CONFIG_RFS_ACCEL + irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); +#endif + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); + param = (struct mlx5_eq_param) { + .index = vecidx, + .mask = 0, + .nent = nent, + .context = &eq->core, + .handler = mlx5_eq_comp_int + }; + err = create_map_eq(dev, &eq->core, name, ¶m); + if (err) { + kfree(eq); + goto clean; + } + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); + /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ + list_add_tail(&eq->list, &table->comp_eqs_list); + } + + err = set_comp_irq_affinity_hints(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); + goto clean; + } + + return 0; + +clean: + destroy_comp_eqs(dev); + return err; +} + +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, + unsigned int *irqn) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq, *n; + int err = -ENOENT; + int i = 0; + + list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + if (i++ == vector) { + *eqn = eq->core.eqn; + *irqn = eq->core.irqn; + err = 0; + break; + } + } + + return err; +} +EXPORT_SYMBOL(mlx5_vector2eqn); + +unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) +{ + return dev->priv.eq_table->num_comp_vectors; +} +EXPORT_SYMBOL(mlx5_comp_vectors_count); + +struct cpumask * +mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) +{ + /* TODO: consider irq_get_affinity_mask(irq) */ + return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; +} +EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); + +struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL + return dev->priv.eq_table->rmap; +#else + return NULL; +#endif +} + +struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq; + + list_for_each_entry(eq, &table->comp_eqs_list, list) { + if (eq->core.eqn == eqn) + return eq; + } + + return ERR_PTR(-ENOENT); } /* This function should only be called after mlx5_cmd_force_teardown_hca */ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; - struct mlx5_eq *eq; + struct mlx5_eq_table *table = dev->priv.eq_table; + int i, max_eqs; + + clear_comp_irqs_affinity_hints(dev); #ifdef CONFIG_RFS_ACCEL - if (dev->rmap) { - free_irq_cpu_rmap(dev->rmap); - dev->rmap = NULL; + if (table->rmap) { + free_irq_cpu_rmap(table->rmap); + table->rmap = NULL; } #endif - list_for_each_entry(eq, &table->comp_eqs_list, list) - free_irq(eq->irqn, eq); - - free_irq(table->pages_eq.irqn, &table->pages_eq); - free_irq(table->async_eq.irqn, &table->async_eq); - free_irq(table->cmd_eq.irqn, &table->cmd_eq); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(dev, pg)) - free_irq(table->pfault_eq.irqn, &table->pfault_eq); -#endif + + mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ + max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; + for (i = max_eqs - 1; i >= 0; i--) { + if (!table->irq_info[i].context) + continue; + free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context); + table->irq_info[i].context = NULL; + } + mutex_unlock(&table->lock); + pci_free_irq_vectors(dev->pdev); +} + +static int alloc_irq_vectors(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_eq_table *table = priv->eq_table; + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); + int nvec; + int err; + + nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + + MLX5_EQ_VEC_COMP_BASE; + nvec = min_t(int, nvec, num_eqs); + if (nvec <= MLX5_EQ_VEC_COMP_BASE) + return -ENOMEM; + + table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL); + if (!table->irq_info) + return -ENOMEM; + + nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, + nvec, PCI_IRQ_MSIX); + if (nvec < 0) { + err = nvec; + goto err_free_irq_info; + } + + table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; + + return 0; + +err_free_irq_info: + kfree(table->irq_info); + return err; +} + +static void free_irq_vectors(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + pci_free_irq_vectors(dev->pdev); + kfree(priv->eq_table->irq_info); +} + +int mlx5_eq_table_create(struct mlx5_core_dev *dev) +{ + int err; + + err = alloc_irq_vectors(dev); + if (err) { + mlx5_core_err(dev, "alloc irq vectors failed\n"); + return err; + } + + err = create_async_eqs(dev); + if (err) { + mlx5_core_err(dev, "Failed to create async EQs\n"); + goto err_async_eqs; + } + + err = create_comp_eqs(dev); + if (err) { + mlx5_core_err(dev, "Failed to create completion EQs\n"); + goto err_comp_eqs; + } + + return 0; +err_comp_eqs: + destroy_async_eqs(dev); +err_async_eqs: + free_irq_vectors(dev); + return err; +} + +void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) +{ + destroy_comp_eqs(dev); + destroy_async_eqs(dev); + free_irq_vectors(dev); +} + +int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) +{ + struct mlx5_eq_table *eqt = dev->priv.eq_table; + + if (nb->event_type >= MLX5_EVENT_TYPE_MAX) + return -EINVAL; + + return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb); +} + +int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) +{ + struct mlx5_eq_table *eqt = dev->priv.eq_table; + + if (nb->event_type >= MLX5_EVENT_TYPE_MAX) + return -EINVAL; + + return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d004957328f9..8a67fd197b79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -36,10 +36,10 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> #include "mlx5_core.h" +#include "lib/eq.h" #include "eswitch.h" #include "fs_core.h" - -#define UPLINK_VPORT 0xFFFF +#include "ecpf.h" enum { MLX5_ACTION_NONE = 0, @@ -51,7 +51,7 @@ enum { struct vport_addr { struct l2addr_node node; u8 action; - u32 vport; + u16 vport; struct mlx5_flow_handle *flow_rule; bool mpfs; /* UC MAC was added to MPFs */ /* A flag indicating that mac was added due to mc promiscuous vport */ @@ -64,11 +64,36 @@ enum { PROMISC_CHANGE = BIT(3), }; +static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw); +static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw); + /* Vport context events */ #define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \ MC_ADDR_CHANGE | \ PROMISC_CHANGE) +/* The vport getter/iterator are only valid after esw->total_vports + * and vport->vport are initialized in mlx5_eswitch_init. + */ +#define mlx5_esw_for_all_vports(esw, i, vport) \ + for ((i) = MLX5_VPORT_PF; \ + (vport) = &(esw)->vports[i], \ + (i) < (esw)->total_vports; (i)++) + +#define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \ + for ((i) = MLX5_VPORT_FIRST_VF; \ + (vport) = &(esw)->vports[i], \ + (i) <= (nvfs); (i)++) + +static struct mlx5_vport *mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, + u16 vport_num) +{ + u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); + + WARN_ON(idx > esw->total_vports - 1); + return &esw->vports[idx]; +} + static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { @@ -80,8 +105,7 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, nic_vport_context); @@ -109,12 +133,11 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, MLX5_SET(modify_esw_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } -static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, +static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, u16 vlan, u8 qos, u8 set_flags) { u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0}; @@ -151,7 +174,7 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, /* E-Switch FDB */ static struct mlx5_flow_handle * -__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, +__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule, u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN]) { int match_header = (is_zero_ether_addr(mac_c) ? 0 : @@ -187,7 +210,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, misc_parameters); mc_misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT); + MLX5_SET(fte_match_set_misc, mv_misc, source_port, MLX5_VPORT_UPLINK); MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port); } @@ -214,7 +237,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, } static struct mlx5_flow_handle * -esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u16 vport) { u8 mac_c[ETH_ALEN]; @@ -223,7 +246,7 @@ esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) } static struct mlx5_flow_handle * -esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) +esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u16 vport) { u8 mac_c[ETH_ALEN]; u8 mac_v[ETH_ALEN]; @@ -236,7 +259,7 @@ esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) } static struct mlx5_flow_handle * -esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) +esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport) { u8 mac_c[ETH_ALEN]; u8 mac_v[ETH_ALEN]; @@ -246,6 +269,37 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v); } +enum { + LEGACY_VEPA_PRIO = 0, + LEGACY_FDB_PRIO, +}; + +static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb; + int err; + + root_ns = mlx5_get_fdb_sub_ns(dev, 0); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + /* num FTE 2, num FG 2 */ + fdb = mlx5_create_auto_grouped_flow_table(root_ns, LEGACY_VEPA_PRIO, + 2, 2, 0, 0); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create VEPA FDB err %d\n", err); + return err; + } + esw->fdb_table.legacy.vepa_fdb = fdb; + + return 0; +} + static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -274,8 +328,8 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) return -ENOMEM; table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - ft_attr.max_fte = table_size; + ft_attr.prio = LEGACY_FDB_PRIO; fdb = mlx5_create_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); @@ -334,41 +388,67 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) esw->fdb_table.legacy.promisc_grp = g; out: - if (err) { - if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) { - mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); - esw->fdb_table.legacy.allmulti_grp = NULL; - } - if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) { - mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); - esw->fdb_table.legacy.addr_grp = NULL; - } - if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.fdb)) { - mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb); - esw->fdb_table.legacy.fdb = NULL; - } - } + if (err) + esw_destroy_legacy_fdb_table(esw); kvfree(flow_group_in); return err; } +static void esw_destroy_legacy_vepa_table(struct mlx5_eswitch *esw) +{ + esw_debug(esw->dev, "Destroy VEPA Table\n"); + if (!esw->fdb_table.legacy.vepa_fdb) + return; + + mlx5_destroy_flow_table(esw->fdb_table.legacy.vepa_fdb); + esw->fdb_table.legacy.vepa_fdb = NULL; +} + static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) { + esw_debug(esw->dev, "Destroy FDB Table\n"); if (!esw->fdb_table.legacy.fdb) return; - esw_debug(esw->dev, "Destroy FDB Table\n"); - mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); - mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); - mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); + if (esw->fdb_table.legacy.promisc_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); + if (esw->fdb_table.legacy.allmulti_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + if (esw->fdb_table.legacy.addr_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb); + esw->fdb_table.legacy.fdb = NULL; esw->fdb_table.legacy.addr_grp = NULL; esw->fdb_table.legacy.allmulti_grp = NULL; esw->fdb_table.legacy.promisc_grp = NULL; } +static int esw_create_legacy_table(struct mlx5_eswitch *esw) +{ + int err; + + memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb)); + + err = esw_create_legacy_vepa_table(esw); + if (err) + return err; + + err = esw_create_legacy_fdb_table(esw); + if (err) + esw_destroy_legacy_vepa_table(esw); + + return err; +} + +static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) +{ + esw_cleanup_vepa_rules(esw); + esw_destroy_legacy_fdb_table(esw); + esw_destroy_legacy_vepa_table(esw); +} + /* E-Switch vport UC/MC lists management */ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, struct vport_addr *vaddr); @@ -376,19 +456,19 @@ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; int err; - /* Skip mlx5_mpfs_add_mac for PFs, - * it is already done by the PF netdev in mlx5e_execute_l2_action + /* Skip mlx5_mpfs_add_mac for eswitch_managers, + * it is already done by its netdev in mlx5e_execute_l2_action */ - if (!vport) + if (esw->manager_vport == vport) goto fdb_add; err = mlx5_mpfs_add_mac(esw->dev, mac); if (err) { esw_warn(esw->dev, - "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n", + "Failed to add L2 table mac(%pM) for vport(0x%x), err(%d)\n", mac, vport, err); return err; } @@ -408,13 +488,13 @@ fdb_add: static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; int err = 0; - /* Skip mlx5_mpfs_del_mac for PFs, - * it is already done by the PF netdev in mlx5e_execute_l2_action + /* Skip mlx5_mpfs_del_mac for eswitch managerss, + * it is already done by its netdev in mlx5e_execute_l2_action */ - if (!vport || !vaddr->mpfs) + if (!vaddr->mpfs || esw->manager_vport == vport) goto fdb_del; err = mlx5_mpfs_del_mac(esw->dev, mac); @@ -437,17 +517,18 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, struct esw_mc_addr *esw_mc) { u8 *mac = vaddr->node.addr; - u32 vport_idx = 0; + struct mlx5_vport *vport; + u16 i, vport_num; - for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) { - struct mlx5_vport *vport = &esw->vports[vport_idx]; + mlx5_esw_for_all_vports(esw, i, vport) { struct hlist_head *vport_hash = vport->mc_list; struct vport_addr *iter_vaddr = l2addr_hash_find(vport_hash, mac, struct vport_addr); + vport_num = vport->vport; if (IS_ERR_OR_NULL(vport->allmulti_rule) || - vaddr->vport == vport_idx) + vaddr->vport == vport_num) continue; switch (vaddr->action) { case MLX5_ACTION_ADD: @@ -459,14 +540,14 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, if (!iter_vaddr) { esw_warn(esw->dev, "ALL-MULTI: Failed to add MAC(%pM) to vport[%d] DB\n", - mac, vport_idx); + mac, vport_num); continue; } - iter_vaddr->vport = vport_idx; + iter_vaddr->vport = vport_num; iter_vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, - vport_idx); + vport_num); iter_vaddr->mc_promisc = true; break; case MLX5_ACTION_DEL: @@ -484,7 +565,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) struct hlist_head *hash = esw->mc_table; struct esw_mc_addr *esw_mc; u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; if (!esw->fdb_table.legacy.fdb) return 0; @@ -498,7 +579,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) return -ENOMEM; esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ - esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); + esw_fdb_set_vport_rule(esw, mac, MLX5_VPORT_UPLINK); /* Add this multicast mac to all the mc promiscuous vports */ update_allmulti_vports(esw, vaddr, esw_mc); @@ -524,7 +605,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) struct hlist_head *hash = esw->mc_table; struct esw_mc_addr *esw_mc; u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; if (!esw->fdb_table.legacy.fdb) return 0; @@ -563,9 +644,9 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) /* Apply vport UC/MC list to HW l2 table and FDB table */ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, - u32 vport_num, int list_type) + u16 vport_num, int list_type) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; vport_addr_action vport_addr_add; vport_addr_action vport_addr_del; @@ -598,9 +679,9 @@ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, /* Sync vport UC/MC list from vport context */ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, - u32 vport_num, int list_type) + u16 vport_num, int list_type) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; u8 (*mac_list)[ETH_ALEN]; struct l2addr_node *node; @@ -685,9 +766,9 @@ out: /* Sync vport UC/MC list from vport context * Must be called after esw_update_vport_addr_list */ -static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) +static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u16 vport_num) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); struct l2addr_node *node; struct vport_addr *addr; struct hlist_head *hash; @@ -720,11 +801,11 @@ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) } /* Apply vport rx mode to HW FDB table */ -static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, +static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num, bool promisc, bool mc_promisc) { + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); struct esw_mc_addr *allmulti_addr = &esw->mc_promisc; - struct mlx5_vport *vport = &esw->vports[vport_num]; if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc) goto promisc; @@ -735,7 +816,7 @@ static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, if (!allmulti_addr->uplink_rule) allmulti_addr->uplink_rule = esw_fdb_set_vport_allmulti_rule(esw, - UPLINK_VPORT); + MLX5_VPORT_UPLINK); allmulti_addr->refcnt++; } else if (vport->allmulti_rule) { mlx5_del_flow_rules(vport->allmulti_rule); @@ -763,9 +844,9 @@ promisc: } /* Sync vport rx mode from vport context */ -static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num) +static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); int promisc_all = 0; int promisc_uc = 0; int promisc_mc = 0; @@ -1133,13 +1214,6 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, int err = 0; u8 *smac_v; - if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) { - mlx5_core_warn(esw->dev, - "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", - vport->vport); - return -EPERM; - } - esw_vport_cleanup_ingress_rules(esw, vport); if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { @@ -1349,8 +1423,8 @@ static void esw_destroy_tsar(struct mlx5_eswitch *esw) static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, u32 initial_max_rate, u32 initial_bw_share) { + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - struct mlx5_vport *vport = &esw->vports[vport_num]; struct mlx5_core_dev *dev = esw->dev; void *vport_elem; int err = 0; @@ -1389,7 +1463,7 @@ static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); int err = 0; if (!vport->qos.enabled) @@ -1408,8 +1482,8 @@ static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num, u32 max_rate, u32 bw_share) { + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - struct mlx5_vport *vport = &esw->vports[vport_num]; struct mlx5_core_dev *dev = esw->dev; void *vport_elem; u32 bitmask = 0; @@ -1465,15 +1539,22 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, { int vport_num = vport->vport; - if (!vport_num) + if (esw->manager_vport == vport_num) return; mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, + vport_num, 1, vport->info.link_state); - mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac); - mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid); + + /* Host PF has its own mac/guid. */ + if (vport_num) { + mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, + vport->info.mac); + mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, + vport->info.node_guid); + } + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, (vport->info.vlan || vport->info.qos)); @@ -1519,10 +1600,10 @@ static void esw_vport_destroy_drop_counters(struct mlx5_vport *vport) mlx5_fc_destroy(dev, vport->egress.drop_counter); } -static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, +static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int enable_events) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + u16 vport_num = vport->vport; mutex_lock(&esw->state_lock); WARN_ON(vport->enabled); @@ -1545,8 +1626,11 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, vport->enabled_events = enable_events; vport->enabled = true; - /* only PF is trusted by default */ - if (!vport_num) + /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well + * in smartNIC as it's a vport group manager. + */ + if (esw->manager_vport == vport_num || + (!vport_num && mlx5_core_is_ecpf(esw->dev))) vport->info.trusted = true; esw_vport_change_handle_locked(vport); @@ -1556,9 +1640,10 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, mutex_unlock(&esw->state_lock); } -static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) +static void esw_disable_vport(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + u16 vport_num = vport->vport; if (!vport->enabled) return; @@ -1567,7 +1652,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) /* Mark this vport as disabled to discard new events */ vport->enabled = false; - synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC)); /* Wait for current already scheduled events to complete */ flush_workqueue(esw->work_queue); /* Disable events from this vport */ @@ -1580,10 +1664,11 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) esw_vport_change_handle_locked(vport); vport->enabled_events = 0; esw_vport_disable_qos(esw, vport_num); - if (vport_num && esw->mode == SRIOV_LEGACY) { + if (esw->manager_vport != vport_num && + esw->mode == SRIOV_LEGACY) { mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, + vport_num, 1, MLX5_VPORT_ADMIN_STATE_DOWN); esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); @@ -1593,12 +1678,29 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) mutex_unlock(&esw->state_lock); } +static int eswitch_vport_event(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_eswitch *esw = mlx5_nb_cof(nb, struct mlx5_eswitch, nb); + struct mlx5_eqe *eqe = data; + struct mlx5_vport *vport; + u16 vport_num; + + vport_num = be16_to_cpu(eqe->data.vport_change.vport_num); + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (vport->enabled) + queue_work(esw->work_queue, &vport->vport_change_handler); + + return NOTIFY_OK; +} + /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) - int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { + int vf_nvports = 0, total_nvports = 0; + struct mlx5_vport *vport; int err; int i, enabled_events; @@ -1615,14 +1717,31 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n"); esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); + + if (mode == SRIOV_OFFLOADS) { + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + err = mlx5_query_host_params_num_vfs(esw->dev, &vf_nvports); + if (err) + return err; + total_nvports = esw->total_vports; + } else { + vf_nvports = nvfs; + total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev); + } + } + esw->mode = mode; + mlx5_lag_update(esw->dev); + if (mode == SRIOV_LEGACY) { - err = esw_create_legacy_fdb_table(esw); + err = esw_create_legacy_table(esw); + if (err) + goto abort; } else { + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - - err = esw_offloads_init(esw, nvfs + 1); + err = esw_offloads_init(esw, vf_nvports, total_nvports); } if (err) @@ -1637,8 +1756,25 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) * 2. FDB/Eswitch is programmed by user space tools */ enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0; - for (i = 0; i <= nvfs; i++) - esw_enable_vport(esw, i, enabled_events); + + /* Enable PF vport */ + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + esw_enable_vport(esw, vport, enabled_events); + + /* Enable ECPF vports */ + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + esw_enable_vport(esw, vport, enabled_events); + } + + /* Enable VF vports */ + mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) + esw_enable_vport(esw, vport, enabled_events); + + if (mode == SRIOV_LEGACY) { + MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->nb); + } esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", esw->enabled_vports); @@ -1647,8 +1783,10 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) abort: esw->mode = SRIOV_NONE; - if (mode == SRIOV_OFFLOADS) + if (mode == SRIOV_OFFLOADS) { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); + } return err; } @@ -1656,8 +1794,8 @@ abort: void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; + struct mlx5_vport *vport; int old_mode; - int nvports; int i; if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE) @@ -1667,10 +1805,12 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw->enabled_vports, esw->mode); mc_promisc = &esw->mc_promisc; - nvports = esw->enabled_vports; - for (i = 0; i < esw->total_vports; i++) - esw_disable_vport(esw, i); + if (esw->mode == SRIOV_LEGACY) + mlx5_eq_notifier_unregister(esw->dev, &esw->nb); + + mlx5_esw_for_all_vports(esw, i, vport) + esw_disable_vport(esw, vport); if (mc_promisc && mc_promisc->uplink_rule) mlx5_del_flow_rules(mc_promisc->uplink_rule); @@ -1678,25 +1818,29 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_destroy_tsar(esw); if (esw->mode == SRIOV_LEGACY) - esw_destroy_legacy_fdb_table(esw); + esw_destroy_legacy_table(esw); else if (esw->mode == SRIOV_OFFLOADS) - esw_offloads_cleanup(esw, nvports); + esw_offloads_cleanup(esw); old_mode = esw->mode; esw->mode = SRIOV_NONE; - if (old_mode == SRIOV_OFFLOADS) + mlx5_lag_update(esw->dev); + + if (old_mode == SRIOV_OFFLOADS) { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); + } } int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int total_vports = MLX5_TOTAL_VPORTS(dev); struct mlx5_eswitch *esw; - int vport_num; - int err; + struct mlx5_vport *vport; + int err, i; - if (!MLX5_ESWITCH_MANAGER(dev)) + if (!MLX5_VPORT_MANAGER(dev)) return 0; esw_info(dev, @@ -1710,6 +1854,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) return -ENOMEM; esw->dev = dev; + esw->manager_vport = mlx5_eswitch_manager_vport(dev); esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { @@ -1724,6 +1869,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } + esw->total_vports = total_vports; + err = esw_offloads_init_reps(esw); if (err) goto abort; @@ -1732,17 +1879,14 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) hash_init(esw->offloads.mod_hdr_tbl); mutex_init(&esw->state_lock); - for (vport_num = 0; vport_num < total_vports; vport_num++) { - struct mlx5_vport *vport = &esw->vports[vport_num]; - - vport->vport = vport_num; + mlx5_esw_for_all_vports(esw, i, vport) { + vport->vport = mlx5_eswitch_index_to_vport_num(esw, i); vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; vport->dev = dev; INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler); } - esw->total_vports = total_vports; esw->enabled_vports = 0; esw->mode = SRIOV_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; @@ -1765,7 +1909,7 @@ abort: void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) { - if (!esw || !MLX5_ESWITCH_MANAGER(esw->dev)) + if (!esw || !MLX5_VPORT_MANAGER(esw->dev)) return; esw_info(esw->dev, "cleanup\n"); @@ -1777,23 +1921,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) kfree(esw); } -void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) -{ - struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change; - u16 vport_num = be16_to_cpu(vc_eqe->vport_num); - struct mlx5_vport *vport; - - if (!esw) { - pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n", - vport_num); - return; - } - - vport = &esw->vports[vport_num]; - if (vport->enabled) - queue_work(esw->work_queue, &vport->vport_change_handler); -} - /* Vport Administration */ #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) @@ -1804,7 +1931,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u64 node_guid; int err = 0; - if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager)) return -EPERM; if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac)) return -EINVAL; @@ -1812,13 +1939,10 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; - if (evport->info.spoofchk && !is_valid_ether_addr(mac)) { + if (evport->info.spoofchk && !is_valid_ether_addr(mac)) mlx5_core_warn(esw->dev, - "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n", + "Set invalid MAC while spoofchk is on, vport(%d)\n", vport); - err = -EPERM; - goto unlock; - } err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); if (err) { @@ -1861,7 +1985,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, err = mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport, link_state); + vport, 1, link_state); if (err) { mlx5_core_warn(esw->dev, "Failed to set vport %d link state, err = %d", @@ -1881,7 +2005,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, { struct mlx5_vport *evport; - if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) return -EINVAL; @@ -1964,6 +2088,10 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, evport = &esw->vports[vport]; pschk = evport->info.spoofchk; evport->info.spoofchk = spoofchk; + if (pschk && !is_valid_ether_addr(evport->info.mac)) + mlx5_core_warn(esw->dev, + "Spoofchk in set while MAC is invalid, vport(%d)\n", + evport->vport); if (evport->enabled && esw->mode == SRIOV_LEGACY) err = esw_vport_ingress_config(esw, evport); if (err) @@ -1973,6 +2101,128 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, return err; } +static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw) +{ + if (esw->fdb_table.legacy.vepa_uplink_rule) + mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_uplink_rule); + + if (esw->fdb_table.legacy.vepa_star_rule) + mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_star_rule); + + esw->fdb_table.legacy.vepa_uplink_rule = NULL; + esw->fdb_table.legacy.vepa_star_rule = NULL; +} + +static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, + u8 setting) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + int err = 0; + void *misc; + + if (!setting) { + esw_cleanup_vepa_rules(esw); + return 0; + } + + if (esw->fdb_table.legacy.vepa_uplink_rule) + return 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Uplink rule forward uplink traffic to FDB */ + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = esw->fdb_table.legacy.fdb; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } else { + esw->fdb_table.legacy.vepa_uplink_rule = flow_rule; + } + + /* Star rule to forward all traffic to uplink vport */ + memset(spec, 0, sizeof(*spec)); + memset(&dest, 0, sizeof(dest)); + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = MLX5_VPORT_UPLINK; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } else { + esw->fdb_table.legacy.vepa_star_rule = flow_rule; + } + +out: + kvfree(spec); + if (err) + esw_cleanup_vepa_rules(esw); + return err; +} + +int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting) +{ + int err = 0; + + if (!esw) + return -EOPNOTSUPP; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + + mutex_lock(&esw->state_lock); + if (esw->mode != SRIOV_LEGACY) { + err = -EOPNOTSUPP; + goto out; + } + + err = _mlx5_eswitch_set_vepa_locked(esw, setting); + +out: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) +{ + int err = 0; + + if (!esw) + return -EOPNOTSUPP; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + + mutex_lock(&esw->state_lock); + if (esw->mode != SRIOV_LEGACY) { + err = -EOPNOTSUPP; + goto out; + } + + *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0; + +out: + mutex_unlock(&esw->state_lock); + return err; +} + int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, int vport, bool setting) { @@ -2000,8 +2250,7 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) u32 max_guarantee = 0; int i; - for (i = 0; i < esw->total_vports; i++) { - evport = &esw->vports[i]; + mlx5_esw_for_all_vports(esw, i, evport) { if (!evport->enabled || evport->info.min_rate < max_guarantee) continue; max_guarantee = evport->info.min_rate; @@ -2020,8 +2269,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) int err; int i; - for (i = 0; i < esw->total_vports; i++) { - evport = &esw->vports[i]; + mlx5_esw_for_all_vports(esw, i, evport) { if (!evport->enabled) continue; vport_min_rate = evport->info.min_rate; @@ -2036,7 +2284,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) if (bw_share == evport->qos.bw_share) continue; - err = esw_vport_qos_config(esw, i, vport_max_rate, + err = esw_vport_qos_config(esw, evport->vport, vport_max_rate, bw_share); if (!err) evport->qos.bw_share = bw_share; @@ -2050,19 +2298,24 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, u32 max_rate, u32 min_rate) { - u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - bool min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && - fw_max_bw_share >= MLX5_MIN_BW_SHARE; - bool max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); struct mlx5_vport *evport; + u32 fw_max_bw_share; u32 previous_min_rate; u32 divider; + bool min_rate_supported; + bool max_rate_supported; int err = 0; if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) return -EINVAL; + + fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && + fw_max_bw_share >= MLX5_MIN_BW_SHARE; + max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); + if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported)) return -EOPNOTSUPP; @@ -2119,7 +2372,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) return 0; - err = mlx5_query_vport_down_stats(dev, vport_idx, + err = mlx5_query_vport_down_stats(dev, vport_idx, 1, &rx_discard_vport_down, &tx_discard_vport_down); if (err) @@ -2156,8 +2409,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, MLX5_CMD_OP_QUERY_VPORT_COUNTER); MLX5_SET(query_vport_counter_in, in, op_mod, 0); MLX5_SET(query_vport_counter_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_vport_counter_in, in, other_vport, 1); + MLX5_SET(query_vport_counter_in, in, other_vport, 1); memset(out, 0, outlen); err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen); @@ -2219,3 +2471,21 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE; } EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); + +bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) +{ + if ((dev0->priv.eswitch->mode == SRIOV_NONE && + dev1->priv.eswitch->mode == SRIOV_NONE) || + (dev0->priv.eswitch->mode == SRIOV_OFFLOADS && + dev1->priv.eswitch->mode == SRIOV_OFFLOADS)) + return true; + + return false; +} + +bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1) +{ + return (dev0->priv.eswitch->mode == SRIOV_OFFLOADS && + dev1->priv.eswitch->mode == SRIOV_OFFLOADS); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index aaafc9f17115..3f3cd32ae60a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -38,6 +38,7 @@ #include <net/devlink.h> #include <linux/mlx5/device.h> #include <linux/mlx5/eswitch.h> +#include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> #include "lib/mpfs.h" @@ -49,8 +50,6 @@ #define MLX5_MAX_MC_PER_VPORT(dev) \ (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) -#define FDB_UPLINK_VPORT 0xffff - #define MLX5_MIN_BW_SHARE 1 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ @@ -138,11 +137,16 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_group *addr_grp; struct mlx5_flow_group *allmulti_grp; struct mlx5_flow_group *promisc_grp; + struct mlx5_flow_table *vepa_fdb; + struct mlx5_flow_handle *vepa_uplink_rule; + struct mlx5_flow_handle *vepa_star_rule; } legacy; struct offloads_fdb { struct mlx5_flow_table *slow_fdb; struct mlx5_flow_group *send_to_vport_grp; + struct mlx5_flow_group *peer_miss_grp; + struct mlx5_flow_handle **peer_miss_rules; struct mlx5_flow_group *miss_grp; struct mlx5_flow_handle *miss_rule_uni; struct mlx5_flow_handle *miss_rule_multi; @@ -165,6 +169,8 @@ struct mlx5_esw_offload { struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; struct mlx5_eswitch_rep *vport_reps; + struct list_head peer_flows; + struct mutex peer_mutex; DECLARE_HASHTABLE(encap_tbl, 8); DECLARE_HASHTABLE(mod_hdr_tbl, 8); u8 inline_mode; @@ -179,8 +185,19 @@ struct esw_mc_addr { /* SRIOV only */ u32 refcnt; }; +struct mlx5_host_work { + struct work_struct work; + struct mlx5_eswitch *esw; +}; + +struct mlx5_host_info { + struct mlx5_nb nb; + u16 num_vfs; +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; + struct mlx5_nb nb; struct mlx5_eswitch_fdb fdb_table; struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct workqueue_struct *work_queue; @@ -201,17 +218,19 @@ struct mlx5_eswitch { struct mlx5_esw_offload offloads; int mode; int nvports; + u16 manager_vport; + struct mlx5_host_info host_info; }; -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); +void esw_offloads_cleanup(struct mlx5_eswitch *esw); +int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, + int total_nvports); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); -void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, @@ -226,6 +245,8 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, int vport_num, bool setting); int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, u32 max_rate, u32 min_rate); +int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting); +int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting); int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi); int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, @@ -281,13 +302,17 @@ enum mlx5_flow_match_level { /* current maximum for flow based vport multicasting */ #define MLX5_MAX_FLOW_FWD_VPORTS 2 +enum { + MLX5_ESW_DEST_ENCAP = BIT(0), + MLX5_ESW_DEST_ENCAP_VALID = BIT(1), +}; + struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; - struct mlx5_eswitch_rep *out_rep[MLX5_MAX_FLOW_FWD_VPORTS]; - struct mlx5_core_dev *out_mdev[MLX5_MAX_FLOW_FWD_VPORTS]; struct mlx5_core_dev *in_mdev; + struct mlx5_core_dev *counter_dev; - int mirror_count; + int split_count; int out_count; int action; @@ -296,9 +321,15 @@ struct mlx5_esw_flow_attr { u8 vlan_prio[MLX5_FS_VLAN_DEPTH]; u8 total_vlan; bool vlan_handled; - u32 encap_id; + struct { + u32 flags; + struct mlx5_eswitch_rep *rep; + struct mlx5_core_dev *mdev; + u32 encap_id; + } dests[MLX5_MAX_FLOW_FWD_VPORTS]; u32 mod_hdr_id; u8 match_level; + u8 tunnel_match_level; struct mlx5_fc *counter; u32 chain; u16 prio; @@ -338,6 +369,11 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2); } +bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1); +bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ @@ -348,13 +384,60 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev #define esw_debug(dev, format, ...) \ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) + +/* The returned number is valid only when the dev is eswitch manager. */ +static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_ECPF : MLX5_VPORT_PF; +} + +static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw) +{ + /* Uplink always locate at the last element of the array.*/ + return esw->total_vports - 1; +} + +static inline int mlx5_eswitch_ecpf_idx(struct mlx5_eswitch *esw) +{ + return esw->total_vports - 2; +} + +static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, + u16 vport_num) +{ + if (vport_num == MLX5_VPORT_ECPF) { + if (!mlx5_ecpf_vport_exists(esw->dev)) + esw_warn(esw->dev, "ECPF vport doesn't exist!\n"); + return mlx5_eswitch_ecpf_idx(esw); + } + + if (vport_num == MLX5_VPORT_UPLINK) + return mlx5_eswitch_uplink_idx(esw); + + return vport_num; +} + +static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, + int index) +{ + if (index == mlx5_eswitch_ecpf_idx(esw) && + mlx5_ecpf_vport_exists(esw->dev)) + return MLX5_VPORT_ECPF; + + if (index == mlx5_eswitch_uplink_idx(esw)) + return MLX5_VPORT_UPLINK; + + return index; +} + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} -static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {} static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} +static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } #define FDB_MAX_CHAIN 1 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9eac137790f5..9b2d78ee22b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -39,15 +39,60 @@ #include "eswitch.h" #include "en.h" #include "fs_core.h" +#include "lib/devcom.h" +#include "ecpf.h" +#include "lib/eq.h" enum { FDB_FAST_PATH = 0, FDB_SLOW_PATH }; +/* There are two match-all miss flows, one for unicast dst mac and + * one for multicast. + */ +#define MLX5_ESW_MISS_FLOWS (2) + #define fdb_prio_table(esw, chain, prio, level) \ (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)] +#define UPLINK_REP_INDEX 0 + +/* The rep getter/iterator are only valid after esw->total_vports + * and vport->vport are initialized in mlx5_eswitch_init. + */ +#define mlx5_esw_for_all_reps(esw, i, rep) \ + for ((i) = MLX5_VPORT_PF; \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) < (esw)->total_vports; (i)++) + +#define mlx5_esw_for_each_vf_rep(esw, i, rep, nvfs) \ + for ((i) = MLX5_VPORT_FIRST_VF; \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) <= (nvfs); (i)++) + +#define mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvfs) \ + for ((i) = (nvfs); \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) >= MLX5_VPORT_FIRST_VF; (i)--) + +#define mlx5_esw_for_each_vf_vport(esw, vport, nvfs) \ + for ((vport) = MLX5_VPORT_FIRST_VF; \ + (vport) <= (nvfs); (vport)++) + +#define mlx5_esw_for_each_vf_vport_reverse(esw, vport, nvfs) \ + for ((vport) = (nvfs); \ + (vport) >= MLX5_VPORT_FIRST_VF; (vport)--) + +static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, + u16 vport_num) +{ + u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); + + WARN_ON(idx > esw->total_vports - 1); + return &esw->offloads.vport_reps[idx]; +} + static struct mlx5_flow_table * esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); static void @@ -81,7 +126,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, { struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {}; struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; - bool mirror = !!(attr->mirror_count); + bool split = !!(attr->split_count); struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; int j, i = 0; @@ -120,13 +165,21 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, dest[i].ft = ft; i++; } else { - for (j = attr->mirror_count; j < attr->out_count; j++) { + for (j = attr->split_count; j < attr->out_count; j++) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[i].vport.num = attr->out_rep[j]->vport; + dest[i].vport.num = attr->dests[j].rep->vport; dest[i].vport.vhca_id = - MLX5_CAP_GEN(attr->out_mdev[j], vhca_id); - dest[i].vport.vhca_id_valid = - !!MLX5_CAP_ESW(esw->dev, merged_eswitch); + MLX5_CAP_GEN(attr->dests[j].mdev, vhca_id); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + dest[i].vport.flags |= + MLX5_FLOW_DEST_VPORT_VHCA_ID; + if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act.reformat_id = attr->dests[j].encap_id; + dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; + dest[i].vport.reformat_id = + attr->dests[j].encap_id; + } i++; } } @@ -151,22 +204,20 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_eswitch_owner_vhca_id); - if (attr->match_level == MLX5_MATCH_NONE) - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; - else - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | - MLX5_MATCH_MISC_PARAMETERS; - - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) - spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { + if (attr->tunnel_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + if (attr->match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; + } else if (attr->match_level != MLX5_MATCH_NONE) { + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_id = attr->mod_hdr_id; - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) - flow_act.reformat_id = attr->encap_id; - - fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!mirror); + fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split); if (IS_ERR(fdb)) { rule = ERR_CAST(fdb); goto err_esw_get; @@ -181,7 +232,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, return rule; err_add_rule: - esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror); + esw_put_prio_table(esw, attr->chain, attr->prio, !!split); err_esw_get: if (attr->dest_chain) esw_put_prio_table(esw, attr->dest_chain, 1, 0); @@ -215,12 +266,17 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - for (i = 0; i < attr->mirror_count; i++) { + for (i = 0; i < attr->split_count; i++) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[i].vport.num = attr->out_rep[i]->vport; + dest[i].vport.num = attr->dests[i].rep->vport; dest[i].vport.vhca_id = - MLX5_CAP_GEN(attr->out_mdev[i], vhca_id); - dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch); + MLX5_CAP_GEN(attr->dests[i].mdev, vhca_id); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) { + dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; + dest[i].vport.reformat_id = attr->dests[i].encap_id; + } } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[i].ft = fwd_fdb, @@ -268,7 +324,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr, bool fwd_rule) { - bool mirror = (attr->mirror_count > 0); + bool split = (attr->split_count > 0); mlx5_del_flow_rules(rule); esw->offloads.num_flows--; @@ -277,7 +333,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, esw_put_prio_table(esw, attr->chain, attr->prio, 1); esw_put_prio_table(esw, attr->chain, attr->prio, 0); } else { - esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror); + esw_put_prio_table(esw, attr->chain, attr->prio, !!split); if (attr->dest_chain) esw_put_prio_table(esw, attr->dest_chain, 1, 0); } @@ -307,7 +363,7 @@ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { rep = &esw->offloads.vport_reps[vf_vport]; - if (!rep->rep_if[REP_ETH].valid) + if (rep->rep_if[REP_ETH].state != REP_LOADED) continue; err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); @@ -325,7 +381,7 @@ esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop) struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL; in_rep = attr->in_rep; - out_rep = attr->out_rep[0]; + out_rep = attr->dests[0].rep; if (push) vport = in_rep; @@ -346,17 +402,17 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, goto out_notsupp; in_rep = attr->in_rep; - out_rep = attr->out_rep[0]; + out_rep = attr->dests[0].rep; - if (push && in_rep->vport == FDB_UPLINK_VPORT) + if (push && in_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; - if (pop && out_rep->vport == FDB_UPLINK_VPORT) + if (pop && out_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; /* vport has vlan push configured, can't offload VF --> wire rules w.o it */ if (!push && !pop && fwd) - if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT) + if (in_rep->vlan && out_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; /* protects against (1) setting rules with different vlans to push and @@ -398,7 +454,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, if (!push && !pop && fwd) { /* tracks VF --> wire rules without vlan push action */ - if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT) { + if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) { vport->vlan_refcount++; attr->vlan_handled = true; } @@ -458,7 +514,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, if (!push && !pop && fwd) { /* tracks VF --> wire rules without vlan push action */ - if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT) + if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) vport->vlan_refcount--; return 0; @@ -505,7 +561,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); - MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */ + /* source vport is the esw manager */ + MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); @@ -531,6 +588,134 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } +static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, + struct mlx5_flow_spec *spec, + struct mlx5_flow_destination *dest) +{ + void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(peer_dev, vhca_id)); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest->vport.num = peer_dev->priv.eswitch->manager_vport; + dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id); + dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; +} + +static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_handle **flows; + struct mlx5_flow_handle *flow; + struct mlx5_flow_spec *spec; + /* total vports is the same for both e-switches */ + int nvports = esw->total_vports; + void *misc; + int err, i; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + peer_miss_rules_setup(peer_dev, spec, &dest); + + flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL); + if (!flows) { + err = -ENOMEM; + goto alloc_flows_err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_pf_flow_err; + } + flows[MLX5_VPORT_PF] = flow; + } + + if (mlx5_ecpf_vport_exists(esw->dev)) { + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_ecpf_flow_err; + } + flows[mlx5_eswitch_ecpf_idx(esw)] = flow; + } + + mlx5_esw_for_each_vf_vport(esw, i, mlx5_core_max_vfs(esw->dev)) { + MLX5_SET(fte_match_set_misc, misc, source_port, i); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_vf_flow_err; + } + flows[i] = flow; + } + + esw->fdb_table.offloads.peer_miss_rules = flows; + + kvfree(spec); + return 0; + +add_vf_flow_err: + nvports = --i; + mlx5_esw_for_each_vf_vport_reverse(esw, i, nvports) + mlx5_del_flow_rules(flows[i]); + + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]); +add_ecpf_flow_err: + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + mlx5_del_flow_rules(flows[MLX5_VPORT_PF]); +add_pf_flow_err: + esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); + kvfree(flows); +alloc_flows_err: + kvfree(spec); + return err; +} + +static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_handle **flows; + int i; + + flows = esw->fdb_table.offloads.peer_miss_rules; + + mlx5_esw_for_each_vf_vport_reverse(esw, i, mlx5_core_max_vfs(esw->dev)) + mlx5_del_flow_rules(flows[i]); + + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + mlx5_del_flow_rules(flows[MLX5_VPORT_PF]); + + kvfree(flows); +} + static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) { struct mlx5_flow_act flow_act = {0}; @@ -557,7 +742,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dmac_c[0] = 0x01; dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport.num = 0; + dest.vport.num = esw->manager_vport; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, @@ -801,7 +986,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) esw->fdb_table.offloads.fdb_left[i] = ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; - table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2; + table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + + MLX5_ESW_MISS_FLOWS + esw->total_vports; /* create the slow path fdb with encap set, so further table instances * can be created at run time while VFs are probed if the FW allows that. @@ -856,6 +1042,34 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) } esw->fdb_table.offloads.send_to_vport_grp = g; + /* create peer esw miss group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ix + esw->total_vports - 1); + ix += esw->total_vports; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err); + goto peer_miss_err; + } + esw->fdb_table.offloads.peer_miss_grp = g; + /* create miss group */ memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -867,7 +1081,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) dmac[0] = 0x01; MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ix + MLX5_ESW_MISS_FLOWS); g = mlx5_create_flow_group(fdb, flow_group_in); if (IS_ERR(g)) { @@ -888,6 +1103,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) miss_rule_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); miss_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); +peer_miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: esw_destroy_offloads_fast_fdb_tables(esw); @@ -907,13 +1124,14 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi); mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); esw_destroy_offloads_fast_fdb_tables(esw); } -static int esw_create_offloads_table(struct mlx5_eswitch *esw) +static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports) { struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; @@ -927,7 +1145,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw) return -EOPNOTSUPP; } - ft_attr.max_fte = dev->priv.sriov.num_vfs + 2; + ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS; ft_offloads = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft_offloads)) { @@ -947,16 +1165,15 @@ static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) mlx5_destroy_flow_table(offloads->ft_offloads); } -static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) +static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *g; - struct mlx5_priv *priv = &esw->dev->priv; u32 *flow_group_in; void *match_criteria, *misc; int err = 0; - int nvports = priv->sriov.num_vfs + 2; + nvports = nvports + MLX5_ESW_MISS_FLOWS; flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; @@ -1033,7 +1250,8 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, { int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; - if (esw->mode != SRIOV_LEGACY) { + if (esw->mode != SRIOV_LEGACY && + !mlx5_core_is_ecpf_esw_manager(esw->dev)) { NL_SET_ERR_MSG_MOD(extack, "Can't set offloads mode, SRIOV legacy not enabled"); return -EINVAL; @@ -1071,9 +1289,8 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) { int total_vfs = MLX5_TOTAL_VPORTS(esw->dev); struct mlx5_core_dev *dev = esw->dev; - struct mlx5_esw_offload *offloads; struct mlx5_eswitch_rep *rep; - u8 hw_id[ETH_ALEN]; + u8 hw_id[ETH_ALEN], rep_type; int vport; esw->offloads.vport_reps = kcalloc(total_vfs, @@ -1082,75 +1299,203 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) if (!esw->offloads.vport_reps) return -ENOMEM; - offloads = &esw->offloads; mlx5_query_nic_vport_mac_address(dev, 0, hw_id); - for (vport = 0; vport < total_vfs; vport++) { - rep = &offloads->vport_reps[vport]; - - rep->vport = vport; + mlx5_esw_for_all_reps(esw, vport, rep) { + rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport); ether_addr_copy(rep->hw_id, hw_id); - } - offloads->vport_reps[0].vport = FDB_UPLINK_VPORT; + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) + rep->rep_if[rep_type].state = REP_UNREGISTERED; + } return 0; } -static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, u8 rep_type) +{ + if (rep->rep_if[rep_type].state != REP_LOADED) + return; + + rep->rep_if[rep_type].unload(rep); + rep->rep_if[rep_type].state = REP_REGISTERED; +} + +static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; - int vport; - for (vport = nvports - 1; vport >= 0; vport--) { - rep = &esw->offloads.vport_reps[vport]; - if (!rep->rep_if[rep_type].valid) - continue; + if (mlx5_ecpf_vport_exists(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } - rep->rep_if[rep_type].unload(rep); + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + __esw_offloads_unload_rep(esw, rep, rep_type); } + + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + __esw_offloads_unload_rep(esw, rep, rep_type); +} + +static void __unload_reps_vf_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + int i; + + mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvports) + __esw_offloads_unload_rep(esw, rep, rep_type); +} + +static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports) +{ + u8 rep_type = NUM_REP_TYPES; + + while (rep_type-- > 0) + __unload_reps_vf_vport(esw, nvports, rep_type); +} + +static void __unload_reps_all_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + __unload_reps_vf_vport(esw, nvports, rep_type); + + /* Special vports must be the last to unload. */ + __unload_reps_special_vport(esw, rep_type); } -static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports) +static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw, int nvports) { u8 rep_type = NUM_REP_TYPES; while (rep_type-- > 0) - esw_offloads_unload_reps_type(esw, nvports, rep_type); + __unload_reps_all_vport(esw, nvports, rep_type); } -static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, u8 rep_type) +{ + int err = 0; + + if (rep->rep_if[rep_type].state != REP_REGISTERED) + return 0; + + err = rep->rep_if[rep_type].load(esw->dev, rep); + if (err) + return err; + + rep->rep_if[rep_type].state = REP_LOADED; + + return 0; +} + +static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; - int vport; int err; - for (vport = 0; vport < nvports; vport++) { - rep = &esw->offloads.vport_reps[vport]; - if (!rep->rep_if[rep_type].valid) - continue; + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + return err; - err = rep->rep_if[rep_type].load(esw->dev, rep); + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + err = __esw_offloads_load_rep(esw, rep, rep_type); if (err) - goto err_reps; + goto err_pf; + } + + if (mlx5_ecpf_vport_exists(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + goto err_ecpf; + } + + return 0; + +err_ecpf: + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } + +err_pf: + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + __esw_offloads_unload_rep(esw, rep, rep_type); + return err; +} + +static int __load_reps_vf_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + int err, i; + + mlx5_esw_for_each_vf_rep(esw, i, rep, nvports) { + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + goto err_vf; } return 0; +err_vf: + __unload_reps_vf_vport(esw, --i, rep_type); + return err; +} + +static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports) +{ + u8 rep_type = 0; + int err; + + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { + err = __load_reps_vf_vport(esw, nvports, rep_type); + if (err) + goto err_reps; + } + + return err; + err_reps: - esw_offloads_unload_reps_type(esw, vport, rep_type); + while (rep_type-- > 0) + __unload_reps_vf_vport(esw, nvports, rep_type); + return err; +} + +static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + int err; + + /* Special vports must be loaded first. */ + err = __load_reps_special_vport(esw, rep_type); + if (err) + return err; + + err = __load_reps_vf_vport(esw, nvports, rep_type); + if (err) + goto err_vfs; + + return 0; + +err_vfs: + __unload_reps_special_vport(esw, rep_type); return err; } -static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports) +static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) { u8 rep_type = 0; int err; for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = esw_offloads_load_reps_type(esw, nvports, rep_type); + err = __load_reps_all_vport(esw, nvports, rep_type); if (err) goto err_reps; } @@ -1159,37 +1504,130 @@ static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports) err_reps: while (rep_type-- > 0) - esw_offloads_unload_reps_type(esw, nvports, rep_type); + __unload_reps_all_vport(esw, nvports, rep_type); return err; } -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +#define ESW_OFFLOADS_DEVCOM_PAIR (0) +#define ESW_OFFLOADS_DEVCOM_UNPAIR (1) + +static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw) { int err; + err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev); + if (err) + return err; + + return 0; +} + +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); + +static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) +{ + mlx5e_tc_clean_fdb_peer_flows(esw); + esw_del_fdb_peer_miss_rules(esw); +} + +static int mlx5_esw_offloads_devcom_event(int event, + void *my_data, + void *event_data) +{ + struct mlx5_eswitch *esw = my_data; + struct mlx5_eswitch *peer_esw = event_data; + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + int err; + + switch (event) { + case ESW_OFFLOADS_DEVCOM_PAIR: + err = mlx5_esw_offloads_pair(esw, peer_esw); + if (err) + goto err_out; + + err = mlx5_esw_offloads_pair(peer_esw, esw); + if (err) + goto err_pair; + + mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true); + break; + + case ESW_OFFLOADS_DEVCOM_UNPAIR: + if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) + break; + + mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); + mlx5_esw_offloads_unpair(peer_esw); + mlx5_esw_offloads_unpair(esw); + break; + } + + return 0; + +err_pair: + mlx5_esw_offloads_unpair(esw); + +err_out: + mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d", + event, err); + return err; +} + +static void esw_offloads_devcom_init(struct mlx5_eswitch *esw) +{ + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + + INIT_LIST_HEAD(&esw->offloads.peer_flows); + mutex_init(&esw->offloads.peer_mutex); + + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) + return; + + mlx5_devcom_register_component(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + mlx5_esw_offloads_devcom_event, + esw); + + mlx5_devcom_send_event(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + ESW_OFFLOADS_DEVCOM_PAIR, esw); +} + +static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) +{ + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) + return; + + mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS, + ESW_OFFLOADS_DEVCOM_UNPAIR, esw); + + mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +} + +static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports) +{ + int err; + + memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); err = esw_create_offloads_fdb_tables(esw, nvports); if (err) return err; - err = esw_create_offloads_table(esw); + err = esw_create_offloads_table(esw, nvports); if (err) goto create_ft_err; - err = esw_create_vport_rx_group(esw); + err = esw_create_vport_rx_group(esw, nvports); if (err) goto create_fg_err; - err = esw_offloads_load_reps(esw, nvports); - if (err) - goto err_reps; - return 0; -err_reps: - esw_destroy_vport_rx_group(esw); - create_fg_err: esw_destroy_offloads_table(esw); @@ -1199,6 +1637,95 @@ create_ft_err: return err; } +static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) +{ + esw_destroy_vport_rx_group(esw); + esw_destroy_offloads_table(esw); + esw_destroy_offloads_fdb_tables(esw); +} + +static void esw_host_params_event_handler(struct work_struct *work) +{ + struct mlx5_host_work *host_work; + struct mlx5_eswitch *esw; + int err, num_vf = 0; + + host_work = container_of(work, struct mlx5_host_work, work); + esw = host_work->esw; + + err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf); + if (err || num_vf == esw->host_info.num_vfs) + goto out; + + /* Number of VFs can only change from "0 to x" or "x to 0". */ + if (esw->host_info.num_vfs > 0) { + esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs); + } else { + err = esw_offloads_load_vf_reps(esw, num_vf); + + if (err) + goto out; + } + + esw->host_info.num_vfs = num_vf; + +out: + kfree(host_work); +} + +static int esw_host_params_event(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_host_work *host_work; + struct mlx5_host_info *host_info; + struct mlx5_eswitch *esw; + + host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC); + if (!host_work) + return NOTIFY_DONE; + + host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb); + esw = container_of(host_info, struct mlx5_eswitch, host_info); + + host_work->esw = esw; + + INIT_WORK(&host_work->work, esw_host_params_event_handler); + queue_work(esw->work_queue, &host_work->work); + + return NOTIFY_OK; +} + +int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, + int total_nvports) +{ + int err; + + mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); + + err = esw_offloads_steering_init(esw, total_nvports); + if (err) + return err; + + err = esw_offloads_load_all_reps(esw, vf_nvports); + if (err) + goto err_reps; + + esw_offloads_devcom_init(esw); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event, + HOST_PARAMS_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb); + esw->host_info.num_vfs = vf_nvports; + } + + return 0; + +err_reps: + esw_offloads_steering_cleanup(esw); + return err; +} + static int esw_offloads_stop(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { @@ -1215,18 +1742,24 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, } } - /* enable back PF RoCE */ - mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - return err; } -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) +void esw_offloads_cleanup(struct mlx5_eswitch *esw) { - esw_offloads_unload_reps(esw, nvports); - esw_destroy_vport_rx_group(esw); - esw_destroy_offloads_table(esw); - esw_destroy_offloads_fdb_tables(esw); + u16 num_vfs; + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb); + flush_workqueue(esw->work_queue); + num_vfs = esw->host_info.num_vfs; + } else { + num_vfs = esw->dev->priv.sriov.num_vfs; + } + + esw_offloads_devcom_cleanup(esw); + esw_offloads_unload_all_reps(esw, num_vfs); + esw_offloads_steering_cleanup(esw); } static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) @@ -1315,7 +1848,8 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink) if(!MLX5_ESWITCH_MANAGER(dev)) return -EPERM; - if (dev->priv.eswitch->mode == SRIOV_NONE) + if (dev->priv.eswitch->mode == SRIOV_NONE && + !mlx5_core_is_ecpf_esw_manager(dev)) return -EOPNOTSUPP; return 0; @@ -1527,47 +2061,45 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) return 0; } -void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - struct mlx5_eswitch_rep_if *__rep_if, - u8 rep_type) +void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep_if *__rep_if, + u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep_if *rep_if; + struct mlx5_eswitch_rep *rep; + int i; - rep_if = &offloads->vport_reps[vport_index].rep_if[rep_type]; - - rep_if->load = __rep_if->load; - rep_if->unload = __rep_if->unload; - rep_if->get_proto_dev = __rep_if->get_proto_dev; - rep_if->priv = __rep_if->priv; + mlx5_esw_for_all_reps(esw, i, rep) { + rep_if = &rep->rep_if[rep_type]; + rep_if->load = __rep_if->load; + rep_if->unload = __rep_if->unload; + rep_if->get_proto_dev = __rep_if->get_proto_dev; + rep_if->priv = __rep_if->priv; - rep_if->valid = true; + rep_if->state = REP_REGISTERED; + } } -EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep); +EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); -void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport_index, u8 rep_type) +void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; + u16 max_vf = mlx5_core_max_vfs(esw->dev); struct mlx5_eswitch_rep *rep; + int i; - rep = &offloads->vport_reps[vport_index]; - - if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled) - rep->rep_if[rep_type].unload(rep); + if (esw->mode == SRIOV_OFFLOADS) + __unload_reps_all_vport(esw, max_vf, rep_type); - rep->rep_if[rep_type].valid = false; + mlx5_esw_for_all_reps(esw, i, rep) + rep->rep_if[rep_type].state = REP_UNREGISTERED; } -EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); +EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) { -#define UPLINK_REP_INDEX 0 - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - rep = &offloads->vport_reps[UPLINK_REP_INDEX]; + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); return rep->rep_if[rep_type].priv; } @@ -1575,15 +2107,11 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, int vport, u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - if (vport == FDB_UPLINK_VPORT) - vport = UPLINK_REP_INDEX; - - rep = &offloads->vport_reps[vport]; + rep = mlx5_eswitch_get_rep(esw, vport); - if (rep->rep_if[rep_type].valid && + if (rep->rep_if[rep_type].state == REP_LOADED && rep->rep_if[rep_type].get_proto_dev) return rep->rep_if[rep_type].get_proto_dev(rep); return NULL; @@ -1592,13 +2120,13 @@ EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type) { - return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type); + return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type); } EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev); struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, int vport) { - return &esw->offloads.vport_reps[vport]; + return mlx5_eswitch_get_rep(esw, vport); } EXPORT_SYMBOL(mlx5_eswitch_vport_rep); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c new file mode 100644 index 000000000000..5d5864e8df3c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include <linux/mlx5/driver.h> + +#include "mlx5_core.h" +#include "lib/eq.h" +#include "lib/mlx5.h" + +struct mlx5_event_nb { + struct mlx5_nb nb; + void *ctx; +}; + +/* General events handlers for the low level mlx5_core driver + * + * Other Major feature specific events such as + * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with + * separate notifiers callbacks, specifically by those mlx5 components. + */ +static int any_notifier(struct notifier_block *, unsigned long, void *); +static int temp_warn(struct notifier_block *, unsigned long, void *); +static int port_module(struct notifier_block *, unsigned long, void *); + +/* handler which forwards the event to events->nh, driver notifiers */ +static int forward_event(struct notifier_block *, unsigned long, void *); + +static struct mlx5_nb events_nbs_ref[] = { + /* Events to be proccessed by mlx5_core */ + {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY }, + {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT }, + {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT }, + + /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, + /* QP/WQ resource events to forward */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR }, + /* SRQ events */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT }, +}; + +struct mlx5_events { + struct mlx5_core_dev *dev; + struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)]; + /* driver notifier chain */ + struct atomic_notifier_head nh; + /* port module events stats */ + struct mlx5_pme_stats pme_stats; +}; + +static const char *eqe_type_str(u8 type) +{ + switch (type) { + case MLX5_EVENT_TYPE_COMP: + return "MLX5_EVENT_TYPE_COMP"; + case MLX5_EVENT_TYPE_PATH_MIG: + return "MLX5_EVENT_TYPE_PATH_MIG"; + case MLX5_EVENT_TYPE_COMM_EST: + return "MLX5_EVENT_TYPE_COMM_EST"; + case MLX5_EVENT_TYPE_SQ_DRAINED: + return "MLX5_EVENT_TYPE_SQ_DRAINED"; + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; + case MLX5_EVENT_TYPE_CQ_ERROR: + return "MLX5_EVENT_TYPE_CQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_INTERNAL_ERROR: + return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; + case MLX5_EVENT_TYPE_PORT_CHANGE: + return "MLX5_EVENT_TYPE_PORT_CHANGE"; + case MLX5_EVENT_TYPE_GPIO_EVENT: + return "MLX5_EVENT_TYPE_GPIO_EVENT"; + case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: + return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; + case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: + return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; + case MLX5_EVENT_TYPE_REMOTE_CONFIG: + return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; + case MLX5_EVENT_TYPE_DB_BF_CONGESTION: + return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; + case MLX5_EVENT_TYPE_STALL_EVENT: + return "MLX5_EVENT_TYPE_STALL_EVENT"; + case MLX5_EVENT_TYPE_CMD: + return "MLX5_EVENT_TYPE_CMD"; + case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE: + return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE"; + case MLX5_EVENT_TYPE_PAGE_REQUEST: + return "MLX5_EVENT_TYPE_PAGE_REQUEST"; + case MLX5_EVENT_TYPE_PAGE_FAULT: + return "MLX5_EVENT_TYPE_PAGE_FAULT"; + case MLX5_EVENT_TYPE_PPS_EVENT: + return "MLX5_EVENT_TYPE_PPS_EVENT"; + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; + case MLX5_EVENT_TYPE_FPGA_ERROR: + return "MLX5_EVENT_TYPE_FPGA_ERROR"; + case MLX5_EVENT_TYPE_FPGA_QP_ERROR: + return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + return "MLX5_EVENT_TYPE_GENERAL_EVENT"; + case MLX5_EVENT_TYPE_MONITOR_COUNTER: + return "MLX5_EVENT_TYPE_MONITOR_COUNTER"; + case MLX5_EVENT_TYPE_DEVICE_TRACER: + return "MLX5_EVENT_TYPE_DEVICE_TRACER"; + default: + return "Unrecognized event"; + } +} + +/* handles all FW events, type == eqe->type */ +static int any_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n", + eqe_type_str(eqe->type), eqe->sub_type); + return NOTIFY_OK; +} + +/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */ +static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + u64 value_lsb; + u64 value_msb; + + value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); + value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); + + mlx5_core_warn(events->dev, + "High temperature on sensors with bit set %llx %llx", + value_msb, value_lsb); + + return NOTIFY_OK; +} + +/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ +static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status) +{ + switch (status) { + case MLX5_MODULE_STATUS_PLUGGED: + return "Cable plugged"; + case MLX5_MODULE_STATUS_UNPLUGGED: + return "Cable unplugged"; + case MLX5_MODULE_STATUS_ERROR: + return "Cable error"; + case MLX5_MODULE_STATUS_DISABLED: + return "Cable disabled"; + default: + return "Unknown status"; + } +} + +static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error) +{ + switch (error) { + case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED: + return "Power budget exceeded"; + case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX: + return "Long Range for non MLNX cable"; + case MLX5_MODULE_EVENT_ERROR_BUS_STUCK: + return "Bus stuck (I2C or data shorted)"; + case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT: + return "No EEPROM/retry timeout"; + case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST: + return "Enforce part number list"; + case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER: + return "Unknown identifier"; + case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE: + return "High Temperature"; + case MLX5_MODULE_EVENT_ERROR_BAD_CABLE: + return "Bad or shorted cable/module"; + case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED: + return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot"; + default: + return "Unknown error"; + } +} + +/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ +static int port_module(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + enum port_module_event_status_type module_status; + enum port_module_event_error_type error_type; + struct mlx5_eqe_port_module *module_event_eqe; + const char *status_str; + u8 module_num; + + module_event_eqe = &eqe->data.port_module; + module_status = module_event_eqe->module_status & + PORT_MODULE_EVENT_MODULE_STATUS_MASK; + error_type = module_event_eqe->error_type & + PORT_MODULE_EVENT_ERROR_TYPE_MASK; + + if (module_status < MLX5_MODULE_STATUS_NUM) + events->pme_stats.status_counters[module_status]++; + + if (module_status == MLX5_MODULE_STATUS_ERROR) + if (error_type < MLX5_MODULE_EVENT_ERROR_NUM) + events->pme_stats.error_counters[error_type]++; + + if (!printk_ratelimit()) + return NOTIFY_OK; + + module_num = module_event_eqe->module; + status_str = mlx5_pme_status_to_string(module_status); + if (module_status == MLX5_MODULE_STATUS_ERROR) { + const char *error_str = mlx5_pme_error_to_string(error_type); + + mlx5_core_err(events->dev, + "Port module event[error]: module %u, %s, %s\n", + module_num, status_str, error_str); + } else { + mlx5_core_info(events->dev, + "Port module event: module %u, %s\n", + module_num, status_str); + } + + return NOTIFY_OK; +} + +void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats) +{ + *stats = dev->priv.events->pme_stats; +} + +/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */ +static int forward_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n", + eqe_type_str(eqe->type), eqe->sub_type); + atomic_notifier_call_chain(&events->nh, event, data); + return NOTIFY_OK; +} + +int mlx5_events_init(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL); + + if (!events) + return -ENOMEM; + + ATOMIC_INIT_NOTIFIER_HEAD(&events->nh); + events->dev = dev; + dev->priv.events = events; + return 0; +} + +void mlx5_events_cleanup(struct mlx5_core_dev *dev) +{ + kvfree(dev->priv.events); +} + +void mlx5_events_start(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = dev->priv.events; + int i; + + for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) { + events->notifiers[i].nb = events_nbs_ref[i]; + events->notifiers[i].ctx = events; + mlx5_eq_notifier_register(dev, &events->notifiers[i].nb); + } +} + +void mlx5_events_stop(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = dev->priv.events; + int i; + + for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--) + mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb); +} + +int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return atomic_notifier_chain_register(&events->nh, nb); +} +EXPORT_SYMBOL(mlx5_notifier_register); + +int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return atomic_notifier_chain_unregister(&events->nh, nb); +} +EXPORT_SYMBOL(mlx5_notifier_unregister); + +int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data) +{ + return atomic_notifier_call_chain(&events->nh, event, data); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index 8ca1d1949d93..873541ef4c1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -334,7 +334,7 @@ static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn, { u8 opcode, status = 0; - opcode = cqe->op_own >> 4; + opcode = get_cqe_opcode(cqe); switch (opcode) { case MLX5_CQE_REQ_ERR: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c index 436a8136f26f..d046d1ec2a86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -36,6 +36,7 @@ #include "mlx5_core.h" #include "lib/mlx5.h" +#include "lib/eq.h" #include "fpga/core.h" #include "fpga/conn.h" @@ -145,6 +146,22 @@ static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev) return 0; } +static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *); + +static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe) +{ + struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb); + + return mlx5_fpga_event(fdev, event, eqe); +} + +static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe) +{ + struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb); + + return mlx5_fpga_event(fdev, event, eqe); +} + int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) { struct mlx5_fpga_device *fdev = mdev->fpga; @@ -185,6 +202,11 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) if (err) goto out; + MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR); + MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR); + mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb); + err = mlx5_fpga_conn_device_init(fdev); if (err) goto err_rsvd_gid; @@ -201,6 +223,8 @@ err_conn_init: mlx5_fpga_conn_device_cleanup(fdev); err_rsvd_gid: + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb); mlx5_core_unreserve_gids(mdev, max_num_qps); out: spin_lock_irqsave(&fdev->state_lock, flags); @@ -256,6 +280,9 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) } mlx5_fpga_conn_device_cleanup(fdev); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb); + max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); mlx5_core_unreserve_gids(mdev, max_num_qps); } @@ -283,13 +310,13 @@ static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome) return "Unknown"; } -void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data) +static int mlx5_fpga_event(struct mlx5_fpga_device *fdev, + unsigned long event, void *eqe) { - struct mlx5_fpga_device *fdev = mdev->fpga; + void *data = ((struct mlx5_eqe *)eqe)->data.raw; const char *event_name; bool teardown = false; unsigned long flags; - u32 fpga_qpn; u8 syndrome; switch (event) { @@ -300,12 +327,9 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data) case MLX5_EVENT_TYPE_FPGA_QP_ERROR: syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome); event_name = mlx5_fpga_qp_syndrome_to_string(syndrome); - fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn); break; default: - mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n", - event); - return; + return NOTIFY_DONE; } spin_lock_irqsave(&fdev->state_lock, flags); @@ -326,4 +350,6 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data) */ if (teardown) mlx5_trigger_health_work(fdev->mdev); + + return NOTIFY_OK; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h index 3e2355c8df3f..7e2e871dbf83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h @@ -35,11 +35,16 @@ #ifdef CONFIG_MLX5_FPGA +#include <linux/mlx5/eq.h> + +#include "lib/eq.h" #include "fpga/cmd.h" /* Represents an Innova device */ struct mlx5_fpga_device { struct mlx5_core_dev *mdev; + struct mlx5_nb fpga_err_nb; + struct mlx5_nb fpga_qp_err_nb; spinlock_t state_lock; /* Protects state transitions */ enum mlx5_fpga_status state; enum mlx5_fpga_image last_admin_image; @@ -82,7 +87,6 @@ int mlx5_fpga_init(struct mlx5_core_dev *mdev); void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev); int mlx5_fpga_device_start(struct mlx5_core_dev *mdev); void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev); -void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data); #else @@ -104,11 +108,6 @@ static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) { } -static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, - void *data) -{ -} - #endif #endif /* __MLX5_FPGA_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c index 5cf5f2a9d51f..22a2ef111514 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c @@ -148,14 +148,16 @@ static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock, return ret; } -static void mlx5_fpga_tls_release_swid(struct idr *idr, - spinlock_t *idr_spinlock, u32 swid) +static void *mlx5_fpga_tls_release_swid(struct idr *idr, + spinlock_t *idr_spinlock, u32 swid) { unsigned long flags; + void *ptr; spin_lock_irqsave(idr_spinlock, flags); - idr_remove(idr, swid); + ptr = idr_remove(idr, swid); spin_unlock_irqrestore(idr_spinlock, flags); + return ptr; } static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn, @@ -165,20 +167,12 @@ static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn, kfree(buf); } -struct mlx5_teardown_stream_context { - struct mlx5_fpga_tls_command_context cmd; - u32 swid; -}; - static void mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev, struct mlx5_fpga_tls_command_context *cmd, struct mlx5_fpga_dma_buf *resp) { - struct mlx5_teardown_stream_context *ctx = - container_of(cmd, struct mlx5_teardown_stream_context, cmd); - if (resp) { u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome); @@ -186,14 +180,6 @@ mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn, mlx5_fpga_err(fdev, "Teardown stream failed with syndrome = %d", syndrome); - else if (MLX5_GET(tls_cmd, cmd->buf.sg[0].data, direction_sx)) - mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr, - &fdev->tls->tx_idr_spinlock, - ctx->swid); - else - mlx5_fpga_tls_release_swid(&fdev->tls->rx_idr, - &fdev->tls->rx_idr_spinlock, - ctx->swid); } mlx5_fpga_tls_put_command_ctx(cmd); } @@ -225,8 +211,14 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, rcu_read_lock(); flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); - rcu_read_unlock(); + if (unlikely(!flow)) { + rcu_read_unlock(); + WARN_ONCE(1, "Received NULL pointer for handle\n"); + kfree(buf); + return -EINVAL; + } mlx5_fpga_tls_flow_to_cmd(flow, cmd); + rcu_read_unlock(); MLX5_SET(tls_cmd, cmd, swid, ntohl(handle)); MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn)); @@ -238,6 +230,8 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, buf->complete = mlx_tls_kfree_complete; ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf); + if (ret < 0) + kfree(buf); return ret; } @@ -245,7 +239,7 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, void *flow, u32 swid, gfp_t flags) { - struct mlx5_teardown_stream_context *ctx; + struct mlx5_fpga_tls_command_context *ctx; struct mlx5_fpga_dma_buf *buf; void *cmd; @@ -253,7 +247,7 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, if (!ctx) return; - buf = &ctx->cmd.buf; + buf = &ctx->buf; cmd = (ctx + 1); MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM); MLX5_SET(tls_cmd, cmd, swid, swid); @@ -264,8 +258,7 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, buf->sg[0].data = cmd; buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; - ctx->swid = swid; - mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd, + mlx5_fpga_tls_cmd_send(mdev->fpga, ctx, mlx5_fpga_tls_teardown_completion); } @@ -275,13 +268,14 @@ void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, struct mlx5_fpga_tls *tls = mdev->fpga->tls; void *flow; - rcu_read_lock(); if (direction_sx) - flow = idr_find(&tls->tx_idr, swid); + flow = mlx5_fpga_tls_release_swid(&tls->tx_idr, + &tls->tx_idr_spinlock, + swid); else - flow = idr_find(&tls->rx_idr, swid); - - rcu_read_unlock(); + flow = mlx5_fpga_tls_release_swid(&tls->rx_idr, + &tls->rx_idr_spinlock, + swid); if (!flow) { mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n", @@ -289,6 +283,7 @@ void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, return; } + synchronize_rcu(); /* before kfree(flow) */ mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 08a891f9aade..c44ccb67c4a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -308,22 +308,68 @@ static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } +static int mlx5_set_extended_dest(struct mlx5_core_dev *dev, + struct fs_fte *fte, bool *extended_dest) +{ + int fw_log_max_fdb_encap_uplink = + MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink); + int num_fwd_destinations = 0; + struct mlx5_flow_rule *dst; + int num_encap = 0; + + *extended_dest = false; + if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return 0; + + list_for_each_entry(dst, &fte->node.children, node.list) { + if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) + num_encap++; + num_fwd_destinations++; + } + if (num_fwd_destinations > 1 && num_encap > 0) + *extended_dest = true; + + if (*extended_dest && !fw_log_max_fdb_encap_uplink) { + mlx5_core_warn(dev, "FW does not support extended destination"); + return -EOPNOTSUPP; + } + if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) { + mlx5_core_warn(dev, "FW does not support more than %d encaps", + 1 << fw_log_max_fdb_encap_uplink); + return -EOPNOTSUPP; + } + + return 0; +} static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, int opmod, int modify_mask, struct mlx5_flow_table *ft, unsigned group_id, struct fs_fte *fte) { - unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + - fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct); u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0}; + bool extended_dest = false; struct mlx5_flow_rule *dst; void *in_flow_context, *vlan; void *in_match_value; + unsigned int inlen; + int dst_cnt_size; void *in_dests; u32 *in; int err; + if (mlx5_set_extended_dest(dev, fte, &extended_dest)) + return -EOPNOTSUPP; + + if (!extended_dest) + dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct); + else + dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format); + + inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size; in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -343,9 +389,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, group_id, group_id); MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag); - MLX5_SET(flow_context, in_flow_context, action, fte->action.action); - MLX5_SET(flow_context, in_flow_context, packet_reformat_id, - fte->action.reformat_id); + MLX5_SET(flow_context, in_flow_context, extended_destination, + extended_dest); + if (extended_dest) { + u32 action; + + action = fte->action.action & + ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + MLX5_SET(flow_context, in_flow_context, action, action); + } else { + MLX5_SET(flow_context, in_flow_context, action, + fte->action.action); + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + fte->action.reformat_id); + } MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->action.modify_id); @@ -387,10 +444,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, id = dst->dest_attr.vport.num; MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id_valid, - dst->dest_attr.vport.vhca_id_valid); + !!(dst->dest_attr.vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID)); MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id, dst->dest_attr.vport.vhca_id); + if (extended_dest) { + MLX5_SET(dest_format_struct, in_dests, + packet_reformat, + !!(dst->dest_attr.vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID)); + MLX5_SET(extended_dest_format, in_dests, + packet_reformat_id, + dst->dest_attr.vport.reformat_id); + } break; default: id = dst->dest_attr.tir_num; @@ -399,7 +466,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(dest_format_struct, in_dests, destination_type, type); MLX5_SET(dest_format_struct, in_dests, destination_id, id); - in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + in_dests += dst_cnt_size; list_size++; } @@ -420,7 +487,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_counter_list, in_dests, flow_counter_id, dst->dest_attr.counter_id); - in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + in_dests += dst_cnt_size; list_size++; } if (list_size > max_list_size) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 08233cf44871..0be3eb86dd84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -32,6 +32,7 @@ #include <linux/mutex.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> #include <linux/mlx5/eswitch.h> #include "mlx5_core.h" @@ -262,10 +263,11 @@ static void nested_down_write_ref_node(struct fs_node *node, } } -static void down_write_ref_node(struct fs_node *node) +static void down_write_ref_node(struct fs_node *node, bool locked) { if (node) { - down_write(&node->lock); + if (!locked) + down_write(&node->lock); refcount_inc(&node->refcount); } } @@ -276,13 +278,14 @@ static void up_read_ref_node(struct fs_node *node) up_read(&node->lock); } -static void up_write_ref_node(struct fs_node *node) +static void up_write_ref_node(struct fs_node *node, bool locked) { refcount_dec(&node->refcount); - up_write(&node->lock); + if (!locked) + up_write(&node->lock); } -static void tree_put_node(struct fs_node *node) +static void tree_put_node(struct fs_node *node, bool locked) { struct fs_node *parent_node = node->parent; @@ -293,27 +296,27 @@ static void tree_put_node(struct fs_node *node) /* Only root namespace doesn't have parent and we just * need to free its node. */ - down_write_ref_node(parent_node); + down_write_ref_node(parent_node, locked); list_del_init(&node->list); if (node->del_sw_func) node->del_sw_func(node); - up_write_ref_node(parent_node); + up_write_ref_node(parent_node, locked); } else { kfree(node); } node = NULL; } if (!node && parent_node) - tree_put_node(parent_node); + tree_put_node(parent_node, locked); } -static int tree_remove_node(struct fs_node *node) +static int tree_remove_node(struct fs_node *node, bool locked) { if (refcount_read(&node->refcount) > 1) { refcount_dec(&node->refcount); return -EEXIST; } - tree_put_node(node); + tree_put_node(node, locked); return 0; } @@ -397,6 +400,7 @@ static void del_hw_flow_table(struct fs_node *node) fs_get_obj(ft, node); dev = get_dev(&ft->node); root = find_root(&ft->node); + trace_mlx5_fs_del_ft(ft); if (node->active) { err = root->cmds->destroy_flow_table(dev, ft); @@ -418,22 +422,34 @@ static void del_sw_flow_table(struct fs_node *node) kfree(ft); } -static void del_sw_hw_rule(struct fs_node *node) +static void modify_fte(struct fs_fte *fte) { struct mlx5_flow_root_namespace *root; - struct mlx5_flow_rule *rule; struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; - struct fs_fte *fte; - int modify_mask; - struct mlx5_core_dev *dev = get_dev(node); + struct mlx5_core_dev *dev; int err; - bool update_fte = false; - fs_get_obj(rule, node); - fs_get_obj(fte, rule->node.parent); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); + dev = get_dev(&fte->node); + + root = find_root(&ft->node); + err = root->cmds->update_fte(dev, ft, fg->id, fte->modify_mask, fte); + if (err) + mlx5_core_warn(dev, + "%s can't del rule fg id=%d fte_index=%d\n", + __func__, fg->id, fte->index); + fte->modify_mask = 0; +} + +static void del_sw_hw_rule(struct fs_node *node) +{ + struct mlx5_flow_rule *rule; + struct fs_fte *fte; + + fs_get_obj(rule, node); + fs_get_obj(fte, rule->node.parent); trace_mlx5_fs_del_rule(rule); if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { mutex_lock(&rule->dest_attr.ft->lock); @@ -443,27 +459,19 @@ static void del_sw_hw_rule(struct fs_node *node) if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER && --fte->dests_size) { - modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) | - BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); + fte->modify_mask |= + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) | + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT; - update_fte = true; goto out; } if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && --fte->dests_size) { - modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); - update_fte = true; + fte->modify_mask |= + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); } out: - root = find_root(&ft->node); - if (update_fte && fte->dests_size) { - err = root->cmds->update_fte(dev, ft, fg->id, modify_mask, fte); - if (err) - mlx5_core_warn(dev, - "%s can't del rule fg id=%d fte_index=%d\n", - __func__, fg->id, fte->index); - } kfree(rule); } @@ -489,6 +497,7 @@ static void del_hw_fte(struct fs_node *node) mlx5_core_warn(dev, "flow steering can't delete fte in index %d of flow group id %d\n", fte->index, fg->id); + node->active = 0; } } @@ -589,7 +598,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, fte->node.type = FS_TYPE_FLOW_ENTRY; fte->action = *flow_act; - tree_init_node(&fte->node, del_hw_fte, del_sw_fte); + tree_init_node(&fte->node, NULL, del_sw_fte); return fte; } @@ -618,7 +627,8 @@ static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steer if (ret) { kmem_cache_free(steering->fgs_cache, fg); return ERR_PTR(ret); -} + } + ida_init(&fg->fte_allocator); fg->mask.match_criteria_enable = match_criteria_enable; memcpy(&fg->mask.match_criteria, match_criteria, @@ -855,7 +865,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, fs_get_obj(fte, rule->node.parent); if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) return -EINVAL; - down_write_ref_node(&fte->node); + down_write_ref_node(&fte->node, false); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); @@ -863,7 +873,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, root = find_root(&ft->node); err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id, modify_mask, fte); - up_write_ref_node(&fte->node); + up_write_ref_node(&fte->node, false); return err; } @@ -1013,12 +1023,13 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa if (err) goto destroy_ft; ft->node.active = true; - down_write_ref_node(&fs_prio->node); + down_write_ref_node(&fs_prio->node, false); tree_add_node(&ft->node, &fs_prio->node); list_add_flow_table(ft, fs_prio); fs_prio->num_ft++; - up_write_ref_node(&fs_prio->node); + up_write_ref_node(&fs_prio->node, false); mutex_unlock(&root->chain_lock); + trace_mlx5_fs_add_ft(ft); return ft; destroy_ft: root->cmds->destroy_flow_table(root->dev, ft); @@ -1110,17 +1121,17 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, if (ft->autogroup.active) return ERR_PTR(-EPERM); - down_write_ref_node(&ft->node); + down_write_ref_node(&ft->node, false); fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria, start_index, end_index, ft->node.children.prev); - up_write_ref_node(&ft->node); + up_write_ref_node(&ft->node, false); if (IS_ERR(fg)) return fg; err = root->cmds->create_flow_group(dev, ft, fg_in, &fg->id); if (err) { - tree_put_node(&fg->node); + tree_put_node(&fg->node, false); return ERR_PTR(err); } trace_mlx5_fs_add_fg(fg); @@ -1373,7 +1384,10 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, { if (d1->type == d2->type) { if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT && - d1->vport.num == d2->vport.num) || + d1->vport.num == d2->vport.num && + d1->vport.flags == d2->vport.flags && + ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ? + (d1->vport.reformat_id == d2->vport.reformat_id) : true)) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && d1->ft == d2->ft) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR && @@ -1514,10 +1528,10 @@ static void free_match_list(struct match_list_head *head) struct match_list *iter, *match_tmp; list_del(&head->first.list); - tree_put_node(&head->first.g->node); + tree_put_node(&head->first.g->node, false); list_for_each_entry_safe(iter, match_tmp, &head->list, list) { - tree_put_node(&iter->g->node); + tree_put_node(&iter->g->node, false); list_del(&iter->list); kfree(iter); } @@ -1594,11 +1608,16 @@ lookup_fte_locked(struct mlx5_flow_group *g, fte_tmp = NULL; goto out; } + if (!fte_tmp->node.active) { + tree_put_node(&fte_tmp->node, false); + fte_tmp = NULL; + goto out; + } nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); out: if (take_write) - up_write_ref_node(&g->node); + up_write_ref_node(&g->node, false); else up_read_ref_node(&g->node); return fte_tmp; @@ -1640,8 +1659,8 @@ search_again_locked: continue; rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num, fte_tmp); - up_write_ref_node(&fte_tmp->node); - tree_put_node(&fte_tmp->node); + up_write_ref_node(&fte_tmp->node, false); + tree_put_node(&fte_tmp->node, false); kmem_cache_free(steering->ftes_cache, fte); return rule; } @@ -1677,7 +1696,7 @@ skip_search: err = insert_fte(g, fte); if (err) { - up_write_ref_node(&g->node); + up_write_ref_node(&g->node, false); if (err == -ENOSPC) continue; kmem_cache_free(steering->ftes_cache, fte); @@ -1685,11 +1704,11 @@ skip_search: } nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); - up_write_ref_node(&g->node); + up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num, fte); - up_write_ref_node(&fte->node); - tree_put_node(&fte->node); + up_write_ref_node(&fte->node, false); + tree_put_node(&fte->node, false); return rule; } rule = ERR_PTR(-ENOENT); @@ -1731,7 +1750,7 @@ search_again_locked: err = build_match_list(&match_head, ft, spec); if (err) { if (take_write) - up_write_ref_node(&ft->node); + up_write_ref_node(&ft->node, false); else up_read_ref_node(&ft->node); return ERR_PTR(err); @@ -1746,7 +1765,7 @@ search_again_locked: if (!IS_ERR(rule) || (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) { if (take_write) - up_write_ref_node(&ft->node); + up_write_ref_node(&ft->node, false); return rule; } @@ -1762,12 +1781,12 @@ search_again_locked: g = alloc_auto_flow_group(ft, spec); if (IS_ERR(g)) { rule = ERR_CAST(g); - up_write_ref_node(&ft->node); + up_write_ref_node(&ft->node, false); return rule; } nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); - up_write_ref_node(&ft->node); + up_write_ref_node(&ft->node, false); err = create_auto_flow_group(ft, g); if (err) @@ -1786,17 +1805,17 @@ search_again_locked: } nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); - up_write_ref_node(&g->node); + up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num, fte); - up_write_ref_node(&fte->node); - tree_put_node(&fte->node); - tree_put_node(&g->node); + up_write_ref_node(&fte->node, false); + tree_put_node(&fte->node, false); + tree_put_node(&g->node, false); return rule; err_release_fg: - up_write_ref_node(&g->node); - tree_put_node(&g->node); + up_write_ref_node(&g->node, false); + tree_put_node(&g->node, false); return ERR_PTR(err); } @@ -1859,10 +1878,33 @@ EXPORT_SYMBOL(mlx5_add_flow_rules); void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) { + struct fs_fte *fte; int i; + /* In order to consolidate the HW changes we lock the FTE for other + * changes, and increase its refcount, in order not to perform the + * "del" functions of the FTE. Will handle them here. + * The removal of the rules is done under locked FTE. + * After removing all the handle's rules, if there are remaining + * rules, it means we just need to modify the FTE in FW, and + * unlock/decrease the refcount we increased before. + * Otherwise, it means the FTE should be deleted. First delete the + * FTE in FW. Then, unlock the FTE, and proceed the tree_put_node of + * the FTE, which will handle the last decrease of the refcount, as + * well as required handling of its parent. + */ + fs_get_obj(fte, handle->rule[0]->node.parent); + down_write_ref_node(&fte->node, false); for (i = handle->num_rules - 1; i >= 0; i--) - tree_remove_node(&handle->rule[i]->node); + tree_remove_node(&handle->rule[i]->node, true); + if (fte->modify_mask && fte->dests_size) { + modify_fte(fte); + up_write_ref_node(&fte->node, false); + } else { + del_hw_fte(&fte->node); + up_write(&fte->node.lock); + tree_put_node(&fte->node, false); + } kfree(handle); } EXPORT_SYMBOL(mlx5_del_flow_rules); @@ -1965,7 +2007,7 @@ int mlx5_destroy_flow_table(struct mlx5_flow_table *ft) mutex_unlock(&root->chain_lock); return err; } - if (tree_remove_node(&ft->node)) + if (tree_remove_node(&ft->node, false)) mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n", ft->id); mutex_unlock(&root->chain_lock); @@ -1976,7 +2018,7 @@ EXPORT_SYMBOL(mlx5_destroy_flow_table); void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) { - if (tree_remove_node(&fg->node)) + if (tree_remove_node(&fg->node, false)) mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n", fg->id); } @@ -2360,8 +2402,8 @@ static void clean_tree(struct fs_node *node) tree_get_node(node); list_for_each_entry_safe(iter, temp, &node->children, list) clean_tree(iter); - tree_put_node(node); - tree_remove_node(node); + tree_put_node(node, false); + tree_remove_node(node, false); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index b51ad217da32..87de0e4d9124 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -145,29 +145,6 @@ struct mlx5_flow_table { struct rhltable fgs_hash; }; -struct mlx5_fc_cache { - u64 packets; - u64 bytes; - u64 lastuse; -}; - -struct mlx5_fc { - struct list_head list; - struct llist_node addlist; - struct llist_node dellist; - - /* last{packets,bytes} members are used when calculating the delta since - * last reading - */ - u64 lastpackets; - u64 lastbytes; - - u32 id; - bool aging; - - struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; -}; - struct mlx5_ft_underlay_qp { struct list_head list; u32 qpn; @@ -195,6 +172,7 @@ struct fs_fte { enum fs_fte_status status; struct mlx5_fc *counter; struct rhash_head hash; + int modify_mask; }; /* Type of children is mlx5_flow_table/namespace */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 32accd6b041b..c6c28f56aa29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -41,6 +41,29 @@ /* Max number of counters to query in bulk read is 32K */ #define MLX5_SW_MAX_COUNTERS_BULK BIT(15) +struct mlx5_fc_cache { + u64 packets; + u64 bytes; + u64 lastuse; +}; + +struct mlx5_fc { + struct list_head list; + struct llist_node addlist; + struct llist_node dellist; + + /* last{packets,bytes} members are used when calculating the delta since + * last reading + */ + u64 lastpackets; + u64 lastbytes; + + u32 id; + bool aging; + + struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; +}; + /* locking scheme: * * It is the responsibility of the user to prevent concurrent calls or bad diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 43118de8ee99..cb9fa3430c53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -38,6 +38,8 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" +#include "lib/eq.h" +#include "lib/mlx5.h" enum { MLX5_HEALTH_POLL_INTERVAL = 2 * HZ, @@ -78,29 +80,6 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) &dev->iseg->cmdq_addr_l_sz); } -static void trigger_cmd_completions(struct mlx5_core_dev *dev) -{ - unsigned long flags; - u64 vector; - - /* wait for pending handlers to complete */ - synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD)); - spin_lock_irqsave(&dev->cmd.alloc_lock, flags); - vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); - if (!vector) - goto no_trig; - - vector |= MLX5_TRIGGERED_CMD_COMP; - spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); - - mlx5_core_dbg(dev, "vector 0x%llx\n", vector); - mlx5_cmd_comp_handler(dev, vector, true); - return; - -no_trig: - spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); -} - static int in_fatal(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; @@ -124,10 +103,10 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) mlx5_core_err(dev, "start\n"); if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; - trigger_cmd_completions(dev); + mlx5_cmd_flush(dev); } - mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1); + mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); mlx5_core_err(dev, "end\n"); unlock: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 11dabd62e2c7..4eac42555c7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -87,7 +87,7 @@ int mlx5i_init(struct mlx5_core_dev *mdev, mlx5_query_port_max_mtu(mdev, &max_mtu, 1); netdev->mtu = max_mtu; - mlx5e_build_nic_params(mdev, &priv->channels.params, + mlx5e_build_nic_params(mdev, &priv->rss_params, &priv->channels.params, mlx5e_get_netdev_max_channels(netdev), netdev->mtu); mlx5i_build_nic_params(mdev, &priv->channels.params); @@ -446,11 +446,11 @@ static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu) new_channels.params = *params; new_channels.params.sw_mtu = new_mtu; - err = mlx5e_open_channels(priv, &new_channels); + + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); if (err) goto out; - mlx5e_switch_priv_channels(priv, &new_channels, NULL); netdev->mtu = new_channels.params.sw_mtu; out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 582b2f18010a..959605559858 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -34,39 +34,9 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/vport.h> #include "mlx5_core.h" - -enum { - MLX5_LAG_FLAG_BONDED = 1 << 0, -}; - -struct lag_func { - struct mlx5_core_dev *dev; - struct net_device *netdev; -}; - -/* Used for collection of netdev event info. */ -struct lag_tracker { - enum netdev_lag_tx_type tx_type; - struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS]; - bool is_bonded; -}; - -/* LAG data of a ConnectX card. - * It serves both its phys functions. - */ -struct mlx5_lag { - u8 flags; - u8 v2p_map[MLX5_MAX_PORTS]; - struct lag_func pf[MLX5_MAX_PORTS]; - struct lag_tracker tracker; - struct delayed_work bond_work; - struct notifier_block nb; - - /* Admin state. Allow lag only if allowed is true - * even if network conditions for lag were met - */ - bool allowed; -}; +#include "eswitch.h" +#include "lag.h" +#include "lag_mp.h" /* General purpose, use for short periods of time. * Beware of lock dependencies (preferably, no locks should be acquired @@ -148,13 +118,8 @@ static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } -static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev) -{ - return dev->priv.lag; -} - -static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, - struct net_device *ndev) +int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, + struct net_device *ndev) { int i; @@ -165,9 +130,14 @@ static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, return -1; } -static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev) +static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_ROCE); +} + +static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) { - return !!(ldev->flags & MLX5_LAG_FLAG_BONDED); + return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV); } static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, @@ -186,36 +156,131 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, *port2 = 1; } -static void mlx5_activate_lag(struct mlx5_lag *ldev, - struct lag_tracker *tracker) +void mlx5_modify_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker) { struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + u8 v2p_port1, v2p_port2; int err; - ldev->flags |= MLX5_LAG_FLAG_BONDED; + mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, + &v2p_port2); + + if (v2p_port1 != ldev->v2p_map[0] || + v2p_port2 != ldev->v2p_map[1]) { + ldev->v2p_map[0] = v2p_port1; + ldev->v2p_map[1] = v2p_port2; + + mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d", + ldev->v2p_map[0], ldev->v2p_map[1]); + + err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); + if (err) + mlx5_core_err(dev0, + "Failed to modify LAG (%d)\n", + err); + } +} + +static int mlx5_create_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker) +{ + struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + int err; mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0], &ldev->v2p_map[1]); + mlx5_core_info(dev0, "lag map port 1:%d port 2:%d", + ldev->v2p_map[0], ldev->v2p_map[1]); + err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]); if (err) mlx5_core_err(dev0, "Failed to create LAG (%d)\n", err); + return err; +} + +int mlx5_activate_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + u8 flags) +{ + bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE); + struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + int err; + + err = mlx5_create_lag(ldev, tracker); + if (err) { + if (roce_lag) { + mlx5_core_err(dev0, + "Failed to activate RoCE LAG\n"); + } else { + mlx5_core_err(dev0, + "Failed to activate VF LAG\n" + "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); + } + return err; + } + + ldev->flags |= flags; + return 0; } -static void mlx5_deactivate_lag(struct mlx5_lag *ldev) +static int mlx5_deactivate_lag(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + bool roce_lag = __mlx5_lag_is_roce(ldev); int err; - ldev->flags &= ~MLX5_LAG_FLAG_BONDED; + ldev->flags &= ~MLX5_LAG_MODE_FLAGS; err = mlx5_cmd_destroy_lag(dev0); - if (err) - mlx5_core_err(dev0, - "Failed to destroy LAG (%d)\n", - err); + if (err) { + if (roce_lag) { + mlx5_core_err(dev0, + "Failed to deactivate RoCE LAG; driver restart required\n"); + } else { + mlx5_core_err(dev0, + "Failed to deactivate VF LAG; driver restart required\n" + "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); + } + } + + return err; +} + +static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) +{ + if (!ldev->pf[0].dev || !ldev->pf[1].dev) + return false; + +#ifdef CONFIG_MLX5_ESWITCH + return mlx5_esw_lag_prereq(ldev->pf[0].dev, ldev->pf[1].dev); +#else + return (!mlx5_sriov_is_enabled(ldev->pf[0].dev) && + !mlx5_sriov_is_enabled(ldev->pf[1].dev)); +#endif +} + +static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + mlx5_add_dev_by_protocol(ldev->pf[i].dev, + MLX5_INTERFACE_PROTOCOL_IB); +} + +static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + mlx5_remove_dev_by_protocol(ldev->pf[i].dev, + MLX5_INTERFACE_PROTOCOL_IB); } static void mlx5_do_bond(struct mlx5_lag *ldev) @@ -223,9 +288,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) struct mlx5_core_dev *dev0 = ldev->pf[0].dev; struct mlx5_core_dev *dev1 = ldev->pf[1].dev; struct lag_tracker tracker; - u8 v2p_port1, v2p_port2; - int i, err; - bool do_bond; + bool do_bond, roce_lag; + int err; if (!dev0 || !dev1) return; @@ -234,48 +298,56 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) tracker = ldev->tracker; mutex_unlock(&lag_mutex); - do_bond = tracker.is_bonded && ldev->allowed; + do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); - if (do_bond && !mlx5_lag_is_bonded(ldev)) { - for (i = 0; i < MLX5_MAX_PORTS; i++) - mlx5_remove_dev_by_protocol(ldev->pf[i].dev, - MLX5_INTERFACE_PROTOCOL_IB); + if (do_bond && !__mlx5_lag_is_active(ldev)) { + roce_lag = !mlx5_sriov_is_enabled(dev0) && + !mlx5_sriov_is_enabled(dev1); - mlx5_activate_lag(ldev, &tracker); +#ifdef CONFIG_MLX5_ESWITCH + roce_lag &= dev0->priv.eswitch->mode == SRIOV_NONE && + dev1->priv.eswitch->mode == SRIOV_NONE; +#endif - mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_nic_vport_enable_roce(dev1); - } else if (do_bond && mlx5_lag_is_bonded(ldev)) { - mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1, - &v2p_port2); + if (roce_lag) + mlx5_lag_remove_ib_devices(ldev); - if ((v2p_port1 != ldev->v2p_map[0]) || - (v2p_port2 != ldev->v2p_map[1])) { - ldev->v2p_map[0] = v2p_port1; - ldev->v2p_map[1] = v2p_port2; + err = mlx5_activate_lag(ldev, &tracker, + roce_lag ? MLX5_LAG_FLAG_ROCE : + MLX5_LAG_FLAG_SRIOV); + if (err) { + if (roce_lag) + mlx5_lag_add_ib_devices(ldev); - err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); - if (err) - mlx5_core_err(dev0, - "Failed to modify LAG (%d)\n", - err); + return; } - } else if (!do_bond && mlx5_lag_is_bonded(ldev)) { - mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_nic_vport_disable_roce(dev1); - mlx5_deactivate_lag(ldev); + if (roce_lag) { + mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_nic_vport_enable_roce(dev1); + } + } else if (do_bond && __mlx5_lag_is_active(ldev)) { + mlx5_modify_lag(ldev, &tracker); + } else if (!do_bond && __mlx5_lag_is_active(ldev)) { + roce_lag = __mlx5_lag_is_roce(ldev); + + if (roce_lag) { + mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_nic_vport_disable_roce(dev1); + } - for (i = 0; i < MLX5_MAX_PORTS; i++) - if (ldev->pf[i].dev) - mlx5_add_dev_by_protocol(ldev->pf[i].dev, - MLX5_INTERFACE_PROTOCOL_IB); + err = mlx5_deactivate_lag(ldev); + if (err) + return; + + if (roce_lag) + mlx5_lag_add_ib_devices(ldev); } } static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) { - schedule_delayed_work(&ldev->bond_work, delay); + queue_delayed_work(ldev->wq, &ldev->bond_work, delay); } static void mlx5_do_bond_work(struct work_struct *work) @@ -419,15 +491,6 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, return NOTIFY_DONE; } -static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) -{ - if ((ldev->pf[0].dev && mlx5_sriov_is_enabled(ldev->pf[0].dev)) || - (ldev->pf[1].dev && mlx5_sriov_is_enabled(ldev->pf[1].dev))) - return false; - else - return true; -} - static struct mlx5_lag *mlx5_lag_dev_alloc(void) { struct mlx5_lag *ldev; @@ -436,14 +499,20 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(void) if (!ldev) return NULL; + ldev->wq = create_singlethread_workqueue("mlx5_lag"); + if (!ldev->wq) { + kfree(ldev); + return NULL; + } + INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); - ldev->allowed = mlx5_lag_check_prereq(ldev); return ldev; } static void mlx5_lag_dev_free(struct mlx5_lag *ldev) { + destroy_workqueue(ldev->wq); kfree(ldev); } @@ -462,7 +531,6 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, ldev->tracker.netdev_state[fn].link_up = 0; ldev->tracker.netdev_state[fn].tx_enabled = 0; - ldev->allowed = mlx5_lag_check_prereq(ldev); dev->priv.lag = ldev; mutex_unlock(&lag_mutex); @@ -484,7 +552,6 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, memset(&ldev->pf[i], 0, sizeof(*ldev->pf)); dev->priv.lag = NULL; - ldev->allowed = mlx5_lag_check_prereq(ldev); mutex_unlock(&lag_mutex); } @@ -493,6 +560,7 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) { struct mlx5_lag *ldev = NULL; struct mlx5_core_dev *tmp_dev; + int err; if (!MLX5_CAP_GEN(dev, vport_group_manager) || !MLX5_CAP_GEN(dev, lag_master) || @@ -520,6 +588,11 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); } } + + err = mlx5_lag_mp_init(ldev); + if (err) + mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", + err); } /* Must be called with intf_mutex held */ @@ -532,7 +605,7 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev) if (!ldev) return; - if (mlx5_lag_is_bonded(ldev)) + if (__mlx5_lag_is_active(ldev)) mlx5_deactivate_lag(ldev); mlx5_lag_dev_remove_pf(ldev, dev); @@ -544,61 +617,67 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev) if (i == MLX5_MAX_PORTS) { if (ldev->nb.notifier_call) unregister_netdevice_notifier(&ldev->nb); + mlx5_lag_mp_cleanup(ldev); cancel_delayed_work_sync(&ldev->bond_work); mlx5_lag_dev_free(ldev); } } -bool mlx5_lag_is_active(struct mlx5_core_dev *dev) +bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; bool res; mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - res = ldev && mlx5_lag_is_bonded(ldev); + res = ldev && __mlx5_lag_is_roce(ldev); mutex_unlock(&lag_mutex); return res; } -EXPORT_SYMBOL(mlx5_lag_is_active); +EXPORT_SYMBOL(mlx5_lag_is_roce); -static int mlx5_lag_set_state(struct mlx5_core_dev *dev, bool allow) +bool mlx5_lag_is_active(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; - int ret = 0; - bool lag_active; - - mlx5_dev_list_lock(); + bool res; + mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - if (!ldev) { - ret = -ENODEV; - goto unlock; - } - lag_active = mlx5_lag_is_bonded(ldev); - if (!mlx5_lag_check_prereq(ldev) && allow) { - ret = -EINVAL; - goto unlock; - } - if (ldev->allowed == allow) - goto unlock; - ldev->allowed = allow; - if ((lag_active && !allow) || allow) - mlx5_do_bond(ldev); -unlock: - mlx5_dev_list_unlock(); - return ret; + res = ldev && __mlx5_lag_is_active(ldev); + mutex_unlock(&lag_mutex); + + return res; } +EXPORT_SYMBOL(mlx5_lag_is_active); -int mlx5_lag_forbid(struct mlx5_core_dev *dev) +bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) { - return mlx5_lag_set_state(dev, false); + struct mlx5_lag *ldev; + bool res; + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + res = ldev && __mlx5_lag_is_sriov(ldev); + mutex_unlock(&lag_mutex); + + return res; } +EXPORT_SYMBOL(mlx5_lag_is_sriov); -int mlx5_lag_allow(struct mlx5_core_dev *dev) +void mlx5_lag_update(struct mlx5_core_dev *dev) { - return mlx5_lag_set_state(dev, true); + struct mlx5_lag *ldev; + + mlx5_dev_list_lock(); + ldev = mlx5_lag_dev_get(dev); + if (!ldev) + goto unlock; + + mlx5_do_bond(ldev); + +unlock: + mlx5_dev_list_unlock(); } struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) @@ -609,7 +688,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - if (!(ldev && mlx5_lag_is_bonded(ldev))) + if (!(ldev && __mlx5_lag_is_roce(ldev))) goto unlock; if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { @@ -638,7 +717,7 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) return true; ldev = mlx5_lag_dev_get(dev); - if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev) + if (!ldev || !__mlx5_lag_is_roce(ldev) || ldev->pf[0].dev == dev) return true; /* If bonded, we do not add an IB device for PF1. */ @@ -665,7 +744,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - if (ldev && mlx5_lag_is_bonded(ldev)) { + if (ldev && __mlx5_lag_is_roce(ldev)) { num_ports = MLX5_MAX_PORTS; mdev[0] = ldev->pf[0].dev; mdev[1] = ldev->pf[1].dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h new file mode 100644 index 000000000000..1dea0b1c9826 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_LAG_H__ +#define __MLX5_LAG_H__ + +#include "mlx5_core.h" +#include "lag_mp.h" + +enum { + MLX5_LAG_FLAG_ROCE = 1 << 0, + MLX5_LAG_FLAG_SRIOV = 1 << 1, + MLX5_LAG_FLAG_MULTIPATH = 1 << 2, +}; + +#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\ + MLX5_LAG_FLAG_MULTIPATH) + +struct lag_func { + struct mlx5_core_dev *dev; + struct net_device *netdev; +}; + +/* Used for collection of netdev event info. */ +struct lag_tracker { + enum netdev_lag_tx_type tx_type; + struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS]; + unsigned int is_bonded:1; +}; + +/* LAG data of a ConnectX card. + * It serves both its phys functions. + */ +struct mlx5_lag { + u8 flags; + u8 v2p_map[MLX5_MAX_PORTS]; + struct lag_func pf[MLX5_MAX_PORTS]; + struct lag_tracker tracker; + struct workqueue_struct *wq; + struct delayed_work bond_work; + struct notifier_block nb; + struct lag_mp lag_mp; +}; + +static inline struct mlx5_lag * +mlx5_lag_dev_get(struct mlx5_core_dev *dev) +{ + return dev->priv.lag; +} + +static inline bool +__mlx5_lag_is_active(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_MODE_FLAGS); +} + +void mlx5_modify_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker); +int mlx5_activate_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + u8 flags); +int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, + struct net_device *ndev); + +#endif /* __MLX5_LAG_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c new file mode 100644 index 000000000000..5633f8572800 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/netdevice.h> +#include "lag.h" +#include "lag_mp.h" +#include "mlx5_core.h" +#include "eswitch.h" +#include "lib/mlx5.h" + +static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) +{ + if (!ldev->pf[0].dev || !ldev->pf[1].dev) + return false; + + return mlx5_esw_multipath_prereq(ldev->pf[0].dev, ldev->pf[1].dev); +} + +static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH); +} + +bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + ldev = mlx5_lag_dev_get(dev); + res = ldev && __mlx5_lag_is_multipath(ldev); + + return res; +} + +/** + * Set lag port affinity + * + * @ldev: lag device + * @port: + * 0 - set normal affinity. + * 1 - set affinity to port 1. + * 2 - set affinity to port 2. + * + **/ +static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, int port) +{ + struct lag_tracker tracker; + + if (!__mlx5_lag_is_multipath(ldev)) + return; + + switch (port) { + case 0: + tracker.netdev_state[0].tx_enabled = true; + tracker.netdev_state[1].tx_enabled = true; + tracker.netdev_state[0].link_up = true; + tracker.netdev_state[1].link_up = true; + break; + case 1: + tracker.netdev_state[0].tx_enabled = true; + tracker.netdev_state[0].link_up = true; + tracker.netdev_state[1].tx_enabled = false; + tracker.netdev_state[1].link_up = false; + break; + case 2: + tracker.netdev_state[0].tx_enabled = false; + tracker.netdev_state[0].link_up = false; + tracker.netdev_state[1].tx_enabled = true; + tracker.netdev_state[1].link_up = true; + break; + default: + mlx5_core_warn(ldev->pf[0].dev, "Invalid affinity port %d", + port); + return; + } + + if (tracker.netdev_state[0].tx_enabled) + mlx5_notifier_call_chain(ldev->pf[0].dev->priv.events, + MLX5_DEV_EVENT_PORT_AFFINITY, + (void *)0); + + if (tracker.netdev_state[1].tx_enabled) + mlx5_notifier_call_chain(ldev->pf[1].dev->priv.events, + MLX5_DEV_EVENT_PORT_AFFINITY, + (void *)0); + + mlx5_modify_lag(ldev, &tracker); +} + +static void mlx5_lag_fib_event_flush(struct notifier_block *nb) +{ + struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); + struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp); + + flush_workqueue(ldev->wq); +} + +struct mlx5_fib_event_work { + struct work_struct work; + struct mlx5_lag *ldev; + unsigned long event; + union { + struct fib_entry_notifier_info fen_info; + struct fib_nh_notifier_info fnh_info; + }; +}; + +static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, + unsigned long event, + struct fib_info *fi) +{ + struct lag_mp *mp = &ldev->lag_mp; + + /* Handle delete event */ + if (event == FIB_EVENT_ENTRY_DEL) { + /* stop track */ + if (mp->mfi == fi) + mp->mfi = NULL; + return; + } + + /* Handle add/replace event */ + if (fi->fib_nhs == 1) { + if (__mlx5_lag_is_active(ldev)) { + struct net_device *nh_dev = fi->fib_nh[0].nh_dev; + int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); + + mlx5_lag_set_port_affinity(ldev, ++i); + } + return; + } + + if (fi->fib_nhs != 2) + return; + + /* Verify next hops are ports of the same hca */ + if (!(fi->fib_nh[0].nh_dev == ldev->pf[0].netdev && + fi->fib_nh[1].nh_dev == ldev->pf[1].netdev) && + !(fi->fib_nh[0].nh_dev == ldev->pf[1].netdev && + fi->fib_nh[1].nh_dev == ldev->pf[0].netdev)) { + mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n"); + return; + } + + /* First time we see multipath route */ + if (!mp->mfi && !__mlx5_lag_is_active(ldev)) { + struct lag_tracker tracker; + + tracker = ldev->tracker; + mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH); + } + + mlx5_lag_set_port_affinity(ldev, 0); + mp->mfi = fi; +} + +static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, + unsigned long event, + struct fib_nh *fib_nh, + struct fib_info *fi) +{ + struct lag_mp *mp = &ldev->lag_mp; + + /* Check the nh event is related to the route */ + if (!mp->mfi || mp->mfi != fi) + return; + + /* nh added/removed */ + if (event == FIB_EVENT_NH_DEL) { + int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->nh_dev); + + if (i >= 0) { + i = (i + 1) % 2 + 1; /* peer port */ + mlx5_lag_set_port_affinity(ldev, i); + } + } else if (event == FIB_EVENT_NH_ADD && + fi->fib_nhs == 2) { + mlx5_lag_set_port_affinity(ldev, 0); + } +} + +static void mlx5_lag_fib_update(struct work_struct *work) +{ + struct mlx5_fib_event_work *fib_work = + container_of(work, struct mlx5_fib_event_work, work); + struct mlx5_lag *ldev = fib_work->ldev; + struct fib_nh *fib_nh; + + /* Protect internal structures from changes */ + rtnl_lock(); + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_APPEND: /* fall through */ + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_DEL: + mlx5_lag_fib_route_event(ldev, fib_work->event, + fib_work->fen_info.fi); + fib_info_put(fib_work->fen_info.fi); + break; + case FIB_EVENT_NH_ADD: /* fall through */ + case FIB_EVENT_NH_DEL: + fib_nh = fib_work->fnh_info.fib_nh; + mlx5_lag_fib_nexthop_event(ldev, + fib_work->event, + fib_work->fnh_info.fib_nh, + fib_nh->nh_parent); + fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); + break; + } + + rtnl_unlock(); + kfree(fib_work); +} + +static struct mlx5_fib_event_work * +mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event) +{ + struct mlx5_fib_event_work *fib_work; + + fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); + if (WARN_ON(!fib_work)) + return NULL; + + INIT_WORK(&fib_work->work, mlx5_lag_fib_update); + fib_work->ldev = ldev; + fib_work->event = event; + + return fib_work; +} + +static int mlx5_lag_fib_event(struct notifier_block *nb, + unsigned long event, + void *ptr) +{ + struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); + struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp); + struct fib_notifier_info *info = ptr; + struct mlx5_fib_event_work *fib_work; + struct fib_entry_notifier_info *fen_info; + struct fib_nh_notifier_info *fnh_info; + struct fib_info *fi; + + if (info->family != AF_INET) + return NOTIFY_DONE; + + if (!mlx5_lag_multipath_check_prereq(ldev)) + return NOTIFY_DONE; + + switch (event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_APPEND: /* fall through */ + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_DEL: + fen_info = container_of(info, struct fib_entry_notifier_info, + info); + fi = fen_info->fi; + if (fi->fib_dev != ldev->pf[0].netdev && + fi->fib_dev != ldev->pf[1].netdev) { + return NOTIFY_DONE; + } + fib_work = mlx5_lag_init_fib_work(ldev, event); + if (!fib_work) + return NOTIFY_DONE; + fib_work->fen_info = *fen_info; + /* Take reference on fib_info to prevent it from being + * freed while work is queued. Release it afterwards. + */ + fib_info_hold(fib_work->fen_info.fi); + break; + case FIB_EVENT_NH_ADD: /* fall through */ + case FIB_EVENT_NH_DEL: + fnh_info = container_of(info, struct fib_nh_notifier_info, + info); + fib_work = mlx5_lag_init_fib_work(ldev, event); + if (!fib_work) + return NOTIFY_DONE; + fib_work->fnh_info = *fnh_info; + fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); + break; + default: + return NOTIFY_DONE; + } + + queue_work(ldev->wq, &fib_work->work); + + return NOTIFY_DONE; +} + +int mlx5_lag_mp_init(struct mlx5_lag *ldev) +{ + struct lag_mp *mp = &ldev->lag_mp; + int err; + + if (mp->fib_nb.notifier_call) + return 0; + + mp->fib_nb.notifier_call = mlx5_lag_fib_event; + err = register_fib_notifier(&mp->fib_nb, + mlx5_lag_fib_event_flush); + if (err) + mp->fib_nb.notifier_call = NULL; + + return err; +} + +void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) +{ + struct lag_mp *mp = &ldev->lag_mp; + + if (!mp->fib_nb.notifier_call) + return; + + unregister_fib_notifier(&mp->fib_nb); + mp->fib_nb.notifier_call = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h new file mode 100644 index 000000000000..6d14b1100be9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_LAG_MP_H__ +#define __MLX5_LAG_MP_H__ + +#include "lag.h" +#include "mlx5_core.h" + +struct lag_mp { + struct notifier_block fib_nb; + struct fib_info *mfi; /* used in tracking fib events */ +}; + +#ifdef CONFIG_MLX5_ESWITCH + +int mlx5_lag_mp_init(struct mlx5_lag *ldev); +void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; } +static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {} + +#endif /* CONFIG_MLX5_ESWITCH */ +#endif /* __MLX5_LAG_MP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 0d90b1b4a3d3..ca0ee9916e9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -33,6 +33,7 @@ #include <linux/clocksource.h> #include <linux/highmem.h> #include <rdma/mlx5-abi.h> +#include "lib/eq.h" #include "en.h" #include "clock.h" @@ -71,7 +72,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc) struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); - return mlx5_read_internal_timer(mdev) & cc->mask; + return mlx5_read_internal_timer(mdev, NULL) & cc->mask; } static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) @@ -155,15 +156,19 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, return 0; } -static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) +static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, + struct ptp_system_timestamp *sts) { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); - u64 ns; + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, + clock); unsigned long flags; + u64 cycles, ns; write_seqlock_irqsave(&clock->lock, flags); - ns = timecounter_read(&clock->tc); + cycles = mlx5_read_internal_timer(mdev, sts); + ns = timecounter_cyc2time(&clock->tc, cycles); write_sequnlock_irqrestore(&clock->lock, flags); *ts = ns_to_timespec64(ns); @@ -306,7 +311,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, ts.tv_sec = rq->perout.start.sec; ts.tv_nsec = rq->perout.start.nsec; ns = timespec64_to_ns(&ts); - cycles_now = mlx5_read_internal_timer(mdev); + cycles_now = mlx5_read_internal_timer(mdev, NULL); write_seqlock_irqsave(&clock->lock, flags); nsec_now = timecounter_cyc2time(&clock->tc, cycles_now); nsec_delta = ns - nsec_now; @@ -383,7 +388,7 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = { .pps = 0, .adjfreq = mlx5_ptp_adjfreq, .adjtime = mlx5_ptp_adjtime, - .gettime64 = mlx5_ptp_gettime, + .gettimex64 = mlx5_ptp_gettimex, .settime64 = mlx5_ptp_settime, .enable = NULL, .verify = NULL, @@ -439,16 +444,17 @@ static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); } -void mlx5_pps_event(struct mlx5_core_dev *mdev, - struct mlx5_eqe *eqe) +static int mlx5_pps_event(struct notifier_block *nb, + unsigned long type, void *data) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb); + struct mlx5_core_dev *mdev = clock->mdev; struct ptp_clock_event ptp_event; - struct timespec64 ts; - u64 nsec_now, nsec_delta; u64 cycles_now, cycles_delta; + u64 nsec_now, nsec_delta, ns; + struct mlx5_eqe *eqe = data; int pin = eqe->data.pps.pin; - s64 ns; + struct timespec64 ts; unsigned long flags; switch (clock->ptp_info.pin_config[pin].func) { @@ -463,11 +469,12 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev, } else { ptp_event.type = PTP_CLOCK_EXTTS; } + /* TODOL clock->ptp can be NULL if ptp_clock_register failes */ ptp_clock_event(clock->ptp, &ptp_event); break; case PTP_PF_PEROUT: - mlx5_ptp_gettime(&clock->ptp_info, &ts); - cycles_now = mlx5_read_internal_timer(mdev); + mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL); + cycles_now = mlx5_read_internal_timer(mdev, NULL); ts.tv_sec += 1; ts.tv_nsec = 0; ns = timespec64_to_ns(&ts); @@ -481,8 +488,11 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev, write_sequnlock_irqrestore(&clock->lock, flags); break; default: - mlx5_core_err(mdev, " Unhandled event\n"); + mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n", + clock->ptp_info.pin_config[pin].func); } + + return NOTIFY_OK; } void mlx5_init_clock(struct mlx5_core_dev *mdev) @@ -511,14 +521,14 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) ktime_to_ns(ktime_get_real())); /* Calculate period in seconds to call the overflow watchdog - to make - * sure counter is checked at least once every wrap around. + * sure counter is checked at least twice every wrap around. * The period is calculated as the minimum between max HW cycles count * (The clock source mask) and max amount of cycles that can be * multiplied by clock multiplier where the result doesn't exceed * 64bits. */ overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult); - overflow_cycles = min(overflow_cycles, clock->cycles.mask >> 1); + overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3)); ns = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles, frac, &frac); @@ -567,6 +577,9 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) PTR_ERR(clock->ptp)); clock->ptp = NULL; } + + MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT); + mlx5_eq_notifier_register(mdev, &clock->pps_nb); } void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) @@ -576,6 +589,7 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) return; + mlx5_eq_notifier_unregister(mdev, &clock->pps_nb); if (clock->ptp) { ptp_clock_unregister(clock->ptp); clock->ptp = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h index 263cb6e2aeee..31600924bdc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -36,7 +36,6 @@ #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) void mlx5_init_clock(struct mlx5_core_dev *mdev); void mlx5_cleanup_clock(struct mlx5_core_dev *mdev); -void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { @@ -60,8 +59,6 @@ static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, #else static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {} static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {} -static inline void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) {} - static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { return -1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c new file mode 100644 index 000000000000..bced2efe9bef --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies */ + +#include <linux/mlx5/vport.h> +#include "lib/devcom.h" + +static LIST_HEAD(devcom_list); + +#define devcom_for_each_component(priv, comp, iter) \ + for (iter = 0; \ + comp = &(priv)->components[iter], iter < MLX5_DEVCOM_NUM_COMPONENTS; \ + iter++) + +struct mlx5_devcom_component { + struct { + void *data; + } device[MLX5_MAX_PORTS]; + + mlx5_devcom_event_handler_t handler; + struct rw_semaphore sem; + bool paired; +}; + +struct mlx5_devcom_list { + struct list_head list; + + struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS]; + struct mlx5_core_dev *devs[MLX5_MAX_PORTS]; +}; + +struct mlx5_devcom { + struct mlx5_devcom_list *priv; + int idx; +}; + +static struct mlx5_devcom_list *mlx5_devcom_list_alloc(void) +{ + struct mlx5_devcom_component *comp; + struct mlx5_devcom_list *priv; + int i; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + devcom_for_each_component(priv, comp, i) + init_rwsem(&comp->sem); + + return priv; +} + +static struct mlx5_devcom *mlx5_devcom_alloc(struct mlx5_devcom_list *priv, + u8 idx) +{ + struct mlx5_devcom *devcom; + + devcom = kzalloc(sizeof(*devcom), GFP_KERNEL); + if (!devcom) + return NULL; + + devcom->priv = priv; + devcom->idx = idx; + return devcom; +} + +/* Must be called with intf_mutex held */ +struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev) +{ + struct mlx5_devcom_list *priv = NULL, *iter; + struct mlx5_devcom *devcom = NULL; + bool new_priv = false; + u64 sguid0, sguid1; + int idx, i; + + if (!mlx5_core_is_pf(dev)) + return NULL; + + sguid0 = mlx5_query_nic_system_image_guid(dev); + list_for_each_entry(iter, &devcom_list, list) { + struct mlx5_core_dev *tmp_dev = NULL; + + idx = -1; + for (i = 0; i < MLX5_MAX_PORTS; i++) { + if (iter->devs[i]) + tmp_dev = iter->devs[i]; + else + idx = i; + } + + if (idx == -1) + continue; + + sguid1 = mlx5_query_nic_system_image_guid(tmp_dev); + if (sguid0 != sguid1) + continue; + + priv = iter; + break; + } + + if (!priv) { + priv = mlx5_devcom_list_alloc(); + if (!priv) + return ERR_PTR(-ENOMEM); + + idx = 0; + new_priv = true; + } + + priv->devs[idx] = dev; + devcom = mlx5_devcom_alloc(priv, idx); + if (!devcom) { + kfree(priv); + return ERR_PTR(-ENOMEM); + } + + if (new_priv) + list_add(&priv->list, &devcom_list); + + return devcom; +} + +/* Must be called with intf_mutex held */ +void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom) +{ + struct mlx5_devcom_list *priv; + int i; + + if (IS_ERR_OR_NULL(devcom)) + return; + + priv = devcom->priv; + priv->devs[devcom->idx] = NULL; + + kfree(devcom); + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (priv->devs[i]) + break; + + if (i != MLX5_MAX_PORTS) + return; + + list_del(&priv->list); + kfree(priv); +} + +void mlx5_devcom_register_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + mlx5_devcom_event_handler_t handler, + void *data) +{ + struct mlx5_devcom_component *comp; + + if (IS_ERR_OR_NULL(devcom)) + return; + + WARN_ON(!data); + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); + comp->handler = handler; + comp->device[devcom->idx].data = data; + up_write(&comp->sem); +} + +void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp; + + if (IS_ERR_OR_NULL(devcom)) + return; + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); + comp->device[devcom->idx].data = NULL; + up_write(&comp->sem); +} + +int mlx5_devcom_send_event(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + int event, + void *event_data) +{ + struct mlx5_devcom_component *comp; + int err = -ENODEV, i; + + if (IS_ERR_OR_NULL(devcom)) + return err; + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (i != devcom->idx && comp->device[i].data) { + err = comp->handler(event, comp->device[i].data, + event_data); + break; + } + + up_write(&comp->sem); + return err; +} + +void mlx5_devcom_set_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + bool paired) +{ + struct mlx5_devcom_component *comp; + + comp = &devcom->priv->components[id]; + WARN_ON(!rwsem_is_locked(&comp->sem)); + + comp->paired = paired; +} + +bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + if (IS_ERR_OR_NULL(devcom)) + return false; + + return devcom->priv->components[id].paired; +} + +void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp; + int i; + + if (IS_ERR_OR_NULL(devcom)) + return NULL; + + comp = &devcom->priv->components[id]; + down_read(&comp->sem); + if (!comp->paired) { + up_read(&comp->sem); + return NULL; + } + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (i != devcom->idx) + break; + + return comp->device[i].data; +} + +void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp = &devcom->priv->components[id]; + + up_read(&comp->sem); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h new file mode 100644 index 000000000000..939d5bf1581b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies */ + +#ifndef __LIB_MLX5_DEVCOM_H__ +#define __LIB_MLX5_DEVCOM_H__ + +#include <linux/mlx5/driver.h> + +enum mlx5_devcom_components { + MLX5_DEVCOM_ESW_OFFLOADS, + + MLX5_DEVCOM_NUM_COMPONENTS, +}; + +typedef int (*mlx5_devcom_event_handler_t)(int event, + void *my_data, + void *event_data); + +struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev); +void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom); + +void mlx5_devcom_register_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + mlx5_devcom_event_handler_t handler, + void *data); +void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); + +int mlx5_devcom_send_event(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + int event, + void *event_data); + +void mlx5_devcom_set_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + bool paired); +bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); + +void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); +void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); + +#endif + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h new file mode 100644 index 000000000000..c0fb6d72b695 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies */ + +#ifndef __LIB_MLX5_EQ_H__ +#define __LIB_MLX5_EQ_H__ +#include <linux/mlx5/driver.h> +#include <linux/mlx5/eq.h> +#include <linux/mlx5/cq.h> + +#define MLX5_MAX_IRQ_NAME (32) +#define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe)) + +struct mlx5_eq_tasklet { + struct list_head list; + struct list_head process_list; + struct tasklet_struct task; + spinlock_t lock; /* lock completion tasklet list */ +}; + +struct mlx5_cq_table { + spinlock_t lock; /* protect radix tree */ + struct radix_tree_root tree; +}; + +struct mlx5_eq { + struct mlx5_core_dev *dev; + struct mlx5_cq_table cq_table; + __be32 __iomem *doorbell; + u32 cons_index; + struct mlx5_frag_buf buf; + int size; + unsigned int vecidx; + unsigned int irqn; + u8 eqn; + int nent; + struct mlx5_rsc_debug *dbg; +}; + +struct mlx5_eq_comp { + struct mlx5_eq core; /* Must be first */ + struct mlx5_eq_tasklet tasklet_ctx; + struct list_head list; +}; + +static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) +{ + return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE); +} + +static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq) +{ + struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1)); + + return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe; +} + +static inline void eq_update_ci(struct mlx5_eq *eq, int arm) +{ + __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); + u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + + __raw_writel((__force u32)cpu_to_be32(val), addr); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} + +int mlx5_eq_table_init(struct mlx5_core_dev *dev); +void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev); +int mlx5_eq_table_create(struct mlx5_core_dev *dev); +void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); + +int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn); +struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev); +void mlx5_cq_tasklet_cb(unsigned long data); +struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix); + +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq); +void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev); +void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev); + +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); + +/* This function should only be called after mlx5_cmd_force_teardown_hca */ +void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); + +#ifdef CONFIG_RFS_ACCEL +struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); +#endif + +int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb); +int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 7550b1cc8c6a..397a2847867a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -33,6 +33,8 @@ #ifndef __LIB_MLX5_H__ #define __LIB_MLX5_H__ +#include "mlx5_core.h" + void mlx5_init_reserved_gids(struct mlx5_core_dev *dev); void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev); int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count); @@ -40,4 +42,38 @@ void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count); int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index); void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index); +/* TODO move to lib/events.h */ + +#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF +#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF + +enum port_module_event_status_type { + MLX5_MODULE_STATUS_PLUGGED = 0x1, + MLX5_MODULE_STATUS_UNPLUGGED = 0x2, + MLX5_MODULE_STATUS_ERROR = 0x3, + MLX5_MODULE_STATUS_DISABLED = 0x4, + MLX5_MODULE_STATUS_NUM, +}; + +enum port_module_event_error_type { + MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0, + MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX = 0x1, + MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2, + MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3, + MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4, + MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER = 0x5, + MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6, + MLX5_MODULE_EVENT_ERROR_BAD_CABLE = 0x7, + MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc, + MLX5_MODULE_EVENT_ERROR_NUM, +}; + +struct mlx5_pme_stats { + u64 status_counters[MLX5_MODULE_STATUS_NUM]; + u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; +}; + +void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); +int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index 98359559c77e..a71d5b9c7ab2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -108,8 +108,7 @@ int mlx5_mpfs_init(struct mlx5_core_dev *dev) mutex_init(&mpfs->lock); mpfs->size = l2table_size; - mpfs->bitmap = kcalloc(BITS_TO_LONGS(l2table_size), - sizeof(uintptr_t), GFP_KERNEL); + mpfs->bitmap = bitmap_zalloc(l2table_size, GFP_KERNEL); if (!mpfs->bitmap) { kfree(mpfs); return -ENOMEM; @@ -127,7 +126,7 @@ void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) return; WARN_ON(!hlist_empty(mpfs->hash)); - kfree(mpfs->bitmap); + bitmap_free(mpfs->bitmap); kfree(mpfs); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c new file mode 100644 index 000000000000..40f4a19b1ce1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/port.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" +#include "lib/port_tun.h" + +struct mlx5_port_tun_entropy_flags { + bool force_supported, force_enabled; + bool calc_supported, calc_enabled; + bool gre_calc_supported, gre_calc_enabled; +}; + +static void mlx5_query_port_tun_entropy(struct mlx5_core_dev *mdev, + struct mlx5_port_tun_entropy_flags *entropy_flags) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + /* Default values for FW which do not support MLX5_REG_PCMR */ + entropy_flags->force_supported = false; + entropy_flags->calc_supported = false; + entropy_flags->gre_calc_supported = false; + entropy_flags->force_enabled = false; + entropy_flags->calc_enabled = true; + entropy_flags->gre_calc_enabled = true; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return; + + if (mlx5_query_ports_check(mdev, out, sizeof(out))) + return; + + entropy_flags->force_supported = !!(MLX5_GET(pcmr_reg, out, entropy_force_cap)); + entropy_flags->calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_calc_cap)); + entropy_flags->gre_calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc_cap)); + entropy_flags->force_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_force)); + entropy_flags->calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_calc)); + entropy_flags->gre_calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc)); +} + +static int mlx5_set_port_tun_entropy_calc(struct mlx5_core_dev *mdev, u8 enable, + u8 force) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; + int err; + + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, entropy_force, force); + MLX5_SET(pcmr_reg, in, entropy_calc, enable); + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +static int mlx5_set_port_gre_tun_entropy_calc(struct mlx5_core_dev *mdev, + u8 enable, u8 force) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; + int err; + + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, entropy_force, force); + MLX5_SET(pcmr_reg, in, entropy_gre_calc, enable); + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy, + struct mlx5_core_dev *mdev) +{ + struct mlx5_port_tun_entropy_flags entropy_flags; + + tun_entropy->mdev = mdev; + mutex_init(&tun_entropy->lock); + mlx5_query_port_tun_entropy(mdev, &entropy_flags); + tun_entropy->num_enabling_entries = 0; + tun_entropy->num_disabling_entries = 0; + tun_entropy->enabled = entropy_flags.calc_enabled; + tun_entropy->enabled = + (entropy_flags.calc_supported) ? + entropy_flags.calc_enabled : true; +} + +static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy, + int reformat_type, bool enable) +{ + struct mlx5_port_tun_entropy_flags entropy_flags; + int err; + + mlx5_query_port_tun_entropy(tun_entropy->mdev, &entropy_flags); + /* Tunnel entropy calculation may be controlled either on port basis + * for all tunneling protocols or specifically for GRE protocol. + * Prioritize GRE protocol control (if capable) over global port + * configuration. + */ + if (entropy_flags.gre_calc_supported && + reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) { + /* Other applications may change the global FW entropy + * calculations settings. Check that the current entropy value + * is the negative of the updated value. + */ + if (entropy_flags.force_enabled && + enable == entropy_flags.gre_calc_enabled) { + mlx5_core_warn(tun_entropy->mdev, + "Unexpected GRE entropy calc setting - expected %d", + !entropy_flags.gre_calc_enabled); + return -EOPNOTSUPP; + } + err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, enable, + entropy_flags.force_supported); + if (err) + return err; + /* if we turn on the entropy we don't need to force it anymore */ + if (entropy_flags.force_supported && enable) { + err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, 1, 0); + if (err) + return err; + } + } else if (entropy_flags.calc_supported) { + /* Other applications may change the global FW entropy + * calculations settings. Check that the current entropy value + * is the negative of the updated value. + */ + if (entropy_flags.force_enabled && + enable == entropy_flags.calc_enabled) { + mlx5_core_warn(tun_entropy->mdev, + "Unexpected entropy calc setting - expected %d", + !entropy_flags.calc_enabled); + return -EOPNOTSUPP; + } + /* GRE requires disabling entropy calculation. if there are + * enabling entries (i.e VXLAN) we cannot turn it off for them, + * thus fail. + */ + if (tun_entropy->num_enabling_entries) + return -EOPNOTSUPP; + err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, enable, + entropy_flags.force_supported); + if (err) + return err; + tun_entropy->enabled = enable; + /* if we turn on the entropy we don't need to force it anymore */ + if (entropy_flags.force_supported && enable) { + err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, 1, 0); + if (err) + return err; + } + } + + return 0; +} + +/* the function manages the refcount for enabling/disabling tunnel types. + * the return value indicates if the inc is successful or not, depending on + * entropy capabilities and configuration. + */ +int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy, + int reformat_type) +{ + /* the default is error for unknown (non VXLAN/GRE tunnel types) */ + int err = -EOPNOTSUPP; + + mutex_lock(&tun_entropy->lock); + if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN && + tun_entropy->enabled) { + /* in case entropy calculation is enabled for all tunneling + * types, it is ok for VXLAN, so approve. + * otherwise keep the error default. + */ + tun_entropy->num_enabling_entries++; + err = 0; + } else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) { + /* turn off the entropy only for the first GRE rule. + * for the next rules the entropy was already disabled + * successfully. + */ + if (tun_entropy->num_disabling_entries == 0) + err = mlx5_set_entropy(tun_entropy, reformat_type, 0); + else + err = 0; + if (!err) + tun_entropy->num_disabling_entries++; + } + mutex_unlock(&tun_entropy->lock); + + return err; +} + +void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy, + int reformat_type) +{ + mutex_lock(&tun_entropy->lock); + if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN) + tun_entropy->num_enabling_entries--; + else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE && + --tun_entropy->num_disabling_entries == 0) + mlx5_set_entropy(tun_entropy, reformat_type, 1); + mutex_unlock(&tun_entropy->lock); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h new file mode 100644 index 000000000000..54c42a88705e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_PORT_TUN_H__ +#define __MLX5_PORT_TUN_H__ + +#include <linux/mlx5/driver.h> + +struct mlx5_tun_entropy { + struct mlx5_core_dev *mdev; + u32 num_enabling_entries; + u32 num_disabling_entries; + u8 enabled; + struct mutex lock; /* lock the entropy fields */ +}; + +void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy, + struct mlx5_core_dev *mdev); +int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy, + int reformat_type); +void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy, + int reformat_type); + +#endif /* __MLX5_PORT_TUN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c deleted file mode 100644 index 3a3b0005fd2b..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/mlx5/driver.h> -#include <linux/mlx5/cmd.h> -#include "mlx5_core.h" - -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, - u16 opmod, u8 port) -{ - int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); - int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); - int err = -ENOMEM; - void *data; - void *resp; - u32 *out; - u32 *in; - - in = kzalloc(inlen, GFP_KERNEL); - out = kzalloc(outlen, GFP_KERNEL); - if (!in || !out) - goto out; - - MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); - MLX5_SET(mad_ifc_in, in, op_mod, opmod); - MLX5_SET(mad_ifc_in, in, port, port); - - data = MLX5_ADDR_OF(mad_ifc_in, in, mad); - memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); - - err = mlx5_cmd_exec(dev, in, inlen, out, outlen); - if (err) - goto out; - - resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); - memcpy(outb, resp, - MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); - -out: - kfree(out); - kfree(in); - return err; -} -EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 28132c7dc05f..76716419370d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -43,7 +43,6 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/cq.h> #include <linux/mlx5/qp.h> -#include <linux/mlx5/srq.h> #include <linux/debugfs.h> #include <linux/kmod.h> #include <linux/mlx5/mlx5_ifc.h> @@ -53,6 +52,7 @@ #endif #include <net/devlink.h> #include "mlx5_core.h" +#include "lib/eq.h" #include "fs_core.h" #include "lib/mpfs.h" #include "eswitch.h" @@ -63,7 +63,9 @@ #include "accel/tls.h" #include "lib/clock.h" #include "lib/vxlan.h" +#include "lib/devcom.h" #include "diag/fw_tracer.h" +#include "ecpf.h" MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -162,26 +164,6 @@ static struct mlx5_profile profile[] = { .size = 8, .limit = 4 }, - .mr_cache[16] = { - .size = 8, - .limit = 4 - }, - .mr_cache[17] = { - .size = 8, - .limit = 4 - }, - .mr_cache[18] = { - .size = 8, - .limit = 4 - }, - .mr_cache[19] = { - .size = 4, - .limit = 2 - }, - .mr_cache[20] = { - .size = 4, - .limit = 2 - }, }, }; @@ -319,51 +301,6 @@ static void release_bar(struct pci_dev *pdev) pci_release_regions(pdev); } -static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_eq_table *table = &priv->eq_table; - int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? - MLX5_CAP_GEN(dev, max_num_eqs) : - 1 << MLX5_CAP_GEN(dev, log_max_eq); - int nvec; - int err; - - nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + - MLX5_EQ_VEC_COMP_BASE; - nvec = min_t(int, nvec, num_eqs); - if (nvec <= MLX5_EQ_VEC_COMP_BASE) - return -ENOMEM; - - priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL); - if (!priv->irq_info) - return -ENOMEM; - - nvec = pci_alloc_irq_vectors(dev->pdev, - MLX5_EQ_VEC_COMP_BASE + 1, nvec, - PCI_IRQ_MSIX); - if (nvec < 0) { - err = nvec; - goto err_free_irq_info; - } - - table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; - - return 0; - -err_free_irq_info: - kfree(priv->irq_info); - return err; -} - -static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - - pci_free_irq_vectors(dev->pdev); - kfree(priv->irq_info); -} - struct mlx5_reg_host_endianness { u8 he; u8 rsvd[15]; @@ -503,6 +440,58 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) return err; } +static int handle_hca_cap_odp(struct mlx5_core_dev *dev) +{ + void *set_hca_cap; + void *set_ctx; + int set_sz; + bool do_set = false; + int err; + + if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) || + !MLX5_CAP_GEN(dev, pg)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); + if (err) + return err; + + set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + set_ctx = kzalloc(set_sz, GFP_KERNEL); + if (!set_ctx) + return -ENOMEM; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP], + MLX5_ST_SZ_BYTES(odp_cap)); + +#define ODP_CAP_SET_MAX(dev, field) \ + do { \ + u32 _res = MLX5_CAP_ODP_MAX(dev, field); \ + if (_res) { \ + do_set = true; \ + MLX5_SET(odp_cap, set_hca_cap, field, _res); \ + } \ + } while (0) + + ODP_CAP_SET_MAX(dev, ud_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, rc_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.send); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.receive); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.write); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.read); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic); + + if (do_set) + err = set_caps(dev, set_ctx, set_sz, + MLX5_SET_HCA_CAP_OP_MOD_ODP); + + kfree(set_ctx); + + return err; +} + static int handle_hca_cap(struct mlx5_core_dev *dev) { void *set_ctx = NULL; @@ -576,6 +565,33 @@ query_ex: return err; } +static int set_hca_cap(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + int err; + + err = handle_hca_cap(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap failed\n"); + goto out; + } + + err = handle_hca_cap_atomic(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n"); + goto out; + } + + err = handle_hca_cap_odp(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap_odp failed\n"); + goto out; + } + +out: + return err; +} + static int set_hca_ctrl(struct mlx5_core_dev *dev) { struct mlx5_reg_host_endianness he_in; @@ -611,6 +627,8 @@ int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); MLX5_SET(enable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, + dev->caps.embedded_cpu); return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); } @@ -621,191 +639,29 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); MLX5_SET(disable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, + dev->caps.embedded_cpu); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev) +u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, + struct ptp_system_timestamp *sts) { u32 timer_h, timer_h1, timer_l; timer_h = ioread32be(&dev->iseg->internal_timer_h); + ptp_read_system_prets(sts); timer_l = ioread32be(&dev->iseg->internal_timer_l); + ptp_read_system_postts(sts); timer_h1 = ioread32be(&dev->iseg->internal_timer_h); - if (timer_h != timer_h1) /* wrap around */ + if (timer_h != timer_h1) { + /* wrap around */ + ptp_read_system_prets(sts); timer_l = ioread32be(&dev->iseg->internal_timer_l); - - return (u64)timer_l | (u64)timer_h1 << 32; -} - -static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); - - if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { - mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); - return -ENOMEM; - } - - cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - priv->irq_info[i].mask); - - if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, priv->irq_info[i].mask)) - mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); - - return 0; -} - -static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); - - irq_set_affinity_hint(irq, NULL); - free_cpumask_var(priv->irq_info[i].mask); -} - -static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) -{ - int err; - int i; - - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { - err = mlx5_irq_set_affinity_hint(mdev, i); - if (err) - goto err_out; + ptp_read_system_postts(sts); } - return 0; - -err_out: - for (i--; i >= 0; i--) - mlx5_irq_clear_affinity_hint(mdev, i); - - return err; -} - -static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev) -{ - int i; - - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) - mlx5_irq_clear_affinity_hint(mdev, i); -} - -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, - unsigned int *irqn) -{ - struct mlx5_eq_table *table = &dev->priv.eq_table; - struct mlx5_eq *eq, *n; - int err = -ENOENT; - - spin_lock(&table->lock); - list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { - if (eq->index == vector) { - *eqn = eq->eqn; - *irqn = eq->irqn; - err = 0; - break; - } - } - spin_unlock(&table->lock); - - return err; -} -EXPORT_SYMBOL(mlx5_vector2eqn); - -struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) -{ - struct mlx5_eq_table *table = &dev->priv.eq_table; - struct mlx5_eq *eq; - - spin_lock(&table->lock); - list_for_each_entry(eq, &table->comp_eqs_list, list) - if (eq->eqn == eqn) { - spin_unlock(&table->lock); - return eq; - } - - spin_unlock(&table->lock); - - return ERR_PTR(-ENOENT); -} - -static void free_comp_eqs(struct mlx5_core_dev *dev) -{ - struct mlx5_eq_table *table = &dev->priv.eq_table; - struct mlx5_eq *eq, *n; - -#ifdef CONFIG_RFS_ACCEL - if (dev->rmap) { - free_irq_cpu_rmap(dev->rmap); - dev->rmap = NULL; - } -#endif - spin_lock(&table->lock); - list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { - list_del(&eq->list); - spin_unlock(&table->lock); - if (mlx5_destroy_unmap_eq(dev, eq)) - mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n", - eq->eqn); - kfree(eq); - spin_lock(&table->lock); - } - spin_unlock(&table->lock); -} - -static int alloc_comp_eqs(struct mlx5_core_dev *dev) -{ - struct mlx5_eq_table *table = &dev->priv.eq_table; - char name[MLX5_MAX_IRQ_NAME]; - struct mlx5_eq *eq; - int ncomp_vec; - int nent; - int err; - int i; - - INIT_LIST_HEAD(&table->comp_eqs_list); - ncomp_vec = table->num_comp_vectors; - nent = MLX5_COMP_EQ_SIZE; -#ifdef CONFIG_RFS_ACCEL - dev->rmap = alloc_irq_cpu_rmap(ncomp_vec); - if (!dev->rmap) - return -ENOMEM; -#endif - for (i = 0; i < ncomp_vec; i++) { - eq = kzalloc(sizeof(*eq), GFP_KERNEL); - if (!eq) { - err = -ENOMEM; - goto clean; - } - -#ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev, - MLX5_EQ_VEC_COMP_BASE + i)); -#endif - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); - err = mlx5_create_map_eq(dev, eq, - i + MLX5_EQ_VEC_COMP_BASE, nent, 0, - name, MLX5_EQ_TYPE_COMP); - if (err) { - kfree(eq); - goto clean; - } - mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn); - eq->index = i; - spin_lock(&table->lock); - list_add_tail(&eq->list, &table->comp_eqs_list); - spin_unlock(&table->lock); - } - - return 0; - -clean: - free_comp_eqs(dev); - return err; + return (u64)timer_l | (u64)timer_h1 << 32; } static int mlx5_core_set_issi(struct mlx5_core_dev *dev) @@ -877,11 +733,9 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) priv->numa_node = dev_to_node(&dev->pdev->dev); - priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root); - if (!priv->dbg_root) { - dev_err(&pdev->dev, "Cannot create debugfs dir, aborting\n"); - return -ENOMEM; - } + if (mlx5_debugfs_root) + priv->dbg_root = + debugfs_create_dir(pci_name(pdev), mlx5_debugfs_root); err = mlx5_pci_enable_device(dev); if (err) { @@ -903,6 +757,11 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_clr_master; } + if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) && + pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) && + pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128)) + mlx5_core_dbg(dev, "Enabling pci atomics failed\n"); + dev->iseg_base = pci_resource_start(dev->pdev, 0); dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg)); if (!dev->iseg) { @@ -938,28 +797,37 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) struct pci_dev *pdev = dev->pdev; int err; + priv->devcom = mlx5_devcom_register_device(dev); + if (IS_ERR(priv->devcom)) + dev_err(&pdev->dev, "failed to register with devcom (0x%p)\n", + priv->devcom); + err = mlx5_query_board_id(dev); if (err) { dev_err(&pdev->dev, "query board id failed\n"); - goto out; + goto err_devcom; } - err = mlx5_eq_init(dev); + err = mlx5_eq_table_init(dev); if (err) { dev_err(&pdev->dev, "failed to initialize eq\n"); - goto out; + goto err_devcom; + } + + err = mlx5_events_init(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize events\n"); + goto err_eq_cleanup; } err = mlx5_cq_debugfs_init(dev); if (err) { dev_err(&pdev->dev, "failed to initialize cq debugfs\n"); - goto err_eq_cleanup; + goto err_events_cleanup; } mlx5_init_qp_table(dev); - mlx5_init_srq_table(dev); - mlx5_init_mkey_table(dev); mlx5_init_reserved_gids(dev); @@ -1013,14 +881,15 @@ err_rl_cleanup: err_tables_cleanup: mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); - +err_events_cleanup: + mlx5_events_cleanup(dev); err_eq_cleanup: - mlx5_eq_cleanup(dev); + mlx5_eq_table_cleanup(dev); +err_devcom: + mlx5_devcom_unregister_device(dev->priv.devcom); -out: return err; } @@ -1036,10 +905,11 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cleanup_clock(dev); mlx5_cleanup_reserved_gids(dev); mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); - mlx5_eq_cleanup(dev); + mlx5_events_cleanup(dev); + mlx5_eq_table_cleanup(dev); + mlx5_devcom_unregister_device(dev->priv.devcom); } static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, @@ -1048,6 +918,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, struct pci_dev *pdev = dev->pdev; int err; + dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n", @@ -1113,15 +984,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto reclaim_boot_pages; } - err = handle_hca_cap(dev); + err = set_hca_cap(dev); if (err) { - dev_err(&pdev->dev, "handle_hca_cap failed\n"); - goto reclaim_boot_pages; - } - - err = handle_hca_cap_atomic(dev); - if (err) { - dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n"); + dev_err(&pdev->dev, "set_hca_cap failed\n"); goto reclaim_boot_pages; } @@ -1131,16 +996,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto reclaim_boot_pages; } - err = mlx5_pagealloc_start(dev); - if (err) { - dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n"); - goto reclaim_boot_pages; - } - err = mlx5_cmd_init_hca(dev, sw_owner_id); if (err) { dev_err(&pdev->dev, "init hca failed\n"); - goto err_pagealloc_stop; + goto reclaim_boot_pages; } mlx5_set_driver_version(dev); @@ -1161,23 +1020,20 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, } } - err = mlx5_alloc_irq_vectors(dev); - if (err) { - dev_err(&pdev->dev, "alloc irq vectors failed\n"); - goto err_cleanup_once; - } - dev->priv.uar = mlx5_get_uars_page(dev); if (IS_ERR(dev->priv.uar)) { dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); err = PTR_ERR(dev->priv.uar); - goto err_disable_msix; + goto err_get_uars; } - err = mlx5_start_eqs(dev); + mlx5_events_start(dev); + mlx5_pagealloc_start(dev); + + err = mlx5_eq_table_create(dev); if (err) { - dev_err(&pdev->dev, "Failed to start pages and async EQs\n"); - goto err_put_uars; + dev_err(&pdev->dev, "Failed to create EQs\n"); + goto err_eq_table; } err = mlx5_fw_tracer_init(dev->tracer); @@ -1186,18 +1042,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_fw_tracer; } - err = alloc_comp_eqs(dev); - if (err) { - dev_err(&pdev->dev, "Failed to alloc completion EQs\n"); - goto err_comp_eqs; - } - - err = mlx5_irq_set_affinity_hints(dev); - if (err) { - dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); - goto err_affinity_hints; - } - err = mlx5_fpga_device_start(dev); if (err) { dev_err(&pdev->dev, "fpga device start failed %d\n", err); @@ -1234,6 +1078,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_sriov; } + err = mlx5_ec_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init embedded CPU\n"); + goto err_ec; + } + if (mlx5_device_registered(dev)) { mlx5_attach_device(dev); } else { @@ -1251,6 +1101,9 @@ out: return 0; err_reg_dev: + mlx5_ec_cleanup(dev); + +err_ec: mlx5_sriov_detach(dev); err_sriov: @@ -1266,24 +1119,17 @@ err_ipsec_start: mlx5_fpga_device_stop(dev); err_fpga_start: - mlx5_irq_clear_affinity_hints(dev); - -err_affinity_hints: - free_comp_eqs(dev); - -err_comp_eqs: mlx5_fw_tracer_cleanup(dev->tracer); err_fw_tracer: - mlx5_stop_eqs(dev); + mlx5_eq_table_destroy(dev); -err_put_uars: +err_eq_table: + mlx5_pagealloc_stop(dev); + mlx5_events_stop(dev); mlx5_put_uars_page(dev, priv->uar); -err_disable_msix: - mlx5_free_irq_vectors(dev); - -err_cleanup_once: +err_get_uars: if (boot) mlx5_cleanup_once(dev); @@ -1294,9 +1140,6 @@ err_stop_poll: goto out_err; } -err_pagealloc_stop: - mlx5_pagealloc_stop(dev); - reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); @@ -1335,26 +1178,26 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, if (mlx5_device_registered(dev)) mlx5_detach_device(dev); + mlx5_ec_cleanup(dev); mlx5_sriov_detach(dev); mlx5_cleanup_fs(dev); mlx5_accel_ipsec_cleanup(dev); mlx5_accel_tls_cleanup(dev); mlx5_fpga_device_stop(dev); - mlx5_irq_clear_affinity_hints(dev); - free_comp_eqs(dev); mlx5_fw_tracer_cleanup(dev->tracer); - mlx5_stop_eqs(dev); + mlx5_eq_table_destroy(dev); + mlx5_pagealloc_stop(dev); + mlx5_events_stop(dev); mlx5_put_uars_page(dev, priv->uar); - mlx5_free_irq_vectors(dev); if (cleanup) mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev, cleanup); + err = mlx5_cmd_teardown_hca(dev); if (err) { dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); goto out; } - mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev, 0); mlx5_cmd_cleanup(dev); @@ -1364,12 +1207,6 @@ out: return err; } -struct mlx5_core_event_handler { - void (*event)(struct mlx5_core_dev *dev, - enum mlx5_dev_event event, - void *data); -}; - static const struct devlink_ops mlx5_devlink_ops = { #ifdef CONFIG_MLX5_ESWITCH .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, @@ -1403,7 +1240,6 @@ static int init_one(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); dev->pdev = pdev; - dev->event = mlx5_core_event; dev->profile = &profile[prof_sel]; INIT_LIST_HEAD(&priv->ctx_list); @@ -1411,17 +1247,6 @@ static int init_one(struct pci_dev *pdev, mutex_init(&dev->pci_status_mutex); mutex_init(&dev->intf_state_mutex); - INIT_LIST_HEAD(&priv->waiting_events_list); - priv->is_accum_events = false; - -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - err = init_srcu_struct(&priv->pfault_srcu); - if (err) { - dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n", - err); - goto clean_dev; - } -#endif mutex_init(&priv->bfregs.reg_head.lock); mutex_init(&priv->bfregs.wc_head.lock); INIT_LIST_HEAD(&priv->bfregs.reg_head.list); @@ -1430,7 +1255,7 @@ static int init_one(struct pci_dev *pdev, err = mlx5_pci_init(dev, priv); if (err) { dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err); - goto clean_srcu; + goto clean_dev; } err = mlx5_health_init(dev); @@ -1439,12 +1264,14 @@ static int init_one(struct pci_dev *pdev, goto close_pci; } - mlx5_pagealloc_init(dev); + err = mlx5_pagealloc_init(dev); + if (err) + goto err_pagealloc_init; err = mlx5_load_one(dev, priv, true); if (err) { dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err); - goto clean_health; + goto err_load_one; } request_module_nowait(MLX5_IB_MOD); @@ -1458,16 +1285,13 @@ static int init_one(struct pci_dev *pdev, clean_load: mlx5_unload_one(dev, priv, true); -clean_health: +err_load_one: mlx5_pagealloc_cleanup(dev); +err_pagealloc_init: mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); -clean_srcu: -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - cleanup_srcu_struct(&priv->pfault_srcu); clean_dev: -#endif devlink_free(devlink); return err; @@ -1491,9 +1315,6 @@ static void remove_one(struct pci_dev *pdev) mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - cleanup_srcu_struct(&priv->pfault_srcu); -#endif devlink_free(devlink); } @@ -1637,7 +1458,6 @@ succeed: * kexec. There is no need to cleanup the mlx5_core software * contexts. */ - mlx5_irq_clear_affinity_hints(dev); mlx5_core_eq_free_irqs(dev); return 0; @@ -1669,6 +1489,8 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */ { PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */ { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */ + { PCI_VDEVICE(MELLANOX, 0x101d) }, /* ConnectX-6 Dx */ + { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ { 0, } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 0594d0961cb3..7b331674622c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -38,6 +38,7 @@ #include <linux/sched.h> #include <linux/if_link.h> #include <linux/firmware.h> +#include <linux/ptp_clock_kernel.h> #include <linux/mlx5/cq.h> #include <linux/mlx5/fs.h> @@ -78,6 +79,11 @@ do { \ __func__, __LINE__, current->pid, \ ##__VA_ARGS__) +#define mlx5_core_warn_once(__dev, format, ...) \ + dev_warn_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + #define mlx5_core_info(__dev, format, ...) \ dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__) @@ -97,12 +103,6 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); - -void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, - unsigned long param); -void mlx5_core_page_fault(struct mlx5_core_dev *dev, - struct mlx5_pagefault *pfault); -void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); void mlx5_disable_device(struct mlx5_core_dev *dev); void mlx5_recover_device(struct mlx5_core_dev *dev); @@ -121,31 +121,12 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 modify_bitmask); int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 element_id); -int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); -u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); - -int mlx5_eq_init(struct mlx5_core_dev *dev); -void mlx5_eq_cleanup(struct mlx5_core_dev *dev); -int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, - enum mlx5_eq_type type); -int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); -int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); -int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - u32 *out, int outlen); -int mlx5_start_eqs(struct mlx5_core_dev *dev); -void mlx5_stop_eqs(struct mlx5_core_dev *dev); -/* This function should only be called after mlx5_cmd_force_teardown_hca */ -void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); -struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); -u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq); -void mlx5_cq_tasklet_cb(unsigned long data); -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); -int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); -void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages); +u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, + struct ptp_system_timestamp *sts); + +void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev); +void mlx5_cmd_flush(struct mlx5_core_dev *dev); int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); @@ -159,6 +140,11 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); +int mlx5_events_init(struct mlx5_core_dev *dev); +void mlx5_events_cleanup(struct mlx5_core_dev *dev); +void mlx5_events_start(struct mlx5_core_dev *dev); +void mlx5_events_stop(struct mlx5_core_dev *dev); + void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv); void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv); void mlx5_attach_device(struct mlx5_core_dev *dev); @@ -202,10 +188,8 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) MLX5_CAP_GEN(dev, lag_master); } -int mlx5_lag_allow(struct mlx5_core_dev *dev); -int mlx5_lag_forbid(struct mlx5_core_dev *dev); - void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol); +void mlx5_lag_update(struct mlx5_core_dev *dev); enum { MLX5_NIC_IFC_FULL = 0, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 0670165afd5f..ea744d8466ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -51,9 +51,10 @@ void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in, int inlen, - u32 *out, int outlen, - mlx5_cmd_cbk_t callback, void *context) + struct mlx5_async_ctx *async_ctx, u32 *in, + int inlen, u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) { struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; @@ -71,7 +72,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, MLX5_SET(mkc, mkc, mkey_7_0, key); if (callback) - return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen, + return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, callback, context); err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); @@ -105,7 +106,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen) { - return mlx5_core_create_mkey_cb(dev, mkey, in, inlen, + return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen, NULL, 0, NULL, NULL); } EXPORT_SYMBOL(mlx5_core_create_mkey); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index e36d3e3675f9..41025387ff2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -37,6 +37,7 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" +#include "lib/eq.h" enum { MLX5_PAGES_CANT_GIVE = 0, @@ -47,6 +48,7 @@ enum { struct mlx5_pages_req { struct mlx5_core_dev *dev; u16 func_id; + u8 ec_function; s32 npages; struct work_struct work; }; @@ -142,6 +144,7 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, MLX5_SET(query_pages_in, in, op_mod, boot ? MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES : MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES); + MLX5_SET(query_pages_in, in, embedded_cpu_function, mlx5_core_is_ecpf(dev)); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) @@ -252,7 +255,8 @@ err_mapping: return err; } -static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) +static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id, + bool ec_function) { u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; @@ -261,6 +265,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE); MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) @@ -269,7 +274,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) } static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, - int notify_fail) + int notify_fail, bool ec_function) { u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); @@ -304,6 +309,7 @@ retry: MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE); MLX5_SET(manage_pages_in, in, function_id, func_id); MLX5_SET(manage_pages_in, in, input_num_entries, npages); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (err) { @@ -315,8 +321,11 @@ retry: dev->priv.fw_pages += npages; if (func_id) dev->priv.vfs_pages += npages; + else if (mlx5_core_is_ecpf(dev) && !ec_function) + dev->priv.peer_pf_pages += npages; - mlx5_core_dbg(dev, "err %d\n", err); + mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n", + npages, ec_function, func_id, err); kvfree(in); return 0; @@ -327,7 +336,7 @@ out_4k: out_free: kvfree(in); if (notify_fail) - page_notify_fail(dev, func_id); + page_notify_fail(dev, func_id, ec_function); return err; } @@ -363,7 +372,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, } static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, - int *nclaimed) + int *nclaimed, bool ec_function) { int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; @@ -384,6 +393,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE); MLX5_SET(manage_pages_in, in, function_id, func_id); MLX5_SET(manage_pages_in, in, input_num_entries, npages); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen); @@ -409,6 +419,8 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, dev->priv.fw_pages -= num_claimed; if (func_id) dev->priv.vfs_pages -= num_claimed; + else if (mlx5_core_is_ecpf(dev) && !ec_function) + dev->priv.peer_pf_pages -= num_claimed; out_free: kvfree(out); @@ -422,9 +434,10 @@ static void pages_work_handler(struct work_struct *work) int err = 0; if (req->npages < 0) - err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL); + err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL, + req->ec_function); else if (req->npages > 0) - err = give_pages(dev, req->func_id, req->npages, 1); + err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function); if (err) mlx5_core_warn(dev, "%s fail %d\n", @@ -433,22 +446,43 @@ static void pages_work_handler(struct work_struct *work) kfree(req); } -void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s32 npages) +enum { + EC_FUNCTION_MASK = 0x8000, +}; + +static int req_pages_handler(struct notifier_block *nb, + unsigned long type, void *data) { struct mlx5_pages_req *req; - + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + struct mlx5_eqe *eqe; + bool ec_function; + u16 func_id; + s32 npages; + + priv = mlx5_nb_cof(nb, struct mlx5_priv, pg_nb); + dev = container_of(priv, struct mlx5_core_dev, priv); + eqe = data; + + func_id = be16_to_cpu(eqe->data.req_pages.func_id); + npages = be32_to_cpu(eqe->data.req_pages.num_pages); + ec_function = be16_to_cpu(eqe->data.req_pages.ec_function) & EC_FUNCTION_MASK; + mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n", + func_id, npages); req = kzalloc(sizeof(*req), GFP_ATOMIC); if (!req) { mlx5_core_warn(dev, "failed to allocate pages request\n"); - return; + return NOTIFY_DONE; } req->dev = dev; req->func_id = func_id; req->npages = npages; + req->ec_function = ec_function; INIT_WORK(&req->work, pages_work_handler); queue_work(dev->priv.pg_wq, &req->work); + return NOTIFY_OK; } int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) @@ -464,7 +498,7 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n", npages, boot ? "boot" : "init", func_id); - return give_pages(dev, func_id, npages, 0); + return give_pages(dev, func_id, npages, 0, mlx5_core_is_ecpf(dev)); } enum { @@ -498,7 +532,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) fwp = rb_entry(p, struct fw_page, rb_node); err = reclaim_pages(dev, fwp->func_id, optimal_reclaimed_pages(), - &nclaimed); + &nclaimed, mlx5_core_is_ecpf(dev)); if (err) { mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", @@ -520,39 +554,45 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) WARN(dev->priv.vfs_pages, "VFs FW pages counter is %d after reclaiming all pages\n", dev->priv.vfs_pages); + WARN(dev->priv.peer_pf_pages, + "Peer PF FW pages counter is %d after reclaiming all pages\n", + dev->priv.peer_pf_pages); return 0; } -void mlx5_pagealloc_init(struct mlx5_core_dev *dev) +int mlx5_pagealloc_init(struct mlx5_core_dev *dev) { dev->priv.page_root = RB_ROOT; INIT_LIST_HEAD(&dev->priv.free_list); + dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); + if (!dev->priv.pg_wq) + return -ENOMEM; + + return 0; } void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) { - /* nothing */ + destroy_workqueue(dev->priv.pg_wq); } -int mlx5_pagealloc_start(struct mlx5_core_dev *dev) +void mlx5_pagealloc_start(struct mlx5_core_dev *dev) { - dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); - if (!dev->priv.pg_wq) - return -ENOMEM; - - return 0; + MLX5_NB_INIT(&dev->priv.pg_nb, req_pages_handler, PAGE_REQUEST); + mlx5_eq_notifier_register(dev, &dev->priv.pg_nb); } void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) { - destroy_workqueue(dev->priv.pg_wq); + mlx5_eq_notifier_unregister(dev, &dev->priv.pg_nb); + flush_workqueue(dev->priv.pg_wq); } -int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) +int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages) { unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); - int prev_vfs_pages = dev->priv.vfs_pages; + int prev_pages = *pages; /* In case of internal error we will free the pages manually later */ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { @@ -560,16 +600,16 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) return 0; } - mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_pages, dev->priv.name); - while (dev->priv.vfs_pages) { + while (*pages) { if (time_after(jiffies, end)) { - mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages); + mlx5_core_warn(dev, "aborting while there are %d pending pages\n", *pages); return -ETIMEDOUT; } - if (dev->priv.vfs_pages < prev_vfs_pages) { + if (*pages < prev_pages) { end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); - prev_vfs_pages = dev->priv.vfs_pages; + prev_pages = *pages; } msleep(50); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 31a9cbd85689..361468e0435d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -30,10 +30,7 @@ * SOFTWARE. */ -#include <linux/module.h> -#include <linux/mlx5/driver.h> #include <linux/mlx5/port.h> -#include <linux/mlx5/cmd.h> #include "mlx5_core.h" int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, @@ -157,44 +154,6 @@ int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) sizeof(out), MLX5_REG_MLCR, 0, 1); } -int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, - u32 *proto_cap, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); - if (err) - return err; - - if (proto_mask == MLX5_PTYS_EN) - *proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); - else - *proto_cap = MLX5_GET(ptys_reg, out, ib_proto_capability); - - return 0; -} -EXPORT_SYMBOL_GPL(mlx5_query_port_proto_cap); - -int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, - u32 *proto_admin, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); - if (err) - return err; - - if (proto_mask == MLX5_PTYS_EN) - *proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); - else - *proto_admin = MLX5_GET(ptys_reg, out, ib_proto_admin); - - return 0; -} -EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin); - int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, u8 *link_width_oper, u8 local_port) { @@ -211,23 +170,6 @@ int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper); -int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev, - u32 *proto_oper, u8 local_port) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, - local_port); - if (err) - return err; - - *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - - return 0; -} -EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper); - int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, u8 *proto_oper, u8 local_port) { @@ -245,35 +187,6 @@ int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper); -int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, - u32 proto_admin, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - u32 in[MLX5_ST_SZ_DW(ptys_reg)]; - u8 an_disable_admin; - u8 an_disable_cap; - u8 an_status; - - mlx5_query_port_autoneg(dev, proto_mask, &an_status, - &an_disable_cap, &an_disable_admin); - if (!an_disable_cap && an_disable) - return -EPERM; - - memset(in, 0, sizeof(in)); - - MLX5_SET(ptys_reg, in, local_port, 1); - MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); - MLX5_SET(ptys_reg, in, proto_mask, proto_mask); - if (proto_mask == MLX5_PTYS_EN) - MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); - else - MLX5_SET(ptys_reg, in, ib_proto_admin, proto_admin); - - return mlx5_core_access_reg(dev, in, sizeof(in), out, - sizeof(out), MLX5_REG_PTYS, 0, 1); -} -EXPORT_SYMBOL_GPL(mlx5_set_port_ptys); - /* This function should be used after setting a port register only */ void mlx5_toggle_port_link(struct mlx5_core_dev *dev) { @@ -404,10 +317,6 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; i2c_addr = MLX5_I2C_ADDR_LOW; - if (offset >= MLX5_EEPROM_PAGE_LENGTH) { - i2c_addr = MLX5_I2C_ADDR_HIGH; - offset -= MLX5_EEPROM_PAGE_LENGTH; - } MLX5_SET(mcia_reg, in, l, 0); MLX5_SET(mcia_reg, in, module, module_num); @@ -606,25 +515,6 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) } EXPORT_SYMBOL_GPL(mlx5_query_port_pfc); -void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask, - u8 *an_status, - u8 *an_disable_cap, u8 *an_disable_admin) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - - *an_status = 0; - *an_disable_cap = 0; - *an_disable_admin = 0; - - if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1)) - return; - - *an_status = MLX5_GET(ptys_reg, out, an_status); - *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); - *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); -} -EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg); - int mlx5_max_tc(struct mlx5_core_dev *mdev) { u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8; @@ -870,8 +760,7 @@ int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode) } EXPORT_SYMBOL_GPL(mlx5_query_port_wol); -static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, - int outlen) +int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen) { u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; @@ -880,7 +769,7 @@ static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, outlen, MLX5_REG_PCMR, 0, 0); } -static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen) +int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen) { u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; @@ -891,7 +780,11 @@ static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen) int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable) { u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; + int err; + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; MLX5_SET(pcmr_reg, in, local_port, 1); MLX5_SET(pcmr_reg, in, fcs_chk, enable); return mlx5_set_ports_check(mdev, in, sizeof(in)); @@ -915,63 +808,6 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk)); } -static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = { - "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */ - "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */ - "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */ -}; - -static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = { - "Power budget exceeded", - "Long Range for non MLNX cable", - "Bus stuck(I2C or data shorted)", - "No EEPROM/retry timeout", - "Enforce part number list", - "Unknown identifier", - "High Temperature", - "Bad or shorted cable/module", - "Unknown status", -}; - -void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) -{ - enum port_module_event_status_type module_status; - enum port_module_event_error_type error_type; - struct mlx5_eqe_port_module *module_event_eqe; - struct mlx5_priv *priv = &dev->priv; - u8 module_num; - - module_event_eqe = &eqe->data.port_module; - module_num = module_event_eqe->module; - module_status = module_event_eqe->module_status & - PORT_MODULE_EVENT_MODULE_STATUS_MASK; - error_type = module_event_eqe->error_type & - PORT_MODULE_EVENT_ERROR_TYPE_MASK; - - if (module_status < MLX5_MODULE_STATUS_ERROR) { - priv->pme_stats.status_counters[module_status - 1]++; - } else if (module_status == MLX5_MODULE_STATUS_ERROR) { - if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN) - /* Unknown error type */ - error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN; - priv->pme_stats.error_counters[error_type]++; - } - - if (!printk_ratelimit()) - return; - - if (module_status < MLX5_MODULE_STATUS_ERROR) - mlx5_core_info(dev, - "Port module event: module %u, %s\n", - module_num, mlx5_pme_status[module_status - 1]); - - else if (module_status == MLX5_MODULE_STATUS_ERROR) - mlx5_core_info(dev, - "Port module event[error]: module %u, %s, %s\n", - module_num, mlx5_pme_status[module_status - 1], - mlx5_pme_error[error_type]); -} - int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size) { u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 91b8139a388d..b8ba74de9555 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -38,26 +38,25 @@ #include <linux/mlx5/transobj.h> #include "mlx5_core.h" +#include "lib/eq.h" -static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev, - u32 rsn) +static int mlx5_core_drain_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *dct); + +static struct mlx5_core_rsc_common * +mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) { - struct mlx5_qp_table *table = &dev->priv.qp_table; struct mlx5_core_rsc_common *common; + unsigned long flags; - spin_lock(&table->lock); + spin_lock_irqsave(&table->lock, flags); common = radix_tree_lookup(&table->tree, rsn); if (common) atomic_inc(&common->refcount); - spin_unlock(&table->lock); + spin_unlock_irqrestore(&table->lock, flags); - if (!common) { - mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", - rsn); - return NULL; - } return common; } @@ -120,19 +119,57 @@ static bool is_event_type_allowed(int rsc_type, int event_type) } } -void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) +static int rsc_event_notifier(struct notifier_block *nb, + unsigned long type, void *data) { - struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn); + struct mlx5_core_rsc_common *common; + struct mlx5_qp_table *table; + struct mlx5_core_dev *dev; struct mlx5_core_dct *dct; + u8 event_type = (u8)type; struct mlx5_core_qp *qp; + struct mlx5_priv *priv; + struct mlx5_eqe *eqe; + u32 rsn; + + switch (event_type) { + case MLX5_EVENT_TYPE_DCT_DRAINED: + eqe = data; + rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; + rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); + break; + case MLX5_EVENT_TYPE_PATH_MIG: + case MLX5_EVENT_TYPE_COMM_EST: + case MLX5_EVENT_TYPE_SQ_DRAINED: + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + eqe = data; + rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN); + break; + default: + return NOTIFY_DONE; + } - if (!common) - return; + table = container_of(nb, struct mlx5_qp_table, nb); + priv = container_of(table, struct mlx5_priv, qp_table); + dev = container_of(priv, struct mlx5_core_dev, priv); + + mlx5_core_dbg(dev, "event (%d) arrived on resource 0x%x\n", eqe->type, rsn); + + common = mlx5_get_rsc(table, rsn); + if (!common) { + mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", rsn); + return NOTIFY_OK; + } if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) { mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n", event_type, rsn); - return; + goto out; } switch (common->res) { @@ -150,8 +187,10 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) default: mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn); } - +out: mlx5_core_put_rsc(common); + + return NOTIFY_OK; } static int create_resource_common(struct mlx5_core_dev *dev, @@ -191,20 +230,49 @@ static void destroy_resource_common(struct mlx5_core_dev *dev, wait_for_completion(&qp->common.free); } +static int _mlx5_core_destroy_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *dct, bool need_cleanup) +{ + u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {0}; + struct mlx5_core_qp *qp = &dct->mqp; + int err; + + err = mlx5_core_drain_dct(dev, dct); + if (err) { + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + goto destroy; + } else { + mlx5_core_warn( + dev, "failed drain DCT 0x%x with error 0x%x\n", + qp->qpn, err); + return err; + } + } + wait_for_completion(&dct->drained); +destroy: + if (need_cleanup) + destroy_resource_common(dev, &dct->mqp); + MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, in, dctn, qp->qpn); + MLX5_SET(destroy_dct_in, in, uid, qp->uid); + err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in), + (void *)&out, sizeof(out)); + return err; +} + int mlx5_core_create_dct(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct, - u32 *in, int inlen) + u32 *in, int inlen, + u32 *out, int outlen) { - u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0}; - u32 din[MLX5_ST_SZ_DW(destroy_dct_in)] = {0}; - u32 dout[MLX5_ST_SZ_DW(destroy_dct_out)] = {0}; struct mlx5_core_qp *qp = &dct->mqp; int err; init_completion(&dct->drained); MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT); - err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); if (err) { mlx5_core_warn(dev, "create DCT failed, ret %d\n", err); return err; @@ -218,11 +286,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev, return 0; err_cmd: - MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT); - MLX5_SET(destroy_dct_in, din, dctn, qp->qpn); - MLX5_SET(destroy_dct_in, din, uid, qp->uid); - mlx5_cmd_exec(dev, (void *)&in, sizeof(din), - (void *)&out, sizeof(dout)); + _mlx5_core_destroy_dct(dev, dct, false); return err; } EXPORT_SYMBOL_GPL(mlx5_core_create_dct); @@ -287,29 +351,7 @@ static int mlx5_core_drain_dct(struct mlx5_core_dev *dev, int mlx5_core_destroy_dct(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct) { - u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {0}; - struct mlx5_core_qp *qp = &dct->mqp; - int err; - - err = mlx5_core_drain_dct(dev, dct); - if (err) { - if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - goto destroy; - } else { - mlx5_core_warn(dev, "failed drain DCT 0x%x with error 0x%x\n", qp->qpn, err); - return err; - } - } - wait_for_completion(&dct->drained); -destroy: - destroy_resource_common(dev, &dct->mqp); - MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT); - MLX5_SET(destroy_dct_in, in, dctn, qp->qpn); - MLX5_SET(destroy_dct_in, in, uid, qp->uid); - err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in), - (void *)&out, sizeof(out)); - return err; + return _mlx5_core_destroy_dct(dev, dct, true); } EXPORT_SYMBOL_GPL(mlx5_core_destroy_dct); @@ -487,10 +529,16 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev) spin_lock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); mlx5_qp_debugfs_init(dev); + + table->nb.notifier_call = rsc_event_notifier; + mlx5_notifier_register(dev, &table->nb); } void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev) { + struct mlx5_qp_table *table = &dev->priv.qp_table; + + mlx5_notifier_unregister(dev, &table->nb); mlx5_qp_debugfs_cleanup(dev); } @@ -670,3 +718,20 @@ int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter); + +struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev, + int res_num, + enum mlx5_res_type res_type) +{ + u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN); + struct mlx5_qp_table *table = &dev->priv.qp_table; + + return mlx5_get_rsc(table, rsn); +} +EXPORT_SYMBOL_GPL(mlx5_core_res_hold); + +void mlx5_core_res_put(struct mlx5_core_rsc_common *res) +{ + mlx5_core_put_rsc(res); +} +EXPORT_SYMBOL_GPL(mlx5_core_res_put); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index a0674962f02c..7b23fa8d2d60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -147,7 +147,7 @@ out: if (MLX5_ESWITCH_MANAGER(dev)) mlx5_eswitch_disable_sriov(dev->priv.eswitch); - if (mlx5_wait_for_vf_pages(dev)) + if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); } @@ -216,20 +216,10 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!mlx5_core_is_pf(dev)) return -EPERM; - if (num_vfs) { - int ret; - - ret = mlx5_lag_forbid(dev); - if (ret && (ret != -ENODEV)) - return ret; - } - - if (num_vfs) { + if (num_vfs) err = mlx5_sriov_enable(pdev, num_vfs); - } else { + else mlx5_sriov_disable(pdev); - mlx5_lag_allow(dev); - } return err ? err : num_vfs; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c deleted file mode 100644 index 6a6fc9be01e6..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ /dev/null @@ -1,716 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/mlx5/driver.h> -#include <linux/mlx5/cmd.h> -#include <linux/mlx5/srq.h> -#include <rdma/ib_verbs.h> -#include "mlx5_core.h" -#include <linux/mlx5/transobj.h> - -void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - struct mlx5_core_srq *srq; - - spin_lock(&table->lock); - - srq = radix_tree_lookup(&table->tree, srqn); - if (srq) - atomic_inc(&srq->refcount); - - spin_unlock(&table->lock); - - if (!srq) { - mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn); - return; - } - - srq->event(srq, event_type); - - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); -} - -static int get_pas_size(struct mlx5_srq_attr *in) -{ - u32 log_page_size = in->log_page_size + 12; - u32 log_srq_size = in->log_size; - u32 log_rq_stride = in->wqe_shift; - u32 page_offset = in->page_offset; - u32 po_quanta = 1 << (log_page_size - 6); - u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); - u32 page_size = 1 << log_page_size; - u32 rq_sz_po = rq_sz + (page_offset * po_quanta); - u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size); - - return rq_num_pas * sizeof(u64); -} - -static void set_wq(void *wq, struct mlx5_srq_attr *in) -{ - MLX5_SET(wq, wq, wq_signature, !!(in->flags - & MLX5_SRQ_FLAG_WQ_SIG)); - MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size); - MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4); - MLX5_SET(wq, wq, log_wq_sz, in->log_size); - MLX5_SET(wq, wq, page_offset, in->page_offset); - MLX5_SET(wq, wq, lwm, in->lwm); - MLX5_SET(wq, wq, pd, in->pd); - MLX5_SET64(wq, wq, dbr_addr, in->db_record); -} - -static void set_srqc(void *srqc, struct mlx5_srq_attr *in) -{ - MLX5_SET(srqc, srqc, wq_signature, !!(in->flags - & MLX5_SRQ_FLAG_WQ_SIG)); - MLX5_SET(srqc, srqc, log_page_size, in->log_page_size); - MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift); - MLX5_SET(srqc, srqc, log_srq_size, in->log_size); - MLX5_SET(srqc, srqc, page_offset, in->page_offset); - MLX5_SET(srqc, srqc, lwm, in->lwm); - MLX5_SET(srqc, srqc, pd, in->pd); - MLX5_SET64(srqc, srqc, dbr_addr, in->db_record); - MLX5_SET(srqc, srqc, xrcd, in->xrcd); - MLX5_SET(srqc, srqc, cqn, in->cqn); -} - -static void get_wq(void *wq, struct mlx5_srq_attr *in) -{ - if (MLX5_GET(wq, wq, wq_signature)) - in->flags &= MLX5_SRQ_FLAG_WQ_SIG; - in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz); - in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4; - in->log_size = MLX5_GET(wq, wq, log_wq_sz); - in->page_offset = MLX5_GET(wq, wq, page_offset); - in->lwm = MLX5_GET(wq, wq, lwm); - in->pd = MLX5_GET(wq, wq, pd); - in->db_record = MLX5_GET64(wq, wq, dbr_addr); -} - -static void get_srqc(void *srqc, struct mlx5_srq_attr *in) -{ - if (MLX5_GET(srqc, srqc, wq_signature)) - in->flags &= MLX5_SRQ_FLAG_WQ_SIG; - in->log_page_size = MLX5_GET(srqc, srqc, log_page_size); - in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride); - in->log_size = MLX5_GET(srqc, srqc, log_srq_size); - in->page_offset = MLX5_GET(srqc, srqc, page_offset); - in->lwm = MLX5_GET(srqc, srqc, lwm); - in->pd = MLX5_GET(srqc, srqc, pd); - in->db_record = MLX5_GET64(srqc, srqc, dbr_addr); -} - -struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - struct mlx5_core_srq *srq; - - spin_lock(&table->lock); - - srq = radix_tree_lookup(&table->tree, srqn); - if (srq) - atomic_inc(&srq->refcount); - - spin_unlock(&table->lock); - - return srq; -} -EXPORT_SYMBOL(mlx5_core_get_srq); - -static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0}; - void *create_in; - void *srqc; - void *pas; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - MLX5_SET(create_srq_in, create_in, uid, in->uid); - srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); - pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); - - set_srqc(srqc, in); - memcpy(pas, in->pas, pas_size); - - MLX5_SET(create_srq_in, create_in, opcode, - MLX5_CMD_OP_CREATE_SRQ); - - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, - sizeof(create_out)); - kvfree(create_in); - if (!err) { - srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); - srq->uid = in->uid; - } - - return err; -} - -static int destroy_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; - u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; - - MLX5_SET(destroy_srq_in, srq_in, opcode, - MLX5_CMD_OP_DESTROY_SRQ); - MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); - MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, sizeof(srq_out)); -} - -static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - u16 lwm, int is_srq) -{ - u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; - u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; - - MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ); - MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); - MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); - MLX5_SET(arm_rq_in, srq_in, lwm, lwm); - MLX5_SET(arm_rq_in, srq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, sizeof(srq_out)); -} - -static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; - u32 *srq_out; - void *srqc; - int err; - - srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL); - if (!srq_out) - return -ENOMEM; - - MLX5_SET(query_srq_in, srq_in, opcode, - MLX5_CMD_OP_QUERY_SRQ); - MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); - err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, MLX5_ST_SZ_BYTES(query_srq_out)); - if (err) - goto out; - - srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); - get_srqc(srqc, out); - if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) - out->flags |= MLX5_SRQ_FLAG_ERR; -out: - kvfree(srq_out); - return err; -} - -static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; - void *create_in; - void *xrc_srqc; - void *pas; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid); - xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, - xrc_srq_context_entry); - pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); - - set_srqc(xrc_srqc, in); - MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); - memcpy(pas, in->pas, pas_size); - MLX5_SET(create_xrc_srq_in, create_in, opcode, - MLX5_CMD_OP_CREATE_XRC_SRQ); - - memset(create_out, 0, sizeof(create_out)); - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, - sizeof(create_out)); - if (err) - goto out; - - srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); - srq->uid = in->uid; -out: - kvfree(create_in); - return err; -} - -static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; - u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; - - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, - MLX5_CMD_OP_DESTROY_XRC_SRQ); - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); -} - -static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, u16 lwm) -{ - u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; - u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; - - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); -} - -static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; - u32 *xrcsrq_out; - void *xrc_srqc; - int err; - - xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL); - if (!xrcsrq_out) - return -ENOMEM; - memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); - - MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, - MLX5_CMD_OP_QUERY_XRC_SRQ); - MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - - err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, - MLX5_ST_SZ_BYTES(query_xrc_srq_out)); - if (err) - goto out; - - xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, - xrc_srq_context_entry); - get_srqc(xrc_srqc, out); - if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD) - out->flags |= MLX5_SRQ_FLAG_ERR; - -out: - kvfree(xrcsrq_out); - return err; -} - -static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - void *create_in; - void *rmpc; - void *wq; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); - wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - MLX5_SET(create_rmp_in, create_in, uid, in->uid); - set_wq(wq, in); - memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); - - err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); - if (!err) - srq->uid = in->uid; - - kvfree(create_in); - return err; -} - -static int destroy_rmp_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; - u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {}; - - MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); - MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn); - MLX5_SET(destroy_rmp_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int arm_rmp_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - u16 lwm) -{ - void *in; - void *rmpc; - void *wq; - void *bitmask; - int err; - - in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); - bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); - wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); - MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); - MLX5_SET(modify_rmp_in, in, uid, srq->uid); - MLX5_SET(wq, wq, lwm, lwm); - MLX5_SET(rmp_bitmask, bitmask, lwm, 1); - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - - err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); - - kvfree(in); - return err; -} - -static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 *rmp_out; - void *rmpc; - int err; - - rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL); - if (!rmp_out) - return -ENOMEM; - - err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out); - if (err) - goto out; - - rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); - get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out); - if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY) - out->flags |= MLX5_SRQ_FLAG_ERR; - -out: - kvfree(rmp_out); - return err; -} - -static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0}; - void *create_in; - void *xrqc; - void *wq; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); - wq = MLX5_ADDR_OF(xrqc, xrqc, wq); - - set_wq(wq, in); - memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); - - if (in->type == IB_SRQT_TM) { - MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); - if (in->flags & MLX5_SRQ_FLAG_RNDV) - MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV); - MLX5_SET(xrqc, xrqc, - tag_matching_topology_context.log_matching_list_sz, - in->tm_log_list_size); - } - MLX5_SET(xrqc, xrqc, user_index, in->user_index); - MLX5_SET(xrqc, xrqc, cqn, in->cqn); - MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); - MLX5_SET(create_xrq_in, create_in, uid, in->uid); - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, - sizeof(create_out)); - kvfree(create_in); - if (!err) { - srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn); - srq->uid = in->uid; - } - - return err; -} - -static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) -{ - u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0}; - - MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); - MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); - MLX5_SET(destroy_xrq_in, in, uid, srq->uid); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int arm_xrq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - u16 lwm) -{ - u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; - - MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); - MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); - MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); - MLX5_SET(arm_rq_in, in, lwm, lwm); - MLX5_SET(arm_rq_in, in, uid, srq->uid); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0}; - u32 *xrq_out; - int outlen = MLX5_ST_SZ_BYTES(query_xrq_out); - void *xrqc; - int err; - - xrq_out = kvzalloc(outlen, GFP_KERNEL); - if (!xrq_out) - return -ENOMEM; - - MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ); - MLX5_SET(query_xrq_in, in, xrqn, srq->srqn); - - err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen); - if (err) - goto out; - - xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context); - get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out); - if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD) - out->flags |= MLX5_SRQ_FLAG_ERR; - out->tm_next_tag = - MLX5_GET(xrqc, xrqc, - tag_matching_topology_context.append_next_index); - out->tm_hw_phase_cnt = - MLX5_GET(xrqc, xrqc, - tag_matching_topology_context.hw_phase_cnt); - out->tm_sw_phase_cnt = - MLX5_GET(xrqc, xrqc, - tag_matching_topology_context.sw_phase_cnt); - -out: - kvfree(xrq_out); - return err; -} - -static int create_srq_split(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - if (!dev->issi) - return create_srq_cmd(dev, srq, in); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return create_xrc_srq_cmd(dev, srq, in); - case MLX5_RES_XRQ: - return create_xrq_cmd(dev, srq, in); - default: - return create_rmp_cmd(dev, srq, in); - } -} - -static int destroy_srq_split(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - if (!dev->issi) - return destroy_srq_cmd(dev, srq); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return destroy_xrc_srq_cmd(dev, srq); - case MLX5_RES_XRQ: - return destroy_xrq_cmd(dev, srq); - default: - return destroy_rmp_cmd(dev, srq); - } -} - -int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - int err; - struct mlx5_srq_table *table = &dev->priv.srq_table; - - switch (in->type) { - case IB_SRQT_XRC: - srq->common.res = MLX5_RES_XSRQ; - break; - case IB_SRQT_TM: - srq->common.res = MLX5_RES_XRQ; - break; - default: - srq->common.res = MLX5_RES_SRQ; - } - - err = create_srq_split(dev, srq, in); - if (err) - return err; - - atomic_set(&srq->refcount, 1); - init_completion(&srq->free); - - spin_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, srq->srqn, srq); - spin_unlock_irq(&table->lock); - if (err) { - mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn); - goto err_destroy_srq_split; - } - - return 0; - -err_destroy_srq_split: - destroy_srq_split(dev, srq); - - return err; -} -EXPORT_SYMBOL(mlx5_core_create_srq); - -int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - struct mlx5_core_srq *tmp; - int err; - - spin_lock_irq(&table->lock); - tmp = radix_tree_delete(&table->tree, srq->srqn); - spin_unlock_irq(&table->lock); - if (!tmp) { - mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn); - return -EINVAL; - } - if (tmp != srq) { - mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn); - return -EINVAL; - } - - err = destroy_srq_split(dev, srq); - if (err) - return err; - - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); - wait_for_completion(&srq->free); - - return 0; -} -EXPORT_SYMBOL(mlx5_core_destroy_srq); - -int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - if (!dev->issi) - return query_srq_cmd(dev, srq, out); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return query_xrc_srq_cmd(dev, srq, out); - case MLX5_RES_XRQ: - return query_xrq_cmd(dev, srq, out); - default: - return query_rmp_cmd(dev, srq, out); - } -} -EXPORT_SYMBOL(mlx5_core_query_srq); - -int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - u16 lwm, int is_srq) -{ - if (!dev->issi) - return arm_srq_cmd(dev, srq, lwm, is_srq); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return arm_xrc_srq_cmd(dev, srq, lwm); - case MLX5_RES_XRQ: - return arm_xrq_cmd(dev, srq, lwm); - default: - return arm_rmp_cmd(dev, srq, lwm); - } -} -EXPORT_SYMBOL(mlx5_core_arm_srq); - -void mlx5_init_srq_table(struct mlx5_core_dev *dev) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - - memset(table, 0, sizeof(*table)); - spin_lock_init(&table->lock); - INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); -} - -void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev) -{ - /* nothing */ -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index a1ee9a8a769e..c4d4b76096dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -258,115 +258,6 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) } EXPORT_SYMBOL(mlx5_core_destroy_tis); -int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *rmpn) -{ - u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0}; - int err; - - MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - if (!err) - *rmpn = MLX5_GET(create_rmp_out, out, rmpn); - - return err; -} - -int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen) -{ - u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0}; - - MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); - return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); -} - -int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn) -{ - u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0}; - - MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); - MLX5_SET(destroy_rmp_in, in, rmpn, rmpn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, - sizeof(out)); -} - -int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out) -{ - u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0}; - int outlen = MLX5_ST_SZ_BYTES(query_rmp_out); - - MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP); - MLX5_SET(query_rmp_in, in, rmpn, rmpn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); -} - -int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm) -{ - void *in; - void *rmpc; - void *wq; - void *bitmask; - int err; - - in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); - bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); - wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); - MLX5_SET(modify_rmp_in, in, rmpn, rmpn); - MLX5_SET(wq, wq, lwm, lwm); - MLX5_SET(rmp_bitmask, bitmask, lwm, 1); - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - - err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); - - kvfree(in); - - return err; -} - -int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *xsrqn) -{ - u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0}; - int err; - - MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - if (!err) - *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn); - - return err; -} - -int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn) -{ - u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; - - MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); - MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm) -{ - u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; - - MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn); - MLX5_SET(arm_xrc_srq_in, in, lwm, lwm); - MLX5_SET(arm_xrc_srq_in, in, op_mod, - MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqtn) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 8b97066dd1f1..94464723ff77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -90,8 +90,8 @@ static void up_rel_func(struct kref *kref) iounmap(up->map); if (mlx5_cmd_free_uar(up->mdev, up->index)) mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index); - kfree(up->reg_bitmap); - kfree(up->fp_bitmap); + bitmap_free(up->reg_bitmap); + bitmap_free(up->fp_bitmap); kfree(up); } @@ -110,11 +110,11 @@ static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev, return ERR_PTR(err); up->mdev = mdev; - up->reg_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL); + up->reg_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL); if (!up->reg_bitmap) goto error1; - up->fp_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL); + up->fp_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL); if (!up->fp_bitmap) goto error1; @@ -157,8 +157,8 @@ error2: if (mlx5_cmd_free_uar(mdev, up->index)) mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index); error1: - kfree(up->fp_bitmap); - kfree(up->reg_bitmap); + bitmap_free(up->fp_bitmap); + bitmap_free(up->reg_bitmap); kfree(up); return ERR_PTR(err); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index cfbea66b4879..ef95feca9961 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -64,7 +64,7 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) } int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, - u16 vport, u8 state) + u16 vport, u8 other_vport, u8 state) { u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {0}; u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0}; @@ -73,8 +73,7 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, MLX5_CMD_OP_MODIFY_VPORT_STATE); MLX5_SET(modify_vport_state_in, in, op_mod, opmod); MLX5_SET(modify_vport_state_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_vport_state_in, in, other_vport, 1); + MLX5_SET(modify_vport_state_in, in, other_vport, other_vport); MLX5_SET(modify_vport_state_in, in, admin_state, state); return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); @@ -255,7 +254,7 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu) EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu); int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, enum mlx5_list_type list_type, u8 addr_list[][ETH_ALEN], int *list_size) @@ -373,7 +372,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, u16 vlans[], int *size) { @@ -526,7 +525,7 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, - u32 vport, u64 node_guid) + u16 vport, u64 node_guid) { int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); void *nic_vport_context; @@ -827,7 +826,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, - u32 vport, + u16 vport, int *promisc_uc, int *promisc_mc, int *promisc_all) @@ -1057,7 +1056,7 @@ free: EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter); int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, - u64 *rx_discard_vport_down, + u8 other_vport, u64 *rx_discard_vport_down, u64 *tx_discard_vport_down) { u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0}; @@ -1068,8 +1067,7 @@ int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, MLX5_CMD_OP_QUERY_VNIC_ENV); MLX5_SET(query_vnic_env_in, in, op_mod, 0); MLX5_SET(query_vnic_env_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_vnic_env_in, in, other_vport, 1); + MLX5_SET(query_vnic_env_in, in, other_vport, other_vport); err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); if (err) @@ -1204,9 +1202,19 @@ EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport); u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) { - if (!mdev->sys_image_guid) - mlx5_query_nic_vport_system_image_guid(mdev, &mdev->sys_image_guid); + int port_type_cap = MLX5_CAP_GEN(mdev, port_type); + u64 tmp = 0; + + if (mdev->sys_image_guid) + return mdev->sys_image_guid; + + if (port_type_cap == MLX5_CAP_PORT_TYPE_ETH) + mlx5_query_nic_vport_system_image_guid(mdev, &tmp); + else + mlx5_query_hca_vport_system_image_guid(mdev, &tmp); + + mdev->sys_image_guid = tmp; - return mdev->sys_image_guid; + return tmp; } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 2dcbf1ebfd6a..953cc8efba69 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -155,7 +155,8 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *cqc, struct mlx5_cqwq *wq, struct mlx5_wq_ctrl *wq_ctrl) { - u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) + 6; + /* CQE_STRIDE_128 and CQE_STRIDE_128_PAD both mean 128B stride */ + u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) == CQE_STRIDE_64 ? 6 : 7; u8 log_wq_sz = MLX5_GET(cqc, cqc, log_cq_size); int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index b1293d153a58..ea934a48c90a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -177,9 +177,14 @@ static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq) return mlx5_cqwq_ctr2ix(wq, wq->cc); } -static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) +static inline struct mlx5_cqe64 *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) { - return mlx5_frag_buf_get_wqe(&wq->fbc, ix); + struct mlx5_cqe64 *cqe = mlx5_frag_buf_get_wqe(&wq->fbc, ix); + + /* For 128B CQEs the data is in the last 64B */ + cqe += wq->fbc.log_stride == 7; + + return cqe; } static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr) |