diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5')
65 files changed, 5317 insertions, 3659 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index d324a3884462..9de9abacf7f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -12,17 +12,17 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5 core basic # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ - health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ + health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \ - diag/fs_tracepoint.o diag/fw_tracer.o + fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ + lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o # # Netdev basic # mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ - en_selftest.o en/port.o + en_selftest.o en/port.o en/monitor_stats.o # # Netdev extra @@ -30,7 +30,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o -mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o # # Core extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a5a0823e5ada..d3125cdf69db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -40,9 +40,11 @@ #include <linux/random.h> #include <linux/io-mapping.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/eq.h> #include <linux/debugfs.h> #include "mlx5_core.h" +#include "lib/eq.h" enum { CMD_IF_REV = 5, @@ -313,6 +315,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_FPGA_DESTROY_QP: case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: case MLX5_CMD_OP_DEALLOC_MEMIC: + case MLX5_CMD_OP_PAGE_FAULT_RESUME: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -326,7 +329,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_CREATE_MKEY: case MLX5_CMD_OP_QUERY_MKEY: case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS: - case MLX5_CMD_OP_PAGE_FAULT_RESUME: case MLX5_CMD_OP_CREATE_EQ: case MLX5_CMD_OP_QUERY_EQ: case MLX5_CMD_OP_GEN_EQE: @@ -371,6 +373,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_VPORT_COUNTER: case MLX5_CMD_OP_ALLOC_Q_COUNTER: case MLX5_CMD_OP_QUERY_Q_COUNTER: + case MLX5_CMD_OP_SET_MONITOR_COUNTER: + case MLX5_CMD_OP_ARM_MONITOR_COUNTER: case MLX5_CMD_OP_SET_PP_RATE_LIMIT: case MLX5_CMD_OP_QUERY_RATE_LIMIT: case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: @@ -520,6 +524,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); + MLX5_COMMAND_STR_CASE(SET_MONITOR_COUNTER); + MLX5_COMMAND_STR_CASE(ARM_MONITOR_COUNTER); MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT); MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); @@ -805,6 +811,8 @@ static u16 msg_to_opcode(struct mlx5_cmd_msg *in) return MLX5_GET(mbox_in, in->first.data, opcode); } +static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); + static void cb_timeout_handler(struct work_struct *work) { struct delayed_work *dwork = container_of(work, struct delayed_work, @@ -1412,14 +1420,32 @@ static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) up(&cmd->sem); } +static int cmd_comp_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_core_dev *dev; + struct mlx5_cmd *cmd; + struct mlx5_eqe *eqe; + + cmd = mlx5_nb_cof(nb, struct mlx5_cmd, nb); + dev = container_of(cmd, struct mlx5_core_dev, cmd); + eqe = data; + + mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); + + return NOTIFY_OK; +} void mlx5_cmd_use_events(struct mlx5_core_dev *dev) { + MLX5_NB_INIT(&dev->cmd.nb, cmd_comp_notifier, CMD); + mlx5_eq_notifier_register(dev, &dev->cmd.nb); mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS); } void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) { mlx5_cmd_change_mod(dev, CMD_MODE_POLLING); + mlx5_eq_notifier_unregister(dev, &dev->cmd.nb); } static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) @@ -1435,7 +1461,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) } } -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) +static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; @@ -1533,7 +1559,29 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) } } } -EXPORT_SYMBOL(mlx5_cmd_comp_handler); + +void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) +{ + unsigned long flags; + u64 vector; + + /* wait for pending handlers to complete */ + mlx5_eq_synchronize_cmd_irq(dev); + spin_lock_irqsave(&dev->cmd.alloc_lock, flags); + vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); + if (!vector) + goto no_trig; + + vector |= MLX5_TRIGGERED_CMD_COMP; + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); + + mlx5_core_dbg(dev, "vector 0x%llx\n", vector); + mlx5_cmd_comp_handler(dev, vector, true); + return; + +no_trig: + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); +} static int status_to_err(u8 status) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 4b85abb5c9f7..713a17ee3751 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -38,6 +38,7 @@ #include <rdma/ib_verbs.h> #include <linux/mlx5/cq.h> #include "mlx5_core.h" +#include "lib/eq.h" #define TASKLET_MAX_TIME 2 #define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME) @@ -92,10 +93,10 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)]; u32 out[MLX5_ST_SZ_DW(create_cq_out)]; u32 din[MLX5_ST_SZ_DW(destroy_cq_in)]; - struct mlx5_eq *eq; + struct mlx5_eq_comp *eq; int err; - eq = mlx5_eqn2eq(dev, eqn); + eq = mlx5_eqn2comp_eq(dev, eqn); if (IS_ERR(eq)) return PTR_ERR(eq); @@ -119,12 +120,12 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, INIT_LIST_HEAD(&cq->tasklet_ctx.list); /* Add to comp EQ CQ tree to recv comp events */ - err = mlx5_eq_add_cq(eq, cq); + err = mlx5_eq_add_cq(&eq->core, cq); if (err) goto err_cmd; /* Add to async EQ CQ tree to recv async events */ - err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq); + err = mlx5_eq_add_cq(mlx5_get_async_eq(dev), cq); if (err) goto err_cq_add; @@ -139,7 +140,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, return 0; err_cq_add: - mlx5_eq_del_cq(eq, cq); + mlx5_eq_del_cq(&eq->core, cq); err_cmd: memset(din, 0, sizeof(din)); memset(dout, 0, sizeof(dout)); @@ -157,11 +158,11 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0}; int err; - err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq); + err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq); if (err) return err; - err = mlx5_eq_del_cq(cq->eq, cq); + err = mlx5_eq_del_cq(&cq->eq->core, cq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 90fabd612b6c..a11e22d0b0cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -36,6 +36,7 @@ #include <linux/mlx5/cq.h> #include <linux/mlx5/driver.h> #include "mlx5_core.h" +#include "lib/eq.h" enum { QP_PID, @@ -349,6 +350,16 @@ out: return param; } +static int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {}; + + MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ); + MLX5_SET(query_eq_in, in, eq_number, eq->eqn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq, int index) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 37ba7c78859d..ebc046fa97d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -45,75 +45,11 @@ struct mlx5_device_context { unsigned long state; }; -struct mlx5_delayed_event { - struct list_head list; - struct mlx5_core_dev *dev; - enum mlx5_dev_event event; - unsigned long param; -}; - enum { MLX5_INTERFACE_ADDED, MLX5_INTERFACE_ATTACHED, }; -static void add_delayed_event(struct mlx5_priv *priv, - struct mlx5_core_dev *dev, - enum mlx5_dev_event event, - unsigned long param) -{ - struct mlx5_delayed_event *delayed_event; - - delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC); - if (!delayed_event) { - mlx5_core_err(dev, "event %d is missed\n", event); - return; - } - - mlx5_core_dbg(dev, "Accumulating event %d\n", event); - delayed_event->dev = dev; - delayed_event->event = event; - delayed_event->param = param; - list_add_tail(&delayed_event->list, &priv->waiting_events_list); -} - -static void delayed_event_release(struct mlx5_device_context *dev_ctx, - struct mlx5_priv *priv) -{ - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - struct mlx5_delayed_event *de; - struct mlx5_delayed_event *n; - struct list_head temp; - - INIT_LIST_HEAD(&temp); - - spin_lock_irq(&priv->ctx_lock); - - priv->is_accum_events = false; - list_splice_init(&priv->waiting_events_list, &temp); - if (!dev_ctx->context) - goto out; - list_for_each_entry_safe(de, n, &temp, list) - dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param); - -out: - spin_unlock_irq(&priv->ctx_lock); - - list_for_each_entry_safe(de, n, &temp, list) { - list_del(&de->list); - kfree(de); - } -} - -/* accumulating events that can come after mlx5_ib calls to - * ib_register_device, till adding that interface to the events list. - */ -static void delayed_event_start(struct mlx5_priv *priv) -{ - spin_lock_irq(&priv->ctx_lock); - priv->is_accum_events = true; - spin_unlock_irq(&priv->ctx_lock); -} void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) { @@ -129,8 +65,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) dev_ctx->intf = intf; - delayed_event_start(priv); - dev_ctx->context = intf->add(dev); if (dev_ctx->context) { set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); @@ -139,22 +73,9 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) spin_lock_irq(&priv->ctx_lock); list_add_tail(&dev_ctx->list, &priv->ctx_list); - -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (dev_ctx->intf->pfault) { - if (priv->pfault) { - mlx5_core_err(dev, "multiple page fault handlers not supported"); - } else { - priv->pfault_ctx = dev_ctx->context; - priv->pfault = dev_ctx->intf->pfault; - } - } -#endif spin_unlock_irq(&priv->ctx_lock); } - delayed_event_release(dev_ctx, priv); - if (!dev_ctx->context) kfree(dev_ctx); } @@ -179,15 +100,6 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) if (!dev_ctx) return; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - spin_lock_irq(&priv->ctx_lock); - if (priv->pfault == dev_ctx->intf->pfault) - priv->pfault = NULL; - spin_unlock_irq(&priv->ctx_lock); - - synchronize_srcu(&priv->pfault_srcu); -#endif - spin_lock_irq(&priv->ctx_lock); list_del(&dev_ctx->list); spin_unlock_irq(&priv->ctx_lock); @@ -207,26 +119,20 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv if (!dev_ctx) return; - delayed_event_start(priv); if (intf->attach) { if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) - goto out; + return; if (intf->attach(dev, dev_ctx->context)) - goto out; - + return; set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); } else { if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) - goto out; + return; dev_ctx->context = intf->add(dev); if (!dev_ctx->context) - goto out; - + return; set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); } - -out: - delayed_event_release(dev_ctx, priv); } void mlx5_attach_device(struct mlx5_core_dev *dev) @@ -350,28 +256,6 @@ void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol) mutex_unlock(&mlx5_intf_mutex); } -void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) -{ - struct mlx5_priv *priv = &mdev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - void *result = NULL; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) - if ((dev_ctx->intf->protocol == protocol) && - dev_ctx->intf->get_dev) { - result = dev_ctx->intf->get_dev(dev_ctx->context); - break; - } - - spin_unlock_irqrestore(&priv->ctx_lock, flags); - - return result; -} -EXPORT_SYMBOL(mlx5_get_protocol_dev); - /* Must be called with intf_mutex held */ void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) { @@ -422,44 +306,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) return res; } -void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, - unsigned long param) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - if (priv->is_accum_events) - add_delayed_event(priv, dev, event, param); - - /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is - * still in priv->ctx_list. In this case, only notify the dev_ctx if its - * ADDED or ATTACHED bit are set. - */ - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf->event && - (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) || - test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))) - dev_ctx->intf->event(dev, dev_ctx->context, event, param); - - spin_unlock_irqrestore(&priv->ctx_lock, flags); -} - -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -void mlx5_core_page_fault(struct mlx5_core_dev *dev, - struct mlx5_pagefault *pfault) -{ - struct mlx5_priv *priv = &dev->priv; - int srcu_idx; - - srcu_idx = srcu_read_lock(&priv->pfault_srcu); - if (priv->pfault) - priv->pfault(dev, priv->pfault_ctx, pfault); - srcu_read_unlock(&priv->pfault_srcu, srcu_idx); -} -#endif void mlx5_dev_list_lock(void) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index 0f11fff32a9b..424457ff9759 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -161,10 +161,10 @@ static void print_misc_parameters_hdrs(struct trace_seq *p, PRINT_MASKED_VAL(name, p, format); \ } DECLARE_MASK_VAL(u64, gre_key) = { - .m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 | - MLX5_GET(fte_match_set_misc, mask, gre_key_l), - .v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 | - MLX5_GET(fte_match_set_misc, value, gre_key_l)}; + .m = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi) << 8 | + MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo), + .v = MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.hi) << 8 | + MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.lo)}; PRINT_MASKED_VAL(gre_key, p, "%llu"); PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index d4ec93bde4de..6999f4486e9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -30,6 +30,7 @@ * SOFTWARE. */ #define CREATE_TRACE_POINTS +#include "lib/eq.h" #include "fw_tracer.h" #include "fw_tracer_tracepoint.h" @@ -846,9 +847,9 @@ free_tracer: return ERR_PTR(err); } -/* Create HW resources + start tracer - * must be called before Async EQ is created - */ +static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data); + +/* Create HW resources + start tracer */ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) { struct mlx5_core_dev *dev; @@ -874,6 +875,9 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) goto err_dealloc_pd; } + MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER); + mlx5_eq_notifier_register(dev, &tracer->nb); + mlx5_fw_tracer_start(tracer); return 0; @@ -883,9 +887,7 @@ err_dealloc_pd: return err; } -/* Stop tracer + Cleanup HW resources - * must be called after Async EQ is destroyed - */ +/* Stop tracer + Cleanup HW resources */ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer) { if (IS_ERR_OR_NULL(tracer)) @@ -893,7 +895,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer) mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n", tracer->owner); - + mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb); cancel_work_sync(&tracer->ownership_change_work); cancel_work_sync(&tracer->handle_traces_work); @@ -922,12 +924,11 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer) kfree(tracer); } -void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) +static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data) { - struct mlx5_fw_tracer *tracer = dev->tracer; - - if (!tracer) - return; + struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb); + struct mlx5_core_dev *dev = tracer->dev; + struct mlx5_eqe *eqe = data; switch (eqe->sub_type) { case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE: @@ -942,6 +943,8 @@ void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n", eqe->sub_type); } + + return NOTIFY_OK; } EXPORT_TRACEPOINT_SYMBOL(mlx5_fw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h index 0347f2dd5cee..a8b8747f2b61 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h @@ -55,6 +55,7 @@ struct mlx5_fw_tracer { struct mlx5_core_dev *dev; + struct mlx5_nb nb; bool owner; u8 trc_ver; struct workqueue_struct *work_queue; @@ -170,6 +171,5 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev); int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer); void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer); void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer); -void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index d7fbd5b6ac95..8fa8fdd30b85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -49,6 +49,7 @@ #include <net/switchdev.h> #include <net/xdp.h> #include <linux/net_dim.h> +#include <linux/bits.h> #include "wq.h" #include "mlx5_core.h" #include "en_stats.h" @@ -147,9 +148,6 @@ struct page_pool; MLX5_UMR_MTT_ALIGNMENT)) #define MLX5E_UMR_WQEBBS \ (DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB)) -#define MLX5E_ICOSQ_MAX_WQEBBS MLX5E_UMR_WQEBBS - -#define MLX5E_NUM_MAIN_GROUPS 9 #define MLX5E_MSG_LEVEL NETIF_MSG_LINK @@ -178,8 +176,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) { return is_kdump_kernel() ? MLX5E_MIN_NUM_CHANNELS : - min_t(int, mdev->priv.eq_table.num_comp_vectors, - MLX5E_MAX_NUM_CHANNELS); + min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS); } /* Use this function to get max num channels after netdev was created */ @@ -214,22 +211,24 @@ struct mlx5e_umr_wqe { extern const char mlx5e_self_tests[][ETH_GSTRING_LEN]; enum mlx5e_priv_flag { - MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0), - MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1), - MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2), - MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3), - MLX5E_PFLAG_RX_NO_CSUM_COMPLETE = (1 << 4), + MLX5E_PFLAG_RX_CQE_BASED_MODER, + MLX5E_PFLAG_TX_CQE_BASED_MODER, + MLX5E_PFLAG_RX_CQE_COMPRESS, + MLX5E_PFLAG_RX_STRIDING_RQ, + MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, + MLX5E_PFLAG_XDP_TX_MPWQE, + MLX5E_NUM_PFLAGS, /* Keep last */ }; #define MLX5E_SET_PFLAG(params, pflag, enable) \ do { \ if (enable) \ - (params)->pflags |= (pflag); \ + (params)->pflags |= BIT(pflag); \ else \ - (params)->pflags &= ~(pflag); \ + (params)->pflags &= ~(BIT(pflag)); \ } while (0) -#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (pflag))) +#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag)))) #ifdef CONFIG_MLX5_CORE_EN_DCB #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ @@ -247,9 +246,6 @@ struct mlx5e_params { bool lro_en; u32 lro_wqe_sz; u8 tx_min_inline_mode; - u8 rss_hfunc; - u8 toeplitz_hash_key[40]; - u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; bool vlan_strip_disable; bool scatter_fcs_en; bool rx_dim_enabled; @@ -349,7 +345,6 @@ enum { MLX5E_SQ_STATE_IPSEC, MLX5E_SQ_STATE_AM, MLX5E_SQ_STATE_TLS, - MLX5E_SQ_STATE_REDIRECT, }; struct mlx5e_sq_wqe_info { @@ -410,24 +405,51 @@ struct mlx5e_xdp_info { struct mlx5e_dma_info di; }; +struct mlx5e_xdp_info_fifo { + struct mlx5e_xdp_info *xi; + u32 *cc; + u32 *pc; + u32 mask; +}; + +struct mlx5e_xdp_wqe_info { + u8 num_wqebbs; + u8 num_ds; +}; + +struct mlx5e_xdp_mpwqe { + /* Current MPWQE session */ + struct mlx5e_tx_wqe *wqe; + u8 ds_count; + u8 max_ds_count; +}; + +struct mlx5e_xdpsq; +typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq*, + struct mlx5e_xdp_info*); struct mlx5e_xdpsq { /* data path */ /* dirtied @completion */ + u32 xdpi_fifo_cc; u16 cc; bool redirect_flush; /* dirtied @xmit */ - u16 pc ____cacheline_aligned_in_smp; - bool doorbell; + u32 xdpi_fifo_pc ____cacheline_aligned_in_smp; + u16 pc; + struct mlx5_wqe_ctrl_seg *doorbell_cseg; + struct mlx5e_xdp_mpwqe mpwqe; struct mlx5e_cq cq; /* read only */ struct mlx5_wq_cyc wq; struct mlx5e_xdpsq_stats *stats; + mlx5e_fp_xmit_xdp_frame xmit_xdp_frame; struct { - struct mlx5e_xdp_info *xdpi; + struct mlx5e_xdp_wqe_info *wqe_info; + struct mlx5e_xdp_info_fifo xdpi_fifo; } db; void __iomem *uar_map; u32 sqn; @@ -569,6 +591,7 @@ struct mlx5e_rq { unsigned long state; int ix; + unsigned int hw_mtu; struct net_dim dim; /* Dynamic Interrupt Moderation */ @@ -632,7 +655,6 @@ struct mlx5e_channel_stats { } ____cacheline_aligned_in_smp; enum { - MLX5E_STATE_ASYNC_EVENTS_ENABLED, MLX5E_STATE_OPENED, MLX5E_STATE_DESTROYING, }; @@ -653,6 +675,13 @@ enum { MLX5E_NIC_PRIO }; +struct mlx5e_rss_params { + u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; + u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS]; + u8 toeplitz_hash_key[40]; + u8 hfunc; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; @@ -673,6 +702,7 @@ struct mlx5e_priv { struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_rss_params rss_params; u32 tx_rates[MLX5E_MAX_NUM_SQS]; struct mlx5e_flow_steering fs; @@ -682,6 +712,8 @@ struct mlx5e_priv { struct work_struct set_rx_mode_work; struct work_struct tx_timeout_work; struct work_struct update_stats_work; + struct work_struct monitor_counters_work; + struct mlx5_nb monitor_counters_nb; struct mlx5_core_dev *mdev; struct net_device *netdev; @@ -691,6 +723,8 @@ struct mlx5e_priv { struct hwtstamp_config tstamp; u16 q_counter; u16 drop_rq_q_counter; + struct notifier_block events_nb; + #ifdef CONFIG_MLX5_CORE_EN_DCB struct mlx5e_dcbx dcbx; #endif @@ -768,6 +802,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt); void mlx5e_update_stats(struct mlx5e_priv *priv); +void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); void mlx5e_init_l2_addr(struct mlx5e_priv *priv); int mlx5e_self_test_num(struct mlx5e_priv *priv); @@ -798,9 +833,11 @@ struct mlx5e_redirect_rqt_param { int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, struct mlx5e_redirect_rqt_param rrp); -void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, - enum mlx5e_traffic_types tt, +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params, + const struct mlx5e_tirc_config *ttconfig, void *tirc, bool inner); +void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen); +struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); @@ -930,14 +967,16 @@ int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); int mlx5e_create_tises(struct mlx5e_priv *priv); -void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); +void mlx5e_update_carrier(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); +void mlx5e_update_ndo_stats(struct mlx5e_priv *priv); void mlx5e_queue_update_stats(struct mlx5e_priv *priv); int mlx5e_bits_invert(unsigned long a, int size); typedef int (*change_hw_mtu_cb)(struct mlx5e_priv *priv); +int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv); int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, change_hw_mtu_cb set_mtu_cb); @@ -961,12 +1000,20 @@ int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal); int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal); +int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, + struct ethtool_link_ksettings *link_ksettings); +int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, + const struct ethtool_link_ksettings *link_ksettings); u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info); int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, struct ethtool_flash *flash); +void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam); +int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam); /* mlx5e generic netdev management API */ int mlx5e_netdev_init(struct net_device *netdev, @@ -982,12 +1029,26 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv); void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, u16 max_channels, u16 mtu); void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); -void mlx5e_build_rss_params(struct mlx5e_params *params); +void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, + u16 num_channels); u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_tx_dim_work(struct work_struct *work); + +void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti); +void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti); +netdev_features_t mlx5e_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features); +#ifdef CONFIG_MLX5_ESWITCH +int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac); +int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); +int mlx5e_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi); +int mlx5e_get_vf_stats(struct net_device *dev, int vf, struct ifla_vf_stats *vf_stats); +#endif #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 1431232c9a09..be5961ff24cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -73,6 +73,22 @@ enum mlx5e_traffic_types { MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY, }; +struct mlx5e_tirc_config { + u8 l3_prot_type; + u8 l4_prot_type; + u32 rx_hash_fields; +}; + +#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP) +#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_L4_SPORT |\ + MLX5_HASH_FIELD_SEL_L4_DPORT) +#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_IPSEC_SPI) + enum mlx5e_tunnel_types { MLX5E_TT_IPV4_GRE, MLX5E_TT_IPV6_GRE, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c new file mode 100644 index 000000000000..2ce420851e77 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include "en.h" +#include "monitor_stats.h" +#include "lib/eq.h" + +/* Driver will set the following watch counters list: + * Ppcnt.802_3: + * a_in_range_length_errors Type: 0x0, Counter: 0x0, group_id = N/A + * a_out_of_range_length_field Type: 0x0, Counter: 0x1, group_id = N/A + * a_frame_too_long_errors Type: 0x0, Counter: 0x2, group_id = N/A + * a_frame_check_sequence_errors Type: 0x0, Counter: 0x3, group_id = N/A + * a_alignment_errors Type: 0x0, Counter: 0x4, group_id = N/A + * if_out_discards Type: 0x0, Counter: 0x5, group_id = N/A + * Q_Counters: + * Q[index].rx_out_of_buffer Type: 0x1, Counter: 0x4, group_id = counter_ix + */ + +#define NUM_REQ_PPCNT_COUNTER_S1 MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1 +#define NUM_REQ_Q_COUNTERS_S1 MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1 + +int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(mdev, max_num_of_monitor_counters)) + return false; + if (MLX5_CAP_PCAM_REG(mdev, ppcnt) && + MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters) < + NUM_REQ_PPCNT_COUNTER_S1) + return false; + if (MLX5_CAP_GEN(mdev, num_q_monitor_counters) < + NUM_REQ_Q_COUNTERS_S1) + return false; + return true; +} + +void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv) +{ + u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {}; + u32 out[MLX5_ST_SZ_DW(arm_monitor_counter_out)] = {}; + + MLX5_SET(arm_monitor_counter_in, in, opcode, + MLX5_CMD_OP_ARM_MONITOR_COUNTER); + mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out)); +} + +static void mlx5e_monitor_counters_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + monitor_counters_work); + + mutex_lock(&priv->state_lock); + mlx5e_update_ndo_stats(priv); + mutex_unlock(&priv->state_lock); + mlx5e_monitor_counter_arm(priv); +} + +static int mlx5e_monitor_event_handler(struct notifier_block *nb, + unsigned long event, void *eqe) +{ + struct mlx5e_priv *priv = mlx5_nb_cof(nb, struct mlx5e_priv, + monitor_counters_nb); + queue_work(priv->wq, &priv->monitor_counters_work); + return NOTIFY_OK; +} + +void mlx5e_monitor_counter_start(struct mlx5e_priv *priv) +{ + MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler, + MONITOR_COUNTER); + mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb); +} + +static void mlx5e_monitor_counter_stop(struct mlx5e_priv *priv) +{ + mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb); + cancel_work_sync(&priv->monitor_counters_work); +} + +static int fill_monitor_counter_ppcnt_set1(int cnt, u32 *in) +{ + enum mlx5_monitor_counter_ppcnt ppcnt_cnt; + + for (ppcnt_cnt = 0; + ppcnt_cnt < NUM_REQ_PPCNT_COUNTER_S1; + ppcnt_cnt++, cnt++) { + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].type, + MLX5_QUERY_MONITOR_CNT_TYPE_PPCNT); + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].counter, + ppcnt_cnt); + } + return ppcnt_cnt; +} + +static int fill_monitor_counter_q_counter_set1(int cnt, int q_counter, u32 *in) +{ + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].type, + MLX5_QUERY_MONITOR_CNT_TYPE_Q_COUNTER); + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].counter, + MLX5_QUERY_MONITOR_Q_COUNTER_RX_OUT_OF_BUFFER); + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].counter_group_id, + q_counter); + return 1; +} + +/* check if mlx5e_monitor_counter_supported before calling this function*/ +static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int max_num_of_counters = MLX5_CAP_GEN(mdev, max_num_of_monitor_counters); + int num_q_counters = MLX5_CAP_GEN(mdev, num_q_monitor_counters); + int num_ppcnt_counters = !MLX5_CAP_PCAM_REG(mdev, ppcnt) ? 0 : + MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters); + u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {}; + u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {}; + int q_counter = priv->q_counter; + int cnt = 0; + + if (num_ppcnt_counters >= NUM_REQ_PPCNT_COUNTER_S1 && + max_num_of_counters >= (NUM_REQ_PPCNT_COUNTER_S1 + cnt)) + cnt += fill_monitor_counter_ppcnt_set1(cnt, in); + + if (num_q_counters >= NUM_REQ_Q_COUNTERS_S1 && + max_num_of_counters >= (NUM_REQ_Q_COUNTERS_S1 + cnt) && + q_counter) + cnt += fill_monitor_counter_q_counter_set1(cnt, q_counter, in); + + MLX5_SET(set_monitor_counter_in, in, num_of_counters, cnt); + MLX5_SET(set_monitor_counter_in, in, opcode, + MLX5_CMD_OP_SET_MONITOR_COUNTER); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +/* check if mlx5e_monitor_counter_supported before calling this function*/ +void mlx5e_monitor_counter_init(struct mlx5e_priv *priv) +{ + INIT_WORK(&priv->monitor_counters_work, mlx5e_monitor_counters_work); + mlx5e_monitor_counter_start(priv); + mlx5e_set_monitor_counter(priv); + mlx5e_monitor_counter_arm(priv); + queue_work(priv->wq, &priv->update_stats_work); +} + +static void mlx5e_monitor_counter_disable(struct mlx5e_priv *priv) +{ + u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {}; + u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {}; + + MLX5_SET(set_monitor_counter_in, in, num_of_counters, 0); + MLX5_SET(set_monitor_counter_in, in, opcode, + MLX5_CMD_OP_SET_MONITOR_COUNTER); + + mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out)); +} + +/* check if mlx5e_monitor_counter_supported before calling this function*/ +void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv) +{ + mlx5e_monitor_counter_disable(priv); + mlx5e_monitor_counter_stop(priv); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h new file mode 100644 index 000000000000..e1ac4b3d22fb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_MONITOR_H__ +#define __MLX5_MONITOR_H__ + +int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv); +void mlx5e_monitor_counter_init(struct mlx5e_priv *priv); +void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv); +void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv); + +#endif /* __MLX5_MONITOR_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 023dc4bccd28..4a37713023be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -88,10 +88,8 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); *speed = mlx5e_port_ptys2speed(eth_proto_oper); - if (!(*speed)) { - mlx5_core_warn(mdev, "cannot get port speed\n"); + if (!(*speed)) err = -EINVAL; - } return err; } @@ -258,7 +256,7 @@ static int mlx5e_fec_admin_field(u32 *pplm, case 40000: if (!write) *fec_policy = MLX5_GET(pplm_reg, pplm, - fec_override_cap_10g_40g); + fec_override_admin_10g_40g); else MLX5_SET(pplm_reg, pplm, fec_override_admin_10g_40g, *fec_policy); @@ -310,7 +308,7 @@ static int mlx5e_get_fec_cap_field(u32 *pplm, case 10000: case 40000: *fec_cap = MLX5_GET(pplm_reg, pplm, - fec_override_admin_10g_40g); + fec_override_cap_10g_40g); break; case 25000: *fec_cap = MLX5_GET(pplm_reg, pplm, @@ -394,12 +392,12 @@ int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy) { + u8 fec_policy_nofec = BIT(MLX5E_FEC_NOFEC); bool fec_mode_not_supp_in_speed = false; - u8 no_fec_policy = BIT(MLX5E_FEC_NOFEC); u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(pplm_reg); - u32 current_fec_speed; + u8 fec_policy_auto = 0; u8 fec_caps = 0; int err; int i; @@ -415,23 +413,19 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy) if (err) return err; - err = mlx5e_port_linkspeed(dev, ¤t_fec_speed); - if (err) - return err; + MLX5_SET(pplm_reg, out, local_port, 1); - memset(in, 0, sz); - MLX5_SET(pplm_reg, in, local_port, 1); - for (i = 0; i < MLX5E_FEC_SUPPORTED_SPEEDS && !!fec_policy; i++) { + for (i = 0; i < MLX5E_FEC_SUPPORTED_SPEEDS; i++) { mlx5e_get_fec_cap_field(out, &fec_caps, fec_supported_speeds[i]); - /* policy supported for link speed */ - if (!!(fec_caps & fec_policy)) { - mlx5e_fec_admin_field(in, &fec_policy, 1, + /* policy supported for link speed, or policy is auto */ + if (fec_caps & fec_policy || fec_policy == fec_policy_auto) { + mlx5e_fec_admin_field(out, &fec_policy, 1, fec_supported_speeds[i]); } else { - if (fec_supported_speeds[i] == current_fec_speed) - return -EOPNOTSUPP; - mlx5e_fec_admin_field(in, &no_fec_policy, 1, - fec_supported_speeds[i]); + /* turn off FEC if supported. Else, leave it the same */ + if (fec_caps & fec_policy_nofec) + mlx5e_fec_admin_field(out, &fec_policy_nofec, 1, + fec_supported_speeds[i]); fec_mode_not_supp_in_speed = true; } } @@ -441,5 +435,5 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy) "FEC policy 0x%x is not supported for some speeds", fec_policy); - return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 1); + return mlx5_core_access_reg(dev, out, sz, out, sz, MLX5_REG_PPLM, 0, 1); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index c047da8752da..eac245a93f91 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -130,8 +130,10 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) int err; err = mlx5e_port_linkspeed(priv->mdev, &speed); - if (err) + if (err) { + mlx5_core_warn(priv->mdev, "cannot get port speed\n"); return 0; + } xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c new file mode 100644 index 000000000000..046948ead152 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -0,0 +1,634 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/vxlan.h> +#include <net/gre.h> +#include "lib/vxlan.h" +#include "en/tc_tun.h" + +static int get_route_and_out_devs(struct mlx5e_priv *priv, + struct net_device *dev, + struct net_device **route_dev, + struct net_device **out_dev) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *uplink_dev, *uplink_upper; + bool dst_is_lag_dev; + + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + uplink_upper = netdev_master_upper_dev_get(uplink_dev); + dst_is_lag_dev = (uplink_upper && + netif_is_lag_master(uplink_upper) && + dev == uplink_upper && + mlx5_lag_is_sriov(priv->mdev)); + + /* if the egress device isn't on the same HW e-switch or + * it's a LAG device, use the uplink + */ + if (!switchdev_port_same_parent_id(priv->netdev, dev) || + dst_is_lag_dev) { + *route_dev = uplink_dev; + *out_dev = *route_dev; + } else { + *route_dev = dev; + if (is_vlan_dev(*route_dev)) + *out_dev = uplink_dev; + else if (mlx5e_eswitch_rep(dev)) + *out_dev = *route_dev; + else + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct net_device **route_dev, + struct flowi4 *fl4, + struct neighbour **out_n, + u8 *out_ttl) +{ + struct rtable *rt; + struct neighbour *n = NULL; + +#if IS_ENABLED(CONFIG_INET) + int ret; + + rt = ip_route_output_key(dev_net(mirred_dev), fl4); + ret = PTR_ERR_OR_ZERO(rt); + if (ret) + return ret; +#else + return -EOPNOTSUPP; +#endif + + ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev); + if (ret < 0) + return ret; + + if (!(*out_ttl)) + *out_ttl = ip4_dst_hoplimit(&rt->dst); + n = dst_neigh_lookup(&rt->dst, &fl4->daddr); + ip_rt_put(rt); + if (!n) + return -ENOMEM; + + *out_n = n; + return 0; +} + +static const char *mlx5e_netdev_kind(struct net_device *dev) +{ + if (dev->rtnl_link_ops) + return dev->rtnl_link_ops->kind; + else + return ""; +} + +static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct net_device **route_dev, + struct flowi6 *fl6, + struct neighbour **out_n, + u8 *out_ttl) +{ + struct neighbour *n = NULL; + struct dst_entry *dst; + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + int ret; + + ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst, + fl6); + if (ret < 0) + return ret; + + if (!(*out_ttl)) + *out_ttl = ip6_dst_hoplimit(dst); + + ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev); + if (ret < 0) + return ret; +#else + return -EOPNOTSUPP; +#endif + + n = dst_neigh_lookup(dst, &fl6->daddr); + dst_release(dst); + if (!n) + return -ENOMEM; + + *out_n = n; + return 0; +} + +static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key) +{ + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + struct udphdr *udp = (struct udphdr *)(buf); + struct vxlanhdr *vxh = (struct vxlanhdr *) + ((char *)udp + sizeof(struct udphdr)); + + udp->dest = tun_key->tp_dst; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(tun_id); + + return 0; +} + +static int mlx5e_gen_gre_header(char buf[], struct ip_tunnel_key *tun_key) +{ + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + int hdr_len; + struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf); + + /* the HW does not calculate GRE csum or sequences */ + if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ)) + return -EOPNOTSUPP; + + greh->protocol = htons(ETH_P_TEB); + + /* GRE key */ + hdr_len = gre_calc_hlen(tun_key->tun_flags); + greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags); + if (tun_key->tun_flags & TUNNEL_KEY) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + + *ptr = tun_id; + } + + return 0; +} + +static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + int err = 0; + struct ip_tunnel_key *key = &e->tun_info.key; + + if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + *ip_proto = IPPROTO_UDP; + err = mlx5e_gen_vxlan_header(buf, key); + } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { + *ip_proto = IPPROTO_GRE; + err = mlx5e_gen_gre_header(buf, key); + } else { + pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n" + , e->tunnel_type); + err = -EOPNOTSUPP; + } + + return err; +} + +static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev, + struct mlx5e_encap_entry *e, + u16 proto) +{ + struct ethhdr *eth = (struct ethhdr *)buf; + char *ip; + + ether_addr_copy(eth->h_dest, e->h_dest); + ether_addr_copy(eth->h_source, dev->dev_addr); + if (is_vlan_dev(dev)) { + struct vlan_hdr *vlan = (struct vlan_hdr *) + ((char *)eth + ETH_HLEN); + ip = (char *)vlan + VLAN_HLEN; + eth->h_proto = vlan_dev_vlan_proto(dev); + vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev)); + vlan->h_vlan_encapsulated_proto = htons(proto); + } else { + eth->h_proto = htons(proto); + ip = (char *)eth + ETH_HLEN; + } + + return ip; +} + +int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev, *route_dev; + struct neighbour *n = NULL; + struct flowi4 fl4 = {}; + int ipv4_encap_size; + char *encap_header; + u8 nud_state, ttl; + struct iphdr *ip; + int err; + + /* add the IP fields */ + fl4.flowi4_tos = tun_key->tos; + fl4.daddr = tun_key->u.ipv4.dst; + fl4.saddr = tun_key->u.ipv4.src; + ttl = tun_key->ttl; + + err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &route_dev, + &fl4, &n, &ttl); + if (err) + return err; + + ipv4_encap_size = + (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct iphdr) + + e->tunnel_hlen; + + if (max_encap_size < ipv4_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv4_encap_size, max_encap_size); + return -EOPNOTSUPP; + } + + encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; + + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + + /* It's important to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) + goto free_encap; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); + + /* add ethernet header */ + ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, route_dev, e, + ETH_P_IP); + + /* add ip header */ + ip->tos = tun_key->tos; + ip->version = 0x4; + ip->ihl = 0x5; + ip->ttl = ttl; + ip->daddr = fl4.daddr; + ip->saddr = fl4.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr), + &ip->protocol, e); + if (err) + goto destroy_neigh_entry; + + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + err = -EAGAIN; + goto out; + } + + err = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv4_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); +free_encap: + kfree(encap_header); +out: + if (n) + neigh_release(n); + return err; +} + +int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev, *route_dev; + struct neighbour *n = NULL; + struct flowi6 fl6 = {}; + struct ipv6hdr *ip6h; + int ipv6_encap_size; + char *encap_header; + u8 nud_state, ttl; + int err; + + ttl = tun_key->ttl; + + fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); + fl6.daddr = tun_key->u.ipv6.dst; + fl6.saddr = tun_key->u.ipv6.src; + + err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &route_dev, + &fl6, &n, &ttl); + if (err) + return err; + + ipv6_encap_size = + (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct ipv6hdr) + + e->tunnel_hlen; + + if (max_encap_size < ipv6_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv6_encap_size, max_encap_size); + return -EOPNOTSUPP; + } + + encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; + + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + + /* It's importent to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) + goto free_encap; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); + + /* add ethernet header */ + ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, route_dev, e, + ETH_P_IPV6); + + /* add ip header */ + ip6_flow_hdr(ip6h, tun_key->tos, 0); + /* the HW fills up ipv6 payload len */ + ip6h->hop_limit = ttl; + ip6h->daddr = fl6.daddr; + ip6h->saddr = fl6.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr), + &ip6h->nexthdr, e); + if (err) + goto destroy_neigh_entry; + + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + err = -EAGAIN; + goto out; + } + + err = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv6_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); +free_encap: + kfree(encap_header); +out: + if (n) + neigh_release(n); + return err; +} + +int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev) +{ + if (netif_is_vxlan(tunnel_dev)) + return MLX5E_TC_TUNNEL_TYPE_VXLAN; + else if (netif_is_gretap(tunnel_dev) || + netif_is_ip6gretap(tunnel_dev)) + return MLX5E_TC_TUNNEL_TYPE_GRETAP; + else + return MLX5E_TC_TUNNEL_TYPE_UNKNOWN; +} + +bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, + struct net_device *netdev) +{ + int tunnel_type = mlx5e_tc_tun_get_type(netdev); + + if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN && + MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) + return true; + else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP && + MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) + return true; + else + return false; +} + +int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel_type = mlx5e_tc_tun_get_type(tunnel_dev); + + if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + int dst_port = be16_to_cpu(e->tun_info.key.tp_dst); + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { + NL_SET_ERR_MSG_MOD(extack, + "vxlan udp dport was not registered with the HW"); + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", + dst_port); + return -EOPNOTSUPP; + } + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + e->tunnel_hlen = VXLAN_HLEN; + } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE; + e->tunnel_hlen = gre_calc_hlen(e->tun_info.key.tun_flags); + } else { + e->reformat_type = -1; + e->tunnel_hlen = -1; + return -EOPNOTSUPP; + } + return 0; +} + +static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct flow_dissector_key_ports *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + f->key); + struct flow_dissector_key_ports *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + f->mask); + void *misc_c = MLX5_ADDR_OF(fte_match_param, + spec->match_criteria, + misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, + spec->match_value, + misc_parameters); + + /* Full udp dst port must be given */ + if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS) || + memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "VXLAN decap filter must include enc_dst_port condition"); + netdev_warn(priv->netdev, + "VXLAN decap filter must include enc_dst_port condition\n"); + return -EOPNOTSUPP; + } + + /* udp dst port must be knonwn as a VXLAN port */ + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP port is not registered as a VXLAN port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a VXLAN port\n", + be16_to_cpu(key->dst)); + return -EOPNOTSUPP; + } + + /* dst UDP port is valid here */ + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, ntohs(mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(key->dst)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(key->src)); + + /* match on VNI */ + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->key); + struct flow_dissector_key_keyid *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->mask); + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, + be32_to_cpu(mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, + be32_to_cpu(key->keyid)); + } + return 0; +} + +static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *outer_headers_c, + void *outer_headers_v) +{ + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) { + NL_SET_ERR_MSG_MOD(f->common.extack, + "GRE HW offloading is not supported"); + netdev_warn(priv->netdev, "GRE HW offloading is not supported\n"); + return -EOPNOTSUPP; + } + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + ip_protocol, IPPROTO_GRE); + + /* gre protocol*/ + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol); + MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB); + + /* gre key */ + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *mask = NULL; + struct flow_dissector_key_keyid *key = NULL; + + mask = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->mask); + MLX5_SET(fte_match_set_misc, misc_c, + gre_key.key, be32_to_cpu(mask->keyid)); + + key = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->key); + MLX5_SET(fte_match_set_misc, misc_v, + gre_key.key, be32_to_cpu(key->keyid)); + } + + return 0; +} + +int mlx5e_tc_tun_parse(struct net_device *filter_dev, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + int tunnel_type; + int err = 0; + + tunnel_type = mlx5e_tc_tun_get_type(filter_dev); + if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + err = mlx5e_tc_tun_parse_vxlan(priv, spec, f, + headers_c, headers_v); + } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { + err = mlx5e_tc_tun_parse_gretap(priv, spec, f, + headers_c, headers_v); + } else { + netdev_warn(priv->netdev, + "decapsulation offload is not supported for %s net device (%d)\n", + mlx5e_netdev_kind(filter_dev), tunnel_type); + return -EOPNOTSUPP; + } + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h new file mode 100644 index 000000000000..706ce7bf15e7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_TUNNEL_H__ +#define __MLX5_EN_TC_TUNNEL_H__ + +#include <linux/netdevice.h> +#include <linux/mlx5/fs.h> +#include <net/pkt_cls.h> +#include <linux/netlink.h> +#include "en.h" +#include "en_rep.h" + +enum { + MLX5E_TC_TUNNEL_TYPE_UNKNOWN, + MLX5E_TC_TUNNEL_TYPE_VXLAN, + MLX5E_TC_TUNNEL_TYPE_GRETAP +}; + +int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack); + +int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); + +int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); + +int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev); +bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, + struct net_device *netdev); + +int mlx5e_tc_tun_parse(struct net_device *filter_dev, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v); + +#endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index ad6d471d00dd..3740177eed09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -47,7 +47,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di, xdpi.xdpf->len, PCI_DMA_TODEVICE); xdpi.di = *di; - return mlx5e_xmit_xdp_frame(sq, &xdpi); + return sq->xmit_xdp_frame(sq, &xdpi); } /* returns true if packet was consumed by xdp */ @@ -102,7 +102,98 @@ xdp_abort: } } -bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) +static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) +{ + struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5_wq_cyc *wq = &sq->wq; + u8 wqebbs; + u16 pi; + + mlx5e_xdpsq_fetch_wqe(sq, &session->wqe); + + prefetchw(session->wqe->data); + session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + +/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS + * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. + * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a + * full-session WQE be cache-aligned. + */ +#if L1_CACHE_BYTES < 128 +#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) +#else +#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) +#endif + + wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi), + MLX5E_XDP_MPW_MAX_WQEBBS); + + session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs; +} + +static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl; + u16 ds_count = session->ds_count; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; + + cseg->opmod_idx_opcode = + cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); + + wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS); + wi->num_ds = ds_count - MLX5E_XDP_TX_EMPTY_DS_COUNT; + + sq->pc += wi->num_wqebbs; + + sq->doorbell_cseg = cseg; + + session->wqe = NULL; /* Close session */ +} + +static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_info *xdpi) +{ + struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5e_xdpsq_stats *stats = sq->stats; + + dma_addr_t dma_addr = xdpi->dma_addr; + struct xdp_frame *xdpf = xdpi->xdpf; + unsigned int dma_len = xdpf->len; + + if (unlikely(sq->hw_mtu < dma_len)) { + stats->err++; + return false; + } + + if (unlikely(!session->wqe)) { + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, + MLX5_SEND_WQE_MAX_WQEBBS))) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + stats->full++; + return false; + } + + mlx5e_xdp_mpwqe_session_start(sq); + } + + mlx5e_xdp_mpwqe_add_dseg(sq, dma_addr, dma_len); + + if (unlikely(session->ds_count == session->max_ds_count)) + mlx5e_xdp_mpwqe_complete(sq); + + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); + stats->xmit++; + return true; +} + +static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) { struct mlx5_wq_cyc *wq = &sq->wq; u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); @@ -126,11 +217,8 @@ bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) } if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) { - if (sq->doorbell) { - /* SQ is full, ring doorbell */ - mlx5e_xmit_xdp_doorbell(sq); - sq->doorbell = false; - } + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); stats->full++; return false; } @@ -152,23 +240,20 @@ bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - /* move page to reference to sq responsibility, - * and mark so it's not put back in page-cache. - */ - sq->db.xdpi[pi] = *xdpi; sq->pc++; - sq->doorbell = true; + sq->doorbell_cseg = cseg; + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); stats->xmit++; return true; } -bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) { + struct mlx5e_xdp_info_fifo *xdpi_fifo; struct mlx5e_xdpsq *sq; struct mlx5_cqe64 *cqe; - struct mlx5e_rq *rq; bool is_redirect; u16 sqcc; int i; @@ -182,8 +267,8 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) if (!cqe) return false; - is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state); - rq = container_of(sq, struct mlx5e_rq, xdpsq); + is_redirect = !rq; + xdpi_fifo = &sq->db.xdpi_fifo; /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), * otherwise a cq overrun may occur @@ -199,20 +284,33 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) wqe_counter = be16_to_cpu(cqe->wqe_counter); + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) + netdev_WARN_ONCE(sq->channel->netdev, + "Bad OP in XDPSQ CQE: 0x%x\n", + get_cqe_opcode(cqe)); + do { - u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); - struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci]; + struct mlx5e_xdp_wqe_info *wi; + u16 ci, j; last_wqe = (sqcc == wqe_counter); - sqcc++; - - if (is_redirect) { - xdp_return_frame(xdpi->xdpf); - dma_unmap_single(sq->pdev, xdpi->dma_addr, - xdpi->xdpf->len, DMA_TO_DEVICE); - } else { - /* Recycle RX page */ - mlx5e_page_release(rq, &xdpi->di, true); + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); + wi = &sq->db.wqe_info[ci]; + + sqcc += wi->num_wqebbs; + + for (j = 0; j < wi->num_ds; j++) { + struct mlx5e_xdp_info xdpi = + mlx5e_xdpi_fifo_pop(xdpi_fifo); + + if (is_redirect) { + xdp_return_frame(xdpi.xdpf); + dma_unmap_single(sq->pdev, xdpi.dma_addr, + xdpi.xdpf->len, DMA_TO_DEVICE); + } else { + /* Recycle RX page */ + mlx5e_page_release(rq, &xdpi.di, true); + } } } while (!last_wqe); } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); @@ -228,27 +326,32 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) return (i == MLX5E_TX_CQ_POLL_BUDGET); } -void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) { - struct mlx5e_rq *rq; - bool is_redirect; - - is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state); - rq = is_redirect ? NULL : container_of(sq, struct mlx5e_rq, xdpsq); + struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; + bool is_redirect = !rq; while (sq->cc != sq->pc) { - u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); - struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci]; - - sq->cc++; - - if (is_redirect) { - xdp_return_frame(xdpi->xdpf); - dma_unmap_single(sq->pdev, xdpi->dma_addr, - xdpi->xdpf->len, DMA_TO_DEVICE); - } else { - /* Recycle RX page */ - mlx5e_page_release(rq, &xdpi->di, false); + struct mlx5e_xdp_wqe_info *wi; + u16 ci, i; + + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); + wi = &sq->db.wqe_info[ci]; + + sq->cc += wi->num_wqebbs; + + for (i = 0; i < wi->num_ds; i++) { + struct mlx5e_xdp_info xdpi = + mlx5e_xdpi_fifo_pop(xdpi_fifo); + + if (is_redirect) { + xdp_return_frame(xdpi.xdpf); + dma_unmap_single(sq->pdev, xdpi.dma_addr, + xdpi.xdpf->len, DMA_TO_DEVICE); + } else { + /* Recycle RX page */ + mlx5e_page_release(rq, &xdpi.di, false); + } } } } @@ -292,7 +395,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, xdpi.xdpf = xdpf; - if (unlikely(!mlx5e_xmit_xdp_frame(sq, &xdpi))) { + if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) { dma_unmap_single(sq->pdev, xdpi.dma_addr, xdpf->len, DMA_TO_DEVICE); xdp_return_frame_rx_napi(xdpf); @@ -300,8 +403,33 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, } } - if (flags & XDP_XMIT_FLUSH) + if (flags & XDP_XMIT_FLUSH) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); mlx5e_xmit_xdp_doorbell(sq); + } return n - drops; } + +void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) +{ + struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; + + if (xdpsq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(xdpsq); + + mlx5e_xmit_xdp_doorbell(xdpsq); + + if (xdpsq->redirect_flush) { + xdp_do_flush_map(); + xdpsq->redirect_flush = false; + } +} + +void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) +{ + sq->xmit_xdp_frame = is_mpw ? + mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 6dfab045925f..3a67cb3cd179 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -37,27 +37,62 @@ #define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \ MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM))) #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) -#define MLX5E_XDP_TX_DS_COUNT \ - ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) +#define MLX5E_XDP_TX_EMPTY_DS_COUNT \ + (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) +#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, void *va, u16 *rx_headroom, u32 *len); -bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); -void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); - -bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi); +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq); +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq); +void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw); +void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq); int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags); static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) { + if (sq->doorbell_cseg) { + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg); + sq->doorbell_cseg = NULL; + } +} + +static inline void +mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, dma_addr_t dma_addr, u16 dma_len) +{ + struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5_wqe_data_seg *dseg = + (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count++; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->byte_count = cpu_to_be32(dma_len); + dseg->lkey = sq->mkey_be; +} + +static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq, + struct mlx5e_tx_wqe **wqe) +{ struct mlx5_wq_cyc *wq = &sq->wq; - struct mlx5e_tx_wqe *wqe; - u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc - 1); /* last pi */ + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + + *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + memset(*wqe, 0, sizeof(**wqe)); +} - wqe = mlx5_wq_cyc_get_wqe(wq, pi); +static inline void +mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo, + struct mlx5e_xdp_info *xi) +{ + u32 i = (*fifo->pc)++ & fifo->mask; - mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &wqe->ctrl); + fifo->xi[i] = *xi; +} + +static inline struct mlx5e_xdp_info +mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo) +{ + return fifo->xi[(*fifo->cc)++ & fifo->mask]; } #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 128a82b1dbfc..53608afd39b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -254,11 +254,13 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, struct mlx5e_ipsec_metadata *mdata; struct mlx5e_ipsec_sa_entry *sa_entry; struct xfrm_state *x; + struct sec_path *sp; if (!xo) return skb; - if (unlikely(skb->sp->len != 1)) { + sp = skb_sec_path(skb); + if (unlikely(sp->len != 1)) { atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle); goto drop; } @@ -305,10 +307,11 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb, struct mlx5e_priv *priv = netdev_priv(netdev); struct xfrm_offload *xo; struct xfrm_state *xs; + struct sec_path *sp; u32 sa_handle; - skb->sp = secpath_dup(skb->sp); - if (unlikely(!skb->sp)) { + sp = secpath_set(skb); + if (unlikely(!sp)) { atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc); return NULL; } @@ -320,8 +323,9 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb, return NULL; } - skb->sp->xvec[skb->sp->len++] = xs; - skb->sp->olen++; + sp = skb_sec_path(skb); + sp->xvec[sp->len++] = xs; + sp->olen++; xo = xfrm_offload(skb); xo->flags = CRYPTO_DONE; @@ -372,10 +376,11 @@ struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev, netdev_features_t features) { + struct sec_path *sp = skb_sec_path(skb); struct xfrm_state *x; - if (skb->sp && skb->sp->len) { - x = skb->sp->xvec[0]; + if (sp && sp->len) { + x = sp->xvec[0]; if (x && x->xso.offload_handle) return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 3e770abfd802..c9df08133718 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -135,14 +135,15 @@ void mlx5e_build_ptys2ethtool_map(void) ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT); } -static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = { - "rx_cqe_moder", - "tx_cqe_moder", - "rx_cqe_compress", - "rx_striding_rq", - "rx_no_csum_complete", +typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); + +struct pflag_desc { + char name[ETH_GSTRING_LEN]; + mlx5e_pflag_handler handler; }; +static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS]; + int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) { int i, num_stats = 0; @@ -153,7 +154,7 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) num_stats += mlx5e_stats_grps[i].get_num_stats(priv); return num_stats; case ETH_SS_PRIV_FLAGS: - return ARRAY_SIZE(mlx5e_priv_flags); + return MLX5E_NUM_PFLAGS; case ETH_SS_TEST: return mlx5e_self_test_num(priv); /* fallthrough */ @@ -183,8 +184,9 @@ void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data) switch (stringset) { case ETH_SS_PRIV_FLAGS: - for (i = 0; i < ARRAY_SIZE(mlx5e_priv_flags); i++) - strcpy(data + i * ETH_GSTRING_LEN, mlx5e_priv_flags[i]); + for (i = 0; i < MLX5E_NUM_PFLAGS; i++) + strcpy(data + i * ETH_GSTRING_LEN, + mlx5e_priv_flags[i].name); break; case ETH_SS_TEST: @@ -353,7 +355,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, new_channels.params = priv->channels.params; new_channels.params.num_channels = count; if (!netif_is_rxfh_configured(priv->netdev)) - mlx5e_build_default_indir_rqt(new_channels.params.indirection_rqt, + mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, count); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { @@ -785,10 +787,9 @@ static void get_lp_advertising(u32 eth_proto_lp, ptys2ethtool_adver_link(lp_advertising, eth_proto_lp); } -static int mlx5e_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *link_ksettings) +int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, + struct ethtool_link_ksettings *link_ksettings) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; u32 rx_pause = 0; @@ -804,7 +805,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); if (err) { - netdev_err(netdev, "%s: query port ptys failed: %d\n", + netdev_err(priv->netdev, "%s: query port ptys failed: %d\n", __func__, err); goto err_query_regs; } @@ -824,7 +825,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, get_supported(eth_proto_cap, link_ksettings); get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings); - get_speed_duplex(netdev, eth_proto_oper, link_ksettings); + get_speed_duplex(priv->netdev, eth_proto_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; @@ -843,9 +844,8 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Autoneg); - err = get_fec_supported_advertised(mdev, link_ksettings); - if (err) - netdev_dbg(netdev, "%s: FEC caps query failed: %d\n", + if (get_fec_supported_advertised(mdev, link_ksettings)) + netdev_dbg(priv->netdev, "%s: FEC caps query failed: %d\n", __func__, err); if (!an_disable_admin) @@ -856,6 +856,14 @@ err_query_regs: return err; } +static int mlx5e_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings); +} + static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) { u32 i, ptys_modes = 0; @@ -870,10 +878,9 @@ static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) return ptys_modes; } -static int mlx5e_set_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *link_ksettings) +int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, + const struct ethtool_link_ksettings *link_ksettings) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; u32 eth_proto_cap, eth_proto_admin; bool an_changes = false; @@ -893,14 +900,14 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev, err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); if (err) { - netdev_err(netdev, "%s: query port eth proto cap failed: %d\n", + netdev_err(priv->netdev, "%s: query port eth proto cap failed: %d\n", __func__, err); goto out; } link_modes = link_modes & eth_proto_cap; if (!link_modes) { - netdev_err(netdev, "%s: Not supported link mode(s) requested", + netdev_err(priv->netdev, "%s: Not supported link mode(s) requested", __func__); err = -EINVAL; goto out; @@ -908,7 +915,7 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev, err = mlx5_query_port_proto_admin(mdev, ð_proto_admin, MLX5_PTYS_EN); if (err) { - netdev_err(netdev, "%s: query port eth proto admin failed: %d\n", + netdev_err(priv->netdev, "%s: query port eth proto admin failed: %d\n", __func__, err); goto out; } @@ -930,9 +937,17 @@ out: return err; } +static int mlx5e_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings); +} + u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv) { - return sizeof(priv->channels.params.toeplitz_hash_key); + return sizeof(priv->rss_params.toeplitz_hash_key); } static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev) @@ -958,50 +973,27 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rss_params *rss = &priv->rss_params; if (indir) - memcpy(indir, priv->channels.params.indirection_rqt, - sizeof(priv->channels.params.indirection_rqt)); + memcpy(indir, rss->indirection_rqt, + sizeof(rss->indirection_rqt)); if (key) - memcpy(key, priv->channels.params.toeplitz_hash_key, - sizeof(priv->channels.params.toeplitz_hash_key)); + memcpy(key, rss->toeplitz_hash_key, + sizeof(rss->toeplitz_hash_key)); if (hfunc) - *hfunc = priv->channels.params.rss_hfunc; + *hfunc = rss->hfunc; return 0; } -static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) -{ - void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); - struct mlx5_core_dev *mdev = priv->mdev; - int ctxlen = MLX5_ST_SZ_BYTES(tirc); - int tt; - - MLX5_SET(modify_tir_in, in, bitmask.hash, 1); - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - memset(tirc, 0, ctxlen); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); - mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); - } - - if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) - return; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - memset(tirc, 0, ctxlen); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); - mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen); - } -} - static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) { struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_rss_params *rss = &priv->rss_params; int inlen = MLX5_ST_SZ_BYTES(modify_tir_in); bool hash_changed = false; void *in; @@ -1017,15 +1009,14 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mutex_lock(&priv->state_lock); - if (hfunc != ETH_RSS_HASH_NO_CHANGE && - hfunc != priv->channels.params.rss_hfunc) { - priv->channels.params.rss_hfunc = hfunc; + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hfunc) { + rss->hfunc = hfunc; hash_changed = true; } if (indir) { - memcpy(priv->channels.params.indirection_rqt, indir, - sizeof(priv->channels.params.indirection_rqt)); + memcpy(rss->indirection_rqt, indir, + sizeof(rss->indirection_rqt)); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { u32 rqtn = priv->indir_rqt.rqtn; @@ -1033,7 +1024,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, .is_rss = true, { .rss = { - .hfunc = priv->channels.params.rss_hfunc, + .hfunc = rss->hfunc, .channels = &priv->channels, }, }, @@ -1044,10 +1035,9 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, } if (key) { - memcpy(priv->channels.params.toeplitz_hash_key, key, - sizeof(priv->channels.params.toeplitz_hash_key)); - hash_changed = hash_changed || - priv->channels.params.rss_hfunc == ETH_RSS_HASH_TOP; + memcpy(rss->toeplitz_hash_key, key, + sizeof(rss->toeplitz_hash_key)); + hash_changed = hash_changed || rss->hfunc == ETH_RSS_HASH_TOP; } if (hash_changed) @@ -1151,25 +1141,31 @@ static int mlx5e_set_tunable(struct net_device *dev, return err; } -static void mlx5e_get_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pauseparam) +void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; int err; err = mlx5_query_port_pause(mdev, &pauseparam->rx_pause, &pauseparam->tx_pause); if (err) { - netdev_err(netdev, "%s: mlx5_query_port_pause failed:0x%x\n", + netdev_err(priv->netdev, "%s: mlx5_query_port_pause failed:0x%x\n", __func__, err); } } -static int mlx5e_set_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pauseparam) +static void mlx5e_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_ethtool_get_pauseparam(priv, pauseparam); +} + +int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; int err; @@ -1180,22 +1176,25 @@ static int mlx5e_set_pauseparam(struct net_device *netdev, pauseparam->rx_pause ? 1 : 0, pauseparam->tx_pause ? 1 : 0); if (err) { - netdev_err(netdev, "%s: mlx5_set_port_pause failed:0x%x\n", + netdev_err(priv->netdev, "%s: mlx5_set_port_pause failed:0x%x\n", __func__, err); } return err; } +static int mlx5e_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_pauseparam(priv, pauseparam); +} + int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info) { struct mlx5_core_dev *mdev = priv->mdev; - int ret; - - ret = ethtool_op_get_ts_info(priv->netdev, info); - if (ret) - return ret; info->phc_index = mlx5_clock_get_ptp_index(mdev); @@ -1203,9 +1202,9 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, info->phc_index == -1) return 0; - info->so_timestamping |= SOF_TIMESTAMPING_TX_HARDWARE | - SOF_TIMESTAMPING_RX_HARDWARE | - SOF_TIMESTAMPING_RAW_HARDWARE; + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON); @@ -1511,8 +1510,6 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, return 0; } -typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); - static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, bool is_rx_cq) { @@ -1675,23 +1672,58 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) return 0; } +static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channels new_channels = {}; + int err; + + if (enable && !MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)) + return -EOPNOTSUPP; + + new_channels.params = priv->channels.params; + + MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_XDP_TX_MPWQE, enable); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + return 0; + } + + err = mlx5e_open_channels(priv, &new_channels); + if (err) + return err; + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + return 0; +} + +static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = { + { "rx_cqe_moder", set_pflag_rx_cqe_based_moder }, + { "tx_cqe_moder", set_pflag_tx_cqe_based_moder }, + { "rx_cqe_compress", set_pflag_rx_cqe_compress }, + { "rx_striding_rq", set_pflag_rx_striding_rq }, + { "rx_no_csum_complete", set_pflag_rx_no_csum_complete }, + { "xdp_tx_mpwqe", set_pflag_xdp_tx_mpwqe }, +}; + static int mlx5e_handle_pflag(struct net_device *netdev, u32 wanted_flags, - enum mlx5e_priv_flag flag, - mlx5e_pflag_handler pflag_handler) + enum mlx5e_priv_flag flag) { struct mlx5e_priv *priv = netdev_priv(netdev); - bool enable = !!(wanted_flags & flag); + bool enable = !!(wanted_flags & BIT(flag)); u32 changes = wanted_flags ^ priv->channels.params.pflags; int err; - if (!(changes & flag)) + if (!(changes & BIT(flag))) return 0; - err = pflag_handler(netdev, enable); + err = mlx5e_priv_flags[flag].handler(netdev, enable); if (err) { - netdev_err(netdev, "%s private flag 0x%x failed err %d\n", - enable ? "Enable" : "Disable", flag, err); + netdev_err(netdev, "%s private flag '%s' failed err %d\n", + enable ? "Enable" : "Disable", mlx5e_priv_flags[flag].name, err); return err; } @@ -1702,38 +1734,17 @@ static int mlx5e_handle_pflag(struct net_device *netdev, static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) { struct mlx5e_priv *priv = netdev_priv(netdev); + enum mlx5e_priv_flag pflag; int err; mutex_lock(&priv->state_lock); - err = mlx5e_handle_pflag(netdev, pflags, - MLX5E_PFLAG_RX_CQE_BASED_MODER, - set_pflag_rx_cqe_based_moder); - if (err) - goto out; - err = mlx5e_handle_pflag(netdev, pflags, - MLX5E_PFLAG_TX_CQE_BASED_MODER, - set_pflag_tx_cqe_based_moder); - if (err) - goto out; - - err = mlx5e_handle_pflag(netdev, pflags, - MLX5E_PFLAG_RX_CQE_COMPRESS, - set_pflag_rx_cqe_compress); - if (err) - goto out; - - err = mlx5e_handle_pflag(netdev, pflags, - MLX5E_PFLAG_RX_STRIDING_RQ, - set_pflag_rx_striding_rq); - if (err) - goto out; - - err = mlx5e_handle_pflag(netdev, pflags, - MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, - set_pflag_rx_no_csum_complete); + for (pflag = 0; pflag < MLX5E_NUM_PFLAGS; pflag++) { + err = mlx5e_handle_pflag(netdev, pflags, pflag); + if (err) + break; + } -out: mutex_unlock(&priv->state_lock); /* Need to fix some features.. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index c18dcebe1462..4421c10f58ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -771,6 +771,112 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) INIT_LIST_HEAD(&priv->fs.ethtool.rules); } +static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type) +{ + switch (flow_type) { + case TCP_V4_FLOW: + return MLX5E_TT_IPV4_TCP; + case TCP_V6_FLOW: + return MLX5E_TT_IPV6_TCP; + case UDP_V4_FLOW: + return MLX5E_TT_IPV4_UDP; + case UDP_V6_FLOW: + return MLX5E_TT_IPV6_UDP; + case AH_V4_FLOW: + return MLX5E_TT_IPV4_IPSEC_AH; + case AH_V6_FLOW: + return MLX5E_TT_IPV6_IPSEC_AH; + case ESP_V4_FLOW: + return MLX5E_TT_IPV4_IPSEC_ESP; + case ESP_V6_FLOW: + return MLX5E_TT_IPV6_IPSEC_ESP; + case IPV4_FLOW: + return MLX5E_TT_IPV4; + case IPV6_FLOW: + return MLX5E_TT_IPV6; + default: + return MLX5E_NUM_INDIR_TIRS; + } +} + +static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, + struct ethtool_rxnfc *nfc) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + enum mlx5e_traffic_types tt; + u8 rx_hash_field = 0; + void *in; + + tt = flow_type_to_traffic_type(nfc->flow_type); + if (tt == MLX5E_NUM_INDIR_TIRS) + return -EINVAL; + + /* RSS does not support anything other than hashing to queues + * on src IP, dest IP, TCP/UDP src port and TCP/UDP dest + * port. + */ + if (nfc->flow_type != TCP_V4_FLOW && + nfc->flow_type != TCP_V6_FLOW && + nfc->flow_type != UDP_V4_FLOW && + nfc->flow_type != UDP_V6_FLOW) + return -EOPNOTSUPP; + + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EOPNOTSUPP; + + if (nfc->data & RXH_IP_SRC) + rx_hash_field |= MLX5_HASH_FIELD_SEL_SRC_IP; + if (nfc->data & RXH_IP_DST) + rx_hash_field |= MLX5_HASH_FIELD_SEL_DST_IP; + if (nfc->data & RXH_L4_B_0_1) + rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_SPORT; + if (nfc->data & RXH_L4_B_2_3) + rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mutex_lock(&priv->state_lock); + + if (rx_hash_field == priv->rss_params.rx_hash_fields[tt]) + goto out; + + priv->rss_params.rx_hash_fields[tt] = rx_hash_field; + mlx5e_modify_tirs_hash(priv, in, inlen); + +out: + mutex_unlock(&priv->state_lock); + kvfree(in); + return 0; +} + +static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv, + struct ethtool_rxnfc *nfc) +{ + enum mlx5e_traffic_types tt; + u32 hash_field = 0; + + tt = flow_type_to_traffic_type(nfc->flow_type); + if (tt == MLX5E_NUM_INDIR_TIRS) + return -EINVAL; + + hash_field = priv->rss_params.rx_hash_fields[tt]; + nfc->data = 0; + + if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP) + nfc->data |= RXH_IP_SRC; + if (hash_field & MLX5_HASH_FIELD_SEL_DST_IP) + nfc->data |= RXH_IP_DST; + if (hash_field & MLX5_HASH_FIELD_SEL_L4_SPORT) + nfc->data |= RXH_L4_B_0_1; + if (hash_field & MLX5_HASH_FIELD_SEL_L4_DPORT) + nfc->data |= RXH_L4_B_2_3; + + return 0; +} + int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) { int err = 0; @@ -783,6 +889,9 @@ int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) case ETHTOOL_SRXCLSRLDEL: err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location); break; + case ETHTOOL_SRXFH: + err = mlx5e_set_rss_hash_opt(priv, cmd); + break; default: err = -EOPNOTSUPP; break; @@ -810,6 +919,9 @@ int mlx5e_get_rxnfc(struct net_device *dev, case ETHTOOL_GRXCLSRLALL: err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs); break; + case ETHTOOL_GRXFH: + err = mlx5e_get_rss_hash_opt(priv, info); + break; default: err = -EOPNOTSUPP; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1243edbedc9e..8cfd2ec7c0a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -49,6 +49,8 @@ #include "lib/clock.h" #include "en/port.h" #include "en/xdp.h" +#include "lib/eq.h" +#include "en/monitor_stats.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; @@ -59,6 +61,7 @@ struct mlx5e_rq_param { struct mlx5e_sq_param { u32 sqc[MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; + bool is_mpw; }; struct mlx5e_cq_param { @@ -128,6 +131,8 @@ static bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev, return !params->lro_en && frag_sz <= PAGE_SIZE; } +#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \ + MLX5_MPWQE_LOG_STRIDE_SZ_BASE) static bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { @@ -138,6 +143,9 @@ static bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, if (!mlx5e_rx_is_linear_skb(mdev, params)) return false; + if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ) + return false; + if (MLX5_CAP_GEN(mdev, ext_stride_num_range)) return true; @@ -223,7 +231,7 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) MLX5_WQ_TYPE_CYCLIC; } -static void mlx5e_update_carrier(struct mlx5e_priv *priv) +void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; u8 port_state; @@ -262,7 +270,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) mlx5e_stats_grps[i].update_stats(priv); } -static void mlx5e_update_ndo_stats(struct mlx5e_priv *priv) +void mlx5e_update_ndo_stats(struct mlx5e_priv *priv) { int i; @@ -293,33 +301,35 @@ void mlx5e_queue_update_stats(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->update_stats_work); } -static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, - enum mlx5_dev_event event, unsigned long param) +static int async_event(struct notifier_block *nb, unsigned long event, void *data) { - struct mlx5e_priv *priv = vpriv; + struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); + struct mlx5_eqe *eqe = data; - if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state)) - return; + if (event != MLX5_EVENT_TYPE_PORT_CHANGE) + return NOTIFY_DONE; - switch (event) { - case MLX5_DEV_EVENT_PORT_UP: - case MLX5_DEV_EVENT_PORT_DOWN: + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: queue_work(priv->wq, &priv->update_carrier_work); break; default: - break; + return NOTIFY_DONE; } + + return NOTIFY_OK; } static void mlx5e_enable_async_events(struct mlx5e_priv *priv) { - set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); + priv->events_nb.notifier_call = async_event; + mlx5_notifier_register(priv->mdev, &priv->events_nb); } static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { - clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); - synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC)); + mlx5_notifier_unregister(priv->mdev, &priv->events_nb); } static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, @@ -502,6 +512,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->channel = c; rq->ix = c->ix; rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); rq->stats = &c->priv->channel_stats[c->ix].rq; rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL; @@ -982,18 +993,42 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq) { - kvfree(sq->db.xdpi); + kvfree(sq->db.xdpi_fifo.xi); + kvfree(sq->db.wqe_info); +} + +static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa) +{ + struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + + xdpi_fifo->xi = kvzalloc_node(sizeof(*xdpi_fifo->xi) * dsegs_per_wq, + GFP_KERNEL, numa); + if (!xdpi_fifo->xi) + return -ENOMEM; + + xdpi_fifo->pc = &sq->xdpi_fifo_pc; + xdpi_fifo->cc = &sq->xdpi_fifo_cc; + xdpi_fifo->mask = dsegs_per_wq - 1; + + return 0; } static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int err; - sq->db.xdpi = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.xdpi)), - GFP_KERNEL, numa); - if (!sq->db.xdpi) { - mlx5e_free_xdpsq_db(sq); + sq->db.wqe_info = kvzalloc_node(sizeof(*sq->db.wqe_info) * wq_sz, + GFP_KERNEL, numa); + if (!sq->db.wqe_info) return -ENOMEM; + + err = mlx5e_alloc_xdpsq_fifo(sq, numa); + if (err) { + mlx5e_free_xdpsq_db(sq); + return err; } return 0; @@ -1395,6 +1430,7 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) struct mlx5_core_dev *mdev = c->mdev; struct mlx5_rate_limit rl = {0}; + cancel_work_sync(&sq->dim.work); mlx5e_destroy_sq(mdev, sq->sqn); if (sq->rate_limit) { rl.rate = sq->rate_limit; @@ -1551,11 +1587,8 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_xdpsq *sq, bool is_redirect) { - unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT; struct mlx5e_create_sq_param csp = {}; - unsigned int inline_hdr_sz = 0; int err; - int i; err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect); if (err) @@ -1566,30 +1599,40 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c, csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = sq->min_inline_mode; - if (is_redirect) - set_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state); set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); if (err) goto err_free_xdpsq; - if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { - inline_hdr_sz = MLX5E_XDP_MIN_INLINE; - ds_cnt++; - } + mlx5e_set_xmit_fp(sq, param->is_mpw); + + if (!param->is_mpw) { + unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT; + unsigned int inline_hdr_sz = 0; + int i; + + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { + inline_hdr_sz = MLX5E_XDP_MIN_INLINE; + ds_cnt++; + } - /* Pre initialize fixed WQE fields */ - for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) { - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - struct mlx5_wqe_data_seg *dseg; + /* Pre initialize fixed WQE fields */ + for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) { + struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[i]; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + struct mlx5_wqe_data_seg *dseg; - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); - dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1); - dseg->lkey = sq->mkey_be; + dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1); + dseg->lkey = sq->mkey_be; + + wi->num_wqebbs = 1; + wi->num_ds = 1; + } } return 0; @@ -1601,7 +1644,7 @@ err_free_xdpsq: return err; } -static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) +static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) { struct mlx5e_channel *c = sq->channel; @@ -1609,7 +1652,7 @@ static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) napi_synchronize(&c->napi); mlx5e_destroy_sq(c->mdev, sq->sqn); - mlx5e_free_xdpsq_descs(sq); + mlx5e_free_xdpsq_descs(sq, rq); mlx5e_free_xdpsq(sq); } @@ -1623,13 +1666,15 @@ static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev, int err; u32 i; + err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); + if (err) + return err; + err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, &cq->wq_ctrl); if (err) return err; - mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); - mcq->cqe_sz = 64; mcq->set_ci_db = cq->wq_ctrl.db.db; mcq->arm_db = cq->wq_ctrl.db.db + 1; @@ -1687,6 +1732,10 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) int eqn; int err; + err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); + if (err) + return err; + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + sizeof(u64) * cq->wq_ctrl.buf.npages; in = kvzalloc(inlen, GFP_KERNEL); @@ -1700,8 +1749,6 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); - mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); - MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode); MLX5_SET(cqc, cqc, c_eqn, eqn); MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); @@ -1758,11 +1805,6 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq) mlx5e_free_cq(cq); } -static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) -{ - return cpumask_first(priv->mdev->priv.irq_info[ix].mask); -} - static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) @@ -1913,14 +1955,18 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel_param *cparam, struct mlx5e_channel **cp) { + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); struct net_dim_cq_moder icocq_moder = {0, 0}; struct net_device *netdev = priv->netdev; - int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; unsigned int irq; int err; int eqn; + err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); + if (err) + return err; + c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); if (!c) return -ENOMEM; @@ -1937,7 +1983,6 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->xdp = !!params->xdp_prog; c->stats = &priv->channel_stats[ix].ch; - mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); c->irq_desc = irq_to_desc(irq); netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); @@ -1995,7 +2040,7 @@ err_close_rq: err_close_xdp_sq: if (c->xdp) - mlx5e_close_xdpsq(&c->rq.xdpsq); + mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq); err_close_sqs: mlx5e_close_sqs(c); @@ -2048,10 +2093,10 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) static void mlx5e_close_channel(struct mlx5e_channel *c) { - mlx5e_close_xdpsq(&c->xdpsq); + mlx5e_close_xdpsq(&c->xdpsq, NULL); mlx5e_close_rq(&c->rq); if (c->xdp) - mlx5e_close_xdpsq(&c->rq.xdpsq); + mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq); mlx5e_close_sqs(c); mlx5e_close_icosq(&c->icosq); napi_disable(&c->napi); @@ -2218,6 +2263,8 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, void *cqc = param->cqc; MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index); + if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128) + MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, @@ -2294,6 +2341,7 @@ static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, mlx5e_build_sq_param_common(priv, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); + param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); } static void mlx5e_build_channel_param(struct mlx5e_priv *priv, @@ -2496,7 +2544,7 @@ static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz, if (rrp.rss.hfunc == ETH_RSS_HASH_XOR) ix = mlx5e_bits_invert(i, ilog2(sz)); - ix = priv->channels.params.indirection_rqt[ix]; + ix = priv->rss_params.indirection_rqt[ix]; rqn = rrp.rss.channels->c[ix]->rq.rqn; } else { rqn = rrp.rqn; @@ -2579,7 +2627,7 @@ static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, { .rss = { .channels = chs, - .hfunc = chs->params.rss_hfunc, + .hfunc = priv->rss_params.hfunc, } }, }; @@ -2599,6 +2647,54 @@ static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) mlx5e_redirect_rqts(priv, drop_rrp); } +static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = { + [MLX5E_TT_IPV4_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5E_TT_IPV6_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5E_TT_IPV4_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5E_TT_IPV6_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5E_TT_IPV4_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5E_TT_IPV6_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5E_TT_IPV4_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5E_TT_IPV6_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5E_TT_IPV4] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP, + }, + [MLX5E_TT_IPV6] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP, + }, +}; + +struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt) +{ + return tirc_default_config[tt]; +} + static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) { if (!params->lro_en) @@ -2614,116 +2710,68 @@ static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout); } -void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, - enum mlx5e_traffic_types tt, +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params, + const struct mlx5e_tirc_config *ttconfig, void *tirc, bool inner) { void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) : MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); -#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ - MLX5_HASH_FIELD_SEL_DST_IP) - -#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ - MLX5_HASH_FIELD_SEL_DST_IP |\ - MLX5_HASH_FIELD_SEL_L4_SPORT |\ - MLX5_HASH_FIELD_SEL_L4_DPORT) - -#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ - MLX5_HASH_FIELD_SEL_DST_IP |\ - MLX5_HASH_FIELD_SEL_IPSEC_SPI) - - MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(params->rss_hfunc)); - if (params->rss_hfunc == ETH_RSS_HASH_TOP) { + MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(rss_params->hfunc)); + if (rss_params->hfunc == ETH_RSS_HASH_TOP) { void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); - memcpy(rss_key, params->toeplitz_hash_key, len); + memcpy(rss_key, rss_params->toeplitz_hash_key, len); } + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + ttconfig->l3_prot_type); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + ttconfig->l4_prot_type); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + ttconfig->rx_hash_fields); +} - switch (tt) { - case MLX5E_TT_IPV4_TCP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_TCP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV6_TCP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_TCP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV4_UDP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_UDP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV6_UDP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_UDP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV4_IPSEC_AH: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; +static void mlx5e_update_rx_hash_fields(struct mlx5e_tirc_config *ttconfig, + enum mlx5e_traffic_types tt, + u32 rx_hash_fields) +{ + *ttconfig = tirc_default_config[tt]; + ttconfig->rx_hash_fields = rx_hash_fields; +} - case MLX5E_TT_IPV6_IPSEC_AH: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; +void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) +{ + void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); + struct mlx5e_rss_params *rss = &priv->rss_params; + struct mlx5_core_dev *mdev = priv->mdev; + int ctxlen = MLX5_ST_SZ_BYTES(tirc); + struct mlx5e_tirc_config ttconfig; + int tt; - case MLX5E_TT_IPV4_IPSEC_ESP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; + MLX5_SET(modify_tir_in, in, bitmask.hash, 1); - case MLX5E_TT_IPV6_IPSEC_ESP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(tirc, 0, ctxlen); + mlx5e_update_rx_hash_fields(&ttconfig, tt, + rss->rx_hash_fields[tt]); + mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, false); + mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); + } - case MLX5E_TT_IPV4: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP); - break; + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; - case MLX5E_TT_IPV6: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP); - break; - default: - WARN_ONCE(true, "%s: bad traffic type!\n", __func__); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(tirc, 0, ctxlen); + mlx5e_update_rx_hash_fields(&ttconfig, tt, + rss->rx_hash_fields[tt]); + mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, true); + mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, + inlen); } } @@ -2780,7 +2828,8 @@ static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv, MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); + mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, + &tirc_default_config[tt], tirc, true); } static int mlx5e_set_mtu(struct mlx5_core_dev *mdev, @@ -2811,7 +2860,7 @@ static void mlx5e_query_mtu(struct mlx5_core_dev *mdev, *mtu = MLX5E_HW2SW_MTU(params, hw_mtu); } -static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) +int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) { struct mlx5e_params *params = &priv->channels.params; struct net_device *netdev = priv->netdev; @@ -2891,7 +2940,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_activate_channels(&priv->channels); netif_tx_start_all_queues(priv->netdev); - if (MLX5_ESWITCH_MANAGER(priv->mdev)) + if (mlx5e_is_vport_rep(priv)) mlx5e_add_sqs_fwd_rules(priv); mlx5e_wait_channels_min_rx_wqes(&priv->channels); @@ -2902,7 +2951,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { mlx5e_redirect_rqts_to_drop(priv); - if (MLX5_ESWITCH_MANAGER(priv->mdev)) + if (mlx5e_is_vport_rep(priv)) mlx5e_remove_sqs_fwd_rules(priv); /* FIXME: This is a W/A only for tx timeout watch dog false alarm when @@ -3154,7 +3203,7 @@ err_close_tises: return err; } -void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) +static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) { int tc; @@ -3172,7 +3221,9 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); + + mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, + &tirc_default_config[tt], tirc, false); } static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) @@ -3377,11 +3428,14 @@ static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, { switch (cls_flower->command) { case TC_CLSFLOWER_REPLACE: - return mlx5e_configure_flower(priv, cls_flower, flags); + return mlx5e_configure_flower(priv->netdev, priv, cls_flower, + flags); case TC_CLSFLOWER_DESTROY: - return mlx5e_delete_flower(priv, cls_flower, flags); + return mlx5e_delete_flower(priv->netdev, priv, cls_flower, + flags); case TC_CLSFLOWER_STATS: - return mlx5e_stats_flower(priv, cls_flower, flags); + return mlx5e_stats_flower(priv->netdev, priv, cls_flower, + flags); default: return -EOPNOTSUPP; } @@ -3394,7 +3448,8 @@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS); + return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS | + MLX5E_TC_NIC_OFFLOAD); default: return -EOPNOTSUPP; } @@ -3437,7 +3492,7 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, } } -static void +void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -3445,8 +3500,10 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) struct mlx5e_vport_stats *vstats = &priv->stats.vport; struct mlx5e_pport_stats *pstats = &priv->stats.pport; - /* update HW stats in background for next time */ - mlx5e_queue_update_stats(priv); + if (!mlx5e_monitor_counter_supported(priv)) { + /* update HW stats in background for next time */ + mlx5e_queue_update_stats(priv); + } if (mlx5e_is_uplink_rep(priv)) { stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok); @@ -3574,11 +3631,12 @@ static int set_feature_cvlan_filter(struct net_device *netdev, bool enable) return 0; } +#ifdef CONFIG_MLX5_ESWITCH static int set_feature_tc_num_filters(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); - if (!enable && mlx5e_tc_num_filters(priv)) { + if (!enable && mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)) { netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; @@ -3586,6 +3644,7 @@ static int set_feature_tc_num_filters(struct net_device *netdev, bool enable) return 0; } +#endif static int set_feature_rx_all(struct net_device *netdev, bool enable) { @@ -3684,7 +3743,9 @@ static int mlx5e_set_features(struct net_device *netdev, err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER, set_feature_cvlan_filter); +#ifdef CONFIG_MLX5_ESWITCH err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters); +#endif err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all); err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs); err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan); @@ -3755,10 +3816,11 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, } if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, &new_channels.params); u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params); u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params); - reset = reset && (ppw_old != ppw_new); + reset = reset && (is_linear || (ppw_old != ppw_new)); } if (!reset) { @@ -3876,7 +3938,7 @@ static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } #ifdef CONFIG_MLX5_ESWITCH -static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; @@ -3913,8 +3975,8 @@ static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting) return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting); } -static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, - int max_tx_rate) +int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, + int max_tx_rate) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; @@ -3955,8 +4017,8 @@ static int mlx5e_set_vf_link_state(struct net_device *dev, int vf, mlx5_ifla_link2vport(link_state)); } -static int mlx5e_get_vf_config(struct net_device *dev, - int vf, struct ifla_vf_info *ivi) +int mlx5e_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; @@ -3969,8 +4031,8 @@ static int mlx5e_get_vf_config(struct net_device *dev, return 0; } -static int mlx5e_get_vf_stats(struct net_device *dev, - int vf, struct ifla_vf_stats *vf_stats) +int mlx5e_get_vf_stats(struct net_device *dev, + int vf, struct ifla_vf_stats *vf_stats) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; @@ -4031,8 +4093,7 @@ static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add) queue_work(priv->wq, &vxlan_work->work); } -static void mlx5e_add_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti) +void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4045,8 +4106,7 @@ static void mlx5e_add_vxlan_port(struct net_device *netdev, mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1); } -static void mlx5e_del_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti) +void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4096,9 +4156,9 @@ out: return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } -static netdev_features_t mlx5e_features_check(struct sk_buff *skb, - struct net_device *netdev, - netdev_features_t features) +netdev_features_t mlx5e_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4121,17 +4181,17 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev, struct mlx5e_txqsq *sq) { - struct mlx5_eq *eq = sq->cq.mcq.eq; + struct mlx5_eq_comp *eq = sq->cq.mcq.eq; u32 eqe_count; netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", - eq->eqn, eq->cons_index, eq->irqn); + eq->core.eqn, eq->core.cons_index, eq->core.irqn); eqe_count = mlx5_eq_poll_irq_disabled(eq); if (!eqe_count) return false; - netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn); + netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn); sq->channel->stats->eq_rearm++; return true; } @@ -4358,8 +4418,6 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, - .ndo_has_offload_stats = mlx5e_has_offload_stats, - .ndo_get_offload_stats = mlx5e_get_offload_stats, #endif }; @@ -4505,15 +4563,23 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, mlx5e_init_rq_type_params(mdev, params); } -void mlx5e_build_rss_params(struct mlx5e_params *params) +void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, + u16 num_channels) { - params->rss_hfunc = ETH_RSS_HASH_XOR; - netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key)); - mlx5e_build_default_indir_rqt(params->indirection_rqt, - MLX5E_INDIR_RQT_SIZE, params->num_channels); + enum mlx5e_traffic_types tt; + + rss_params->hfunc = ETH_RSS_HASH_XOR; + netdev_rss_key_fill(rss_params->toeplitz_hash_key, + sizeof(rss_params->toeplitz_hash_key)); + mlx5e_build_default_indir_rqt(rss_params->indirection_rqt, + MLX5E_INDIR_RQT_SIZE, num_channels); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + rss_params->rx_hash_fields[tt] = + tirc_default_config[tt].rx_hash_fields; } void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, u16 max_channels, u16 mtu) { @@ -4529,6 +4595,10 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + /* XDP SQ */ + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, + MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)); + /* set CQE compression */ params->rx_cqe_compress_def = false; if (MLX5_CAP_GEN(mdev, cqe_compression) && @@ -4562,7 +4632,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev); /* RSS */ - mlx5e_build_rss_params(params); + mlx5e_build_rss_params(rss_params, params->num_channels); } static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) @@ -4577,12 +4647,6 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) } } -#if IS_ENABLED(CONFIG_MLX5_ESWITCH) -static const struct switchdev_ops mlx5e_switchdev_ops = { - .switchdev_port_attr_get = mlx5e_attr_get, -}; -#endif - static void mlx5e_build_nic_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4678,7 +4742,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) FT_CAP(modify_root) && FT_CAP(identified_miss_table_mode) && FT_CAP(flow_table_modify)) { +#ifdef CONFIG_MLX5_ESWITCH netdev->hw_features |= NETIF_F_HW_TC; +#endif #ifdef CONFIG_MLX5_EN_ARFS netdev->hw_features |= NETIF_F_NTUPLE; #endif @@ -4690,12 +4756,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->priv_flags |= IFF_UNICAST_FLT; mlx5e_set_netdev_dev_addr(netdev); - -#if IS_ENABLED(CONFIG_MLX5_ESWITCH) - if (MLX5_ESWITCH_MANAGER(mdev)) - netdev->switchdev_ops = &mlx5e_switchdev_ops; -#endif - mlx5e_ipsec_build_netdev(priv); mlx5e_tls_build_netdev(priv); } @@ -4733,14 +4793,16 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, void *ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rss_params *rss = &priv->rss_params; int err; err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv); if (err) return err; - mlx5e_build_nic_params(mdev, &priv->channels.params, - mlx5e_get_netdev_max_channels(netdev), netdev->mtu); + mlx5e_build_nic_params(mdev, rss, &priv->channels.params, + mlx5e_get_netdev_max_channels(netdev), + netdev->mtu); mlx5e_timestamp_init(priv); @@ -4870,9 +4932,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5_lag_add(mdev, netdev); mlx5e_enable_async_events(priv); - - if (MLX5_ESWITCH_MANAGER(priv->mdev)) - mlx5e_register_vport_reps(priv); + if (mlx5e_monitor_counter_supported(priv)) + mlx5e_monitor_counter_init(priv); if (netdev->reg_state != NETREG_REGISTERED) return; @@ -4906,8 +4967,8 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->set_rx_mode_work); - if (MLX5_ESWITCH_MANAGER(priv->mdev)) - mlx5e_unregister_vport_reps(priv); + if (mlx5e_monitor_counter_supported(priv)) + mlx5e_monitor_counter_cleanup(priv); mlx5e_disable_async_events(priv); mlx5_lag_remove(mdev); @@ -4960,7 +5021,7 @@ int mlx5e_netdev_init(struct net_device *netdev, netif_carrier_off(netdev); #ifdef CONFIG_MLX5_EN_ARFS - netdev->rx_cpu_rmap = mdev->rmap; + netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(mdev); #endif return 0; @@ -5004,11 +5065,21 @@ err_free_netdev: int mlx5e_attach_netdev(struct mlx5e_priv *priv) { const struct mlx5e_profile *profile; + int max_nch; int err; profile = priv->profile; clear_bit(MLX5E_STATE_DESTROYING, &priv->state); + /* max number of channels may have changed */ + max_nch = mlx5e_get_max_num_channels(priv->mdev); + if (priv->channels.params.num_channels > max_nch) { + mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch); + priv->channels.params.num_channels = max_nch; + mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, max_nch); + } + err = profile->init_tx(priv); if (err) goto out; @@ -5094,7 +5165,6 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) static void *mlx5e_add(struct mlx5_core_dev *mdev) { struct net_device *netdev; - void *rpriv = NULL; void *priv; int err; int nch; @@ -5104,20 +5174,18 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) return NULL; #ifdef CONFIG_MLX5_ESWITCH - if (MLX5_ESWITCH_MANAGER(mdev)) { - rpriv = mlx5e_alloc_nic_rep_priv(mdev); - if (!rpriv) { - mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n"); - return NULL; - } + if (MLX5_ESWITCH_MANAGER(mdev) && + mlx5_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { + mlx5e_rep_register_vport_reps(mdev); + return mdev; } #endif nch = mlx5e_get_max_num_channels(mdev); - netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, rpriv); + netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, NULL); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); - goto err_free_rpriv; + return NULL; } priv = netdev_priv(netdev); @@ -5143,30 +5211,26 @@ err_detach: mlx5e_detach(mdev, priv); err_destroy_netdev: mlx5e_destroy_netdev(priv); -err_free_rpriv: - kfree(rpriv); return NULL; } static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) { - struct mlx5e_priv *priv = vpriv; - void *ppriv = priv->ppriv; + struct mlx5e_priv *priv; +#ifdef CONFIG_MLX5_ESWITCH + if (MLX5_ESWITCH_MANAGER(mdev) && vpriv == mdev) { + mlx5e_rep_unregister_vport_reps(mdev); + return; + } +#endif + priv = vpriv; #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_delete_app(priv); #endif unregister_netdev(priv->netdev); mlx5e_detach(mdev, vpriv); mlx5e_destroy_netdev(priv); - kfree(ppriv); -} - -static void *mlx5e_get_netdev(void *vpriv) -{ - struct mlx5e_priv *priv = vpriv; - - return priv->netdev; } static struct mlx5_interface mlx5e_interface = { @@ -5174,9 +5238,7 @@ static struct mlx5_interface mlx5e_interface = { .remove = mlx5e_remove, .attach = mlx5e_attach, .detach = mlx5e_detach, - .event = mlx5e_async_event, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, - .get_dev = mlx5e_get_netdev, }; void mlx5e_init(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index c3c657548824..96cc0c6a4014 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -42,13 +42,24 @@ #include "en.h" #include "en_rep.h" #include "en_tc.h" +#include "en/tc_tun.h" #include "fs_core.h" -#define MLX5E_REP_PARAMS_LOG_SQ_SIZE \ - max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) +#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ + max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) +#define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1 static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; +struct mlx5e_rep_indr_block_priv { + struct net_device *netdev; + struct mlx5e_rep_priv *rpriv; + + struct list_head list; +}; + +static void mlx5e_rep_indr_unregister_block(struct net_device *netdev); + static void mlx5e_rep_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { @@ -98,7 +109,7 @@ static void mlx5e_rep_get_strings(struct net_device *dev, } } -static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) +static void mlx5e_vf_rep_update_hw_counters(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -121,6 +132,32 @@ static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) vport_stats->tx_bytes = vf_stats.rx_bytes; } +static void mlx5e_uplink_rep_update_hw_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct rtnl_link_stats64 *vport_stats; + + mlx5e_grp_802_3_update_stats(priv); + + vport_stats = &priv->stats.vf_vport; + + vport_stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok); + vport_stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok); + vport_stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok); + vport_stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); +} + +static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + if (rep->vport == FDB_UPLINK_VPORT) + mlx5e_uplink_rep_update_hw_counters(priv); + else + mlx5e_vf_rep_update_hw_counters(priv); +} + static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) { struct mlx5e_sw_stats *s = &priv->stats.sw; @@ -256,6 +293,22 @@ static int mlx5e_rep_set_channels(struct net_device *dev, return 0; } +static int mlx5e_rep_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_coalesce(priv, coal); +} + +static int mlx5e_rep_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_coalesce(priv, coal); +} + static u32 mlx5e_rep_get_rxfh_key_size(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -270,7 +323,55 @@ static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev) return mlx5e_ethtool_get_rxfh_indir_size(priv); } -static const struct ethtool_ops mlx5e_rep_ethtool_ops = { +static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_ethtool_get_pauseparam(priv, pauseparam); +} + +static int mlx5e_uplink_rep_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_pauseparam(priv, pauseparam); +} + +static int mlx5e_uplink_rep_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings); +} + +static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings); +} + +static const struct ethtool_ops mlx5e_vf_rep_ethtool_ops = { + .get_drvinfo = mlx5e_rep_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = mlx5e_rep_get_strings, + .get_sset_count = mlx5e_rep_get_sset_count, + .get_ethtool_stats = mlx5e_rep_get_ethtool_stats, + .get_ringparam = mlx5e_rep_get_ringparam, + .set_ringparam = mlx5e_rep_set_ringparam, + .get_channels = mlx5e_rep_get_channels, + .set_channels = mlx5e_rep_set_channels, + .get_coalesce = mlx5e_rep_get_coalesce, + .set_coalesce = mlx5e_rep_set_coalesce, + .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size, + .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, +}; + +static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { .get_drvinfo = mlx5e_rep_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_rep_get_strings, @@ -280,24 +381,44 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = { .set_ringparam = mlx5e_rep_set_ringparam, .get_channels = mlx5e_rep_get_channels, .set_channels = mlx5e_rep_set_channels, + .get_coalesce = mlx5e_rep_get_coalesce, + .set_coalesce = mlx5e_rep_set_coalesce, + .get_link_ksettings = mlx5e_uplink_rep_get_link_ksettings, + .set_link_ksettings = mlx5e_uplink_rep_set_link_ksettings, .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size, .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, + .get_pauseparam = mlx5e_uplink_rep_get_pauseparam, + .set_pauseparam = mlx5e_uplink_rep_set_pauseparam, }; -int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) +static int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *uplink_upper = NULL; + struct mlx5e_priv *uplink_priv = NULL; + struct net_device *uplink_dev; if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + if (uplink_dev) { + uplink_upper = netdev_master_upper_dev_get(uplink_dev); + uplink_priv = netdev_priv(uplink_dev); + } + switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: attr->u.ppid.id_len = ETH_ALEN; - ether_addr_copy(attr->u.ppid.id, rep->hw_id); + if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) { + ether_addr_copy(attr->u.ppid.id, uplink_upper->dev_addr); + } else { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + ether_addr_copy(attr->u.ppid.id, rep->hw_id); + } break; default: return -EOPNOTSUPP; @@ -466,8 +587,8 @@ static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, ASSERT_RTNL(); - if ((!neigh_connected && (e->flags & MLX5_ENCAP_ENTRY_VALID)) || - !ether_addr_equal(e->h_dest, ha)) + if ((e->flags & MLX5_ENCAP_ENTRY_VALID) && + (!neigh_connected || !ether_addr_equal(e->h_dest, ha))) mlx5e_tc_encap_flows_del(priv, e); if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { @@ -518,6 +639,184 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) neigh_release(n); } +static struct mlx5e_rep_indr_block_priv * +mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev) +{ + struct mlx5e_rep_indr_block_priv *cb_priv; + + /* All callback list access should be protected by RTNL. */ + ASSERT_RTNL(); + + list_for_each_entry(cb_priv, + &rpriv->uplink_priv.tc_indr_block_priv_list, + list) + if (cb_priv->netdev == netdev) + return cb_priv; + + return NULL; +} + +static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_rep_indr_block_priv *cb_priv, *temp; + struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list; + + list_for_each_entry_safe(cb_priv, temp, head, list) { + mlx5e_rep_indr_unregister_block(cb_priv->netdev); + kfree(cb_priv); + } +} + +static int +mlx5e_rep_indr_offload(struct net_device *netdev, + struct tc_cls_flower_offload *flower, + struct mlx5e_rep_indr_block_priv *indr_priv) +{ + struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); + int flags = MLX5E_TC_EGRESS | MLX5E_TC_ESW_OFFLOAD; + int err = 0; + + switch (flower->command) { + case TC_CLSFLOWER_REPLACE: + err = mlx5e_configure_flower(netdev, priv, flower, flags); + break; + case TC_CLSFLOWER_DESTROY: + err = mlx5e_delete_flower(netdev, priv, flower, flags); + break; + case TC_CLSFLOWER_STATS: + err = mlx5e_stats_flower(netdev, priv, flower, flags); + break; + default: + err = -EOPNOTSUPP; + } + + return err; +} + +static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type, + void *type_data, void *indr_priv) +{ + struct mlx5e_rep_indr_block_priv *priv = indr_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_rep_indr_offload(priv->netdev, type_data, priv); + default: + return -EOPNOTSUPP; + } +} + +static int +mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, + struct mlx5e_rep_priv *rpriv, + struct tc_block_offload *f) +{ + struct mlx5e_rep_indr_block_priv *indr_priv; + int err = 0; + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); + if (indr_priv) + return -EEXIST; + + indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL); + if (!indr_priv) + return -ENOMEM; + + indr_priv->netdev = netdev; + indr_priv->rpriv = rpriv; + list_add(&indr_priv->list, + &rpriv->uplink_priv.tc_indr_block_priv_list); + + err = tcf_block_cb_register(f->block, + mlx5e_rep_indr_setup_block_cb, + netdev, indr_priv, f->extack); + if (err) { + list_del(&indr_priv->list); + kfree(indr_priv); + } + + return err; + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, + mlx5e_rep_indr_setup_block_cb, + netdev); + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); + if (indr_priv) { + list_del(&indr_priv->list); + kfree(indr_priv); + } + + return 0; + default: + return -EOPNOTSUPP; + } + return 0; +} + +static +int mlx5e_rep_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv, + enum tc_setup_type type, void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return mlx5e_rep_indr_setup_tc_block(netdev, cb_priv, + type_data); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev) +{ + int err; + + err = __tc_indr_block_cb_register(netdev, rpriv, + mlx5e_rep_indr_setup_tc_cb, + netdev); + if (err) { + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n", + netdev_name(netdev), err); + } + return err; +} + +static void mlx5e_rep_indr_unregister_block(struct net_device *netdev) +{ + __tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb, + netdev); +} + +static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, + uplink_priv.netdevice_nb); + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + + if (!mlx5e_tc_tun_device_to_offload(priv, netdev)) + return NOTIFY_OK; + + switch (event) { + case NETDEV_REGISTER: + mlx5e_rep_indr_register_block(rpriv, netdev); + break; + case NETDEV_UNREGISTER: + mlx5e_rep_indr_unregister_block(netdev); + break; + } + return NOTIFY_OK; +} + static struct mlx5e_neigh_hash_entry * mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, struct mlx5e_neigh *m_neigh); @@ -779,7 +1078,7 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, mlx5e_rep_neigh_entry_destroy(priv, nhe); } -static int mlx5e_rep_open(struct net_device *dev) +static int mlx5e_vf_rep_open(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -801,7 +1100,7 @@ unlock: return err; } -static int mlx5e_rep_close(struct net_device *dev) +static int mlx5e_vf_rep_close(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -838,24 +1137,14 @@ mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, { switch (cls_flower->command) { case TC_CLSFLOWER_REPLACE: - return mlx5e_configure_flower(priv, cls_flower, flags); + return mlx5e_configure_flower(priv->netdev, priv, cls_flower, + flags); case TC_CLSFLOWER_DESTROY: - return mlx5e_delete_flower(priv, cls_flower, flags); + return mlx5e_delete_flower(priv->netdev, priv, cls_flower, + flags); case TC_CLSFLOWER_STATS: - return mlx5e_stats_flower(priv, cls_flower, flags); - default: - return -EOPNOTSUPP; - } -} - -static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data, - void *cb_priv) -{ - struct mlx5e_priv *priv = cb_priv; - - switch (type) { - case TC_SETUP_CLSFLOWER: - return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS); + return mlx5e_stats_flower(priv->netdev, priv, cls_flower, + flags); default: return -EOPNOTSUPP; } @@ -868,7 +1157,8 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS); + return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS | + MLX5E_TC_ESW_OFFLOAD); default: return -EOPNOTSUPP; } @@ -907,43 +1197,23 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep; if (!MLX5_ESWITCH_MANAGER(priv->mdev)) return false; - rep = rpriv->rep; - if (esw->mode == SRIOV_OFFLOADS && - rep && rep->vport == FDB_UPLINK_VPORT) - return true; - - return false; -} - -static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) -{ - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep; - - if (!MLX5_ESWITCH_MANAGER(priv->mdev)) + if (!rpriv) /* non vport rep mlx5e instances don't use this field */ return false; rep = rpriv->rep; - if (rep && rep->vport != FDB_UPLINK_VPORT) - return true; - - return false; + return (rep->vport == FDB_UPLINK_VPORT); } -bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id) +static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id) { - struct mlx5e_priv *priv = netdev_priv(dev); - switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: - if (mlx5e_is_vf_vport_rep(priv) || mlx5e_is_uplink_rep(priv)) return true; } @@ -969,8 +1239,8 @@ mlx5e_get_sw_stats64(const struct net_device *dev, return 0; } -int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, - void *sp) +static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) { switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: @@ -981,7 +1251,7 @@ int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, } static void -mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -990,37 +1260,93 @@ mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); } +static int mlx5e_vf_rep_change_mtu(struct net_device *netdev, int new_mtu) +{ + return mlx5e_change_mtu(netdev, new_mtu, NULL); +} + +static int mlx5e_uplink_rep_change_mtu(struct net_device *netdev, int new_mtu) +{ + return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu); +} + +static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr) +{ + struct sockaddr *saddr = addr; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + ether_addr_copy(netdev->dev_addr, saddr->sa_data); + return 0; +} + static const struct switchdev_ops mlx5e_rep_switchdev_ops = { .switchdev_port_attr_get = mlx5e_attr_get, }; -static int mlx5e_change_rep_mtu(struct net_device *netdev, int new_mtu) -{ - return mlx5e_change_mtu(netdev, new_mtu, NULL); -} +static const struct net_device_ops mlx5e_netdev_ops_vf_rep = { + .ndo_open = mlx5e_vf_rep_open, + .ndo_stop = mlx5e_vf_rep_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, + .ndo_setup_tc = mlx5e_rep_setup_tc, + .ndo_get_stats64 = mlx5e_vf_rep_get_stats, + .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, + .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, + .ndo_change_mtu = mlx5e_vf_rep_change_mtu, +}; -static const struct net_device_ops mlx5e_netdev_ops_rep = { - .ndo_open = mlx5e_rep_open, - .ndo_stop = mlx5e_rep_close, +static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { + .ndo_open = mlx5e_open, + .ndo_stop = mlx5e_close, .ndo_start_xmit = mlx5e_xmit, + .ndo_set_mac_address = mlx5e_uplink_rep_set_mac, .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, .ndo_setup_tc = mlx5e_rep_setup_tc, - .ndo_get_stats64 = mlx5e_rep_get_stats, - .ndo_has_offload_stats = mlx5e_has_offload_stats, - .ndo_get_offload_stats = mlx5e_get_offload_stats, - .ndo_change_mtu = mlx5e_change_rep_mtu, + .ndo_get_stats64 = mlx5e_get_stats, + .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, + .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, + .ndo_change_mtu = mlx5e_uplink_rep_change_mtu, + .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, + .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, + .ndo_features_check = mlx5e_features_check, + .ndo_set_vf_mac = mlx5e_set_vf_mac, + .ndo_set_vf_rate = mlx5e_set_vf_rate, + .ndo_get_vf_config = mlx5e_get_vf_config, + .ndo_get_vf_stats = mlx5e_get_vf_stats, }; -static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, u16 mtu) +bool mlx5e_eswitch_rep(struct net_device *netdev) +{ + if (netdev->netdev_ops == &mlx5e_netdev_ops_vf_rep || + netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep) + return true; + + return false; +} + +static void mlx5e_build_rep_params(struct net_device *netdev) { + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params *params; + u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + params = &priv->channels.params; params->hard_mtu = MLX5E_ETH_HARD_MTU; - params->sw_mtu = mtu; - params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE; + params->sw_mtu = netdev->mtu; + + /* SQ */ + if (rep->vport == FDB_UPLINK_VPORT) + params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + else + params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE; /* RQ */ mlx5e_build_rq_params(mdev, params); @@ -1034,24 +1360,38 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); /* RSS */ - mlx5e_build_rss_params(params); + mlx5e_build_rss_params(&priv->rss_params, params->num_channels); } static void mlx5e_build_rep_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_core_dev *mdev = priv->mdev; - u16 max_mtu; - netdev->netdev_ops = &mlx5e_netdev_ops_rep; + if (rep->vport == FDB_UPLINK_VPORT) { + SET_NETDEV_DEV(netdev, &priv->mdev->pdev->dev); + netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep; + /* we want a persistent mac for the uplink rep */ + mlx5_query_nic_vport_mac_address(mdev, 0, netdev->dev_addr); + netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops; +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (MLX5_CAP_GEN(mdev, qos)) + netdev->dcbnl_ops = &mlx5e_dcbnl_ops; +#endif + } else { + netdev->netdev_ops = &mlx5e_netdev_ops_vf_rep; + eth_hw_addr_random(netdev); + netdev->ethtool_ops = &mlx5e_vf_rep_ethtool_ops; + } netdev->watchdog_timeo = 15 * HZ; - netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; netdev->switchdev_ops = &mlx5e_rep_switchdev_ops; - netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; + netdev->features |= NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; netdev->hw_features |= NETIF_F_HW_TC; netdev->hw_features |= NETIF_F_SG; @@ -1062,13 +1402,10 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_TSO6; netdev->hw_features |= NETIF_F_RXCSUM; - netdev->features |= netdev->hw_features; - - eth_hw_addr_random(netdev); + if (rep->vport != FDB_UPLINK_VPORT) + netdev->features |= NETIF_F_VLAN_CHALLENGED; - netdev->min_mtu = ETH_MIN_MTU; - mlx5_query_port_max_mtu(mdev, &max_mtu, 1); - netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); + netdev->features |= netdev->hw_features; } static int mlx5e_init_rep(struct mlx5_core_dev *mdev, @@ -1083,11 +1420,9 @@ static int mlx5e_init_rep(struct mlx5_core_dev *mdev, if (err) return err; + priv->channels.params.num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS; - priv->channels.params.num_channels = - mlx5e_get_netdev_max_channels(netdev); - - mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu); + mlx5e_build_rep_params(netdev); mlx5e_build_rep_netdev(netdev); mlx5e_timestamp_init(priv); @@ -1210,94 +1545,173 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) { - int err; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_rep_uplink_priv *uplink_priv; + int tc, err; err = mlx5e_create_tises(priv); if (err) { mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); return err; } - return 0; -} -static const struct mlx5e_profile mlx5e_rep_profile = { - .init = mlx5e_init_rep, - .cleanup = mlx5e_cleanup_rep, - .init_rx = mlx5e_init_rep_rx, - .cleanup_rx = mlx5e_cleanup_rep_rx, - .init_tx = mlx5e_init_rep_tx, - .cleanup_tx = mlx5e_cleanup_nic_tx, - .update_stats = mlx5e_rep_update_hw_counters, - .update_carrier = NULL, - .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, - .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, - .max_tc = 1, -}; + if (rpriv->rep->vport == FDB_UPLINK_VPORT) { + uplink_priv = &rpriv->uplink_priv; -/* e-Switch vport representors */ + /* init shared tc flow table */ + err = mlx5e_tc_esw_init(&uplink_priv->tc_ht); + if (err) + goto destroy_tises; + + /* init indirect block notifications */ + INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list); + uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event; + err = register_netdevice_notifier(&uplink_priv->netdevice_nb); + if (err) { + mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n"); + goto tc_esw_cleanup; + } + } -static int -mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) + return 0; + +tc_esw_cleanup: + mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht); +destroy_tises: + for (tc = 0; tc < priv->profile->max_tc; tc++) + mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); + return err; +} + +static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); - struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + int tc; - int err; + for (tc = 0; tc < priv->profile->max_tc; tc++) + mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { - err = mlx5e_add_sqs_fwd_rules(priv); - if (err) - return err; + if (rpriv->rep->vport == FDB_UPLINK_VPORT) { + /* clean indirect TC block notifications */ + unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb); + mlx5e_rep_indr_clean_block_privs(rpriv); + + /* delete shared tc flow table */ + mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht); } +} - err = mlx5e_rep_neigh_init(rpriv); - if (err) - goto err_remove_sqs; +static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + u16 max_mtu; - /* init shared tc flow table */ - err = mlx5e_tc_esw_init(&rpriv->tc_ht); - if (err) - goto err_neigh_cleanup; + netdev->min_mtu = ETH_MIN_MTU; + mlx5_query_port_max_mtu(mdev, &max_mtu, 1); + netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); +} - return 0; +static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); + struct mlx5_eqe *eqe = data; -err_neigh_cleanup: - mlx5e_rep_neigh_cleanup(rpriv); -err_remove_sqs: - mlx5e_remove_sqs_fwd_rules(priv); - return err; + if (event != MLX5_EVENT_TYPE_PORT_CHANGE) + return NOTIFY_DONE; + + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + queue_work(priv->wq, &priv->update_carrier_work); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; } -static void -mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep) +static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); - struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + u16 max_mtu; - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_remove_sqs_fwd_rules(priv); + netdev->min_mtu = ETH_MIN_MTU; + mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); + netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); + mlx5e_set_dev_port_mtu(priv); + + mlx5_lag_add(mdev, netdev); + priv->events_nb.notifier_call = uplink_rep_async_event; + mlx5_notifier_register(mdev, &priv->events_nb); +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_initialize(priv); + mlx5e_dcbnl_init_app(priv); +#endif +} - /* clean uplink offloaded TC rules, delete shared tc flow table */ - mlx5e_tc_esw_cleanup(&rpriv->tc_ht); +static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; - mlx5e_rep_neigh_cleanup(rpriv); +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_delete_app(priv); +#endif + mlx5_notifier_unregister(mdev, &priv->events_nb); + mlx5_lag_remove(mdev); } +static const struct mlx5e_profile mlx5e_vf_rep_profile = { + .init = mlx5e_init_rep, + .cleanup = mlx5e_cleanup_rep, + .init_rx = mlx5e_init_rep_rx, + .cleanup_rx = mlx5e_cleanup_rep_rx, + .init_tx = mlx5e_init_rep_tx, + .cleanup_tx = mlx5e_cleanup_rep_tx, + .enable = mlx5e_vf_rep_enable, + .update_stats = mlx5e_vf_rep_update_hw_counters, + .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .max_tc = 1, +}; + +static const struct mlx5e_profile mlx5e_uplink_rep_profile = { + .init = mlx5e_init_rep, + .cleanup = mlx5e_cleanup_rep, + .init_rx = mlx5e_init_rep_rx, + .cleanup_rx = mlx5e_cleanup_rep_rx, + .init_tx = mlx5e_init_rep_tx, + .cleanup_tx = mlx5e_cleanup_rep_tx, + .enable = mlx5e_uplink_rep_enable, + .disable = mlx5e_uplink_rep_disable, + .update_stats = mlx5e_uplink_rep_update_hw_counters, + .update_carrier = mlx5e_update_carrier, + .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .max_tc = MLX5E_MAX_NUM_TC, +}; + +/* e-Switch vport representors */ static int mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { - struct mlx5e_rep_priv *uplink_rpriv; + const struct mlx5e_profile *profile; struct mlx5e_rep_priv *rpriv; struct net_device *netdev; - struct mlx5e_priv *upriv; int nch, err; rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); if (!rpriv) return -ENOMEM; + /* rpriv->rep to be looked up when profile->init() is called */ + rpriv->rep = rep; + nch = mlx5e_get_max_num_channels(dev); - netdev = mlx5e_create_netdev(dev, &mlx5e_rep_profile, nch, rpriv); + profile = (rep->vport == FDB_UPLINK_VPORT) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile; + netdev = mlx5e_create_netdev(dev, profile, nch, rpriv); if (!netdev) { pr_warn("Failed to create representor netdev for vport %d\n", rep->vport); @@ -1306,15 +1720,20 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) } rpriv->netdev = netdev; - rpriv->rep = rep; rep->rep_if[REP_ETH].priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); + if (rep->vport == FDB_UPLINK_VPORT) { + err = mlx5e_create_mdev_resources(dev); + if (err) + goto err_destroy_netdev; + } + err = mlx5e_attach_netdev(netdev_priv(netdev)); if (err) { pr_warn("Failed to attach representor netdev for vport %d\n", rep->vport); - goto err_destroy_netdev; + goto err_destroy_mdev_resources; } err = mlx5e_rep_neigh_init(rpriv); @@ -1324,32 +1743,25 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) goto err_detach_netdev; } - uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch, REP_ETH); - upriv = netdev_priv(uplink_rpriv->netdev); - err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb_egdev, - upriv); - if (err) - goto err_neigh_cleanup; - err = register_netdev(netdev); if (err) { pr_warn("Failed to register representor netdev for vport %d\n", rep->vport); - goto err_egdev_cleanup; + goto err_neigh_cleanup; } return 0; -err_egdev_cleanup: - tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev, - upriv); - err_neigh_cleanup: mlx5e_rep_neigh_cleanup(rpriv); err_detach_netdev: mlx5e_detach_netdev(netdev_priv(netdev)); +err_destroy_mdev_resources: + if (rep->vport == FDB_UPLINK_VPORT) + mlx5e_destroy_mdev_resources(dev); + err_destroy_netdev: mlx5e_destroy_netdev(netdev_priv(netdev)); kfree(rpriv); @@ -1362,18 +1774,13 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_rep_priv *uplink_rpriv; void *ppriv = priv->ppriv; - struct mlx5e_priv *upriv; unregister_netdev(netdev); - uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, - REP_ETH); - upriv = netdev_priv(uplink_rpriv->netdev); - tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev, - upriv); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); + if (rep->vport == FDB_UPLINK_VPORT) + mlx5e_destroy_mdev_resources(priv->mdev); mlx5e_destroy_netdev(priv); kfree(ppriv); /* mlx5e_rep_priv */ } @@ -1387,14 +1794,13 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) return rpriv->netdev; } -static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv) +void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev) { - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5_eswitch *esw = mdev->priv.eswitch; int total_vfs = MLX5_TOTAL_VPORTS(mdev); int vport; - for (vport = 1; vport < total_vfs; vport++) { + for (vport = 0; vport < total_vfs; vport++) { struct mlx5_eswitch_rep_if rep_if = {}; rep_if.load = mlx5e_vport_rep_load; @@ -1404,55 +1810,12 @@ static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv) } } -static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv) +void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev) { - struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_eswitch *esw = mdev->priv.eswitch; int total_vfs = MLX5_TOTAL_VPORTS(mdev); int vport; - for (vport = 1; vport < total_vfs; vport++) + for (vport = total_vfs - 1; vport >= 0; vport--) mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH); } - -void mlx5e_register_vport_reps(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_eswitch_rep_if rep_if; - struct mlx5e_rep_priv *rpriv; - - rpriv = priv->ppriv; - rpriv->netdev = priv->netdev; - - rep_if.load = mlx5e_nic_rep_load; - rep_if.unload = mlx5e_nic_rep_unload; - rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - rep_if.priv = rpriv; - INIT_LIST_HEAD(&rpriv->vport_sqs_list); - mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/ - - mlx5e_rep_register_vf_vports(priv); /* VFs vports */ -} - -void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - - mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */ - mlx5_eswitch_unregister_vport_rep(esw, 0, REP_ETH); /* UPLINK PF*/ -} - -void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev) -{ - struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5e_rep_priv *rpriv; - - rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); - if (!rpriv) - return NULL; - - rpriv->rep = &esw->offloads.vport_reps[0]; - return rpriv; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 844d32d5c29f..edd722824697 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -53,13 +53,33 @@ struct mlx5e_neigh_update_table { unsigned long min_interval; /* jiffies */ }; +struct mlx5_rep_uplink_priv { + /* Filters DB - instantiated by the uplink representor and shared by + * the uplink's VFs + */ + struct rhashtable tc_ht; + + /* indirect block callbacks are invoked on bind/unbind events + * on registered higher level devices (e.g. tunnel devices) + * + * tc_indr_block_cb_priv_list is used to lookup indirect callback + * private data + * + * netdevice_nb is the netdev events notifier - used to register + * tunnel devices for block events + * + */ + struct list_head tc_indr_block_priv_list; + struct notifier_block netdevice_nb; +}; + struct mlx5e_rep_priv { struct mlx5_eswitch_rep *rep; struct mlx5e_neigh_update_table neigh_update; struct net_device *netdev; struct mlx5_flow_handle *vport_rx_rule; struct list_head vport_sqs_list; - struct rhashtable tc_ht; /* valid for uplink rep */ + struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ }; static inline @@ -129,6 +149,8 @@ struct mlx5e_encap_entry { struct net_device *out_dev; int tunnel_type; + int tunnel_hlen; + int reformat_type; u8 flags; char *encap_header; int encap_size; @@ -140,16 +162,12 @@ struct mlx5e_rep_sq { }; void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev); -void mlx5e_register_vport_reps(struct mlx5e_priv *priv); -void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv); +void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev); +void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev); bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); -int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, void *sp); -bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id); - -int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, @@ -158,12 +176,17 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); + +bool mlx5e_eswitch_rep(struct net_device *netdev); + #else /* CONFIG_MLX5_ESWITCH */ -static inline void mlx5e_register_vport_reps(struct mlx5e_priv *priv) {} -static inline void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) {} static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; } static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {} #endif +static inline bool mlx5e_is_vport_rep(struct mlx5e_priv *priv) +{ + return (MLX5_ESWITCH_MANAGER(priv->mdev) && priv->ppriv); +} #endif /* __MLX5E_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 79638dcbae78..1d0bb5ff8c26 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -554,9 +554,9 @@ static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, mlx5_cqwq_pop(&cq->wq); - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { netdev_WARN_ONCE(cq->channel->netdev, - "Bad OP in ICOSQ CQE: 0x%x\n", cqe->op_own); + "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe)); return; } @@ -724,9 +724,9 @@ static u32 mlx5e_get_fcs(const struct sk_buff *skb) return __get_unaligned_cpu32(fcs_bytes); } -static u8 get_ip_proto(struct sk_buff *skb, __be16 proto) +static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) { - void *ip_p = skb->data + sizeof(struct ethhdr); + void *ip_p = skb->data + network_depth; return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol : ((struct ipv6hdr *)ip_p)->nexthdr; @@ -755,7 +755,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, goto csum_unnecessary; if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) { - if (unlikely(get_ip_proto(skb, proto) == IPPROTO_SCTP)) + if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP)) goto csum_unnecessary; skb->ip_summed = CHECKSUM_COMPLETE; @@ -898,7 +898,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, prefetchw(va); /* xdp_frame data area */ prefetch(data); - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { rq->stats->wqe_err++; return NULL; } @@ -930,7 +930,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, u16 byte_cnt = cqe_bcnt - headlen; struct sk_buff *skb; - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { rq->stats->wqe_err++; return NULL; } @@ -1104,6 +1104,12 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, u32 frag_size; bool consumed; + /* Check packet size. Note LRO doesn't use linear SKB */ + if (unlikely(cqe_bcnt > rq->hw_mtu)) { + rq->stats->oversize_pkts_sw_drop++; + return NULL; + } + va = page_address(di->page) + head_offset; data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32); @@ -1148,7 +1154,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi->consumed_strides += cstrides; - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { rq->stats->wqe_err++; goto mpwrq_cqe_out; } @@ -1184,7 +1190,6 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - struct mlx5e_xdpsq *xdpsq; struct mlx5_cqe64 *cqe; int work_done = 0; @@ -1195,10 +1200,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); cqe = mlx5_cqwq_get_cqe(&cq->wq); - if (!cqe) + if (!cqe) { + if (unlikely(work_done)) + goto out; return 0; - - xdpsq = &rq->xdpsq; + } do { if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { @@ -1213,15 +1219,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) rq->handle_rx_cqe(rq, cqe); } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); - if (xdpsq->doorbell) { - mlx5e_xmit_xdp_doorbell(xdpsq); - xdpsq->doorbell = false; - } - - if (xdpsq->redirect_flush) { - xdp_do_flush_map(); - xdpsq->redirect_flush = false; - } +out: + if (rq->xdp_prog) + mlx5e_xdp_rx_poll_complete(rq); mlx5_cqwq_update_db_record(&cq->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 35ded91203f5..4382ef85488c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -98,18 +98,17 @@ static int mlx5e_test_link_speed(struct mlx5e_priv *priv) return 1; } -#ifdef CONFIG_INET -/* loopback test */ -#define MLX5E_TEST_PKT_SIZE (MLX5E_RX_MAX_HEAD - NET_IP_ALIGN) -static const char mlx5e_test_text[ETH_GSTRING_LEN] = "MLX5E SELF TEST"; -#define MLX5E_TEST_MAGIC 0x5AEED15C001ULL - struct mlx5ehdr { __be32 version; __be64 magic; - char text[ETH_GSTRING_LEN]; }; +#ifdef CONFIG_INET +/* loopback test */ +#define MLX5E_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) +\ + sizeof(struct udphdr) + sizeof(struct mlx5ehdr)) +#define MLX5E_TEST_MAGIC 0x5AEED15C001ULL + static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv) { struct sk_buff *skb = NULL; @@ -117,10 +116,7 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv) struct ethhdr *ethh; struct udphdr *udph; struct iphdr *iph; - int datalen, iplen; - - datalen = MLX5E_TEST_PKT_SIZE - - (sizeof(*ethh) + sizeof(*iph) + sizeof(*udph)); + int iplen; skb = netdev_alloc_skb(priv->netdev, MLX5E_TEST_PKT_SIZE); if (!skb) { @@ -149,7 +145,7 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv) /* Fill UDP header */ udph->source = htons(9); udph->dest = htons(9); /* Discard Protocol */ - udph->len = htons(datalen + sizeof(struct udphdr)); + udph->len = htons(sizeof(struct mlx5ehdr) + sizeof(struct udphdr)); udph->check = 0; /* Fill IP header */ @@ -157,7 +153,8 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv) iph->ttl = 32; iph->version = 4; iph->protocol = IPPROTO_UDP; - iplen = sizeof(struct iphdr) + sizeof(struct udphdr) + datalen; + iplen = sizeof(struct iphdr) + sizeof(struct udphdr) + + sizeof(struct mlx5ehdr); iph->tot_len = htons(iplen); iph->frag_off = 0; iph->saddr = 0; @@ -170,9 +167,6 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv) mlxh = skb_put(skb, sizeof(*mlxh)); mlxh->version = 0; mlxh->magic = cpu_to_be64(MLX5E_TEST_MAGIC); - strlcpy(mlxh->text, mlx5e_test_text, sizeof(mlxh->text)); - datalen -= sizeof(*mlxh); - skb_put_zero(skb, datalen); skb->csum = 0; skb->ip_summed = CHECKSUM_PARTIAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 1e55b9c27ffc..d3fe48ff9da9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include "lib/mlx5.h" #include "en.h" #include "en_accel/ipsec.h" #include "en_accel/tls.h" @@ -74,7 +75,6 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_recover) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqes) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_udp_seg_rem) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) }, @@ -83,6 +83,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_cqes) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_strides) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_oversize_pkts_sw_drop) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, @@ -161,6 +162,7 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler_cqes += rq_stats->mpwqe_filler_cqes; s->rx_mpwqe_filler_strides += rq_stats->mpwqe_filler_strides; + s->rx_oversize_pkts_sw_drop += rq_stats->oversize_pkts_sw_drop; s->rx_buff_alloc_err += rq_stats->buff_alloc_err; s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; @@ -196,7 +198,6 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_nop += sq_stats->nop; s->tx_queue_stopped += sq_stats->stopped; s->tx_queue_wake += sq_stats->wake; - s->tx_udp_seg_rem += sq_stats->udp_seg_rem; s->tx_queue_dropped += sq_stats->dropped; s->tx_cqe_err += sq_stats->cqe_err; s->tx_recover += sq_stats->recover; @@ -480,7 +481,10 @@ static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } -static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv) +#define MLX5_BASIC_PPCNT_SUPPORTED(mdev) \ + (MLX5_CAP_GEN(mdev, pcam_reg) ? MLX5_CAP_PCAM_REG(mdev, ppcnt) : 1) + +void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv) { struct mlx5e_pport_stats *pstats = &priv->stats.pport; struct mlx5_core_dev *mdev = priv->mdev; @@ -488,6 +492,9 @@ static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv) int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); void *out; + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return; + MLX5_SET(ppcnt_reg, in, local_port, 1); out = pstats->IEEE_802_3_counters; MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); @@ -600,6 +607,9 @@ static void mlx5e_grp_2819_update_stats(struct mlx5e_priv *priv) int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); void *out; + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return; + MLX5_SET(ppcnt_reg, in, local_port, 1); out = pstats->RFC_2819_counters; MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); @@ -934,7 +944,7 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { }; static const struct counter_desc pport_pfc_stall_stats_desc[] = { - { "tx_pause_storm_warning_events ", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) }, + { "tx_pause_storm_warning_events", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) }, { "tx_pause_storm_error_events", PPORT_PER_PRIO_OFF(device_stall_critical_watermark_cnt) }, }; @@ -1075,6 +1085,9 @@ static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv) int prio; void *out; + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return; + MLX5_SET(ppcnt_reg, in, local_port, 1); MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { @@ -1086,13 +1099,13 @@ static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv) } static const struct counter_desc mlx5e_pme_status_desc[] = { - { "module_unplug", 8 }, + { "module_unplug", sizeof(u64) * MLX5_MODULE_STATUS_UNPLUGGED }, }; static const struct counter_desc mlx5e_pme_error_desc[] = { - { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */ - { "module_high_temp", 48 }, /* high temperature */ - { "module_bad_shorted", 56 }, /* bad or shorted cable/module */ + { "module_bus_stuck", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BUS_STUCK }, + { "module_high_temp", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE }, + { "module_bad_shorted", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BAD_CABLE }, }; #define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc) @@ -1120,15 +1133,17 @@ static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data, static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) { - struct mlx5_priv *mlx5_priv = &priv->mdev->priv; + struct mlx5_pme_stats pme_stats; int i; + mlx5_get_pme_stats(priv->mdev, &pme_stats); + for (i = 0; i < NUM_PME_STATUS_STATS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters, + data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters, mlx5e_pme_status_desc, i); for (i = 0; i < NUM_PME_ERR_STATS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters, + data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters, mlx5e_pme_error_desc, i); return idx; @@ -1189,6 +1204,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 77f74ce11280..fe91ec06e3c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -87,7 +87,6 @@ struct mlx5e_sw_stats { u64 tx_recover; u64 tx_cqes; u64 tx_queue_wake; - u64 tx_udp_seg_rem; u64 tx_cqe_err; u64 tx_xdp_xmit; u64 tx_xdp_full; @@ -96,6 +95,7 @@ struct mlx5e_sw_stats { u64 rx_wqe_err; u64 rx_mpwqe_filler_cqes; u64 rx_mpwqe_filler_strides; + u64 rx_oversize_pkts_sw_drop; u64 rx_buff_alloc_err; u64 rx_cqe_compress_blks; u64 rx_cqe_compress_pkts; @@ -193,6 +193,7 @@ struct mlx5e_rq_stats { u64 wqe_err; u64 mpwqe_filler_cqes; u64 mpwqe_filler_strides; + u64 oversize_pkts_sw_drop; u64 buff_alloc_err; u64 cqe_compress_blks; u64 cqe_compress_pkts; @@ -219,7 +220,6 @@ struct mlx5e_sq_stats { u64 csum_partial_inner; u64 added_vlan_packets; u64 nop; - u64 udp_seg_rem; #ifdef CONFIG_MLX5_EN_TLS u64 tls_ooo; u64 tls_resync_bytes; @@ -278,5 +278,6 @@ extern const struct mlx5e_stats_grp mlx5e_stats_grps[]; extern const int mlx5e_num_stats_grps; void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv); +void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv); #endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 608025ca5c04..cae6c6d48984 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -44,15 +44,15 @@ #include <net/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_pedit.h> #include <net/tc_act/tc_csum.h> -#include <net/vxlan.h> #include <net/arp.h> #include "en.h" #include "en_rep.h" #include "en_tc.h" #include "eswitch.h" -#include "lib/vxlan.h" #include "fs_core.h" #include "en/port.h" +#include "en/tc_tun.h" +#include "lib/devcom.h" struct mlx5_nic_flow_attr { u32 action; @@ -69,25 +69,54 @@ struct mlx5_nic_flow_attr { enum { MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS, MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS, - MLX5E_TC_FLOW_ESWITCH = BIT(MLX5E_TC_FLOW_BASE), - MLX5E_TC_FLOW_NIC = BIT(MLX5E_TC_FLOW_BASE + 1), - MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2), - MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3), - MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4), - MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 5), + MLX5E_TC_FLOW_ESWITCH = MLX5E_TC_ESW_OFFLOAD, + MLX5E_TC_FLOW_NIC = MLX5E_TC_NIC_OFFLOAD, + MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE), + MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 1), + MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 2), + MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 3), + MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 4), }; #define MLX5E_TC_MAX_SPLITS 1 +/* Helper struct for accessing a struct containing list_head array. + * Containing struct + * |- Helper array + * [0] Helper item 0 + * |- list_head item 0 + * |- index (0) + * [1] Helper item 1 + * |- list_head item 1 + * |- index (1) + * To access the containing struct from one of the list_head items: + * 1. Get the helper item from the list_head item using + * helper item = + * container_of(list_head item, helper struct type, list_head field) + * 2. Get the contining struct from the helper item and its index in the array: + * containing struct = + * container_of(helper item, containing struct type, helper field[index]) + */ +struct encap_flow_item { + struct list_head list; + int index; +}; + struct mlx5e_tc_flow { struct rhash_head node; struct mlx5e_priv *priv; u64 cookie; u16 flags; struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; - struct list_head encap; /* flows sharing the same encap ID */ + /* Flow can be associated with multiple encap IDs. + * The number of encaps is bounded by the number of supported + * destinations. + */ + struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5e_tc_flow *peer_flow; struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ struct list_head hairpin; /* flows sharing the same hairpin */ + struct list_head peer; /* flows with peer flow */ union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@ -95,11 +124,12 @@ struct mlx5e_tc_flow { }; struct mlx5e_tc_flow_parse_attr { - struct ip_tunnel_info tun_info; + struct ip_tunnel_info tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; + struct net_device *filter_dev; struct mlx5_flow_spec spec; int num_mod_hdr_actions; void *mod_hdr_actions; - int mirred_ifindex; + int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; }; #define MLX5E_TC_TABLE_NUM_GROUPS 4 @@ -316,7 +346,7 @@ static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) for (i = 0; i < sz; i++) { ix = i; - if (priv->channels.params.rss_hfunc == ETH_RSS_HASH_XOR) + if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR) ix = mlx5e_bits_invert(i, ilog2(sz)); ix = indirection_rqt[ix]; rqn = hp->pair->rqn[ix]; @@ -360,13 +390,15 @@ static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) void *tirc; for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt); + memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in)); tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); MLX5_SET(tirc, tirc, transport_domain, hp->tdn); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); + mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false); err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]); @@ -569,7 +601,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, struct netlink_ext_ack *extack) { - int peer_ifindex = parse_attr->mirred_ifindex; + int peer_ifindex = parse_attr->mirred_ifindex[0]; struct mlx5_hairpin_params params; struct mlx5_core_dev *peer_mdev; struct mlx5e_hairpin_entry *hpe; @@ -802,7 +834,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, mlx5_del_flow_rules(flow->rule[0]); mlx5_fc_destroy(priv->mdev, counter); - if (!mlx5e_tc_num_filters(priv) && priv->fs.tc.t) { + if (!mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD) && priv->fs.tc.t) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; } @@ -815,14 +847,15 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, } static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow); + struct mlx5e_tc_flow *flow, int out_index); static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct ip_tunnel_info *tun_info, struct net_device *mirred_dev, struct net_device **encap_dev, struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack); + struct netlink_ext_ack *extack, + int out_index); static struct mlx5_flow_handle * mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, @@ -836,7 +869,7 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, if (IS_ERR(rule)) return rule; - if (attr->mirror_count) { + if (attr->split_count) { flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); if (IS_ERR(flow->rule[1])) { mlx5_eswitch_del_offloaded_rule(esw, rule, attr); @@ -855,7 +888,7 @@ mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, { flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; - if (attr->mirror_count) + if (attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); @@ -870,9 +903,9 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule; memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); - slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - slow_attr->mirror_count = 0, - slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN, + slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + slow_attr->split_count = 0; + slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN; rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); if (!IS_ERR(rule)) @@ -887,6 +920,9 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *slow_attr) { memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); + slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + slow_attr->split_count = 0; + slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN; mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); flow->flags &= ~MLX5E_TC_FLOW_SLOW; } @@ -906,12 +942,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; int err = 0, encap_err = 0; + int out_index; - /* if prios are not supported, keep the old behaviour of using same prio - * for all offloaded rules. - */ - if (!mlx5_eswitch_prios_supported(esw)) - attr->prio = 1; + if (!mlx5_eswitch_prios_supported(esw) && attr->prio != 1) { + NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW"); + return -EOPNOTSUPP; + } if (attr->chain > max_chain) { NL_SET_ERR_MSG(extack, "Requested chain is out of supported range"); @@ -925,20 +961,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, goto err_max_prio_chain; } - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + int mirred_ifindex; + + if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + mirred_ifindex = attr->parse_attr->mirred_ifindex[out_index]; out_dev = __dev_get_by_index(dev_net(priv->netdev), - attr->parse_attr->mirred_ifindex); - encap_err = mlx5e_attach_encap(priv, &parse_attr->tun_info, - out_dev, &encap_dev, flow, - extack); - if (encap_err && encap_err != -EAGAIN) { - err = encap_err; + mirred_ifindex); + err = mlx5e_attach_encap(priv, + &parse_attr->tun_info[out_index], + out_dev, &encap_dev, flow, + extack, out_index); + if (err && err != -EAGAIN) goto err_attach_encap; - } + if (err == -EAGAIN) + encap_err = err; out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; - attr->out_rep[attr->out_count] = rpriv->rep; - attr->out_mdev[attr->out_count++] = out_priv->mdev; + attr->dests[out_index].rep = rpriv->rep; + attr->dests[out_index].mdev = out_priv->mdev; } err = mlx5_eswitch_add_vlan_action(esw, attr); @@ -953,7 +996,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - counter = mlx5_fc_create(esw->dev, true); + counter = mlx5_fc_create(attr->counter_dev, true); if (IS_ERR(counter)) { err = PTR_ERR(counter); goto err_create_counter; @@ -982,15 +1025,16 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, return 0; err_add_rule: - mlx5_fc_destroy(esw->dev, counter); + mlx5_fc_destroy(attr->counter_dev, counter); err_create_counter: if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); err_mod_hdr: mlx5_eswitch_del_vlan_action(esw, attr); err_add_vlan: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) - mlx5e_detach_encap(priv, flow); + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) + mlx5e_detach_encap(priv, flow, out_index); err_attach_encap: err_max_prio_chain: return err; @@ -1002,6 +1046,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5_esw_flow_attr slow_attr; + int out_index; if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { if (flow->flags & MLX5E_TC_FLOW_SLOW) @@ -1012,16 +1057,16 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5_eswitch_del_vlan_action(esw, attr); - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { - mlx5e_detach_encap(priv, flow); - kvfree(attr->parse_attr); - } + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) + mlx5e_detach_encap(priv, flow, out_index); + kvfree(attr->parse_attr); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) - mlx5_fc_destroy(esw->dev, attr->counter); + mlx5_fc_destroy(attr->counter_dev, attr->counter); } void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, @@ -1031,10 +1076,12 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr slow_attr, *esw_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; + struct encap_flow_item *efi; struct mlx5e_tc_flow *flow; int err; - err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, + err = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, e->encap_size, e->encap_header, MLX5_FLOW_NAMESPACE_FDB, &e->encap_id); @@ -1046,11 +1093,31 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(priv); - list_for_each_entry(flow, &e->flows, encap) { + list_for_each_entry(efi, &e->flows, list) { + bool all_flow_encaps_valid = true; + int i; + + flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); esw_attr = flow->esw_attr; - esw_attr->encap_id = e->encap_id; spec = &esw_attr->parse_attr->spec; + esw_attr->dests[efi->index].encap_id = e->encap_id; + esw_attr->dests[efi->index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + /* Flow can be associated with multiple encap entries. + * Before offloading the flow verify that all of them have + * a valid neighbour. + */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) + continue; + if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { + all_flow_encaps_valid = false; + break; + } + } + /* Do not offload flows with unresolved neighbors */ + if (!all_flow_encaps_valid) + continue; /* update from slow path rule to encap rule */ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr); if (IS_ERR(rule)) { @@ -1073,14 +1140,18 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr slow_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; + struct encap_flow_item *efi; struct mlx5e_tc_flow *flow; int err; - list_for_each_entry(flow, &e->flows, encap) { + list_for_each_entry(efi, &e->flows, list) { + flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); spec = &flow->esw_attr->parse_attr->spec; /* update from encap rule to slow path rule */ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr); + /* mark the flow's encap dest as non-valid */ + flow->esw_attr->dests[efi->index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -1094,10 +1165,9 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, flow->rule[0] = rule; } - if (e->flags & MLX5_ENCAP_ENTRY_VALID) { - e->flags &= ~MLX5_ENCAP_ENTRY_VALID; - mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); - } + /* we know that the encap is valid */ + e->flags &= ~MLX5_ENCAP_ENTRY_VALID; + mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); } static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) @@ -1129,9 +1199,12 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) return; list_for_each_entry(e, &nhe->encap_list, encap_list) { + struct encap_flow_item *efi; if (!(e->flags & MLX5_ENCAP_ENTRY_VALID)) continue; - list_for_each_entry(flow, &e->flows, encap) { + list_for_each_entry(efi, &e->flows, list) { + flow = container_of(efi, struct mlx5e_tc_flow, + encaps[efi->index]); if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { counter = mlx5e_tc_get_counter(flow); mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); @@ -1161,11 +1234,11 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) } static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow) + struct mlx5e_tc_flow *flow, int out_index) { - struct list_head *next = flow->encap.next; + struct list_head *next = flow->encaps[out_index].list.next; - list_del(&flow->encap); + list_del(&flow->encaps[out_index].list); if (list_empty(next)) { struct mlx5e_encap_entry *e; @@ -1181,49 +1254,55 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, } } -static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow) +static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) - mlx5e_tc_del_fdb_flow(priv, flow); - else - mlx5e_tc_del_nic_flow(priv, flow); + struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; + + if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) || + !(flow->flags & MLX5E_TC_FLOW_DUP)) + return; + + mutex_lock(&esw->offloads.peer_mutex); + list_del(&flow->peer); + mutex_unlock(&esw->offloads.peer_mutex); + + flow->flags &= ~MLX5E_TC_FLOW_DUP; + + mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); + kvfree(flow->peer_flow); + flow->peer_flow = NULL; } -static void parse_vxlan_attr(struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f) +static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { - void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers); - void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers); - void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); + struct mlx5_core_dev *dev = flow->priv->mdev; + struct mlx5_devcom *devcom = dev->priv.devcom; + struct mlx5_eswitch *peer_esw; - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + return; - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_dissector_key_keyid *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->key); - struct flow_dissector_key_keyid *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->mask); - MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, - be32_to_cpu(mask->keyid)); - MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, - be32_to_cpu(key->keyid)); + __mlx5e_tc_del_fdb_peer_flow(flow); + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +} + +static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + mlx5e_tc_del_fdb_peer_flow(flow); + mlx5e_tc_del_fdb_flow(priv, flow); + } else { + mlx5e_tc_del_nic_flow(priv, flow); } } + static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f) + struct tc_cls_flower_offload *f, + struct net_device *filter_dev) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@ -1235,48 +1314,14 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, skb_flow_dissector_target(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL, f->key); + int err = 0; - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { - struct flow_dissector_key_ports *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_PORTS, - f->key); - struct flow_dissector_key_ports *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_PORTS, - f->mask); - - /* Full udp dst port must be given */ - if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) - goto vxlan_match_offload_err; - - if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst)) && - MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) - parse_vxlan_attr(spec, f); - else { - NL_SET_ERR_MSG_MOD(extack, - "port isn't an offloaded vxlan udp dport"); - netdev_warn(priv->netdev, - "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst)); - return -EOPNOTSUPP; - } - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - udp_dport, ntohs(mask->dst)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - udp_dport, ntohs(key->dst)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - udp_sport, ntohs(mask->src)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - udp_sport, ntohs(key->src)); - } else { /* udp dst port must be given */ -vxlan_match_offload_err: + err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, + headers_c, headers_v); + if (err) { NL_SET_ERR_MSG_MOD(extack, - "IP tunnel decap offload supported only for vxlan, must set UDP dport"); - netdev_warn(priv->netdev, - "IP tunnel decap offload supported only for vxlan, must set UDP dport\n"); - return -EOPNOTSUPP; + "failed to parse tunnel attributes"); + return err; } if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { @@ -1380,6 +1425,7 @@ vxlan_match_offload_err: static int __parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f, + struct net_device *filter_dev, u8 *match_level) { struct netlink_ext_ack *extack = f->common.extack; @@ -1431,7 +1477,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, switch (key->addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: case FLOW_DISSECTOR_KEY_IPV6_ADDRS: - if (parse_tunnel_attr(priv, spec, f)) + if (parse_tunnel_attr(priv, spec, f, filter_dev)) return -EOPNOTSUPP; break; default: @@ -1447,31 +1493,21 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, inner_headers); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { - struct flow_dissector_key_eth_addrs *key = + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, + FLOW_DISSECTOR_KEY_BASIC, f->key); - struct flow_dissector_key_eth_addrs *mask = + struct flow_dissector_key_basic *mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, + FLOW_DISSECTOR_KEY_BASIC, f->mask); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, + ntohs(mask->n_proto)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ntohs(key->n_proto)); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dmac_47_16), - mask->dst); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dmac_47_16), - key->dst); - - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - smac_47_16), - mask->src); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - smac_47_16), - key->src); - - if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst)) + if (mask->n_proto) *match_level = MLX5_MATCH_L2; } @@ -1505,9 +1541,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, *match_level = MLX5_MATCH_L2; } - } else { + } else if (*match_level != MLX5_MATCH_NONE) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); + *match_level = MLX5_MATCH_L2; } if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) { @@ -1545,21 +1582,31 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, } } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_dissector_key_eth_addrs *key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, + FLOW_DISSECTOR_KEY_ETH_ADDRS, f->key); - struct flow_dissector_key_basic *mask = + struct flow_dissector_key_eth_addrs *mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, + FLOW_DISSECTOR_KEY_ETH_ADDRS, f->mask); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, - ntohs(mask->n_proto)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ntohs(key->n_proto)); - if (mask->n_proto) + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dmac_47_16), + mask->dst); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dmac_47_16), + key->dst); + + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + smac_47_16), + mask->src); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + smac_47_16), + key->src); + + if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst)) *match_level = MLX5_MATCH_L2; } @@ -1586,10 +1633,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, /* the HW doesn't need L3 inline to match on frag=no */ if (!(key->flags & FLOW_DIS_IS_FRAGMENT)) - *match_level = MLX5_INLINE_MODE_L2; + *match_level = MLX5_MATCH_L2; /* *** L2 attributes parsing up to here *** */ else - *match_level = MLX5_INLINE_MODE_IP; + *match_level = MLX5_MATCH_L3; } } @@ -1772,7 +1819,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f) + struct tc_cls_flower_offload *f, + struct net_device *filter_dev) { struct netlink_ext_ack *extack = f->common.extack; struct mlx5_core_dev *dev = priv->mdev; @@ -1782,7 +1830,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, u8 match_level; int err; - err = __parse_cls_flower(priv, spec, f, &match_level); + err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level); if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { rep = rpriv->rep; @@ -2135,7 +2183,6 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, { const struct tc_action *a; bool modify_ip_header; - LIST_HEAD(actions); u8 htype, ip_proto; void *headers_v; u16 ethertype; @@ -2224,7 +2271,6 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, { struct mlx5_nic_flow_attr *attr = flow->nic_attr; const struct tc_action *a; - LIST_HEAD(actions); u32 action = 0; int err, i; @@ -2267,7 +2313,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (priv->netdev->netdev_ops == peer_dev->netdev_ops && same_hw_devs(priv, netdev_priv(peer_dev))) { - parse_attr->mirred_ifindex = peer_dev->ifindex; + parse_attr->mirred_ifindex[0] = peer_dev->ifindex; flow->flags |= MLX5E_TC_FLOW_HAIRPIN; action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; @@ -2316,45 +2362,6 @@ static inline int hash_encap_info(struct ip_tunnel_key *key) return jhash(key, sizeof(*key), 0); } -static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct net_device **out_dev, - struct flowi4 *fl4, - struct neighbour **out_n, - u8 *out_ttl) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_rep_priv *uplink_rpriv; - struct rtable *rt; - struct neighbour *n = NULL; - -#if IS_ENABLED(CONFIG_INET) - int ret; - - rt = ip_route_output_key(dev_net(mirred_dev), fl4); - ret = PTR_ERR_OR_ZERO(rt); - if (ret) - return ret; -#else - return -EOPNOTSUPP; -#endif - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - /* if the egress device isn't on the same HW e-switch, we use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) - *out_dev = uplink_rpriv->netdev; - else - *out_dev = rt->dst.dev; - - if (!(*out_ttl)) - *out_ttl = ip4_dst_hoplimit(&rt->dst); - n = dst_neigh_lookup(&rt->dst, &fl4->daddr); - ip_rt_put(rt); - if (!n) - return -ENOMEM; - - *out_n = n; - return 0; -} static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, struct net_device *peer_netdev) @@ -2370,377 +2377,24 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, (peer_priv->mdev->priv.eswitch->mode == SRIOV_OFFLOADS)); } -static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct net_device **out_dev, - struct flowi6 *fl6, - struct neighbour **out_n, - u8 *out_ttl) -{ - struct neighbour *n = NULL; - struct dst_entry *dst; - -#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) - struct mlx5e_rep_priv *uplink_rpriv; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - int ret; - - ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst, - fl6); - if (ret < 0) - return ret; - - if (!(*out_ttl)) - *out_ttl = ip6_dst_hoplimit(dst); - - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - /* if the egress device isn't on the same HW e-switch, we use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, dst->dev)) - *out_dev = uplink_rpriv->netdev; - else - *out_dev = dst->dev; -#else - return -EOPNOTSUPP; -#endif - - n = dst_neigh_lookup(dst, &fl6->daddr); - dst_release(dst); - if (!n) - return -ENOMEM; - - *out_n = n; - return 0; -} - -static void gen_vxlan_header_ipv4(struct net_device *out_dev, - char buf[], int encap_size, - unsigned char h_dest[ETH_ALEN], - u8 tos, u8 ttl, - __be32 daddr, - __be32 saddr, - __be16 udp_dst_port, - __be32 vx_vni) -{ - struct ethhdr *eth = (struct ethhdr *)buf; - struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr)); - struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr)); - struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); - - memset(buf, 0, encap_size); - - ether_addr_copy(eth->h_dest, h_dest); - ether_addr_copy(eth->h_source, out_dev->dev_addr); - eth->h_proto = htons(ETH_P_IP); - - ip->daddr = daddr; - ip->saddr = saddr; - - ip->tos = tos; - ip->ttl = ttl; - ip->protocol = IPPROTO_UDP; - ip->version = 0x4; - ip->ihl = 0x5; - - udp->dest = udp_dst_port; - vxh->vx_flags = VXLAN_HF_VNI; - vxh->vx_vni = vxlan_vni_field(vx_vni); -} - -static void gen_vxlan_header_ipv6(struct net_device *out_dev, - char buf[], int encap_size, - unsigned char h_dest[ETH_ALEN], - u8 tos, u8 ttl, - struct in6_addr *daddr, - struct in6_addr *saddr, - __be16 udp_dst_port, - __be32 vx_vni) -{ - struct ethhdr *eth = (struct ethhdr *)buf; - struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr)); - struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr)); - struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); - - memset(buf, 0, encap_size); - - ether_addr_copy(eth->h_dest, h_dest); - ether_addr_copy(eth->h_source, out_dev->dev_addr); - eth->h_proto = htons(ETH_P_IPV6); - - ip6_flow_hdr(ip6h, tos, 0); - /* the HW fills up ipv6 payload len */ - ip6h->nexthdr = IPPROTO_UDP; - ip6h->hop_limit = ttl; - ip6h->daddr = *daddr; - ip6h->saddr = *saddr; - - udp->dest = udp_dst_port; - vxh->vx_flags = VXLAN_HF_VNI; - vxh->vx_vni = vxlan_vni_field(vx_vni); -} - -static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct mlx5e_encap_entry *e) -{ - int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN; - struct ip_tunnel_key *tun_key = &e->tun_info.key; - struct net_device *out_dev; - struct neighbour *n = NULL; - struct flowi4 fl4 = {}; - u8 nud_state, tos, ttl; - char *encap_header; - int err; - - if (max_encap_size < ipv4_encap_size) { - mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", - ipv4_encap_size, max_encap_size); - return -EOPNOTSUPP; - } - - encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); - if (!encap_header) - return -ENOMEM; - - switch (e->tunnel_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - fl4.flowi4_proto = IPPROTO_UDP; - fl4.fl4_dport = tun_key->tp_dst; - break; - default: - err = -EOPNOTSUPP; - goto free_encap; - } - - tos = tun_key->tos; - ttl = tun_key->ttl; - fl4.flowi4_tos = tun_key->tos; - fl4.daddr = tun_key->u.ipv4.dst; - fl4.saddr = tun_key->u.ipv4.src; - - err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, - &fl4, &n, &ttl); - if (err) - goto free_encap; - - /* used by mlx5e_detach_encap to lookup a neigh hash table - * entry in the neigh hash table when a user deletes a rule - */ - e->m_neigh.dev = n->dev; - e->m_neigh.family = n->ops->family; - memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - e->out_dev = out_dev; - - /* It's importent to add the neigh to the hash table before checking - * the neigh validity state. So if we'll get a notification, in case the - * neigh changes it's validity state, we would find the relevant neigh - * in the hash. - */ - err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); - if (err) - goto free_encap; - - read_lock_bh(&n->lock); - nud_state = n->nud_state; - ether_addr_copy(e->h_dest, n->ha); - read_unlock_bh(&n->lock); - - switch (e->tunnel_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - gen_vxlan_header_ipv4(out_dev, encap_header, - ipv4_encap_size, e->h_dest, tos, ttl, - fl4.daddr, - fl4.saddr, tun_key->tp_dst, - tunnel_id_to_key32(tun_key->tun_id)); - break; - default: - err = -EOPNOTSUPP; - goto destroy_neigh_entry; - } - e->encap_size = ipv4_encap_size; - e->encap_header = encap_header; - - if (!(nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); - err = -EAGAIN; - goto out; - } - - err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, - ipv4_encap_size, encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) - goto destroy_neigh_entry; - - e->flags |= MLX5_ENCAP_ENTRY_VALID; - mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); - neigh_release(n); - return err; - -destroy_neigh_entry: - mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); -free_encap: - kfree(encap_header); -out: - if (n) - neigh_release(n); - return err; -} - -static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct mlx5e_encap_entry *e) -{ - int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN; - struct ip_tunnel_key *tun_key = &e->tun_info.key; - struct net_device *out_dev; - struct neighbour *n = NULL; - struct flowi6 fl6 = {}; - u8 nud_state, tos, ttl; - char *encap_header; - int err; - - if (max_encap_size < ipv6_encap_size) { - mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", - ipv6_encap_size, max_encap_size); - return -EOPNOTSUPP; - } - - encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); - if (!encap_header) - return -ENOMEM; - - switch (e->tunnel_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - fl6.flowi6_proto = IPPROTO_UDP; - fl6.fl6_dport = tun_key->tp_dst; - break; - default: - err = -EOPNOTSUPP; - goto free_encap; - } - - tos = tun_key->tos; - ttl = tun_key->ttl; - - fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); - fl6.daddr = tun_key->u.ipv6.dst; - fl6.saddr = tun_key->u.ipv6.src; - - err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, - &fl6, &n, &ttl); - if (err) - goto free_encap; - - /* used by mlx5e_detach_encap to lookup a neigh hash table - * entry in the neigh hash table when a user deletes a rule - */ - e->m_neigh.dev = n->dev; - e->m_neigh.family = n->ops->family; - memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - e->out_dev = out_dev; - - /* It's importent to add the neigh to the hash table before checking - * the neigh validity state. So if we'll get a notification, in case the - * neigh changes it's validity state, we would find the relevant neigh - * in the hash. - */ - err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); - if (err) - goto free_encap; - - read_lock_bh(&n->lock); - nud_state = n->nud_state; - ether_addr_copy(e->h_dest, n->ha); - read_unlock_bh(&n->lock); - - switch (e->tunnel_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - gen_vxlan_header_ipv6(out_dev, encap_header, - ipv6_encap_size, e->h_dest, tos, ttl, - &fl6.daddr, - &fl6.saddr, tun_key->tp_dst, - tunnel_id_to_key32(tun_key->tun_id)); - break; - default: - err = -EOPNOTSUPP; - goto destroy_neigh_entry; - } - - e->encap_size = ipv6_encap_size; - e->encap_header = encap_header; - - if (!(nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); - err = -EAGAIN; - goto out; - } - - err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, - ipv6_encap_size, encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) - goto destroy_neigh_entry; - - e->flags |= MLX5_ENCAP_ENTRY_VALID; - mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); - neigh_release(n); - return err; - -destroy_neigh_entry: - mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); -free_encap: - kfree(encap_header); -out: - if (n) - neigh_release(n); - return err; -} static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct ip_tunnel_info *tun_info, struct net_device *mirred_dev, struct net_device **encap_dev, struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + int out_index) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; unsigned short family = ip_tunnel_info_af(tun_info); struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct ip_tunnel_key *key = &tun_info->key; struct mlx5e_encap_entry *e; - int tunnel_type, err = 0; uintptr_t hash_key; bool found = false; - - /* udp dst port must be set */ - if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst))) - goto vxlan_encap_offload_err; - - /* setting udp src port isn't supported */ - if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) { -vxlan_encap_offload_err: - NL_SET_ERR_MSG_MOD(extack, - "must set udp dst port and not set udp src port"); - netdev_warn(priv->netdev, - "must set udp dst port and not set udp src port\n"); - return -EOPNOTSUPP; - } - - if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) && - MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { - tunnel_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; - } else { - NL_SET_ERR_MSG_MOD(extack, - "port isn't an offloaded vxlan udp dport"); - netdev_warn(priv->netdev, - "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst)); - return -EOPNOTSUPP; - } + int err = 0; hash_key = hash_encap_info(key); @@ -2761,13 +2415,16 @@ vxlan_encap_offload_err: return -ENOMEM; e->tun_info = *tun_info; - e->tunnel_type = tunnel_type; + err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); + if (err) + goto out_err; + INIT_LIST_HEAD(&e->flows); if (family == AF_INET) - err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e); + err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); else if (family == AF_INET6) - err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e); + err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); if (err && err != -EAGAIN) goto out_err; @@ -2775,12 +2432,15 @@ vxlan_encap_offload_err: hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); attach_flow: - list_add(&flow->encap, &e->flows); + list_add(&flow->encaps[out_index].list, &e->flows); + flow->encaps[out_index].index = out_index; *encap_dev = e->out_dev; - if (e->flags & MLX5_ENCAP_ENTRY_VALID) - attr->encap_id = e->encap_id; - else + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + attr->dests[out_index].encap_id = e->encap_id; + attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + } else { err = -EAGAIN; + } return err; @@ -2849,7 +2509,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, struct mlx5e_rep_priv *rpriv = priv->ppriv; struct ip_tunnel_info *info = NULL; const struct tc_action *a; - LIST_HEAD(actions); bool encap = false; u32 action = 0; int err, i; @@ -2874,7 +2533,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return err; action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - attr->mirror_count = attr->out_count; + attr->split_count = attr->out_count; continue; } @@ -2892,6 +2551,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, struct net_device *out_dev; out_dev = tcf_mirred_dev(a); + if (!out_dev) { + /* out_dev is NULL when filters with + * non-existing mirred device are replayed to + * the driver. + */ + return -EINVAL; + } if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { NL_SET_ERR_MSG_MOD(extack, @@ -2901,23 +2567,47 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EOPNOTSUPP; } + action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; if (switchdev_port_same_parent_id(priv->netdev, out_dev) || is_merged_eswitch_dev(priv, out_dev)) { - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev); + + if (uplink_upper && + netif_is_lag_master(uplink_upper) && + uplink_upper == out_dev) + out_dev = uplink_dev; + + if (!mlx5e_eswitch_rep(out_dev)) + return -EOPNOTSUPP; + out_priv = netdev_priv(out_dev); rpriv = out_priv->ppriv; - attr->out_rep[attr->out_count] = rpriv->rep; - attr->out_mdev[attr->out_count++] = out_priv->mdev; + attr->dests[attr->out_count].rep = rpriv->rep; + attr->dests[attr->out_count].mdev = out_priv->mdev; + attr->out_count++; } else if (encap) { - parse_attr->mirred_ifindex = out_dev->ifindex; - parse_attr->tun_info = *info; + parse_attr->mirred_ifindex[attr->out_count] = + out_dev->ifindex; + parse_attr->tun_info[attr->out_count] = *info; + encap = false; attr->parse_attr = parse_attr; - action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - /* attr->out_rep is resolved when we handle encap */ + attr->dests[attr->out_count].flags |= + MLX5_ESW_DEST_ENCAP; + attr->out_count++; + /* attr->dests[].rep is resolved when we + * handle encap + */ + } else if (parse_attr->filter_dev != priv->netdev) { + /* All mlx5 devices are called to configure + * high level device filters. Therefore, the + * *attempt* to install a filter on invalid + * eswitch should not trigger an explicit error + */ + return -EINVAL; } else { NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding"); @@ -2934,7 +2624,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, encap = true; else return -EOPNOTSUPP; - attr->mirror_count = attr->out_count; continue; } @@ -2944,7 +2633,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (err) return err; - attr->mirror_count = attr->out_count; + attr->split_count = attr->out_count; continue; } @@ -2965,8 +2654,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range"); return -EOPNOTSUPP; } - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; attr->dest_chain = dest_chain; continue; @@ -2979,7 +2667,15 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (!actions_match_supported(priv, exts, parse_attr, flow, extack)) return -EOPNOTSUPP; - if (attr->out_count > 1 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { + if (attr->dest_chain) { + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported"); + return -EOPNOTSUPP; + } + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } + + if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { NL_SET_ERR_MSG_MOD(extack, "current firmware doesn't support split rule for port mirroring"); netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n"); @@ -2998,6 +2694,11 @@ static void get_flags(int flags, u16 *flow_flags) if (flags & MLX5E_TC_EGRESS) __flow_flags |= MLX5E_TC_FLOW_EGRESS; + if (flags & MLX5E_TC_ESW_OFFLOAD) + __flow_flags |= MLX5E_TC_FLOW_ESWITCH; + if (flags & MLX5E_TC_NIC_OFFLOAD) + __flow_flags |= MLX5E_TC_FLOW_NIC; + *flow_flags = __flow_flags; } @@ -3008,18 +2709,32 @@ static const struct rhashtable_params tc_ht_params = { .automatic_shrinking = true, }; -static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv) +static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *uplink_rpriv; - if (MLX5_VPORT_MANAGER(priv->mdev) && esw->mode == SRIOV_OFFLOADS) { + if (flags & MLX5E_TC_ESW_OFFLOAD) { uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - return &uplink_rpriv->tc_ht; - } else + return &uplink_rpriv->uplink_priv.tc_ht; + } else /* NIC offload */ return &priv->fs.tc.ht; } +static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) +{ + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + bool is_rep_ingress = attr->in_rep->vport != FDB_UPLINK_VPORT && + flow->flags & MLX5E_TC_FLOW_INGRESS; + bool act_is_encap = !!(attr->action & + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); + bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom, + MLX5_DEVCOM_ESW_OFFLOADS); + + return esw_paired && mlx5_lag_is_sriov(attr->in_mdev) && + (is_rep_ingress || act_is_encap); +} + static int mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, struct tc_cls_flower_offload *f, u16 flow_flags, @@ -3041,10 +2756,6 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, flow->flags = flow_flags; flow->priv = priv; - err = parse_cls_flower(priv, flow, &parse_attr->spec, f); - if (err) - goto err_free; - *__flow = flow; *__parse_attr = parse_attr; @@ -3057,12 +2768,16 @@ err_free: } static int -mlx5e_add_fdb_flow(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, - u16 flow_flags, - struct mlx5e_tc_flow **__flow) +__mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + u16 flow_flags, + struct net_device *filter_dev, + struct mlx5_eswitch_rep *in_rep, + struct mlx5_core_dev *in_mdev, + struct mlx5e_tc_flow **__flow) { struct netlink_ext_ack *extack = f->common.extack; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; int attr_size, err; @@ -3073,6 +2788,12 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, &parse_attr, &flow); if (err) goto out; + parse_attr->filter_dev = filter_dev; + flow->esw_attr->parse_attr = parse_attr; + err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, + f, filter_dev); + if (err) + goto err_free; flow->esw_attr->chain = f->common.chain_index; flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; @@ -3080,14 +2801,19 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; + flow->esw_attr->in_rep = in_rep; + flow->esw_attr->in_mdev = in_mdev; + + if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) == + MLX5_COUNTER_SOURCE_ESWITCH) + flow->esw_attr->counter_dev = in_mdev; + else + flow->esw_attr->counter_dev = priv->mdev; + err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack); if (err) goto err_free; - if (!(flow->esw_attr->action & - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)) - kvfree(parse_attr); - *__flow = flow; return 0; @@ -3099,10 +2825,92 @@ out: return err; } +static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f, + struct mlx5e_tc_flow *flow) +{ + struct mlx5e_priv *priv = flow->priv, *peer_priv; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; + struct mlx5_devcom *devcom = priv->mdev->priv.devcom; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_rep_priv *peer_urpriv; + struct mlx5e_tc_flow *peer_flow; + struct mlx5_core_dev *in_mdev; + int err = 0; + + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + return -ENODEV; + + peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH); + peer_priv = netdev_priv(peer_urpriv->netdev); + + /* in_mdev is assigned of which the packet originated from. + * So packets redirected to uplink use the same mdev of the + * original flow and packets redirected from uplink use the + * peer mdev. + */ + if (flow->esw_attr->in_rep->vport == FDB_UPLINK_VPORT) + in_mdev = peer_priv->mdev; + else + in_mdev = priv->mdev; + + parse_attr = flow->esw_attr->parse_attr; + err = __mlx5e_add_fdb_flow(peer_priv, f, flow->flags, + parse_attr->filter_dev, + flow->esw_attr->in_rep, in_mdev, &peer_flow); + if (err) + goto out; + + flow->peer_flow = peer_flow; + flow->flags |= MLX5E_TC_FLOW_DUP; + mutex_lock(&esw->offloads.peer_mutex); + list_add_tail(&flow->peer, &esw->offloads.peer_flows); + mutex_unlock(&esw->offloads.peer_mutex); + +out: + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return err; +} + +static int +mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + u16 flow_flags, + struct net_device *filter_dev, + struct mlx5e_tc_flow **__flow) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *in_rep = rpriv->rep; + struct mlx5_core_dev *in_mdev = priv->mdev; + struct mlx5e_tc_flow *flow; + int err; + + err = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, + in_mdev, &flow); + if (err) + goto out; + + if (is_peer_flow_needed(flow)) { + err = mlx5e_tc_add_fdb_peer_flow(f, flow); + if (err) { + mlx5e_tc_del_fdb_flow(priv, flow); + goto out; + } + } + + *__flow = flow; + + return 0; + +out: + return err; +} + static int mlx5e_add_nic_flow(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, u16 flow_flags, + struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { struct netlink_ext_ack *extack = f->common.extack; @@ -3121,6 +2929,12 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, if (err) goto out; + parse_attr->filter_dev = filter_dev; + err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, + f, filter_dev); + if (err) + goto err_free; + err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, extack); if (err) goto err_free; @@ -3146,6 +2960,7 @@ static int mlx5e_tc_add_flow(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags, + struct net_device *filter_dev, struct mlx5e_tc_flow **flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -3158,18 +2973,20 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv, return -EOPNOTSUPP; if (esw && esw->mode == SRIOV_OFFLOADS) - err = mlx5e_add_fdb_flow(priv, f, flow_flags, flow); + err = mlx5e_add_fdb_flow(priv, f, flow_flags, + filter_dev, flow); else - err = mlx5e_add_nic_flow(priv, f, flow_flags, flow); + err = mlx5e_add_nic_flow(priv, f, flow_flags, + filter_dev, flow); return err; } -int mlx5e_configure_flower(struct mlx5e_priv *priv, +int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags) { struct netlink_ext_ack *extack = f->common.extack; - struct rhashtable *tc_ht = get_tc_ht(priv); + struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; int err = 0; @@ -3183,7 +3000,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, goto out; } - err = mlx5e_tc_add_flow(priv, f, flags, &flow); + err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow); if (err) goto out; @@ -3211,10 +3028,10 @@ static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) return false; } -int mlx5e_delete_flower(struct mlx5e_priv *priv, +int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags) { - struct rhashtable *tc_ht = get_tc_ht(priv); + struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); @@ -3230,10 +3047,12 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, return 0; } -int mlx5e_stats_flower(struct mlx5e_priv *priv, +int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags) { - struct rhashtable *tc_ht = get_tc_ht(priv); + struct mlx5_devcom *devcom = priv->mdev->priv.devcom; + struct rhashtable *tc_ht = get_tc_ht(priv, flags); + struct mlx5_eswitch *peer_esw; struct mlx5e_tc_flow *flow; struct mlx5_fc *counter; u64 bytes; @@ -3253,6 +3072,27 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + goto out; + + if ((flow->flags & MLX5E_TC_FLOW_DUP) && + (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) { + u64 bytes2; + u64 packets2; + u64 lastuse2; + + counter = mlx5e_tc_get_counter(flow->peer_flow); + mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2); + + bytes += bytes2; + packets += packets2; + lastuse = max_t(u64, lastuse, lastuse2); + } + + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + +out: tcf_exts_stats_update(f->exts, bytes, packets, lastuse); return 0; @@ -3341,7 +3181,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) if (tc->netdevice_nb.notifier_call) unregister_netdevice_notifier(&tc->netdevice_nb); - rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL); + rhashtable_destroy(&tc->ht); if (!IS_ERR_OR_NULL(tc->t)) { mlx5_destroy_flow_table(tc->t); @@ -3359,9 +3199,17 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); } -int mlx5e_tc_num_filters(struct mlx5e_priv *priv) +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags) { - struct rhashtable *tc_ht = get_tc_ht(priv); + struct rhashtable *tc_ht = get_tc_ht(priv, flags); return atomic_read(&tc_ht->nelems); } + +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) +{ + struct mlx5e_tc_flow *flow, *tmp; + + list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer) + __mlx5e_tc_del_fdb_peer_flow(flow); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 49436bf3b80a..d2d87f978c06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -42,7 +42,9 @@ enum { MLX5E_TC_INGRESS = BIT(0), MLX5E_TC_EGRESS = BIT(1), - MLX5E_TC_LAST_EXPORTED_BIT = 1, + MLX5E_TC_NIC_OFFLOAD = BIT(2), + MLX5E_TC_ESW_OFFLOAD = BIT(3), + MLX5E_TC_LAST_EXPORTED_BIT = 3, }; int mlx5e_tc_nic_init(struct mlx5e_priv *priv); @@ -51,12 +53,12 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv); int mlx5e_tc_esw_init(struct rhashtable *tc_ht); void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht); -int mlx5e_configure_flower(struct mlx5e_priv *priv, +int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags); -int mlx5e_delete_flower(struct mlx5e_priv *priv, +int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags); -int mlx5e_stats_flower(struct mlx5e_priv *priv, +int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags); struct mlx5e_encap_entry; @@ -68,12 +70,13 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, struct mlx5e_neigh_hash_entry; void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); -int mlx5e_tc_num_filters(struct mlx5e_priv *priv); +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags); + #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} -static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return 0; } +static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags) { return 0; } #endif #endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 6dacaeba2fbf..598ad7e4d5c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -127,7 +127,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, else #endif if (skb_vlan_tag_present(skb)) - up = skb->vlan_tci >> VLAN_PRIO_SHIFT; + up = skb_vlan_tag_get_prio(skb); /* channel_ix can be larger than num_channels since * dev->num_real_tx_queues = num_channels * num_tc @@ -459,9 +459,10 @@ static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq, u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq); netdev_err(sq->channel->netdev, - "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", - sq->cq.mcq.cqn, ci, sq->sqn, err_cqe->syndrome, - err_cqe->vendor_err_synd); + "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", + sq->cq.mcq.cqn, ci, sq->sqn, + get_cqe_opcode((struct mlx5_cqe64 *)err_cqe), + err_cqe->syndrome, err_cqe->vendor_err_synd); mlx5_dump_err_cqe(sq->cq.mdev, err_cqe); } @@ -507,7 +508,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) wqe_counter = be16_to_cpu(cqe->wqe_counter); - if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) { + if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { mlx5e_dump_error_cqe(sq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 85d517360157..b4af5e19f6ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -76,6 +76,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, napi); struct mlx5e_ch_stats *ch_stats = c->stats; + struct mlx5e_rq *rq = &c->rq; bool busy = false; int work_done = 0; int i; @@ -85,17 +86,17 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); - busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); + busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq, NULL); if (c->xdp) - busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq); + busy |= mlx5e_poll_xdpsq_cq(&rq->xdpsq.cq, rq); if (likely(budget)) { /* budget=0 means: don't poll rx rings */ - work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); + work_done = mlx5e_poll_rx_cq(&rq->cq, budget); busy |= work_done == budget; } - busy |= c->rq.post_wqes(&c->rq); + busy |= c->rq.post_wqes(rq); if (busy) { if (likely(mlx5e_channel_no_affinity_change(c))) @@ -115,9 +116,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) mlx5e_cq_arm(&c->sq[i].cq); } - mlx5e_handle_rx_dim(&c->rq); + mlx5e_handle_rx_dim(rq); - mlx5e_cq_arm(&c->rq.cq); + mlx5e_cq_arm(&rq->cq); mlx5e_cq_arm(&c->icosq.cq); mlx5e_cq_arm(&c->xdpsq.cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index c1e1a16a9b07..ee04aab65a9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -31,20 +31,22 @@ */ #include <linux/interrupt.h> +#include <linux/notifier.h> #include <linux/module.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/eq.h> #include <linux/mlx5/cmd.h> #ifdef CONFIG_RFS_ACCEL #include <linux/cpu_rmap.h> #endif #include "mlx5_core.h" +#include "lib/eq.h" #include "fpga/core.h" #include "eswitch.h" #include "lib/clock.h" #include "diag/fw_tracer.h" enum { - MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), MLX5_EQE_OWNER_INIT_VAL = 0x1, }; @@ -55,14 +57,32 @@ enum { }; enum { - MLX5_NUM_SPARE_EQE = 0x80, - MLX5_NUM_ASYNC_EQE = 0x1000, - MLX5_NUM_CMD_EQE = 32, - MLX5_NUM_PF_DRAIN = 64, + MLX5_EQ_DOORBEL_OFFSET = 0x40, }; -enum { - MLX5_EQ_DOORBEL_OFFSET = 0x40, +struct mlx5_irq_info { + cpumask_var_t mask; + char name[MLX5_MAX_IRQ_NAME]; + void *context; /* dev_id provided to request_irq */ +}; + +struct mlx5_eq_table { + struct list_head comp_eqs_list; + struct mlx5_eq pages_eq; + struct mlx5_eq cmd_eq; + struct mlx5_eq async_eq; + + struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; + + /* Since CQ DB is stored in async_eq */ + struct mlx5_nb cq_err_nb; + + struct mutex lock; /* sync async eqs creations */ + int num_comp_vectors; + struct mlx5_irq_info *irq_info; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif }; #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ @@ -78,17 +98,6 @@ enum { (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) -struct map_eq_in { - u64 mask; - u32 reserved; - u32 unmap_eqn; -}; - -struct cre_des_eq { - u8 reserved[15]; - u8 eqn; -}; - static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; @@ -99,213 +108,56 @@ static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) -{ - return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE); -} - -static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq) -{ - struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1)); - - return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe; -} - -static const char *eqe_type_str(u8 type) -{ - switch (type) { - case MLX5_EVENT_TYPE_COMP: - return "MLX5_EVENT_TYPE_COMP"; - case MLX5_EVENT_TYPE_PATH_MIG: - return "MLX5_EVENT_TYPE_PATH_MIG"; - case MLX5_EVENT_TYPE_COMM_EST: - return "MLX5_EVENT_TYPE_COMM_EST"; - case MLX5_EVENT_TYPE_SQ_DRAINED: - return "MLX5_EVENT_TYPE_SQ_DRAINED"; - case MLX5_EVENT_TYPE_SRQ_LAST_WQE: - return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; - case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: - return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; - case MLX5_EVENT_TYPE_CQ_ERROR: - return "MLX5_EVENT_TYPE_CQ_ERROR"; - case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: - return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; - case MLX5_EVENT_TYPE_PATH_MIG_FAILED: - return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; - case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: - return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; - case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: - return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; - case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: - return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; - case MLX5_EVENT_TYPE_INTERNAL_ERROR: - return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; - case MLX5_EVENT_TYPE_PORT_CHANGE: - return "MLX5_EVENT_TYPE_PORT_CHANGE"; - case MLX5_EVENT_TYPE_GPIO_EVENT: - return "MLX5_EVENT_TYPE_GPIO_EVENT"; - case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: - return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; - case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: - return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; - case MLX5_EVENT_TYPE_REMOTE_CONFIG: - return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; - case MLX5_EVENT_TYPE_DB_BF_CONGESTION: - return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; - case MLX5_EVENT_TYPE_STALL_EVENT: - return "MLX5_EVENT_TYPE_STALL_EVENT"; - case MLX5_EVENT_TYPE_CMD: - return "MLX5_EVENT_TYPE_CMD"; - case MLX5_EVENT_TYPE_PAGE_REQUEST: - return "MLX5_EVENT_TYPE_PAGE_REQUEST"; - case MLX5_EVENT_TYPE_PAGE_FAULT: - return "MLX5_EVENT_TYPE_PAGE_FAULT"; - case MLX5_EVENT_TYPE_PPS_EVENT: - return "MLX5_EVENT_TYPE_PPS_EVENT"; - case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: - return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; - case MLX5_EVENT_TYPE_FPGA_ERROR: - return "MLX5_EVENT_TYPE_FPGA_ERROR"; - case MLX5_EVENT_TYPE_FPGA_QP_ERROR: - return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; - case MLX5_EVENT_TYPE_GENERAL_EVENT: - return "MLX5_EVENT_TYPE_GENERAL_EVENT"; - case MLX5_EVENT_TYPE_DEVICE_TRACER: - return "MLX5_EVENT_TYPE_DEVICE_TRACER"; - default: - return "Unrecognized event"; - } -} - -static enum mlx5_dev_event port_subtype_event(u8 subtype) -{ - switch (subtype) { - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: - return MLX5_DEV_EVENT_PORT_DOWN; - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - return MLX5_DEV_EVENT_PORT_UP; - case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: - return MLX5_DEV_EVENT_PORT_INITIALIZED; - case MLX5_PORT_CHANGE_SUBTYPE_LID: - return MLX5_DEV_EVENT_LID_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_PKEY: - return MLX5_DEV_EVENT_PKEY_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_GUID: - return MLX5_DEV_EVENT_GUID_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: - return MLX5_DEV_EVENT_CLIENT_REREG; - } - return -1; -} - -static void eq_update_ci(struct mlx5_eq *eq, int arm) +/* caller must eventually call mlx5_cq_put on the returned cq */ +static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) { - __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); - u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); - - __raw_writel((__force u32)cpu_to_be32(val), addr); - /* We still want ordering, just not swabbing, so add a barrier */ - mb(); -} + struct mlx5_cq_table *table = &eq->cq_table; + struct mlx5_core_cq *cq = NULL; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -static void eqe_pf_action(struct work_struct *work) -{ - struct mlx5_pagefault *pfault = container_of(work, - struct mlx5_pagefault, - work); - struct mlx5_eq *eq = pfault->eq; + spin_lock(&table->lock); + cq = radix_tree_lookup(&table->tree, cqn); + if (likely(cq)) + mlx5_cq_hold(cq); + spin_unlock(&table->lock); - mlx5_core_page_fault(eq->dev, pfault); - mempool_free(pfault, eq->pf_ctx.pool); + return cq; } -static void eq_pf_process(struct mlx5_eq *eq) +static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) { - struct mlx5_core_dev *dev = eq->dev; - struct mlx5_eqe_page_fault *pf_eqe; - struct mlx5_pagefault *pfault; + struct mlx5_eq_comp *eq_comp = eq_ptr; + struct mlx5_eq *eq = eq_ptr; struct mlx5_eqe *eqe; int set_ci = 0; + u32 cqn = -1; while ((eqe = next_eqe_sw(eq))) { - pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC); - if (!pfault) { - schedule_work(&eq->pf_ctx.work); - break; - } - + struct mlx5_core_cq *cq; + /* Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ dma_rmb(); - pf_eqe = &eqe->data.page_fault; - pfault->event_subtype = eqe->sub_type; - pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed); - - mlx5_core_dbg(dev, - "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n", - eqe->sub_type, pfault->bytes_committed); - - switch (eqe->sub_type) { - case MLX5_PFAULT_SUBTYPE_RDMA: - /* RDMA based event */ - pfault->type = - be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24; - pfault->token = - be32_to_cpu(pf_eqe->rdma.pftype_token) & - MLX5_24BIT_MASK; - pfault->rdma.r_key = - be32_to_cpu(pf_eqe->rdma.r_key); - pfault->rdma.packet_size = - be16_to_cpu(pf_eqe->rdma.packet_length); - pfault->rdma.rdma_op_len = - be32_to_cpu(pf_eqe->rdma.rdma_op_len); - pfault->rdma.rdma_va = - be64_to_cpu(pf_eqe->rdma.rdma_va); - mlx5_core_dbg(dev, - "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n", - pfault->type, pfault->token, - pfault->rdma.r_key); - mlx5_core_dbg(dev, - "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n", - pfault->rdma.rdma_op_len, - pfault->rdma.rdma_va); - break; - - case MLX5_PFAULT_SUBTYPE_WQE: - /* WQE based event */ - pfault->type = - (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; - pfault->token = - be32_to_cpu(pf_eqe->wqe.token); - pfault->wqe.wq_num = - be32_to_cpu(pf_eqe->wqe.pftype_wq) & - MLX5_24BIT_MASK; - pfault->wqe.wqe_index = - be16_to_cpu(pf_eqe->wqe.wqe_index); - pfault->wqe.packet_size = - be16_to_cpu(pf_eqe->wqe.packet_length); - mlx5_core_dbg(dev, - "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n", - pfault->type, pfault->token, - pfault->wqe.wq_num, - pfault->wqe.wqe_index); - break; - - default: - mlx5_core_warn(dev, - "Unsupported page fault event sub-type: 0x%02hhx\n", - eqe->sub_type); - /* Unsupported page faults should still be - * resolved by the page fault handler - */ + /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */ + cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; + + cq = mlx5_eq_cq_get(eq, cqn); + if (likely(cq)) { + ++cq->arm_sn; + cq->comp(cq); + mlx5_cq_put(cq); + } else { + mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); } - pfault->eq = eq; - INIT_WORK(&pfault->work, eqe_pf_action); - queue_work(eq->pf_ctx.wq, &pfault->work); - ++eq->cons_index; ++set_ci; + /* The HCA will think the queue has overflowed if we + * don't tell it we've been processing events. We + * create our EQs with MLX5_NUM_SPARE_EQE extra + * entries, so we must update our consumer index at + * least that often. + */ if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { eq_update_ci(eq, 0); set_ci = 0; @@ -313,165 +165,41 @@ static void eq_pf_process(struct mlx5_eq *eq) } eq_update_ci(eq, 1); -} -static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr) -{ - struct mlx5_eq *eq = eq_ptr; - unsigned long flags; - - if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) { - eq_pf_process(eq); - spin_unlock_irqrestore(&eq->pf_ctx.lock, flags); - } else { - schedule_work(&eq->pf_ctx.work); - } + if (cqn != -1) + tasklet_schedule(&eq_comp->tasklet_ctx.task); return IRQ_HANDLED; } -/* mempool_refill() was proposed but unfortunately wasn't accepted - * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html - * Chip workaround. +/* Some architectures don't latch interrupts when they are disabled, so using + * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to + * avoid losing them. It is not recommended to use it, unless this is the last + * resort. */ -static void mempool_refill(mempool_t *pool) -{ - while (pool->curr_nr < pool->min_nr) - mempool_free(mempool_alloc(pool, GFP_KERNEL), pool); -} - -static void eq_pf_action(struct work_struct *work) -{ - struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work); - - mempool_refill(eq->pf_ctx.pool); - - spin_lock_irq(&eq->pf_ctx.lock); - eq_pf_process(eq); - spin_unlock_irq(&eq->pf_ctx.lock); -} - -static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name) -{ - spin_lock_init(&pf_ctx->lock); - INIT_WORK(&pf_ctx->work, eq_pf_action); - - pf_ctx->wq = alloc_ordered_workqueue(name, - WQ_MEM_RECLAIM); - if (!pf_ctx->wq) - return -ENOMEM; - - pf_ctx->pool = mempool_create_kmalloc_pool - (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault)); - if (!pf_ctx->pool) - goto err_wq; - - return 0; -err_wq: - destroy_workqueue(pf_ctx->wq); - return -ENOMEM; -} - -int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, - u32 wq_num, u8 type, int error) -{ - u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0}; - - MLX5_SET(page_fault_resume_in, in, opcode, - MLX5_CMD_OP_PAGE_FAULT_RESUME); - MLX5_SET(page_fault_resume_in, in, error, !!error); - MLX5_SET(page_fault_resume_in, in, page_fault_type, type); - MLX5_SET(page_fault_resume_in, in, wq_number, wq_num); - MLX5_SET(page_fault_resume_in, in, token, token); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} -EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); -#endif - -static void general_event_handler(struct mlx5_core_dev *dev, - struct mlx5_eqe *eqe) -{ - switch (eqe->sub_type) { - case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: - if (dev->event) - dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0); - break; - default: - mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n", - eqe->sub_type); - } -} - -static void mlx5_temp_warning_event(struct mlx5_core_dev *dev, - struct mlx5_eqe *eqe) -{ - u64 value_lsb; - u64 value_msb; - - value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); - value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); - - mlx5_core_warn(dev, - "High temperature on sensors with bit set %llx %llx", - value_msb, value_lsb); -} - -/* caller must eventually call mlx5_cq_put on the returned cq */ -static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) -{ - struct mlx5_cq_table *table = &eq->cq_table; - struct mlx5_core_cq *cq = NULL; - - spin_lock(&table->lock); - cq = radix_tree_lookup(&table->tree, cqn); - if (likely(cq)) - mlx5_cq_hold(cq); - spin_unlock(&table->lock); - - return cq; -} - -static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn) -{ - struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn); - - if (unlikely(!cq)) { - mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); - return; - } - - ++cq->arm_sn; - - cq->comp(cq); - - mlx5_cq_put(cq); -} - -static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type) +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) { - struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn); - - if (unlikely(!cq)) { - mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); - return; - } + u32 count_eqe; - cq->event(cq, event_type); + disable_irq(eq->core.irqn); + count_eqe = eq->core.cons_index; + mlx5_eq_comp_int(eq->core.irqn, eq); + count_eqe = eq->core.cons_index - count_eqe; + enable_irq(eq->core.irqn); - mlx5_cq_put(cq); + return count_eqe; } -static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) +static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) { struct mlx5_eq *eq = eq_ptr; - struct mlx5_core_dev *dev = eq->dev; + struct mlx5_eq_table *eqt; + struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; int set_ci = 0; - u32 cqn = -1; - u32 rsn; - u8 port; + + dev = eq->dev; + eqt = dev->priv.eq_table; while ((eqe = next_eqe_sw(eq))) { /* @@ -480,116 +208,12 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) */ dma_rmb(); - mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", - eq->eqn, eqe_type_str(eqe->type)); - switch (eqe->type) { - case MLX5_EVENT_TYPE_COMP: - cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; - mlx5_eq_cq_completion(eq, cqn); - break; - case MLX5_EVENT_TYPE_DCT_DRAINED: - rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; - rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); - mlx5_rsc_event(dev, rsn, eqe->type); - break; - case MLX5_EVENT_TYPE_PATH_MIG: - case MLX5_EVENT_TYPE_COMM_EST: - case MLX5_EVENT_TYPE_SQ_DRAINED: - case MLX5_EVENT_TYPE_SRQ_LAST_WQE: - case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: - case MLX5_EVENT_TYPE_PATH_MIG_FAILED: - case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: - case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: - rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN); - mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n", - eqe_type_str(eqe->type), eqe->type, rsn); - mlx5_rsc_event(dev, rsn, eqe->type); - break; - - case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: - case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: - rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n", - eqe_type_str(eqe->type), eqe->type, rsn); - mlx5_srq_event(dev, rsn, eqe->type); - break; - - case MLX5_EVENT_TYPE_CMD: - mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); - break; + if (likely(eqe->type < MLX5_EVENT_TYPE_MAX)) + atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); + else + mlx5_core_warn_once(dev, "notifier_call_chain is not setup for eqe: %d\n", eqe->type); - case MLX5_EVENT_TYPE_PORT_CHANGE: - port = (eqe->data.port.port >> 4) & 0xf; - switch (eqe->sub_type) { - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - case MLX5_PORT_CHANGE_SUBTYPE_LID: - case MLX5_PORT_CHANGE_SUBTYPE_PKEY: - case MLX5_PORT_CHANGE_SUBTYPE_GUID: - case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: - case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: - if (dev->event) - dev->event(dev, port_subtype_event(eqe->sub_type), - (unsigned long)port); - break; - default: - mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n", - port, eqe->sub_type); - } - break; - case MLX5_EVENT_TYPE_CQ_ERROR: - cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; - mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", - cqn, eqe->data.cq_err.syndrome); - mlx5_eq_cq_event(eq, cqn, eqe->type); - break; - - case MLX5_EVENT_TYPE_PAGE_REQUEST: - { - u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id); - s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages); - - mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n", - func_id, npages); - mlx5_core_req_pages_handler(dev, func_id, npages); - } - break; - - case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: - mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); - break; - - case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: - mlx5_port_module_event(dev, eqe); - break; - - case MLX5_EVENT_TYPE_PPS_EVENT: - mlx5_pps_event(dev, eqe); - break; - - case MLX5_EVENT_TYPE_FPGA_ERROR: - case MLX5_EVENT_TYPE_FPGA_QP_ERROR: - mlx5_fpga_event(dev, eqe->type, &eqe->data.raw); - break; - - case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: - mlx5_temp_warning_event(dev, eqe); - break; - - case MLX5_EVENT_TYPE_GENERAL_EVENT: - general_event_handler(dev, eqe); - break; - - case MLX5_EVENT_TYPE_DEVICE_TRACER: - mlx5_fw_tracer_event(dev, eqe); - break; - - default: - mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", - eqe->type, eq->eqn); - break; - } + atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); ++eq->cons_index; ++set_ci; @@ -608,30 +232,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) eq_update_ci(eq, 1); - if (cqn != -1) - tasklet_schedule(&eq->tasklet_ctx.task); - return IRQ_HANDLED; } -/* Some architectures don't latch interrupts when they are disabled, so using - * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to - * avoid losing them. It is not recommended to use it, unless this is the last - * resort. - */ -u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq) -{ - u32 count_eqe; - - disable_irq(eq->irqn); - count_eqe = eq->cons_index; - mlx5_eq_int(eq->irqn, eq); - count_eqe = eq->cons_index - count_eqe; - enable_irq(eq->irqn); - - return count_eqe; -} - static void init_eq_buf(struct mlx5_eq *eq) { struct mlx5_eqe *eqe; @@ -643,39 +246,35 @@ static void init_eq_buf(struct mlx5_eq *eq) } } -int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, - enum mlx5_eq_type type) +static int +create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, + struct mlx5_eq_param *param) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - irq_handler_t handler; + u8 vecidx = param->index; __be64 *pas; void *eqc; int inlen; u32 *in; int err; + if (eq_table->irq_info[vecidx].context) + return -EEXIST; + /* Init CQ table */ memset(cq_table, 0, sizeof(*cq_table)); spin_lock_init(&cq_table->lock); INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); - eq->type = type; - eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); + eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE); eq->cons_index = 0; err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); if (err) return err; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (type == MLX5_EQ_TYPE_PF) - handler = mlx5_eq_pf_int; - else -#endif - handler = mlx5_eq_int; - init_eq_buf(eq); inlen = MLX5_ST_SZ_BYTES(create_eq_in) + @@ -691,7 +290,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, mlx5_fill_page_array(&eq->buf, pas); MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); - MLX5_SET64(create_eq_in, in, event_bitmask, mask); + MLX5_SET64(create_eq_in, in, event_bitmask, param->mask); eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); @@ -704,15 +303,17 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, if (err) goto err_in; - snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", + snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); + eq_table->irq_info[vecidx].context = param->context; + eq->vecidx = vecidx; eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(eq->irqn, handler, 0, - priv->irq_info[vecidx].name, eq); + err = request_irq(eq->irqn, param->handler, 0, + eq_table->irq_info[vecidx].name, param->context); if (err) goto err_eq; @@ -720,21 +321,6 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, if (err) goto err_irq; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (type == MLX5_EQ_TYPE_PF) { - err = init_pf_ctx(&eq->pf_ctx, name); - if (err) - goto err_irq; - } else -#endif - { - INIT_LIST_HEAD(&eq->tasklet_ctx.list); - INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); - spin_lock_init(&eq->tasklet_ctx.lock); - tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, - (unsigned long)&eq->tasklet_ctx); - } - /* EQs are created in ARMED state */ eq_update_ci(eq, 1); @@ -756,27 +342,25 @@ err_buf: return err; } -int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + struct mlx5_irq_info *irq_info; int err; + irq_info = &eq_table->irq_info[eq->vecidx]; + mlx5_debug_eq_remove(dev, eq); - free_irq(eq->irqn, eq); + + free_irq(eq->irqn, irq_info->context); + irq_info->context = NULL; + err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); synchronize_irq(eq->irqn); - if (eq->type == MLX5_EQ_TYPE_COMP) { - tasklet_disable(&eq->tasklet_ctx.task); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - } else if (eq->type == MLX5_EQ_TYPE_PF) { - cancel_work_sync(&eq->pf_ctx.work); - destroy_workqueue(eq->pf_ctx.wq); - mempool_destroy(eq->pf_ctx.pool); -#endif - } mlx5_buf_free(dev, &eq->buf); return err; @@ -816,28 +400,106 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) return 0; } -int mlx5_eq_init(struct mlx5_core_dev *dev) +int mlx5_eq_table_init(struct mlx5_core_dev *dev) { - int err; + struct mlx5_eq_table *eq_table; + int i, err; - spin_lock_init(&dev->priv.eq_table.lock); + eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL); + if (!eq_table) + return -ENOMEM; + + dev->priv.eq_table = eq_table; err = mlx5_eq_debugfs_init(dev); + if (err) + goto kvfree_eq_table; + + mutex_init(&eq_table->lock); + for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) + ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); + return 0; + +kvfree_eq_table: + kvfree(eq_table); + dev->priv.eq_table = NULL; return err; } -void mlx5_eq_cleanup(struct mlx5_core_dev *dev) +void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) { mlx5_eq_debugfs_cleanup(dev); + kvfree(dev->priv.eq_table); } -int mlx5_start_eqs(struct mlx5_core_dev *dev) +/* Async EQs */ + +static int create_async_eq(struct mlx5_core_dev *dev, const char *name, + struct mlx5_eq *eq, struct mlx5_eq_param *param) { - struct mlx5_eq_table *table = &dev->priv.eq_table; - u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int err; + + mutex_lock(&eq_table->lock); + if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) { + err = -ENOSPC; + goto unlock; + } + + err = create_map_eq(dev, eq, name, param); +unlock: + mutex_unlock(&eq_table->lock); + return err; +} + +static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; + mutex_lock(&eq_table->lock); + err = destroy_unmap_eq(dev, eq); + mutex_unlock(&eq_table->lock); + return err; +} + +static int cq_err_event_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_eq_table *eqt; + struct mlx5_core_cq *cq; + struct mlx5_eqe *eqe; + struct mlx5_eq *eq; + u32 cqn; + + /* type == MLX5_EVENT_TYPE_CQ_ERROR */ + + eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb); + eq = &eqt->async_eq; + eqe = data; + + cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; + mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", + cqn, eqe->data.cq_err.syndrome); + + cq = mlx5_eq_cq_get(eq, cqn); + if (unlikely(!cq)) { + mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); + return NOTIFY_OK; + } + + cq->event(cq, type); + + mlx5_cq_put(cq); + + return NOTIFY_OK; +} + +static u64 gather_async_events_mask(struct mlx5_core_dev *dev) +{ + u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; + if (MLX5_VPORT_MANAGER(dev)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); @@ -865,127 +527,521 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_MCAM_REG(dev, tracer_registers)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER); - err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, - MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, - "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC); + if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); + + return async_event_mask; +} + +static int create_async_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_param param = {}; + int err; + + MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); + mlx5_eq_notifier_register(dev, &table->cq_err_nb); + + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_CMD_IDX, + .mask = 1ull << MLX5_EVENT_TYPE_CMD, + .nent = MLX5_NUM_CMD_EQE, + .context = &table->cmd_eq, + .handler = mlx5_eq_async_int, + }; + err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); - return err; + goto err0; } mlx5_cmd_use_events(dev); - err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC, - MLX5_NUM_ASYNC_EQE, async_event_mask, - "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC); + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_ASYNC_IDX, + .mask = gather_async_events_mask(dev), + .nent = MLX5_NUM_ASYNC_EQE, + .context = &table->async_eq, + .handler = mlx5_eq_async_int, + }; + err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; } - err = mlx5_create_map_eq(dev, &table->pages_eq, - MLX5_EQ_VEC_PAGES, - /* TODO: sriov max_vf + */ 1, - 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", - MLX5_EQ_TYPE_ASYNC); + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_PAGEREQ_IDX, + .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, + .nent = /* TODO: sriov max_vf + */ 1, + .context = &table->pages_eq, + .handler = mlx5_eq_async_int, + }; + err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(dev, pg)) { - err = mlx5_create_map_eq(dev, &table->pfault_eq, - MLX5_EQ_VEC_PFAULT, - MLX5_NUM_ASYNC_EQE, - 1 << MLX5_EVENT_TYPE_PAGE_FAULT, - "mlx5_page_fault_eq", - MLX5_EQ_TYPE_PF); - if (err) { - mlx5_core_warn(dev, "failed to create page fault EQ %d\n", - err); - goto err3; - } - } - - return err; -err3: - mlx5_destroy_unmap_eq(dev, &table->pages_eq); -#else return err; -#endif err2: - mlx5_destroy_unmap_eq(dev, &table->async_eq); + destroy_async_eq(dev, &table->async_eq); err1: mlx5_cmd_use_polling(dev); - mlx5_destroy_unmap_eq(dev, &table->cmd_eq); + destroy_async_eq(dev, &table->cmd_eq); +err0: + mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; } -void mlx5_stop_eqs(struct mlx5_core_dev *dev) +static void destroy_async_eqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; int err; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(dev, pg)) { - err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq); - if (err) - mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n", - err); - } -#endif - - err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); + err = destroy_async_eq(dev, &table->pages_eq); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); - err = mlx5_destroy_unmap_eq(dev, &table->async_eq); + err = destroy_async_eq(dev, &table->async_eq); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", err); + mlx5_cmd_use_polling(dev); - err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq); + err = destroy_async_eq(dev, &table->cmd_eq); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", err); + + mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); } -int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - u32 *out, int outlen) +struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) { - u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0}; + return &dev->priv.eq_table->async_eq; +} - MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ); - MLX5_SET(query_eq_in, in, eq_number, eq->eqn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) +{ + synchronize_irq(dev->priv.eq_table->async_eq.irqn); +} + +void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) +{ + synchronize_irq(dev->priv.eq_table->cmd_eq.irqn); +} + +/* Generic EQ API for mlx5_core consumers + * Needed For RDMA ODP EQ for now + */ +struct mlx5_eq * +mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, + struct mlx5_eq_param *param) +{ + struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); + int err; + + if (!eq) + return ERR_PTR(-ENOMEM); + + err = create_async_eq(dev, name, eq, param); + if (err) { + kvfree(eq); + eq = ERR_PTR(err); + } + + return eq; +} +EXPORT_SYMBOL(mlx5_eq_create_generic); + +int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + if (IS_ERR(eq)) + return -EINVAL; + + err = destroy_async_eq(dev, eq); + if (err) + goto out; + + kvfree(eq); +out: + return err; +} +EXPORT_SYMBOL(mlx5_eq_destroy_generic); + +struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc) +{ + u32 ci = eq->cons_index + cc; + struct mlx5_eqe *eqe; + + eqe = get_eqe(eq, ci & (eq->nent - 1)); + eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe; + /* Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + if (eqe) + dma_rmb(); + + return eqe; +} +EXPORT_SYMBOL(mlx5_eq_get_eqe); + +void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) +{ + __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); + u32 val; + + eq->cons_index += cc; + val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + + __raw_writel((__force u32)cpu_to_be32(val), addr); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} +EXPORT_SYMBOL(mlx5_eq_update_ci); + +/* Completion EQs */ + +static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + struct mlx5_priv *priv = &mdev->priv; + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + int irq = pci_irq_vector(mdev->pdev, vecidx); + struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; + + if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); + return -ENOMEM; + } + + cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), + irq_info->mask); + + if (IS_ENABLED(CONFIG_SMP) && + irq_set_affinity_hint(irq, irq_info->mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); + + return 0; +} + +static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + struct mlx5_priv *priv = &mdev->priv; + int irq = pci_irq_vector(mdev->pdev, vecidx); + struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; + + irq_set_affinity_hint(irq, NULL); + free_cpumask_var(irq_info->mask); +} + +static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) +{ + int err; + int i; + + for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) { + err = set_comp_irq_affinity_hint(mdev, i); + if (err) + goto err_out; + } + + return 0; + +err_out: + for (i--; i >= 0; i--) + clear_comp_irq_affinity_hint(mdev, i); + + return err; +} + +static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) +{ + int i; + + for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) + clear_comp_irq_affinity_hint(mdev, i); +} + +static void destroy_comp_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq, *n; + + clear_comp_irqs_affinity_hints(dev); + +#ifdef CONFIG_RFS_ACCEL + if (table->rmap) { + free_irq_cpu_rmap(table->rmap); + table->rmap = NULL; + } +#endif + list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + list_del(&eq->list); + if (destroy_unmap_eq(dev, &eq->core)) + mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", + eq->core.eqn); + tasklet_disable(&eq->tasklet_ctx.task); + kfree(eq); + } +} + +static int create_comp_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + char name[MLX5_MAX_IRQ_NAME]; + struct mlx5_eq_comp *eq; + int ncomp_vec; + int nent; + int err; + int i; + + INIT_LIST_HEAD(&table->comp_eqs_list); + ncomp_vec = table->num_comp_vectors; + nent = MLX5_COMP_EQ_SIZE; +#ifdef CONFIG_RFS_ACCEL + table->rmap = alloc_irq_cpu_rmap(ncomp_vec); + if (!table->rmap) + return -ENOMEM; +#endif + for (i = 0; i < ncomp_vec; i++) { + int vecidx = i + MLX5_EQ_VEC_COMP_BASE; + struct mlx5_eq_param param = {}; + + eq = kzalloc(sizeof(*eq), GFP_KERNEL); + if (!eq) { + err = -ENOMEM; + goto clean; + } + + INIT_LIST_HEAD(&eq->tasklet_ctx.list); + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); + spin_lock_init(&eq->tasklet_ctx.lock); + tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, + (unsigned long)&eq->tasklet_ctx); + +#ifdef CONFIG_RFS_ACCEL + irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); +#endif + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); + param = (struct mlx5_eq_param) { + .index = vecidx, + .mask = 0, + .nent = nent, + .context = &eq->core, + .handler = mlx5_eq_comp_int + }; + err = create_map_eq(dev, &eq->core, name, ¶m); + if (err) { + kfree(eq); + goto clean; + } + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); + /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ + list_add_tail(&eq->list, &table->comp_eqs_list); + } + + err = set_comp_irq_affinity_hints(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); + goto clean; + } + + return 0; + +clean: + destroy_comp_eqs(dev); + return err; +} + +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, + unsigned int *irqn) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq, *n; + int err = -ENOENT; + int i = 0; + + list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + if (i++ == vector) { + *eqn = eq->core.eqn; + *irqn = eq->core.irqn; + err = 0; + break; + } + } + + return err; +} +EXPORT_SYMBOL(mlx5_vector2eqn); + +unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) +{ + return dev->priv.eq_table->num_comp_vectors; +} +EXPORT_SYMBOL(mlx5_comp_vectors_count); + +struct cpumask * +mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) +{ + /* TODO: consider irq_get_affinity_mask(irq) */ + return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; +} +EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); + +struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL + return dev->priv.eq_table->rmap; +#else + return NULL; +#endif +} + +struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq; + + list_for_each_entry(eq, &table->comp_eqs_list, list) { + if (eq->core.eqn == eqn) + return eq; + } + + return ERR_PTR(-ENOENT); } /* This function should only be called after mlx5_cmd_force_teardown_hca */ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; - struct mlx5_eq *eq; + struct mlx5_eq_table *table = dev->priv.eq_table; + int i, max_eqs; + + clear_comp_irqs_affinity_hints(dev); #ifdef CONFIG_RFS_ACCEL - if (dev->rmap) { - free_irq_cpu_rmap(dev->rmap); - dev->rmap = NULL; + if (table->rmap) { + free_irq_cpu_rmap(table->rmap); + table->rmap = NULL; } #endif - list_for_each_entry(eq, &table->comp_eqs_list, list) - free_irq(eq->irqn, eq); - - free_irq(table->pages_eq.irqn, &table->pages_eq); - free_irq(table->async_eq.irqn, &table->async_eq); - free_irq(table->cmd_eq.irqn, &table->cmd_eq); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(dev, pg)) - free_irq(table->pfault_eq.irqn, &table->pfault_eq); -#endif + + mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ + max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; + for (i = max_eqs - 1; i >= 0; i--) { + if (!table->irq_info[i].context) + continue; + free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context); + table->irq_info[i].context = NULL; + } + mutex_unlock(&table->lock); + pci_free_irq_vectors(dev->pdev); +} + +static int alloc_irq_vectors(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_eq_table *table = priv->eq_table; + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); + int nvec; + int err; + + nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + + MLX5_EQ_VEC_COMP_BASE; + nvec = min_t(int, nvec, num_eqs); + if (nvec <= MLX5_EQ_VEC_COMP_BASE) + return -ENOMEM; + + table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL); + if (!table->irq_info) + return -ENOMEM; + + nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, + nvec, PCI_IRQ_MSIX); + if (nvec < 0) { + err = nvec; + goto err_free_irq_info; + } + + table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; + + return 0; + +err_free_irq_info: + kfree(table->irq_info); + return err; +} + +static void free_irq_vectors(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + pci_free_irq_vectors(dev->pdev); + kfree(priv->eq_table->irq_info); +} + +int mlx5_eq_table_create(struct mlx5_core_dev *dev) +{ + int err; + + err = alloc_irq_vectors(dev); + if (err) { + mlx5_core_err(dev, "alloc irq vectors failed\n"); + return err; + } + + err = create_async_eqs(dev); + if (err) { + mlx5_core_err(dev, "Failed to create async EQs\n"); + goto err_async_eqs; + } + + err = create_comp_eqs(dev); + if (err) { + mlx5_core_err(dev, "Failed to create completion EQs\n"); + goto err_comp_eqs; + } + + return 0; +err_comp_eqs: + destroy_async_eqs(dev); +err_async_eqs: + free_irq_vectors(dev); + return err; +} + +void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) +{ + destroy_comp_eqs(dev); + destroy_async_eqs(dev); + free_irq_vectors(dev); +} + +int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) +{ + struct mlx5_eq_table *eqt = dev->priv.eq_table; + + if (nb->event_type >= MLX5_EVENT_TYPE_MAX) + return -EINVAL; + + return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb); +} + +int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) +{ + struct mlx5_eq_table *eqt = dev->priv.eq_table; + + if (nb->event_type >= MLX5_EVENT_TYPE_MAX) + return -EINVAL; + + return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d004957328f9..a44ea7b85614 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -36,6 +36,7 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> #include "mlx5_core.h" +#include "lib/eq.h" #include "eswitch.h" #include "fs_core.h" @@ -1567,7 +1568,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) /* Mark this vport as disabled to discard new events */ vport->enabled = false; - synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC)); /* Wait for current already scheduled events to complete */ flush_workqueue(esw->work_queue); /* Disable events from this vport */ @@ -1593,10 +1593,25 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) mutex_unlock(&esw->state_lock); } +static int eswitch_vport_event(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_eswitch *esw = mlx5_nb_cof(nb, struct mlx5_eswitch, nb); + struct mlx5_eqe *eqe = data; + struct mlx5_vport *vport; + u16 vport_num; + + vport_num = be16_to_cpu(eqe->data.vport_change.vport_num); + vport = &esw->vports[vport_num]; + if (vport->enabled) + queue_work(esw->work_queue, &vport->vport_change_handler); + + return NOTIFY_OK; +} + /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) - int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { int err; @@ -1615,13 +1630,16 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n"); esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); + esw->mode = mode; + mlx5_lag_update(esw->dev); + if (mode == SRIOV_LEGACY) { err = esw_create_legacy_fdb_table(esw); } else { + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw, nvfs + 1); } @@ -1640,6 +1658,11 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) for (i = 0; i <= nvfs; i++) esw_enable_vport(esw, i, enabled_events); + if (mode == SRIOV_LEGACY) { + MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->nb); + } + esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", esw->enabled_vports); return 0; @@ -1647,8 +1670,10 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) abort: esw->mode = SRIOV_NONE; - if (mode == SRIOV_OFFLOADS) + if (mode == SRIOV_OFFLOADS) { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); + } return err; } @@ -1669,6 +1694,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) mc_promisc = &esw->mc_promisc; nvports = esw->enabled_vports; + if (esw->mode == SRIOV_LEGACY) + mlx5_eq_notifier_unregister(esw->dev, &esw->nb); + for (i = 0; i < esw->total_vports; i++) esw_disable_vport(esw, i); @@ -1685,8 +1713,12 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) old_mode = esw->mode; esw->mode = SRIOV_NONE; - if (old_mode == SRIOV_OFFLOADS) + mlx5_lag_update(esw->dev); + + if (old_mode == SRIOV_OFFLOADS) { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); + } } int mlx5_eswitch_init(struct mlx5_core_dev *dev) @@ -1777,23 +1809,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) kfree(esw); } -void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) -{ - struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change; - u16 vport_num = be16_to_cpu(vc_eqe->vport_num); - struct mlx5_vport *vport; - - if (!esw) { - pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n", - vport_num); - return; - } - - vport = &esw->vports[vport_num]; - if (vport->enabled) - queue_work(esw->work_queue, &vport->vport_change_handler); -} - /* Vport Administration */ #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) @@ -2219,3 +2234,14 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE; } EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); + +bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) +{ + if ((dev0->priv.eswitch->mode == SRIOV_NONE && + dev1->priv.eswitch->mode == SRIOV_NONE) || + (dev0->priv.eswitch->mode == SRIOV_OFFLOADS && + dev1->priv.eswitch->mode == SRIOV_OFFLOADS)) + return true; + + return false; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index aaafc9f17115..9c89eea9b2c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -143,6 +143,8 @@ struct mlx5_eswitch_fdb { struct offloads_fdb { struct mlx5_flow_table *slow_fdb; struct mlx5_flow_group *send_to_vport_grp; + struct mlx5_flow_group *peer_miss_grp; + struct mlx5_flow_handle **peer_miss_rules; struct mlx5_flow_group *miss_grp; struct mlx5_flow_handle *miss_rule_uni; struct mlx5_flow_handle *miss_rule_multi; @@ -165,6 +167,8 @@ struct mlx5_esw_offload { struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; struct mlx5_eswitch_rep *vport_reps; + struct list_head peer_flows; + struct mutex peer_mutex; DECLARE_HASHTABLE(encap_tbl, 8); DECLARE_HASHTABLE(mod_hdr_tbl, 8); u8 inline_mode; @@ -181,6 +185,7 @@ struct esw_mc_addr { /* SRIOV only */ struct mlx5_eswitch { struct mlx5_core_dev *dev; + struct mlx5_nb nb; struct mlx5_eswitch_fdb fdb_table; struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct workqueue_struct *work_queue; @@ -211,7 +216,6 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); -void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, @@ -281,13 +285,17 @@ enum mlx5_flow_match_level { /* current maximum for flow based vport multicasting */ #define MLX5_MAX_FLOW_FWD_VPORTS 2 +enum { + MLX5_ESW_DEST_ENCAP = BIT(0), + MLX5_ESW_DEST_ENCAP_VALID = BIT(1), +}; + struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; - struct mlx5_eswitch_rep *out_rep[MLX5_MAX_FLOW_FWD_VPORTS]; - struct mlx5_core_dev *out_mdev[MLX5_MAX_FLOW_FWD_VPORTS]; struct mlx5_core_dev *in_mdev; + struct mlx5_core_dev *counter_dev; - int mirror_count; + int split_count; int out_count; int action; @@ -296,7 +304,12 @@ struct mlx5_esw_flow_attr { u8 vlan_prio[MLX5_FS_VLAN_DEPTH]; u8 total_vlan; bool vlan_handled; - u32 encap_id; + struct { + u32 flags; + struct mlx5_eswitch_rep *rep; + struct mlx5_core_dev *mdev; + u32 encap_id; + } dests[MLX5_MAX_FLOW_FWD_VPORTS]; u32 mod_hdr_id; u8 match_level; struct mlx5_fc *counter; @@ -338,6 +351,9 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2); } +bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ @@ -352,9 +368,9 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} -static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {} static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} +static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } #define FDB_MAX_CHAIN 1 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9eac137790f5..53065b6ae593 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -39,6 +39,7 @@ #include "eswitch.h" #include "en.h" #include "fs_core.h" +#include "lib/devcom.h" enum { FDB_FAST_PATH = 0, @@ -81,7 +82,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, { struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {}; struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; - bool mirror = !!(attr->mirror_count); + bool split = !!(attr->split_count); struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; int j, i = 0; @@ -120,13 +121,21 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, dest[i].ft = ft; i++; } else { - for (j = attr->mirror_count; j < attr->out_count; j++) { + for (j = attr->split_count; j < attr->out_count; j++) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[i].vport.num = attr->out_rep[j]->vport; + dest[i].vport.num = attr->dests[j].rep->vport; dest[i].vport.vhca_id = - MLX5_CAP_GEN(attr->out_mdev[j], vhca_id); - dest[i].vport.vhca_id_valid = - !!MLX5_CAP_ESW(esw->dev, merged_eswitch); + MLX5_CAP_GEN(attr->dests[j].mdev, vhca_id); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + dest[i].vport.flags |= + MLX5_FLOW_DEST_VPORT_VHCA_ID; + if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act.reformat_id = attr->dests[j].encap_id; + dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; + dest[i].vport.reformat_id = + attr->dests[j].encap_id; + } i++; } } @@ -163,10 +172,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_id = attr->mod_hdr_id; - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) - flow_act.reformat_id = attr->encap_id; - - fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!mirror); + fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split); if (IS_ERR(fdb)) { rule = ERR_CAST(fdb); goto err_esw_get; @@ -181,7 +187,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, return rule; err_add_rule: - esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror); + esw_put_prio_table(esw, attr->chain, attr->prio, !!split); err_esw_get: if (attr->dest_chain) esw_put_prio_table(esw, attr->dest_chain, 1, 0); @@ -215,12 +221,17 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - for (i = 0; i < attr->mirror_count; i++) { + for (i = 0; i < attr->split_count; i++) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[i].vport.num = attr->out_rep[i]->vport; + dest[i].vport.num = attr->dests[i].rep->vport; dest[i].vport.vhca_id = - MLX5_CAP_GEN(attr->out_mdev[i], vhca_id); - dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch); + MLX5_CAP_GEN(attr->dests[i].mdev, vhca_id); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) { + dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; + dest[i].vport.reformat_id = attr->dests[i].encap_id; + } } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[i].ft = fwd_fdb, @@ -268,7 +279,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr, bool fwd_rule) { - bool mirror = (attr->mirror_count > 0); + bool split = (attr->split_count > 0); mlx5_del_flow_rules(rule); esw->offloads.num_flows--; @@ -277,7 +288,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, esw_put_prio_table(esw, attr->chain, attr->prio, 1); esw_put_prio_table(esw, attr->chain, attr->prio, 0); } else { - esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror); + esw_put_prio_table(esw, attr->chain, attr->prio, !!split); if (attr->dest_chain) esw_put_prio_table(esw, attr->dest_chain, 1, 0); } @@ -325,7 +336,7 @@ esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop) struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL; in_rep = attr->in_rep; - out_rep = attr->out_rep[0]; + out_rep = attr->dests[0].rep; if (push) vport = in_rep; @@ -346,7 +357,7 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, goto out_notsupp; in_rep = attr->in_rep; - out_rep = attr->out_rep[0]; + out_rep = attr->dests[0].rep; if (push && in_rep->vport == FDB_UPLINK_VPORT) goto out_notsupp; @@ -398,7 +409,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, if (!push && !pop && fwd) { /* tracks VF --> wire rules without vlan push action */ - if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT) { + if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT) { vport->vlan_refcount++; attr->vlan_handled = true; } @@ -458,7 +469,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, if (!push && !pop && fwd) { /* tracks VF --> wire rules without vlan push action */ - if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT) + if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT) vport->vlan_refcount--; return 0; @@ -531,6 +542,98 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } +static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, + struct mlx5_flow_spec *spec, + struct mlx5_flow_destination *dest) +{ + void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(peer_dev, vhca_id)); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest->vport.num = 0; + dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id); + dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; +} + +static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_handle **flows; + struct mlx5_flow_handle *flow; + struct mlx5_flow_spec *spec; + /* total vports is the same for both e-switches */ + int nvports = esw->total_vports; + void *misc; + int err, i; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + peer_miss_rules_setup(peer_dev, spec, &dest); + + flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL); + if (!flows) { + err = -ENOMEM; + goto alloc_flows_err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + for (i = 1; i < nvports; i++) { + MLX5_SET(fte_match_set_misc, misc, source_port, i); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); + goto add_flow_err; + } + flows[i] = flow; + } + + esw->fdb_table.offloads.peer_miss_rules = flows; + + kvfree(spec); + return 0; + +add_flow_err: + for (i--; i > 0; i--) + mlx5_del_flow_rules(flows[i]); + kvfree(flows); +alloc_flows_err: + kvfree(spec); + return err; +} + +static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_handle **flows; + int i; + + flows = esw->fdb_table.offloads.peer_miss_rules; + + for (i = 1; i < esw->total_vports; i++) + mlx5_del_flow_rules(flows[i]); + + kvfree(flows); +} + static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) { struct mlx5_flow_act flow_act = {0}; @@ -801,7 +904,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) esw->fdb_table.offloads.fdb_left[i] = ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; - table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2; + table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2 + + esw->total_vports; /* create the slow path fdb with encap set, so further table instances * can be created at run time while VFs are probed if the FW allows that. @@ -856,6 +960,34 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) } esw->fdb_table.offloads.send_to_vport_grp = g; + /* create peer esw miss group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ix + esw->total_vports - 1); + ix += esw->total_vports; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err); + goto peer_miss_err; + } + esw->fdb_table.offloads.peer_miss_grp = g; + /* create miss group */ memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -888,6 +1020,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) miss_rule_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); miss_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); +peer_miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: esw_destroy_offloads_fast_fdb_tables(esw); @@ -907,6 +1041,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi); mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); @@ -1163,6 +1298,105 @@ err_reps: return err; } +#define ESW_OFFLOADS_DEVCOM_PAIR (0) +#define ESW_OFFLOADS_DEVCOM_UNPAIR (1) + +static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw) +{ + int err; + + err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev); + if (err) + return err; + + return 0; +} + +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); + +static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) +{ + mlx5e_tc_clean_fdb_peer_flows(esw); + esw_del_fdb_peer_miss_rules(esw); +} + +static int mlx5_esw_offloads_devcom_event(int event, + void *my_data, + void *event_data) +{ + struct mlx5_eswitch *esw = my_data; + struct mlx5_eswitch *peer_esw = event_data; + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + int err; + + switch (event) { + case ESW_OFFLOADS_DEVCOM_PAIR: + err = mlx5_esw_offloads_pair(esw, peer_esw); + if (err) + goto err_out; + + err = mlx5_esw_offloads_pair(peer_esw, esw); + if (err) + goto err_pair; + + mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true); + break; + + case ESW_OFFLOADS_DEVCOM_UNPAIR: + if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) + break; + + mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); + mlx5_esw_offloads_unpair(peer_esw); + mlx5_esw_offloads_unpair(esw); + break; + } + + return 0; + +err_pair: + mlx5_esw_offloads_unpair(esw); + +err_out: + mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d", + event, err); + return err; +} + +static void esw_offloads_devcom_init(struct mlx5_eswitch *esw) +{ + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + + INIT_LIST_HEAD(&esw->offloads.peer_flows); + mutex_init(&esw->offloads.peer_mutex); + + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) + return; + + mlx5_devcom_register_component(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + mlx5_esw_offloads_devcom_event, + esw); + + mlx5_devcom_send_event(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + ESW_OFFLOADS_DEVCOM_PAIR, esw); +} + +static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) +{ + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) + return; + + mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS, + ESW_OFFLOADS_DEVCOM_UNPAIR, esw); + + mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +} + int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) { int err; @@ -1185,6 +1419,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) if (err) goto err_reps; + esw_offloads_devcom_init(esw); return 0; err_reps: @@ -1215,14 +1450,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, } } - /* enable back PF RoCE */ - mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - return err; } void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) { + esw_offloads_devcom_cleanup(esw); esw_offloads_unload_reps(esw, nvports); esw_destroy_vport_rx_group(esw); esw_destroy_offloads_table(esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c new file mode 100644 index 000000000000..fbc42b7252a9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include <linux/mlx5/driver.h> + +#include "mlx5_core.h" +#include "lib/eq.h" +#include "lib/mlx5.h" + +struct mlx5_event_nb { + struct mlx5_nb nb; + void *ctx; +}; + +/* General events handlers for the low level mlx5_core driver + * + * Other Major feature specific events such as + * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with + * separate notifiers callbacks, specifically by those mlx5 components. + */ +static int any_notifier(struct notifier_block *, unsigned long, void *); +static int temp_warn(struct notifier_block *, unsigned long, void *); +static int port_module(struct notifier_block *, unsigned long, void *); + +/* handler which forwards the event to events->nh, driver notifiers */ +static int forward_event(struct notifier_block *, unsigned long, void *); + +static struct mlx5_nb events_nbs_ref[] = { + /* Events to be proccessed by mlx5_core */ + {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY }, + {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT }, + {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT }, + + /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, + /* QP/WQ resource events to forward */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR }, + /* SRQ events */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT }, +}; + +struct mlx5_events { + struct mlx5_core_dev *dev; + struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)]; + /* driver notifier chain */ + struct atomic_notifier_head nh; + /* port module events stats */ + struct mlx5_pme_stats pme_stats; +}; + +static const char *eqe_type_str(u8 type) +{ + switch (type) { + case MLX5_EVENT_TYPE_COMP: + return "MLX5_EVENT_TYPE_COMP"; + case MLX5_EVENT_TYPE_PATH_MIG: + return "MLX5_EVENT_TYPE_PATH_MIG"; + case MLX5_EVENT_TYPE_COMM_EST: + return "MLX5_EVENT_TYPE_COMM_EST"; + case MLX5_EVENT_TYPE_SQ_DRAINED: + return "MLX5_EVENT_TYPE_SQ_DRAINED"; + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; + case MLX5_EVENT_TYPE_CQ_ERROR: + return "MLX5_EVENT_TYPE_CQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_INTERNAL_ERROR: + return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; + case MLX5_EVENT_TYPE_PORT_CHANGE: + return "MLX5_EVENT_TYPE_PORT_CHANGE"; + case MLX5_EVENT_TYPE_GPIO_EVENT: + return "MLX5_EVENT_TYPE_GPIO_EVENT"; + case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: + return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; + case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: + return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; + case MLX5_EVENT_TYPE_REMOTE_CONFIG: + return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; + case MLX5_EVENT_TYPE_DB_BF_CONGESTION: + return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; + case MLX5_EVENT_TYPE_STALL_EVENT: + return "MLX5_EVENT_TYPE_STALL_EVENT"; + case MLX5_EVENT_TYPE_CMD: + return "MLX5_EVENT_TYPE_CMD"; + case MLX5_EVENT_TYPE_PAGE_REQUEST: + return "MLX5_EVENT_TYPE_PAGE_REQUEST"; + case MLX5_EVENT_TYPE_PAGE_FAULT: + return "MLX5_EVENT_TYPE_PAGE_FAULT"; + case MLX5_EVENT_TYPE_PPS_EVENT: + return "MLX5_EVENT_TYPE_PPS_EVENT"; + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; + case MLX5_EVENT_TYPE_FPGA_ERROR: + return "MLX5_EVENT_TYPE_FPGA_ERROR"; + case MLX5_EVENT_TYPE_FPGA_QP_ERROR: + return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + return "MLX5_EVENT_TYPE_GENERAL_EVENT"; + case MLX5_EVENT_TYPE_MONITOR_COUNTER: + return "MLX5_EVENT_TYPE_MONITOR_COUNTER"; + case MLX5_EVENT_TYPE_DEVICE_TRACER: + return "MLX5_EVENT_TYPE_DEVICE_TRACER"; + default: + return "Unrecognized event"; + } +} + +/* handles all FW events, type == eqe->type */ +static int any_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n", + eqe_type_str(eqe->type), eqe->sub_type); + return NOTIFY_OK; +} + +/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */ +static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + u64 value_lsb; + u64 value_msb; + + value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); + value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); + + mlx5_core_warn(events->dev, + "High temperature on sensors with bit set %llx %llx", + value_msb, value_lsb); + + return NOTIFY_OK; +} + +/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ +static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status) +{ + switch (status) { + case MLX5_MODULE_STATUS_PLUGGED: + return "Cable plugged"; + case MLX5_MODULE_STATUS_UNPLUGGED: + return "Cable unplugged"; + case MLX5_MODULE_STATUS_ERROR: + return "Cable error"; + case MLX5_MODULE_STATUS_DISABLED: + return "Cable disabled"; + default: + return "Unknown status"; + } +} + +static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error) +{ + switch (error) { + case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED: + return "Power budget exceeded"; + case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX: + return "Long Range for non MLNX cable"; + case MLX5_MODULE_EVENT_ERROR_BUS_STUCK: + return "Bus stuck (I2C or data shorted)"; + case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT: + return "No EEPROM/retry timeout"; + case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST: + return "Enforce part number list"; + case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER: + return "Unknown identifier"; + case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE: + return "High Temperature"; + case MLX5_MODULE_EVENT_ERROR_BAD_CABLE: + return "Bad or shorted cable/module"; + case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED: + return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot"; + default: + return "Unknown error"; + } +} + +/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ +static int port_module(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + enum port_module_event_status_type module_status; + enum port_module_event_error_type error_type; + struct mlx5_eqe_port_module *module_event_eqe; + const char *status_str, *error_str; + u8 module_num; + + module_event_eqe = &eqe->data.port_module; + module_num = module_event_eqe->module; + module_status = module_event_eqe->module_status & + PORT_MODULE_EVENT_MODULE_STATUS_MASK; + error_type = module_event_eqe->error_type & + PORT_MODULE_EVENT_ERROR_TYPE_MASK; + + if (module_status < MLX5_MODULE_STATUS_NUM) + events->pme_stats.status_counters[module_status]++; + status_str = mlx5_pme_status_to_string(module_status); + + if (module_status == MLX5_MODULE_STATUS_ERROR) { + if (error_type < MLX5_MODULE_EVENT_ERROR_NUM) + events->pme_stats.error_counters[error_type]++; + error_str = mlx5_pme_error_to_string(error_type); + } + + if (!printk_ratelimit()) + return NOTIFY_OK; + + if (module_status == MLX5_MODULE_STATUS_ERROR) + mlx5_core_err(events->dev, + "Port module event[error]: module %u, %s, %s\n", + module_num, status_str, error_str); + else + mlx5_core_info(events->dev, + "Port module event: module %u, %s\n", + module_num, status_str); + + return NOTIFY_OK; +} + +void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats) +{ + *stats = dev->priv.events->pme_stats; +} + +/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */ +static int forward_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n", + eqe_type_str(eqe->type), eqe->sub_type); + atomic_notifier_call_chain(&events->nh, event, data); + return NOTIFY_OK; +} + +int mlx5_events_init(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL); + + if (!events) + return -ENOMEM; + + ATOMIC_INIT_NOTIFIER_HEAD(&events->nh); + events->dev = dev; + dev->priv.events = events; + return 0; +} + +void mlx5_events_cleanup(struct mlx5_core_dev *dev) +{ + kvfree(dev->priv.events); +} + +void mlx5_events_start(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = dev->priv.events; + int i; + + for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) { + events->notifiers[i].nb = events_nbs_ref[i]; + events->notifiers[i].ctx = events; + mlx5_eq_notifier_register(dev, &events->notifiers[i].nb); + } +} + +void mlx5_events_stop(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = dev->priv.events; + int i; + + for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--) + mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb); +} + +int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return atomic_notifier_chain_register(&events->nh, nb); +} +EXPORT_SYMBOL(mlx5_notifier_register); + +int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return atomic_notifier_chain_unregister(&events->nh, nb); +} +EXPORT_SYMBOL(mlx5_notifier_unregister); + +int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data) +{ + return atomic_notifier_call_chain(&events->nh, event, data); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index 8ca1d1949d93..873541ef4c1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -334,7 +334,7 @@ static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn, { u8 opcode, status = 0; - opcode = cqe->op_own >> 4; + opcode = get_cqe_opcode(cqe); switch (opcode) { case MLX5_CQE_REQ_ERR: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c index 436a8136f26f..27c5f6c7d36a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -36,6 +36,7 @@ #include "mlx5_core.h" #include "lib/mlx5.h" +#include "lib/eq.h" #include "fpga/core.h" #include "fpga/conn.h" @@ -145,6 +146,22 @@ static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev) return 0; } +static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *); + +static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe) +{ + struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb); + + return mlx5_fpga_event(fdev, event, eqe); +} + +static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe) +{ + struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb); + + return mlx5_fpga_event(fdev, event, eqe); +} + int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) { struct mlx5_fpga_device *fdev = mdev->fpga; @@ -185,6 +202,11 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) if (err) goto out; + MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR); + MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR); + mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb); + err = mlx5_fpga_conn_device_init(fdev); if (err) goto err_rsvd_gid; @@ -201,6 +223,8 @@ err_conn_init: mlx5_fpga_conn_device_cleanup(fdev); err_rsvd_gid: + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb); mlx5_core_unreserve_gids(mdev, max_num_qps); out: spin_lock_irqsave(&fdev->state_lock, flags); @@ -256,6 +280,9 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) } mlx5_fpga_conn_device_cleanup(fdev); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb); + max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); mlx5_core_unreserve_gids(mdev, max_num_qps); } @@ -283,9 +310,10 @@ static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome) return "Unknown"; } -void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data) +static int mlx5_fpga_event(struct mlx5_fpga_device *fdev, + unsigned long event, void *eqe) { - struct mlx5_fpga_device *fdev = mdev->fpga; + void *data = ((struct mlx5_eqe *)eqe)->data.raw; const char *event_name; bool teardown = false; unsigned long flags; @@ -303,9 +331,7 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data) fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn); break; default: - mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n", - event); - return; + return NOTIFY_DONE; } spin_lock_irqsave(&fdev->state_lock, flags); @@ -326,4 +352,6 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data) */ if (teardown) mlx5_trigger_health_work(fdev->mdev); + + return NOTIFY_OK; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h index 3e2355c8df3f..7e2e871dbf83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h @@ -35,11 +35,16 @@ #ifdef CONFIG_MLX5_FPGA +#include <linux/mlx5/eq.h> + +#include "lib/eq.h" #include "fpga/cmd.h" /* Represents an Innova device */ struct mlx5_fpga_device { struct mlx5_core_dev *mdev; + struct mlx5_nb fpga_err_nb; + struct mlx5_nb fpga_qp_err_nb; spinlock_t state_lock; /* Protects state transitions */ enum mlx5_fpga_status state; enum mlx5_fpga_image last_admin_image; @@ -82,7 +87,6 @@ int mlx5_fpga_init(struct mlx5_core_dev *mdev); void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev); int mlx5_fpga_device_start(struct mlx5_core_dev *mdev); void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev); -void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data); #else @@ -104,11 +108,6 @@ static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) { } -static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, - void *data) -{ -} - #endif #endif /* __MLX5_FPGA_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 515e3d6de051..5a22c5874f3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -83,8 +83,14 @@ struct mlx5_fpga_ipsec_rule { }; static const struct rhashtable_params rhash_sa = { - .key_len = FIELD_SIZEOF(struct mlx5_fpga_ipsec_sa_ctx, hw_sa), - .key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa), + /* Keep out "cmd" field from the key as it's + * value is not constant during the lifetime + * of the key object. + */ + .key_len = FIELD_SIZEOF(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) - + FIELD_SIZEOF(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd), + .key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) + + FIELD_SIZEOF(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd), .head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash), .automatic_shrinking = true, .min_size = 1, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 08a891f9aade..c44ccb67c4a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -308,22 +308,68 @@ static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } +static int mlx5_set_extended_dest(struct mlx5_core_dev *dev, + struct fs_fte *fte, bool *extended_dest) +{ + int fw_log_max_fdb_encap_uplink = + MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink); + int num_fwd_destinations = 0; + struct mlx5_flow_rule *dst; + int num_encap = 0; + + *extended_dest = false; + if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return 0; + + list_for_each_entry(dst, &fte->node.children, node.list) { + if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) + num_encap++; + num_fwd_destinations++; + } + if (num_fwd_destinations > 1 && num_encap > 0) + *extended_dest = true; + + if (*extended_dest && !fw_log_max_fdb_encap_uplink) { + mlx5_core_warn(dev, "FW does not support extended destination"); + return -EOPNOTSUPP; + } + if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) { + mlx5_core_warn(dev, "FW does not support more than %d encaps", + 1 << fw_log_max_fdb_encap_uplink); + return -EOPNOTSUPP; + } + + return 0; +} static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, int opmod, int modify_mask, struct mlx5_flow_table *ft, unsigned group_id, struct fs_fte *fte) { - unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + - fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct); u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0}; + bool extended_dest = false; struct mlx5_flow_rule *dst; void *in_flow_context, *vlan; void *in_match_value; + unsigned int inlen; + int dst_cnt_size; void *in_dests; u32 *in; int err; + if (mlx5_set_extended_dest(dev, fte, &extended_dest)) + return -EOPNOTSUPP; + + if (!extended_dest) + dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct); + else + dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format); + + inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size; in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -343,9 +389,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, group_id, group_id); MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag); - MLX5_SET(flow_context, in_flow_context, action, fte->action.action); - MLX5_SET(flow_context, in_flow_context, packet_reformat_id, - fte->action.reformat_id); + MLX5_SET(flow_context, in_flow_context, extended_destination, + extended_dest); + if (extended_dest) { + u32 action; + + action = fte->action.action & + ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + MLX5_SET(flow_context, in_flow_context, action, action); + } else { + MLX5_SET(flow_context, in_flow_context, action, + fte->action.action); + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + fte->action.reformat_id); + } MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->action.modify_id); @@ -387,10 +444,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, id = dst->dest_attr.vport.num; MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id_valid, - dst->dest_attr.vport.vhca_id_valid); + !!(dst->dest_attr.vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID)); MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id, dst->dest_attr.vport.vhca_id); + if (extended_dest) { + MLX5_SET(dest_format_struct, in_dests, + packet_reformat, + !!(dst->dest_attr.vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID)); + MLX5_SET(extended_dest_format, in_dests, + packet_reformat_id, + dst->dest_attr.vport.reformat_id); + } break; default: id = dst->dest_attr.tir_num; @@ -399,7 +466,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(dest_format_struct, in_dests, destination_type, type); MLX5_SET(dest_format_struct, in_dests, destination_id, id); - in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + in_dests += dst_cnt_size; list_size++; } @@ -420,7 +487,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_counter_list, in_dests, flow_counter_id, dst->dest_attr.counter_id); - in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + in_dests += dst_cnt_size; list_size++; } if (list_size > max_list_size) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 9d73eb955f75..79f122b45def 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -452,7 +452,7 @@ static void del_sw_hw_rule(struct fs_node *node) if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && --fte->dests_size) { - modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST), + modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); update_fte = true; } out: @@ -1373,7 +1373,10 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, { if (d1->type == d2->type) { if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT && - d1->vport.num == d2->vport.num) || + d1->vport.num == d2->vport.num && + d1->vport.flags == d2->vport.flags && + ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ? + (d1->vport.reformat_id == d2->vport.reformat_id) : true)) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && d1->ft == d2->ft) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index b51ad217da32..2dc86347af58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -145,29 +145,6 @@ struct mlx5_flow_table { struct rhltable fgs_hash; }; -struct mlx5_fc_cache { - u64 packets; - u64 bytes; - u64 lastuse; -}; - -struct mlx5_fc { - struct list_head list; - struct llist_node addlist; - struct llist_node dellist; - - /* last{packets,bytes} members are used when calculating the delta since - * last reading - */ - u64 lastpackets; - u64 lastbytes; - - u32 id; - bool aging; - - struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; -}; - struct mlx5_ft_underlay_qp { struct list_head list; u32 qpn; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 32accd6b041b..c6c28f56aa29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -41,6 +41,29 @@ /* Max number of counters to query in bulk read is 32K */ #define MLX5_SW_MAX_COUNTERS_BULK BIT(15) +struct mlx5_fc_cache { + u64 packets; + u64 bytes; + u64 lastuse; +}; + +struct mlx5_fc { + struct list_head list; + struct llist_node addlist; + struct llist_node dellist; + + /* last{packets,bytes} members are used when calculating the delta since + * last reading + */ + u64 lastpackets; + u64 lastbytes; + + u32 id; + bool aging; + + struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; +}; + /* locking scheme: * * It is the responsibility of the user to prevent concurrent calls or bad diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 43118de8ee99..196c07383082 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -38,6 +38,8 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" +#include "lib/eq.h" +#include "lib/mlx5.h" enum { MLX5_HEALTH_POLL_INTERVAL = 2 * HZ, @@ -78,29 +80,6 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) &dev->iseg->cmdq_addr_l_sz); } -static void trigger_cmd_completions(struct mlx5_core_dev *dev) -{ - unsigned long flags; - u64 vector; - - /* wait for pending handlers to complete */ - synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD)); - spin_lock_irqsave(&dev->cmd.alloc_lock, flags); - vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); - if (!vector) - goto no_trig; - - vector |= MLX5_TRIGGERED_CMD_COMP; - spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); - - mlx5_core_dbg(dev, "vector 0x%llx\n", vector); - mlx5_cmd_comp_handler(dev, vector, true); - return; - -no_trig: - spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); -} - static int in_fatal(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; @@ -124,10 +103,10 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) mlx5_core_err(dev, "start\n"); if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; - trigger_cmd_completions(dev); + mlx5_cmd_trigger_completions(dev); } - mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1); + mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); mlx5_core_err(dev, "end\n"); unlock: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index b59953daf8b4..bfc0f6581729 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -87,7 +87,7 @@ int mlx5i_init(struct mlx5_core_dev *mdev, mlx5_query_port_max_mtu(mdev, &max_mtu, 1); netdev->mtu = max_mtu; - mlx5e_build_nic_params(mdev, &priv->channels.params, + mlx5e_build_nic_params(mdev, &priv->rss_params, &priv->channels.params, mlx5e_get_netdev_max_channels(netdev), netdev->mtu); mlx5i_build_nic_params(mdev, &priv->channels.params); @@ -560,9 +560,9 @@ static int mlx5i_close(struct net_device *netdev) netif_carrier_off(epriv->netdev); mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); - mlx5i_uninit_underlay_qp(epriv); mlx5e_deactivate_priv_channels(epriv); mlx5e_close_channels(&epriv->channels); + mlx5i_uninit_underlay_qp(epriv); unlock: mutex_unlock(&epriv->state_lock); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 582b2f18010a..3a6baed722d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -34,11 +34,15 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/vport.h> #include "mlx5_core.h" +#include "eswitch.h" enum { - MLX5_LAG_FLAG_BONDED = 1 << 0, + MLX5_LAG_FLAG_ROCE = 1 << 0, + MLX5_LAG_FLAG_SRIOV = 1 << 1, }; +#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV) + struct lag_func { struct mlx5_core_dev *dev; struct net_device *netdev; @@ -61,11 +65,6 @@ struct mlx5_lag { struct lag_tracker tracker; struct delayed_work bond_work; struct notifier_block nb; - - /* Admin state. Allow lag only if allowed is true - * even if network conditions for lag were met - */ - bool allowed; }; /* General purpose, use for short periods of time. @@ -165,9 +164,19 @@ static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, return -1; } -static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev) +static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_ROCE); +} + +static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV); +} + +static bool __mlx5_lag_is_active(struct mlx5_lag *ldev) { - return !!(ldev->flags & MLX5_LAG_FLAG_BONDED); + return !!(ldev->flags & MLX5_LAG_MODE_FLAGS); } static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, @@ -186,36 +195,131 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, *port2 = 1; } -static void mlx5_activate_lag(struct mlx5_lag *ldev, - struct lag_tracker *tracker) +static void mlx5_modify_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker) { struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + u8 v2p_port1, v2p_port2; int err; - ldev->flags |= MLX5_LAG_FLAG_BONDED; + mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, + &v2p_port2); + + if (v2p_port1 != ldev->v2p_map[0] || + v2p_port2 != ldev->v2p_map[1]) { + ldev->v2p_map[0] = v2p_port1; + ldev->v2p_map[1] = v2p_port2; + + mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d", + ldev->v2p_map[0], ldev->v2p_map[1]); + + err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); + if (err) + mlx5_core_err(dev0, + "Failed to modify LAG (%d)\n", + err); + } +} + +static int mlx5_create_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker) +{ + struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + int err; mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0], &ldev->v2p_map[1]); + mlx5_core_info(dev0, "lag map port 1:%d port 2:%d", + ldev->v2p_map[0], ldev->v2p_map[1]); + err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]); if (err) mlx5_core_err(dev0, "Failed to create LAG (%d)\n", err); + return err; +} + +static int mlx5_activate_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + u8 flags) +{ + bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE); + struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + int err; + + err = mlx5_create_lag(ldev, tracker); + if (err) { + if (roce_lag) { + mlx5_core_err(dev0, + "Failed to activate RoCE LAG\n"); + } else { + mlx5_core_err(dev0, + "Failed to activate VF LAG\n" + "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); + } + return err; + } + + ldev->flags |= flags; + return 0; } -static void mlx5_deactivate_lag(struct mlx5_lag *ldev) +static int mlx5_deactivate_lag(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + bool roce_lag = __mlx5_lag_is_roce(ldev); int err; - ldev->flags &= ~MLX5_LAG_FLAG_BONDED; + ldev->flags &= ~MLX5_LAG_MODE_FLAGS; err = mlx5_cmd_destroy_lag(dev0); - if (err) - mlx5_core_err(dev0, - "Failed to destroy LAG (%d)\n", - err); + if (err) { + if (roce_lag) { + mlx5_core_err(dev0, + "Failed to deactivate RoCE LAG; driver restart required\n"); + } else { + mlx5_core_err(dev0, + "Failed to deactivate VF LAG; driver restart required\n" + "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); + } + } + + return err; +} + +static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) +{ + if (!ldev->pf[0].dev || !ldev->pf[1].dev) + return false; + +#ifdef CONFIG_MLX5_ESWITCH + return mlx5_esw_lag_prereq(ldev->pf[0].dev, ldev->pf[1].dev); +#else + return (!mlx5_sriov_is_enabled(ldev->pf[0].dev) && + !mlx5_sriov_is_enabled(ldev->pf[1].dev)); +#endif +} + +static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + mlx5_add_dev_by_protocol(ldev->pf[i].dev, + MLX5_INTERFACE_PROTOCOL_IB); +} + +static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + mlx5_remove_dev_by_protocol(ldev->pf[i].dev, + MLX5_INTERFACE_PROTOCOL_IB); } static void mlx5_do_bond(struct mlx5_lag *ldev) @@ -223,9 +327,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) struct mlx5_core_dev *dev0 = ldev->pf[0].dev; struct mlx5_core_dev *dev1 = ldev->pf[1].dev; struct lag_tracker tracker; - u8 v2p_port1, v2p_port2; - int i, err; - bool do_bond; + bool do_bond, roce_lag; + int err; if (!dev0 || !dev1) return; @@ -234,42 +337,45 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) tracker = ldev->tracker; mutex_unlock(&lag_mutex); - do_bond = tracker.is_bonded && ldev->allowed; + do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); - if (do_bond && !mlx5_lag_is_bonded(ldev)) { - for (i = 0; i < MLX5_MAX_PORTS; i++) - mlx5_remove_dev_by_protocol(ldev->pf[i].dev, - MLX5_INTERFACE_PROTOCOL_IB); + if (do_bond && !__mlx5_lag_is_active(ldev)) { + roce_lag = !mlx5_sriov_is_enabled(dev0) && + !mlx5_sriov_is_enabled(dev1); - mlx5_activate_lag(ldev, &tracker); + if (roce_lag) + mlx5_lag_remove_ib_devices(ldev); - mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_nic_vport_enable_roce(dev1); - } else if (do_bond && mlx5_lag_is_bonded(ldev)) { - mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1, - &v2p_port2); + err = mlx5_activate_lag(ldev, &tracker, + roce_lag ? MLX5_LAG_FLAG_ROCE : + MLX5_LAG_FLAG_SRIOV); + if (err) { + if (roce_lag) + mlx5_lag_add_ib_devices(ldev); - if ((v2p_port1 != ldev->v2p_map[0]) || - (v2p_port2 != ldev->v2p_map[1])) { - ldev->v2p_map[0] = v2p_port1; - ldev->v2p_map[1] = v2p_port2; + return; + } - err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); - if (err) - mlx5_core_err(dev0, - "Failed to modify LAG (%d)\n", - err); + if (roce_lag) { + mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_nic_vport_enable_roce(dev1); + } + } else if (do_bond && __mlx5_lag_is_active(ldev)) { + mlx5_modify_lag(ldev, &tracker); + } else if (!do_bond && __mlx5_lag_is_active(ldev)) { + roce_lag = __mlx5_lag_is_roce(ldev); + + if (roce_lag) { + mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_nic_vport_disable_roce(dev1); } - } else if (!do_bond && mlx5_lag_is_bonded(ldev)) { - mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_nic_vport_disable_roce(dev1); - mlx5_deactivate_lag(ldev); + err = mlx5_deactivate_lag(ldev); + if (err) + return; - for (i = 0; i < MLX5_MAX_PORTS; i++) - if (ldev->pf[i].dev) - mlx5_add_dev_by_protocol(ldev->pf[i].dev, - MLX5_INTERFACE_PROTOCOL_IB); + if (roce_lag) + mlx5_lag_add_ib_devices(ldev); } } @@ -419,15 +525,6 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, return NOTIFY_DONE; } -static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) -{ - if ((ldev->pf[0].dev && mlx5_sriov_is_enabled(ldev->pf[0].dev)) || - (ldev->pf[1].dev && mlx5_sriov_is_enabled(ldev->pf[1].dev))) - return false; - else - return true; -} - static struct mlx5_lag *mlx5_lag_dev_alloc(void) { struct mlx5_lag *ldev; @@ -437,7 +534,6 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(void) return NULL; INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); - ldev->allowed = mlx5_lag_check_prereq(ldev); return ldev; } @@ -462,7 +558,6 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, ldev->tracker.netdev_state[fn].link_up = 0; ldev->tracker.netdev_state[fn].tx_enabled = 0; - ldev->allowed = mlx5_lag_check_prereq(ldev); dev->priv.lag = ldev; mutex_unlock(&lag_mutex); @@ -484,7 +579,6 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, memset(&ldev->pf[i], 0, sizeof(*ldev->pf)); dev->priv.lag = NULL; - ldev->allowed = mlx5_lag_check_prereq(ldev); mutex_unlock(&lag_mutex); } @@ -532,7 +626,7 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev) if (!ldev) return; - if (mlx5_lag_is_bonded(ldev)) + if (__mlx5_lag_is_active(ldev)) mlx5_deactivate_lag(ldev); mlx5_lag_dev_remove_pf(ldev, dev); @@ -549,56 +643,61 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev) } } -bool mlx5_lag_is_active(struct mlx5_core_dev *dev) +bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; bool res; mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - res = ldev && mlx5_lag_is_bonded(ldev); + res = ldev && __mlx5_lag_is_roce(ldev); mutex_unlock(&lag_mutex); return res; } -EXPORT_SYMBOL(mlx5_lag_is_active); +EXPORT_SYMBOL(mlx5_lag_is_roce); -static int mlx5_lag_set_state(struct mlx5_core_dev *dev, bool allow) +bool mlx5_lag_is_active(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; - int ret = 0; - bool lag_active; - - mlx5_dev_list_lock(); + bool res; + mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - if (!ldev) { - ret = -ENODEV; - goto unlock; - } - lag_active = mlx5_lag_is_bonded(ldev); - if (!mlx5_lag_check_prereq(ldev) && allow) { - ret = -EINVAL; - goto unlock; - } - if (ldev->allowed == allow) - goto unlock; - ldev->allowed = allow; - if ((lag_active && !allow) || allow) - mlx5_do_bond(ldev); -unlock: - mlx5_dev_list_unlock(); - return ret; + res = ldev && __mlx5_lag_is_active(ldev); + mutex_unlock(&lag_mutex); + + return res; } +EXPORT_SYMBOL(mlx5_lag_is_active); -int mlx5_lag_forbid(struct mlx5_core_dev *dev) +bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) { - return mlx5_lag_set_state(dev, false); + struct mlx5_lag *ldev; + bool res; + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + res = ldev && __mlx5_lag_is_sriov(ldev); + mutex_unlock(&lag_mutex); + + return res; } +EXPORT_SYMBOL(mlx5_lag_is_sriov); -int mlx5_lag_allow(struct mlx5_core_dev *dev) +void mlx5_lag_update(struct mlx5_core_dev *dev) { - return mlx5_lag_set_state(dev, true); + struct mlx5_lag *ldev; + + mlx5_dev_list_lock(); + ldev = mlx5_lag_dev_get(dev); + if (!ldev) + goto unlock; + + mlx5_do_bond(ldev); + +unlock: + mlx5_dev_list_unlock(); } struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) @@ -609,7 +708,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - if (!(ldev && mlx5_lag_is_bonded(ldev))) + if (!(ldev && __mlx5_lag_is_roce(ldev))) goto unlock; if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { @@ -638,7 +737,7 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) return true; ldev = mlx5_lag_dev_get(dev); - if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev) + if (!ldev || !__mlx5_lag_is_roce(ldev) || ldev->pf[0].dev == dev) return true; /* If bonded, we do not add an IB device for PF1. */ @@ -665,7 +764,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - if (ldev && mlx5_lag_is_bonded(ldev)) { + if (ldev && __mlx5_lag_is_roce(ldev)) { num_ports = MLX5_MAX_PORTS; mdev[0] = ldev->pf[0].dev; mdev[1] = ldev->pf[1].dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 0d90b1b4a3d3..ca0ee9916e9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -33,6 +33,7 @@ #include <linux/clocksource.h> #include <linux/highmem.h> #include <rdma/mlx5-abi.h> +#include "lib/eq.h" #include "en.h" #include "clock.h" @@ -71,7 +72,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc) struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); - return mlx5_read_internal_timer(mdev) & cc->mask; + return mlx5_read_internal_timer(mdev, NULL) & cc->mask; } static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) @@ -155,15 +156,19 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, return 0; } -static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) +static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, + struct ptp_system_timestamp *sts) { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); - u64 ns; + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, + clock); unsigned long flags; + u64 cycles, ns; write_seqlock_irqsave(&clock->lock, flags); - ns = timecounter_read(&clock->tc); + cycles = mlx5_read_internal_timer(mdev, sts); + ns = timecounter_cyc2time(&clock->tc, cycles); write_sequnlock_irqrestore(&clock->lock, flags); *ts = ns_to_timespec64(ns); @@ -306,7 +311,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, ts.tv_sec = rq->perout.start.sec; ts.tv_nsec = rq->perout.start.nsec; ns = timespec64_to_ns(&ts); - cycles_now = mlx5_read_internal_timer(mdev); + cycles_now = mlx5_read_internal_timer(mdev, NULL); write_seqlock_irqsave(&clock->lock, flags); nsec_now = timecounter_cyc2time(&clock->tc, cycles_now); nsec_delta = ns - nsec_now; @@ -383,7 +388,7 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = { .pps = 0, .adjfreq = mlx5_ptp_adjfreq, .adjtime = mlx5_ptp_adjtime, - .gettime64 = mlx5_ptp_gettime, + .gettimex64 = mlx5_ptp_gettimex, .settime64 = mlx5_ptp_settime, .enable = NULL, .verify = NULL, @@ -439,16 +444,17 @@ static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); } -void mlx5_pps_event(struct mlx5_core_dev *mdev, - struct mlx5_eqe *eqe) +static int mlx5_pps_event(struct notifier_block *nb, + unsigned long type, void *data) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb); + struct mlx5_core_dev *mdev = clock->mdev; struct ptp_clock_event ptp_event; - struct timespec64 ts; - u64 nsec_now, nsec_delta; u64 cycles_now, cycles_delta; + u64 nsec_now, nsec_delta, ns; + struct mlx5_eqe *eqe = data; int pin = eqe->data.pps.pin; - s64 ns; + struct timespec64 ts; unsigned long flags; switch (clock->ptp_info.pin_config[pin].func) { @@ -463,11 +469,12 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev, } else { ptp_event.type = PTP_CLOCK_EXTTS; } + /* TODOL clock->ptp can be NULL if ptp_clock_register failes */ ptp_clock_event(clock->ptp, &ptp_event); break; case PTP_PF_PEROUT: - mlx5_ptp_gettime(&clock->ptp_info, &ts); - cycles_now = mlx5_read_internal_timer(mdev); + mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL); + cycles_now = mlx5_read_internal_timer(mdev, NULL); ts.tv_sec += 1; ts.tv_nsec = 0; ns = timespec64_to_ns(&ts); @@ -481,8 +488,11 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev, write_sequnlock_irqrestore(&clock->lock, flags); break; default: - mlx5_core_err(mdev, " Unhandled event\n"); + mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n", + clock->ptp_info.pin_config[pin].func); } + + return NOTIFY_OK; } void mlx5_init_clock(struct mlx5_core_dev *mdev) @@ -511,14 +521,14 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) ktime_to_ns(ktime_get_real())); /* Calculate period in seconds to call the overflow watchdog - to make - * sure counter is checked at least once every wrap around. + * sure counter is checked at least twice every wrap around. * The period is calculated as the minimum between max HW cycles count * (The clock source mask) and max amount of cycles that can be * multiplied by clock multiplier where the result doesn't exceed * 64bits. */ overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult); - overflow_cycles = min(overflow_cycles, clock->cycles.mask >> 1); + overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3)); ns = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles, frac, &frac); @@ -567,6 +577,9 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) PTR_ERR(clock->ptp)); clock->ptp = NULL; } + + MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT); + mlx5_eq_notifier_register(mdev, &clock->pps_nb); } void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) @@ -576,6 +589,7 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) return; + mlx5_eq_notifier_unregister(mdev, &clock->pps_nb); if (clock->ptp) { ptp_clock_unregister(clock->ptp); clock->ptp = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h index 263cb6e2aeee..31600924bdc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -36,7 +36,6 @@ #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) void mlx5_init_clock(struct mlx5_core_dev *mdev); void mlx5_cleanup_clock(struct mlx5_core_dev *mdev); -void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { @@ -60,8 +59,6 @@ static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, #else static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {} static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {} -static inline void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) {} - static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { return -1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c new file mode 100644 index 000000000000..bced2efe9bef --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies */ + +#include <linux/mlx5/vport.h> +#include "lib/devcom.h" + +static LIST_HEAD(devcom_list); + +#define devcom_for_each_component(priv, comp, iter) \ + for (iter = 0; \ + comp = &(priv)->components[iter], iter < MLX5_DEVCOM_NUM_COMPONENTS; \ + iter++) + +struct mlx5_devcom_component { + struct { + void *data; + } device[MLX5_MAX_PORTS]; + + mlx5_devcom_event_handler_t handler; + struct rw_semaphore sem; + bool paired; +}; + +struct mlx5_devcom_list { + struct list_head list; + + struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS]; + struct mlx5_core_dev *devs[MLX5_MAX_PORTS]; +}; + +struct mlx5_devcom { + struct mlx5_devcom_list *priv; + int idx; +}; + +static struct mlx5_devcom_list *mlx5_devcom_list_alloc(void) +{ + struct mlx5_devcom_component *comp; + struct mlx5_devcom_list *priv; + int i; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + devcom_for_each_component(priv, comp, i) + init_rwsem(&comp->sem); + + return priv; +} + +static struct mlx5_devcom *mlx5_devcom_alloc(struct mlx5_devcom_list *priv, + u8 idx) +{ + struct mlx5_devcom *devcom; + + devcom = kzalloc(sizeof(*devcom), GFP_KERNEL); + if (!devcom) + return NULL; + + devcom->priv = priv; + devcom->idx = idx; + return devcom; +} + +/* Must be called with intf_mutex held */ +struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev) +{ + struct mlx5_devcom_list *priv = NULL, *iter; + struct mlx5_devcom *devcom = NULL; + bool new_priv = false; + u64 sguid0, sguid1; + int idx, i; + + if (!mlx5_core_is_pf(dev)) + return NULL; + + sguid0 = mlx5_query_nic_system_image_guid(dev); + list_for_each_entry(iter, &devcom_list, list) { + struct mlx5_core_dev *tmp_dev = NULL; + + idx = -1; + for (i = 0; i < MLX5_MAX_PORTS; i++) { + if (iter->devs[i]) + tmp_dev = iter->devs[i]; + else + idx = i; + } + + if (idx == -1) + continue; + + sguid1 = mlx5_query_nic_system_image_guid(tmp_dev); + if (sguid0 != sguid1) + continue; + + priv = iter; + break; + } + + if (!priv) { + priv = mlx5_devcom_list_alloc(); + if (!priv) + return ERR_PTR(-ENOMEM); + + idx = 0; + new_priv = true; + } + + priv->devs[idx] = dev; + devcom = mlx5_devcom_alloc(priv, idx); + if (!devcom) { + kfree(priv); + return ERR_PTR(-ENOMEM); + } + + if (new_priv) + list_add(&priv->list, &devcom_list); + + return devcom; +} + +/* Must be called with intf_mutex held */ +void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom) +{ + struct mlx5_devcom_list *priv; + int i; + + if (IS_ERR_OR_NULL(devcom)) + return; + + priv = devcom->priv; + priv->devs[devcom->idx] = NULL; + + kfree(devcom); + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (priv->devs[i]) + break; + + if (i != MLX5_MAX_PORTS) + return; + + list_del(&priv->list); + kfree(priv); +} + +void mlx5_devcom_register_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + mlx5_devcom_event_handler_t handler, + void *data) +{ + struct mlx5_devcom_component *comp; + + if (IS_ERR_OR_NULL(devcom)) + return; + + WARN_ON(!data); + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); + comp->handler = handler; + comp->device[devcom->idx].data = data; + up_write(&comp->sem); +} + +void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp; + + if (IS_ERR_OR_NULL(devcom)) + return; + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); + comp->device[devcom->idx].data = NULL; + up_write(&comp->sem); +} + +int mlx5_devcom_send_event(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + int event, + void *event_data) +{ + struct mlx5_devcom_component *comp; + int err = -ENODEV, i; + + if (IS_ERR_OR_NULL(devcom)) + return err; + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (i != devcom->idx && comp->device[i].data) { + err = comp->handler(event, comp->device[i].data, + event_data); + break; + } + + up_write(&comp->sem); + return err; +} + +void mlx5_devcom_set_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + bool paired) +{ + struct mlx5_devcom_component *comp; + + comp = &devcom->priv->components[id]; + WARN_ON(!rwsem_is_locked(&comp->sem)); + + comp->paired = paired; +} + +bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + if (IS_ERR_OR_NULL(devcom)) + return false; + + return devcom->priv->components[id].paired; +} + +void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp; + int i; + + if (IS_ERR_OR_NULL(devcom)) + return NULL; + + comp = &devcom->priv->components[id]; + down_read(&comp->sem); + if (!comp->paired) { + up_read(&comp->sem); + return NULL; + } + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (i != devcom->idx) + break; + + return comp->device[i].data; +} + +void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp = &devcom->priv->components[id]; + + up_read(&comp->sem); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h new file mode 100644 index 000000000000..939d5bf1581b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies */ + +#ifndef __LIB_MLX5_DEVCOM_H__ +#define __LIB_MLX5_DEVCOM_H__ + +#include <linux/mlx5/driver.h> + +enum mlx5_devcom_components { + MLX5_DEVCOM_ESW_OFFLOADS, + + MLX5_DEVCOM_NUM_COMPONENTS, +}; + +typedef int (*mlx5_devcom_event_handler_t)(int event, + void *my_data, + void *event_data); + +struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev); +void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom); + +void mlx5_devcom_register_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + mlx5_devcom_event_handler_t handler, + void *data); +void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); + +int mlx5_devcom_send_event(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + int event, + void *event_data); + +void mlx5_devcom_set_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + bool paired); +bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); + +void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); +void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); + +#endif + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h new file mode 100644 index 000000000000..c0fb6d72b695 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies */ + +#ifndef __LIB_MLX5_EQ_H__ +#define __LIB_MLX5_EQ_H__ +#include <linux/mlx5/driver.h> +#include <linux/mlx5/eq.h> +#include <linux/mlx5/cq.h> + +#define MLX5_MAX_IRQ_NAME (32) +#define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe)) + +struct mlx5_eq_tasklet { + struct list_head list; + struct list_head process_list; + struct tasklet_struct task; + spinlock_t lock; /* lock completion tasklet list */ +}; + +struct mlx5_cq_table { + spinlock_t lock; /* protect radix tree */ + struct radix_tree_root tree; +}; + +struct mlx5_eq { + struct mlx5_core_dev *dev; + struct mlx5_cq_table cq_table; + __be32 __iomem *doorbell; + u32 cons_index; + struct mlx5_frag_buf buf; + int size; + unsigned int vecidx; + unsigned int irqn; + u8 eqn; + int nent; + struct mlx5_rsc_debug *dbg; +}; + +struct mlx5_eq_comp { + struct mlx5_eq core; /* Must be first */ + struct mlx5_eq_tasklet tasklet_ctx; + struct list_head list; +}; + +static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) +{ + return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE); +} + +static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq) +{ + struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1)); + + return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe; +} + +static inline void eq_update_ci(struct mlx5_eq *eq, int arm) +{ + __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); + u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + + __raw_writel((__force u32)cpu_to_be32(val), addr); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} + +int mlx5_eq_table_init(struct mlx5_core_dev *dev); +void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev); +int mlx5_eq_table_create(struct mlx5_core_dev *dev); +void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); + +int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn); +struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev); +void mlx5_cq_tasklet_cb(unsigned long data); +struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix); + +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq); +void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev); +void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev); + +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); + +/* This function should only be called after mlx5_cmd_force_teardown_hca */ +void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); + +#ifdef CONFIG_RFS_ACCEL +struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); +#endif + +int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb); +int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 7550b1cc8c6a..397a2847867a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -33,6 +33,8 @@ #ifndef __LIB_MLX5_H__ #define __LIB_MLX5_H__ +#include "mlx5_core.h" + void mlx5_init_reserved_gids(struct mlx5_core_dev *dev); void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev); int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count); @@ -40,4 +42,38 @@ void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count); int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index); void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index); +/* TODO move to lib/events.h */ + +#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF +#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF + +enum port_module_event_status_type { + MLX5_MODULE_STATUS_PLUGGED = 0x1, + MLX5_MODULE_STATUS_UNPLUGGED = 0x2, + MLX5_MODULE_STATUS_ERROR = 0x3, + MLX5_MODULE_STATUS_DISABLED = 0x4, + MLX5_MODULE_STATUS_NUM, +}; + +enum port_module_event_error_type { + MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0, + MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX = 0x1, + MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2, + MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3, + MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4, + MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER = 0x5, + MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6, + MLX5_MODULE_EVENT_ERROR_BAD_CABLE = 0x7, + MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc, + MLX5_MODULE_EVENT_ERROR_NUM, +}; + +struct mlx5_pme_stats { + u64 status_counters[MLX5_MODULE_STATUS_NUM]; + u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; +}; + +void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); +int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 28132c7dc05f..77896c11f6f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -43,7 +43,6 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/cq.h> #include <linux/mlx5/qp.h> -#include <linux/mlx5/srq.h> #include <linux/debugfs.h> #include <linux/kmod.h> #include <linux/mlx5/mlx5_ifc.h> @@ -53,6 +52,7 @@ #endif #include <net/devlink.h> #include "mlx5_core.h" +#include "lib/eq.h" #include "fs_core.h" #include "lib/mpfs.h" #include "eswitch.h" @@ -63,6 +63,7 @@ #include "accel/tls.h" #include "lib/clock.h" #include "lib/vxlan.h" +#include "lib/devcom.h" #include "diag/fw_tracer.h" MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); @@ -319,51 +320,6 @@ static void release_bar(struct pci_dev *pdev) pci_release_regions(pdev); } -static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_eq_table *table = &priv->eq_table; - int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? - MLX5_CAP_GEN(dev, max_num_eqs) : - 1 << MLX5_CAP_GEN(dev, log_max_eq); - int nvec; - int err; - - nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + - MLX5_EQ_VEC_COMP_BASE; - nvec = min_t(int, nvec, num_eqs); - if (nvec <= MLX5_EQ_VEC_COMP_BASE) - return -ENOMEM; - - priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL); - if (!priv->irq_info) - return -ENOMEM; - - nvec = pci_alloc_irq_vectors(dev->pdev, - MLX5_EQ_VEC_COMP_BASE + 1, nvec, - PCI_IRQ_MSIX); - if (nvec < 0) { - err = nvec; - goto err_free_irq_info; - } - - table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; - - return 0; - -err_free_irq_info: - kfree(priv->irq_info); - return err; -} - -static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - - pci_free_irq_vectors(dev->pdev); - kfree(priv->irq_info); -} - struct mlx5_reg_host_endianness { u8 he; u8 rsvd[15]; @@ -624,188 +580,24 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev) +u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, + struct ptp_system_timestamp *sts) { u32 timer_h, timer_h1, timer_l; timer_h = ioread32be(&dev->iseg->internal_timer_h); + ptp_read_system_prets(sts); timer_l = ioread32be(&dev->iseg->internal_timer_l); + ptp_read_system_postts(sts); timer_h1 = ioread32be(&dev->iseg->internal_timer_h); - if (timer_h != timer_h1) /* wrap around */ + if (timer_h != timer_h1) { + /* wrap around */ + ptp_read_system_prets(sts); timer_l = ioread32be(&dev->iseg->internal_timer_l); - - return (u64)timer_l | (u64)timer_h1 << 32; -} - -static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); - - if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { - mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); - return -ENOMEM; + ptp_read_system_postts(sts); } - cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - priv->irq_info[i].mask); - - if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, priv->irq_info[i].mask)) - mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); - - return 0; -} - -static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); - - irq_set_affinity_hint(irq, NULL); - free_cpumask_var(priv->irq_info[i].mask); -} - -static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) -{ - int err; - int i; - - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { - err = mlx5_irq_set_affinity_hint(mdev, i); - if (err) - goto err_out; - } - - return 0; - -err_out: - for (i--; i >= 0; i--) - mlx5_irq_clear_affinity_hint(mdev, i); - - return err; -} - -static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev) -{ - int i; - - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) - mlx5_irq_clear_affinity_hint(mdev, i); -} - -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, - unsigned int *irqn) -{ - struct mlx5_eq_table *table = &dev->priv.eq_table; - struct mlx5_eq *eq, *n; - int err = -ENOENT; - - spin_lock(&table->lock); - list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { - if (eq->index == vector) { - *eqn = eq->eqn; - *irqn = eq->irqn; - err = 0; - break; - } - } - spin_unlock(&table->lock); - - return err; -} -EXPORT_SYMBOL(mlx5_vector2eqn); - -struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) -{ - struct mlx5_eq_table *table = &dev->priv.eq_table; - struct mlx5_eq *eq; - - spin_lock(&table->lock); - list_for_each_entry(eq, &table->comp_eqs_list, list) - if (eq->eqn == eqn) { - spin_unlock(&table->lock); - return eq; - } - - spin_unlock(&table->lock); - - return ERR_PTR(-ENOENT); -} - -static void free_comp_eqs(struct mlx5_core_dev *dev) -{ - struct mlx5_eq_table *table = &dev->priv.eq_table; - struct mlx5_eq *eq, *n; - -#ifdef CONFIG_RFS_ACCEL - if (dev->rmap) { - free_irq_cpu_rmap(dev->rmap); - dev->rmap = NULL; - } -#endif - spin_lock(&table->lock); - list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { - list_del(&eq->list); - spin_unlock(&table->lock); - if (mlx5_destroy_unmap_eq(dev, eq)) - mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n", - eq->eqn); - kfree(eq); - spin_lock(&table->lock); - } - spin_unlock(&table->lock); -} - -static int alloc_comp_eqs(struct mlx5_core_dev *dev) -{ - struct mlx5_eq_table *table = &dev->priv.eq_table; - char name[MLX5_MAX_IRQ_NAME]; - struct mlx5_eq *eq; - int ncomp_vec; - int nent; - int err; - int i; - - INIT_LIST_HEAD(&table->comp_eqs_list); - ncomp_vec = table->num_comp_vectors; - nent = MLX5_COMP_EQ_SIZE; -#ifdef CONFIG_RFS_ACCEL - dev->rmap = alloc_irq_cpu_rmap(ncomp_vec); - if (!dev->rmap) - return -ENOMEM; -#endif - for (i = 0; i < ncomp_vec; i++) { - eq = kzalloc(sizeof(*eq), GFP_KERNEL); - if (!eq) { - err = -ENOMEM; - goto clean; - } - -#ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev, - MLX5_EQ_VEC_COMP_BASE + i)); -#endif - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); - err = mlx5_create_map_eq(dev, eq, - i + MLX5_EQ_VEC_COMP_BASE, nent, 0, - name, MLX5_EQ_TYPE_COMP); - if (err) { - kfree(eq); - goto clean; - } - mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn); - eq->index = i; - spin_lock(&table->lock); - list_add_tail(&eq->list, &table->comp_eqs_list); - spin_unlock(&table->lock); - } - - return 0; - -clean: - free_comp_eqs(dev); - return err; + return (u64)timer_l | (u64)timer_h1 << 32; } static int mlx5_core_set_issi(struct mlx5_core_dev *dev) @@ -938,28 +730,37 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) struct pci_dev *pdev = dev->pdev; int err; + priv->devcom = mlx5_devcom_register_device(dev); + if (IS_ERR(priv->devcom)) + dev_err(&pdev->dev, "failed to register with devcom (0x%p)\n", + priv->devcom); + err = mlx5_query_board_id(dev); if (err) { dev_err(&pdev->dev, "query board id failed\n"); - goto out; + goto err_devcom; } - err = mlx5_eq_init(dev); + err = mlx5_eq_table_init(dev); if (err) { dev_err(&pdev->dev, "failed to initialize eq\n"); - goto out; + goto err_devcom; + } + + err = mlx5_events_init(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize events\n"); + goto err_eq_cleanup; } err = mlx5_cq_debugfs_init(dev); if (err) { dev_err(&pdev->dev, "failed to initialize cq debugfs\n"); - goto err_eq_cleanup; + goto err_events_cleanup; } mlx5_init_qp_table(dev); - mlx5_init_srq_table(dev); - mlx5_init_mkey_table(dev); mlx5_init_reserved_gids(dev); @@ -1013,14 +814,15 @@ err_rl_cleanup: err_tables_cleanup: mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); - +err_events_cleanup: + mlx5_events_cleanup(dev); err_eq_cleanup: - mlx5_eq_cleanup(dev); + mlx5_eq_table_cleanup(dev); +err_devcom: + mlx5_devcom_unregister_device(dev->priv.devcom); -out: return err; } @@ -1036,10 +838,11 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cleanup_clock(dev); mlx5_cleanup_reserved_gids(dev); mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); - mlx5_eq_cleanup(dev); + mlx5_events_cleanup(dev); + mlx5_eq_table_cleanup(dev); + mlx5_devcom_unregister_device(dev->priv.devcom); } static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, @@ -1131,16 +934,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto reclaim_boot_pages; } - err = mlx5_pagealloc_start(dev); - if (err) { - dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n"); - goto reclaim_boot_pages; - } - err = mlx5_cmd_init_hca(dev, sw_owner_id); if (err) { dev_err(&pdev->dev, "init hca failed\n"); - goto err_pagealloc_stop; + goto reclaim_boot_pages; } mlx5_set_driver_version(dev); @@ -1161,23 +958,20 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, } } - err = mlx5_alloc_irq_vectors(dev); - if (err) { - dev_err(&pdev->dev, "alloc irq vectors failed\n"); - goto err_cleanup_once; - } - dev->priv.uar = mlx5_get_uars_page(dev); if (IS_ERR(dev->priv.uar)) { dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); err = PTR_ERR(dev->priv.uar); - goto err_disable_msix; + goto err_get_uars; } - err = mlx5_start_eqs(dev); + mlx5_events_start(dev); + mlx5_pagealloc_start(dev); + + err = mlx5_eq_table_create(dev); if (err) { - dev_err(&pdev->dev, "Failed to start pages and async EQs\n"); - goto err_put_uars; + dev_err(&pdev->dev, "Failed to create EQs\n"); + goto err_eq_table; } err = mlx5_fw_tracer_init(dev->tracer); @@ -1186,18 +980,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_fw_tracer; } - err = alloc_comp_eqs(dev); - if (err) { - dev_err(&pdev->dev, "Failed to alloc completion EQs\n"); - goto err_comp_eqs; - } - - err = mlx5_irq_set_affinity_hints(dev); - if (err) { - dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); - goto err_affinity_hints; - } - err = mlx5_fpga_device_start(dev); if (err) { dev_err(&pdev->dev, "fpga device start failed %d\n", err); @@ -1266,24 +1048,17 @@ err_ipsec_start: mlx5_fpga_device_stop(dev); err_fpga_start: - mlx5_irq_clear_affinity_hints(dev); - -err_affinity_hints: - free_comp_eqs(dev); - -err_comp_eqs: mlx5_fw_tracer_cleanup(dev->tracer); err_fw_tracer: - mlx5_stop_eqs(dev); + mlx5_eq_table_destroy(dev); -err_put_uars: +err_eq_table: + mlx5_pagealloc_stop(dev); + mlx5_events_stop(dev); mlx5_put_uars_page(dev, priv->uar); -err_disable_msix: - mlx5_free_irq_vectors(dev); - -err_cleanup_once: +err_get_uars: if (boot) mlx5_cleanup_once(dev); @@ -1294,9 +1069,6 @@ err_stop_poll: goto out_err; } -err_pagealloc_stop: - mlx5_pagealloc_stop(dev); - reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); @@ -1340,21 +1112,20 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_accel_ipsec_cleanup(dev); mlx5_accel_tls_cleanup(dev); mlx5_fpga_device_stop(dev); - mlx5_irq_clear_affinity_hints(dev); - free_comp_eqs(dev); mlx5_fw_tracer_cleanup(dev->tracer); - mlx5_stop_eqs(dev); + mlx5_eq_table_destroy(dev); + mlx5_pagealloc_stop(dev); + mlx5_events_stop(dev); mlx5_put_uars_page(dev, priv->uar); - mlx5_free_irq_vectors(dev); if (cleanup) mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev, cleanup); + err = mlx5_cmd_teardown_hca(dev); if (err) { dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); goto out; } - mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev, 0); mlx5_cmd_cleanup(dev); @@ -1364,12 +1135,6 @@ out: return err; } -struct mlx5_core_event_handler { - void (*event)(struct mlx5_core_dev *dev, - enum mlx5_dev_event event, - void *data); -}; - static const struct devlink_ops mlx5_devlink_ops = { #ifdef CONFIG_MLX5_ESWITCH .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, @@ -1403,7 +1168,6 @@ static int init_one(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); dev->pdev = pdev; - dev->event = mlx5_core_event; dev->profile = &profile[prof_sel]; INIT_LIST_HEAD(&priv->ctx_list); @@ -1411,17 +1175,6 @@ static int init_one(struct pci_dev *pdev, mutex_init(&dev->pci_status_mutex); mutex_init(&dev->intf_state_mutex); - INIT_LIST_HEAD(&priv->waiting_events_list); - priv->is_accum_events = false; - -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - err = init_srcu_struct(&priv->pfault_srcu); - if (err) { - dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n", - err); - goto clean_dev; - } -#endif mutex_init(&priv->bfregs.reg_head.lock); mutex_init(&priv->bfregs.wc_head.lock); INIT_LIST_HEAD(&priv->bfregs.reg_head.list); @@ -1430,7 +1183,7 @@ static int init_one(struct pci_dev *pdev, err = mlx5_pci_init(dev, priv); if (err) { dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err); - goto clean_srcu; + goto clean_dev; } err = mlx5_health_init(dev); @@ -1439,12 +1192,14 @@ static int init_one(struct pci_dev *pdev, goto close_pci; } - mlx5_pagealloc_init(dev); + err = mlx5_pagealloc_init(dev); + if (err) + goto err_pagealloc_init; err = mlx5_load_one(dev, priv, true); if (err) { dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err); - goto clean_health; + goto err_load_one; } request_module_nowait(MLX5_IB_MOD); @@ -1458,16 +1213,13 @@ static int init_one(struct pci_dev *pdev, clean_load: mlx5_unload_one(dev, priv, true); -clean_health: +err_load_one: mlx5_pagealloc_cleanup(dev); +err_pagealloc_init: mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); -clean_srcu: -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - cleanup_srcu_struct(&priv->pfault_srcu); clean_dev: -#endif devlink_free(devlink); return err; @@ -1491,9 +1243,6 @@ static void remove_one(struct pci_dev *pdev) mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - cleanup_srcu_struct(&priv->pfault_srcu); -#endif devlink_free(devlink); } @@ -1637,7 +1386,6 @@ succeed: * kexec. There is no need to cleanup the mlx5_core software * contexts. */ - mlx5_irq_clear_affinity_hints(dev); mlx5_core_eq_free_irqs(dev); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 0594d0961cb3..c68dcea5985b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -38,6 +38,7 @@ #include <linux/sched.h> #include <linux/if_link.h> #include <linux/firmware.h> +#include <linux/ptp_clock_kernel.h> #include <linux/mlx5/cq.h> #include <linux/mlx5/fs.h> @@ -78,6 +79,11 @@ do { \ __func__, __LINE__, current->pid, \ ##__VA_ARGS__) +#define mlx5_core_warn_once(__dev, format, ...) \ + dev_warn_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + #define mlx5_core_info(__dev, format, ...) \ dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__) @@ -97,12 +103,6 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); - -void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, - unsigned long param); -void mlx5_core_page_fault(struct mlx5_core_dev *dev, - struct mlx5_pagefault *pfault); -void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); void mlx5_disable_device(struct mlx5_core_dev *dev); void mlx5_recover_device(struct mlx5_core_dev *dev); @@ -122,30 +122,10 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 element_id); int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); -u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); - -int mlx5_eq_init(struct mlx5_core_dev *dev); -void mlx5_eq_cleanup(struct mlx5_core_dev *dev); -int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, - enum mlx5_eq_type type); -int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); -int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); -int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - u32 *out, int outlen); -int mlx5_start_eqs(struct mlx5_core_dev *dev); -void mlx5_stop_eqs(struct mlx5_core_dev *dev); -/* This function should only be called after mlx5_cmd_force_teardown_hca */ -void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); -struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); -u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq); -void mlx5_cq_tasklet_cb(unsigned long data); -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); -int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); -void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); +u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, + struct ptp_system_timestamp *sts); + +void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev); int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); @@ -159,6 +139,11 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); +int mlx5_events_init(struct mlx5_core_dev *dev); +void mlx5_events_cleanup(struct mlx5_core_dev *dev); +void mlx5_events_start(struct mlx5_core_dev *dev); +void mlx5_events_stop(struct mlx5_core_dev *dev); + void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv); void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv); void mlx5_attach_device(struct mlx5_core_dev *dev); @@ -202,10 +187,8 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) MLX5_CAP_GEN(dev, lag_master); } -int mlx5_lag_allow(struct mlx5_core_dev *dev); -int mlx5_lag_forbid(struct mlx5_core_dev *dev); - void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol); +void mlx5_lag_update(struct mlx5_core_dev *dev); enum { MLX5_NIC_IFC_FULL = 0, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index e36d3e3675f9..a83b517b0714 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -37,6 +37,7 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" +#include "lib/eq.h" enum { MLX5_PAGES_CANT_GIVE = 0, @@ -433,15 +434,28 @@ static void pages_work_handler(struct work_struct *work) kfree(req); } -void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s32 npages) +static int req_pages_handler(struct notifier_block *nb, + unsigned long type, void *data) { struct mlx5_pages_req *req; - + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + struct mlx5_eqe *eqe; + u16 func_id; + s32 npages; + + priv = mlx5_nb_cof(nb, struct mlx5_priv, pg_nb); + dev = container_of(priv, struct mlx5_core_dev, priv); + eqe = data; + + func_id = be16_to_cpu(eqe->data.req_pages.func_id); + npages = be32_to_cpu(eqe->data.req_pages.num_pages); + mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n", + func_id, npages); req = kzalloc(sizeof(*req), GFP_ATOMIC); if (!req) { mlx5_core_warn(dev, "failed to allocate pages request\n"); - return; + return NOTIFY_DONE; } req->dev = dev; @@ -449,6 +463,7 @@ void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, req->npages = npages; INIT_WORK(&req->work, pages_work_handler); queue_work(dev->priv.pg_wq, &req->work); + return NOTIFY_OK; } int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) @@ -524,29 +539,32 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) return 0; } -void mlx5_pagealloc_init(struct mlx5_core_dev *dev) +int mlx5_pagealloc_init(struct mlx5_core_dev *dev) { dev->priv.page_root = RB_ROOT; INIT_LIST_HEAD(&dev->priv.free_list); + dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); + if (!dev->priv.pg_wq) + return -ENOMEM; + + return 0; } void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) { - /* nothing */ + destroy_workqueue(dev->priv.pg_wq); } -int mlx5_pagealloc_start(struct mlx5_core_dev *dev) +void mlx5_pagealloc_start(struct mlx5_core_dev *dev) { - dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); - if (!dev->priv.pg_wq) - return -ENOMEM; - - return 0; + MLX5_NB_INIT(&dev->priv.pg_nb, req_pages_handler, PAGE_REQUEST); + mlx5_eq_notifier_register(dev, &dev->priv.pg_nb); } void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) { - destroy_workqueue(dev->priv.pg_wq); + mlx5_eq_notifier_unregister(dev, &dev->priv.pg_nb); + flush_workqueue(dev->priv.pg_wq); } int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 31a9cbd85689..2b82f35f4c35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -915,63 +915,6 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk)); } -static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = { - "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */ - "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */ - "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */ -}; - -static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = { - "Power budget exceeded", - "Long Range for non MLNX cable", - "Bus stuck(I2C or data shorted)", - "No EEPROM/retry timeout", - "Enforce part number list", - "Unknown identifier", - "High Temperature", - "Bad or shorted cable/module", - "Unknown status", -}; - -void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) -{ - enum port_module_event_status_type module_status; - enum port_module_event_error_type error_type; - struct mlx5_eqe_port_module *module_event_eqe; - struct mlx5_priv *priv = &dev->priv; - u8 module_num; - - module_event_eqe = &eqe->data.port_module; - module_num = module_event_eqe->module; - module_status = module_event_eqe->module_status & - PORT_MODULE_EVENT_MODULE_STATUS_MASK; - error_type = module_event_eqe->error_type & - PORT_MODULE_EVENT_ERROR_TYPE_MASK; - - if (module_status < MLX5_MODULE_STATUS_ERROR) { - priv->pme_stats.status_counters[module_status - 1]++; - } else if (module_status == MLX5_MODULE_STATUS_ERROR) { - if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN) - /* Unknown error type */ - error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN; - priv->pme_stats.error_counters[error_type]++; - } - - if (!printk_ratelimit()) - return; - - if (module_status < MLX5_MODULE_STATUS_ERROR) - mlx5_core_info(dev, - "Port module event: module %u, %s\n", - module_num, mlx5_pme_status[module_status - 1]); - - else if (module_status == MLX5_MODULE_STATUS_ERROR) - mlx5_core_info(dev, - "Port module event[error]: module %u, %s, %s\n", - module_num, mlx5_pme_status[module_status - 1], - mlx5_pme_error[error_type]); -} - int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size) { u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 91b8139a388d..388f205a497f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -38,11 +38,11 @@ #include <linux/mlx5/transobj.h> #include "mlx5_core.h" +#include "lib/eq.h" -static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev, - u32 rsn) +static struct mlx5_core_rsc_common * +mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) { - struct mlx5_qp_table *table = &dev->priv.qp_table; struct mlx5_core_rsc_common *common; spin_lock(&table->lock); @@ -53,11 +53,6 @@ static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev, spin_unlock(&table->lock); - if (!common) { - mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", - rsn); - return NULL; - } return common; } @@ -120,19 +115,57 @@ static bool is_event_type_allowed(int rsc_type, int event_type) } } -void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) +static int rsc_event_notifier(struct notifier_block *nb, + unsigned long type, void *data) { - struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn); + struct mlx5_core_rsc_common *common; + struct mlx5_qp_table *table; + struct mlx5_core_dev *dev; struct mlx5_core_dct *dct; + u8 event_type = (u8)type; struct mlx5_core_qp *qp; + struct mlx5_priv *priv; + struct mlx5_eqe *eqe; + u32 rsn; + + switch (event_type) { + case MLX5_EVENT_TYPE_DCT_DRAINED: + eqe = data; + rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; + rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); + break; + case MLX5_EVENT_TYPE_PATH_MIG: + case MLX5_EVENT_TYPE_COMM_EST: + case MLX5_EVENT_TYPE_SQ_DRAINED: + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + eqe = data; + rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN); + break; + default: + return NOTIFY_DONE; + } + + table = container_of(nb, struct mlx5_qp_table, nb); + priv = container_of(table, struct mlx5_priv, qp_table); + dev = container_of(priv, struct mlx5_core_dev, priv); + + mlx5_core_dbg(dev, "event (%d) arrived on resource 0x%x\n", eqe->type, rsn); - if (!common) - return; + common = mlx5_get_rsc(table, rsn); + if (!common) { + mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", rsn); + return NOTIFY_OK; + } if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) { mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n", event_type, rsn); - return; + goto out; } switch (common->res) { @@ -150,8 +183,10 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) default: mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn); } - +out: mlx5_core_put_rsc(common); + + return NOTIFY_OK; } static int create_resource_common(struct mlx5_core_dev *dev, @@ -487,10 +522,16 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev) spin_lock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); mlx5_qp_debugfs_init(dev); + + table->nb.notifier_call = rsc_event_notifier; + mlx5_notifier_register(dev, &table->nb); } void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev) { + struct mlx5_qp_table *table = &dev->priv.qp_table; + + mlx5_notifier_unregister(dev, &table->nb); mlx5_qp_debugfs_cleanup(dev); } @@ -670,3 +711,20 @@ int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter); + +struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev, + int res_num, + enum mlx5_res_type res_type) +{ + u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN); + struct mlx5_qp_table *table = &dev->priv.qp_table; + + return mlx5_get_rsc(table, rsn); +} +EXPORT_SYMBOL_GPL(mlx5_core_res_hold); + +void mlx5_core_res_put(struct mlx5_core_rsc_common *res) +{ + mlx5_core_put_rsc(res); +} +EXPORT_SYMBOL_GPL(mlx5_core_res_put); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index a0674962f02c..6e178030d8fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -216,20 +216,10 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!mlx5_core_is_pf(dev)) return -EPERM; - if (num_vfs) { - int ret; - - ret = mlx5_lag_forbid(dev); - if (ret && (ret != -ENODEV)) - return ret; - } - - if (num_vfs) { + if (num_vfs) err = mlx5_sriov_enable(pdev, num_vfs); - } else { + else mlx5_sriov_disable(pdev); - mlx5_lag_allow(dev); - } return err ? err : num_vfs; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c deleted file mode 100644 index 6a6fc9be01e6..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ /dev/null @@ -1,716 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/mlx5/driver.h> -#include <linux/mlx5/cmd.h> -#include <linux/mlx5/srq.h> -#include <rdma/ib_verbs.h> -#include "mlx5_core.h" -#include <linux/mlx5/transobj.h> - -void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - struct mlx5_core_srq *srq; - - spin_lock(&table->lock); - - srq = radix_tree_lookup(&table->tree, srqn); - if (srq) - atomic_inc(&srq->refcount); - - spin_unlock(&table->lock); - - if (!srq) { - mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn); - return; - } - - srq->event(srq, event_type); - - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); -} - -static int get_pas_size(struct mlx5_srq_attr *in) -{ - u32 log_page_size = in->log_page_size + 12; - u32 log_srq_size = in->log_size; - u32 log_rq_stride = in->wqe_shift; - u32 page_offset = in->page_offset; - u32 po_quanta = 1 << (log_page_size - 6); - u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); - u32 page_size = 1 << log_page_size; - u32 rq_sz_po = rq_sz + (page_offset * po_quanta); - u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size); - - return rq_num_pas * sizeof(u64); -} - -static void set_wq(void *wq, struct mlx5_srq_attr *in) -{ - MLX5_SET(wq, wq, wq_signature, !!(in->flags - & MLX5_SRQ_FLAG_WQ_SIG)); - MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size); - MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4); - MLX5_SET(wq, wq, log_wq_sz, in->log_size); - MLX5_SET(wq, wq, page_offset, in->page_offset); - MLX5_SET(wq, wq, lwm, in->lwm); - MLX5_SET(wq, wq, pd, in->pd); - MLX5_SET64(wq, wq, dbr_addr, in->db_record); -} - -static void set_srqc(void *srqc, struct mlx5_srq_attr *in) -{ - MLX5_SET(srqc, srqc, wq_signature, !!(in->flags - & MLX5_SRQ_FLAG_WQ_SIG)); - MLX5_SET(srqc, srqc, log_page_size, in->log_page_size); - MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift); - MLX5_SET(srqc, srqc, log_srq_size, in->log_size); - MLX5_SET(srqc, srqc, page_offset, in->page_offset); - MLX5_SET(srqc, srqc, lwm, in->lwm); - MLX5_SET(srqc, srqc, pd, in->pd); - MLX5_SET64(srqc, srqc, dbr_addr, in->db_record); - MLX5_SET(srqc, srqc, xrcd, in->xrcd); - MLX5_SET(srqc, srqc, cqn, in->cqn); -} - -static void get_wq(void *wq, struct mlx5_srq_attr *in) -{ - if (MLX5_GET(wq, wq, wq_signature)) - in->flags &= MLX5_SRQ_FLAG_WQ_SIG; - in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz); - in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4; - in->log_size = MLX5_GET(wq, wq, log_wq_sz); - in->page_offset = MLX5_GET(wq, wq, page_offset); - in->lwm = MLX5_GET(wq, wq, lwm); - in->pd = MLX5_GET(wq, wq, pd); - in->db_record = MLX5_GET64(wq, wq, dbr_addr); -} - -static void get_srqc(void *srqc, struct mlx5_srq_attr *in) -{ - if (MLX5_GET(srqc, srqc, wq_signature)) - in->flags &= MLX5_SRQ_FLAG_WQ_SIG; - in->log_page_size = MLX5_GET(srqc, srqc, log_page_size); - in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride); - in->log_size = MLX5_GET(srqc, srqc, log_srq_size); - in->page_offset = MLX5_GET(srqc, srqc, page_offset); - in->lwm = MLX5_GET(srqc, srqc, lwm); - in->pd = MLX5_GET(srqc, srqc, pd); - in->db_record = MLX5_GET64(srqc, srqc, dbr_addr); -} - -struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - struct mlx5_core_srq *srq; - - spin_lock(&table->lock); - - srq = radix_tree_lookup(&table->tree, srqn); - if (srq) - atomic_inc(&srq->refcount); - - spin_unlock(&table->lock); - - return srq; -} -EXPORT_SYMBOL(mlx5_core_get_srq); - -static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0}; - void *create_in; - void *srqc; - void *pas; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - MLX5_SET(create_srq_in, create_in, uid, in->uid); - srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); - pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); - - set_srqc(srqc, in); - memcpy(pas, in->pas, pas_size); - - MLX5_SET(create_srq_in, create_in, opcode, - MLX5_CMD_OP_CREATE_SRQ); - - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, - sizeof(create_out)); - kvfree(create_in); - if (!err) { - srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); - srq->uid = in->uid; - } - - return err; -} - -static int destroy_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; - u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; - - MLX5_SET(destroy_srq_in, srq_in, opcode, - MLX5_CMD_OP_DESTROY_SRQ); - MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); - MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, sizeof(srq_out)); -} - -static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - u16 lwm, int is_srq) -{ - u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; - u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; - - MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ); - MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); - MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); - MLX5_SET(arm_rq_in, srq_in, lwm, lwm); - MLX5_SET(arm_rq_in, srq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, sizeof(srq_out)); -} - -static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; - u32 *srq_out; - void *srqc; - int err; - - srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL); - if (!srq_out) - return -ENOMEM; - - MLX5_SET(query_srq_in, srq_in, opcode, - MLX5_CMD_OP_QUERY_SRQ); - MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); - err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, MLX5_ST_SZ_BYTES(query_srq_out)); - if (err) - goto out; - - srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); - get_srqc(srqc, out); - if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) - out->flags |= MLX5_SRQ_FLAG_ERR; -out: - kvfree(srq_out); - return err; -} - -static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; - void *create_in; - void *xrc_srqc; - void *pas; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid); - xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, - xrc_srq_context_entry); - pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); - - set_srqc(xrc_srqc, in); - MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); - memcpy(pas, in->pas, pas_size); - MLX5_SET(create_xrc_srq_in, create_in, opcode, - MLX5_CMD_OP_CREATE_XRC_SRQ); - - memset(create_out, 0, sizeof(create_out)); - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, - sizeof(create_out)); - if (err) - goto out; - - srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); - srq->uid = in->uid; -out: - kvfree(create_in); - return err; -} - -static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; - u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; - - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, - MLX5_CMD_OP_DESTROY_XRC_SRQ); - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); -} - -static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, u16 lwm) -{ - u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; - u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; - - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); -} - -static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; - u32 *xrcsrq_out; - void *xrc_srqc; - int err; - - xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL); - if (!xrcsrq_out) - return -ENOMEM; - memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); - - MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, - MLX5_CMD_OP_QUERY_XRC_SRQ); - MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - - err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, - MLX5_ST_SZ_BYTES(query_xrc_srq_out)); - if (err) - goto out; - - xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, - xrc_srq_context_entry); - get_srqc(xrc_srqc, out); - if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD) - out->flags |= MLX5_SRQ_FLAG_ERR; - -out: - kvfree(xrcsrq_out); - return err; -} - -static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - void *create_in; - void *rmpc; - void *wq; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); - wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - MLX5_SET(create_rmp_in, create_in, uid, in->uid); - set_wq(wq, in); - memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); - - err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); - if (!err) - srq->uid = in->uid; - - kvfree(create_in); - return err; -} - -static int destroy_rmp_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; - u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {}; - - MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); - MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn); - MLX5_SET(destroy_rmp_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int arm_rmp_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - u16 lwm) -{ - void *in; - void *rmpc; - void *wq; - void *bitmask; - int err; - - in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); - bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); - wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); - MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); - MLX5_SET(modify_rmp_in, in, uid, srq->uid); - MLX5_SET(wq, wq, lwm, lwm); - MLX5_SET(rmp_bitmask, bitmask, lwm, 1); - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - - err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); - - kvfree(in); - return err; -} - -static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 *rmp_out; - void *rmpc; - int err; - - rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL); - if (!rmp_out) - return -ENOMEM; - - err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out); - if (err) - goto out; - - rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); - get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out); - if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY) - out->flags |= MLX5_SRQ_FLAG_ERR; - -out: - kvfree(rmp_out); - return err; -} - -static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0}; - void *create_in; - void *xrqc; - void *wq; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); - wq = MLX5_ADDR_OF(xrqc, xrqc, wq); - - set_wq(wq, in); - memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); - - if (in->type == IB_SRQT_TM) { - MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); - if (in->flags & MLX5_SRQ_FLAG_RNDV) - MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV); - MLX5_SET(xrqc, xrqc, - tag_matching_topology_context.log_matching_list_sz, - in->tm_log_list_size); - } - MLX5_SET(xrqc, xrqc, user_index, in->user_index); - MLX5_SET(xrqc, xrqc, cqn, in->cqn); - MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); - MLX5_SET(create_xrq_in, create_in, uid, in->uid); - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, - sizeof(create_out)); - kvfree(create_in); - if (!err) { - srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn); - srq->uid = in->uid; - } - - return err; -} - -static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) -{ - u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0}; - - MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); - MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); - MLX5_SET(destroy_xrq_in, in, uid, srq->uid); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int arm_xrq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - u16 lwm) -{ - u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; - - MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); - MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); - MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); - MLX5_SET(arm_rq_in, in, lwm, lwm); - MLX5_SET(arm_rq_in, in, uid, srq->uid); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0}; - u32 *xrq_out; - int outlen = MLX5_ST_SZ_BYTES(query_xrq_out); - void *xrqc; - int err; - - xrq_out = kvzalloc(outlen, GFP_KERNEL); - if (!xrq_out) - return -ENOMEM; - - MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ); - MLX5_SET(query_xrq_in, in, xrqn, srq->srqn); - - err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen); - if (err) - goto out; - - xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context); - get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out); - if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD) - out->flags |= MLX5_SRQ_FLAG_ERR; - out->tm_next_tag = - MLX5_GET(xrqc, xrqc, - tag_matching_topology_context.append_next_index); - out->tm_hw_phase_cnt = - MLX5_GET(xrqc, xrqc, - tag_matching_topology_context.hw_phase_cnt); - out->tm_sw_phase_cnt = - MLX5_GET(xrqc, xrqc, - tag_matching_topology_context.sw_phase_cnt); - -out: - kvfree(xrq_out); - return err; -} - -static int create_srq_split(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - if (!dev->issi) - return create_srq_cmd(dev, srq, in); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return create_xrc_srq_cmd(dev, srq, in); - case MLX5_RES_XRQ: - return create_xrq_cmd(dev, srq, in); - default: - return create_rmp_cmd(dev, srq, in); - } -} - -static int destroy_srq_split(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - if (!dev->issi) - return destroy_srq_cmd(dev, srq); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return destroy_xrc_srq_cmd(dev, srq); - case MLX5_RES_XRQ: - return destroy_xrq_cmd(dev, srq); - default: - return destroy_rmp_cmd(dev, srq); - } -} - -int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - int err; - struct mlx5_srq_table *table = &dev->priv.srq_table; - - switch (in->type) { - case IB_SRQT_XRC: - srq->common.res = MLX5_RES_XSRQ; - break; - case IB_SRQT_TM: - srq->common.res = MLX5_RES_XRQ; - break; - default: - srq->common.res = MLX5_RES_SRQ; - } - - err = create_srq_split(dev, srq, in); - if (err) - return err; - - atomic_set(&srq->refcount, 1); - init_completion(&srq->free); - - spin_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, srq->srqn, srq); - spin_unlock_irq(&table->lock); - if (err) { - mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn); - goto err_destroy_srq_split; - } - - return 0; - -err_destroy_srq_split: - destroy_srq_split(dev, srq); - - return err; -} -EXPORT_SYMBOL(mlx5_core_create_srq); - -int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - struct mlx5_core_srq *tmp; - int err; - - spin_lock_irq(&table->lock); - tmp = radix_tree_delete(&table->tree, srq->srqn); - spin_unlock_irq(&table->lock); - if (!tmp) { - mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn); - return -EINVAL; - } - if (tmp != srq) { - mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn); - return -EINVAL; - } - - err = destroy_srq_split(dev, srq); - if (err) - return err; - - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); - wait_for_completion(&srq->free); - - return 0; -} -EXPORT_SYMBOL(mlx5_core_destroy_srq); - -int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - if (!dev->issi) - return query_srq_cmd(dev, srq, out); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return query_xrc_srq_cmd(dev, srq, out); - case MLX5_RES_XRQ: - return query_xrq_cmd(dev, srq, out); - default: - return query_rmp_cmd(dev, srq, out); - } -} -EXPORT_SYMBOL(mlx5_core_query_srq); - -int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - u16 lwm, int is_srq) -{ - if (!dev->issi) - return arm_srq_cmd(dev, srq, lwm, is_srq); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return arm_xrc_srq_cmd(dev, srq, lwm); - case MLX5_RES_XRQ: - return arm_xrq_cmd(dev, srq, lwm); - default: - return arm_rmp_cmd(dev, srq, lwm); - } -} -EXPORT_SYMBOL(mlx5_core_arm_srq); - -void mlx5_init_srq_table(struct mlx5_core_dev *dev) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - - memset(table, 0, sizeof(*table)); - spin_lock_init(&table->lock); - INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); -} - -void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev) -{ - /* nothing */ -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index a1ee9a8a769e..c4d4b76096dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -258,115 +258,6 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) } EXPORT_SYMBOL(mlx5_core_destroy_tis); -int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *rmpn) -{ - u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0}; - int err; - - MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - if (!err) - *rmpn = MLX5_GET(create_rmp_out, out, rmpn); - - return err; -} - -int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen) -{ - u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0}; - - MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); - return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); -} - -int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn) -{ - u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0}; - - MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); - MLX5_SET(destroy_rmp_in, in, rmpn, rmpn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, - sizeof(out)); -} - -int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out) -{ - u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0}; - int outlen = MLX5_ST_SZ_BYTES(query_rmp_out); - - MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP); - MLX5_SET(query_rmp_in, in, rmpn, rmpn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); -} - -int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm) -{ - void *in; - void *rmpc; - void *wq; - void *bitmask; - int err; - - in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); - bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); - wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); - MLX5_SET(modify_rmp_in, in, rmpn, rmpn); - MLX5_SET(wq, wq, lwm, lwm); - MLX5_SET(rmp_bitmask, bitmask, lwm, 1); - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - - err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); - - kvfree(in); - - return err; -} - -int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *xsrqn) -{ - u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0}; - int err; - - MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - if (!err) - *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn); - - return err; -} - -int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn) -{ - u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; - - MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); - MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm) -{ - u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; - - MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn); - MLX5_SET(arm_xrc_srq_in, in, lwm, lwm); - MLX5_SET(arm_xrc_srq_in, in, op_mod, - MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqtn) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index cfbea66b4879..9b150ce9d315 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1204,9 +1204,19 @@ EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport); u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) { - if (!mdev->sys_image_guid) - mlx5_query_nic_vport_system_image_guid(mdev, &mdev->sys_image_guid); + int port_type_cap = MLX5_CAP_GEN(mdev, port_type); + u64 tmp = 0; - return mdev->sys_image_guid; + if (mdev->sys_image_guid) + return mdev->sys_image_guid; + + if (port_type_cap == MLX5_CAP_PORT_TYPE_ETH) + mlx5_query_nic_vport_system_image_guid(mdev, &tmp); + else + mlx5_query_hca_vport_system_image_guid(mdev, &tmp); + + mdev->sys_image_guid = tmp; + + return tmp; } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 2dcbf1ebfd6a..953cc8efba69 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -155,7 +155,8 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *cqc, struct mlx5_cqwq *wq, struct mlx5_wq_ctrl *wq_ctrl) { - u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) + 6; + /* CQE_STRIDE_128 and CQE_STRIDE_128_PAD both mean 128B stride */ + u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) == CQE_STRIDE_64 ? 6 : 7; u8 log_wq_sz = MLX5_GET(cqc, cqc, log_cq_size); int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index b1293d153a58..ea934a48c90a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -177,9 +177,14 @@ static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq) return mlx5_cqwq_ctr2ix(wq, wq->cc); } -static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) +static inline struct mlx5_cqe64 *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) { - return mlx5_frag_buf_get_wqe(&wq->fbc, ix); + struct mlx5_cqe64 *cqe = mlx5_frag_buf_get_wqe(&wq->fbc, ix); + + /* For 128B CQEs the data is in the last 64B */ + cqe += wq->fbc.log_stride == 7; + + return cqe; } static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr) |